cassiopee 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Changelog +2 -0
- data/demo-mt.rb +28 -0
- data/lib/cassiopee-mt.rb +79 -0
- data/lib/cassiopee.rb +3 -3
- data/tests/test-suite.rb +9 -0
- metadata +6 -4
data/Changelog
CHANGED
data/demo-mt.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'lib/cassiopee-mt')
|
2
|
+
require 'rubygems'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
# Instanciate a new crawler
|
6
|
+
crawler = CassiopeeMt::CrawlerMt.new
|
7
|
+
crawler.setLogLevel(Logger::INFO)
|
8
|
+
crawler.maxthread=3
|
9
|
+
#crawler.use_store = true
|
10
|
+
|
11
|
+
# String to index
|
12
|
+
crawler.indexString('iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiisallou salluiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
|
13
|
+
# Search pattern in indexed string
|
14
|
+
crawler.searchExact('llo')
|
15
|
+
|
16
|
+
# Go through matches
|
17
|
+
while((match = crawler.next())!=nil)
|
18
|
+
puts "got an exact match " << match.inspect
|
19
|
+
end
|
20
|
+
|
21
|
+
crawler.clear()
|
22
|
+
|
23
|
+
crawler.searchApproximate('llo',1)
|
24
|
+
|
25
|
+
# Go through matches
|
26
|
+
while((match = crawler.next())!=nil)
|
27
|
+
puts "got an approximate match " << match.inspect
|
28
|
+
end
|
data/lib/cassiopee-mt.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'logger'
|
3
|
+
require 'zlib'
|
4
|
+
#require 'rubygems'
|
5
|
+
#require 'text'
|
6
|
+
#require 'text/util'
|
7
|
+
require File.join(File.dirname(__FILE__), 'cassiopee')
|
8
|
+
|
9
|
+
include Cassiopee
|
10
|
+
|
11
|
+
# Module managing multi threads to search in strings, extending Cassiopee
|
12
|
+
module CassiopeeMt
|
13
|
+
|
14
|
+
# Multi threaded search using a Crawler per thread
|
15
|
+
# Filtering is used to split the input data according to maxthread
|
16
|
+
# Matches of each thread are merge to matches of CrawlerMT
|
17
|
+
class CrawlerMt < Crawler
|
18
|
+
|
19
|
+
# Max number fo threads to use
|
20
|
+
attr_accessor :maxthread
|
21
|
+
|
22
|
+
@th = []
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
super
|
26
|
+
@th = []
|
27
|
+
@matches = Array.new
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def setParams(crawler,threadId)
|
32
|
+
crawler.setLogLevel($log.level)
|
33
|
+
crawler.file_suffix = @file_suffix
|
34
|
+
crawler.loadIndex()
|
35
|
+
#crawler.file_suffix = @file_suffix+"."+threadId.to_s
|
36
|
+
crawler.indexString(@sequence)
|
37
|
+
end
|
38
|
+
|
39
|
+
def searchExact(pattern)
|
40
|
+
nb = @sequence.length.div(maxthread)
|
41
|
+
min = 0
|
42
|
+
(1..maxthread).each do |i|
|
43
|
+
crawler = Crawler.new
|
44
|
+
setParams(crawler,i)
|
45
|
+
max = min + nb
|
46
|
+
if(i==maxthread)
|
47
|
+
max = @sequence.length
|
48
|
+
end
|
49
|
+
crawler.filter_position(min,max)
|
50
|
+
$log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
|
51
|
+
@th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchExact(pattern) }
|
52
|
+
min = max + 1
|
53
|
+
end
|
54
|
+
@th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
|
55
|
+
return @matches
|
56
|
+
end
|
57
|
+
|
58
|
+
def searchApproximate(s,edit)
|
59
|
+
nb = @sequence.length.div(maxthread)
|
60
|
+
min = 0
|
61
|
+
(1..maxthread).each do |i|
|
62
|
+
crawler = Crawler.new
|
63
|
+
setParams(crawler,i)
|
64
|
+
max = min + nb
|
65
|
+
if(i==maxthread)
|
66
|
+
max = @sequence.length
|
67
|
+
end
|
68
|
+
crawler.filter_position(min,max)
|
69
|
+
$log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
|
70
|
+
@th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchApproximate(s,edit) }
|
71
|
+
min = max + 1
|
72
|
+
end
|
73
|
+
@th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
|
74
|
+
return @matches
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/cassiopee.rb
CHANGED
@@ -136,15 +136,14 @@ module Cassiopee
|
|
136
136
|
# * ambigous is a Hash of char/Array of char mapping
|
137
137
|
|
138
138
|
def isAmbiguousEqual(a,b,ambiguous)
|
139
|
-
if(ambiguous==nil || ambiguous[a.chr]==nil)
|
139
|
+
if(ambiguous==nil || (ambiguous[a.chr]==nil && ambiguous[b.chr]==nil ))
|
140
140
|
if(a==b)
|
141
141
|
return true
|
142
142
|
else
|
143
143
|
return false
|
144
144
|
end
|
145
145
|
end
|
146
|
-
|
147
|
-
if(ambiguous[a.chr].index(b.chr)!=nil)
|
146
|
+
if(ambiguous[a.chr].index(b.chr)!=nil || ambiguous[b.chr].index(a.chr)!=nil || a==b)
|
148
147
|
return true
|
149
148
|
else
|
150
149
|
return false
|
@@ -214,6 +213,7 @@ module Cassiopee
|
|
214
213
|
|
215
214
|
def clear
|
216
215
|
@suffixes = Hash.new
|
216
|
+
@matches = Array.new
|
217
217
|
File.delete(@file_suffix+FILE_SUFFIX_POS) unless !File.exists?(@file_suffix+FILE_SUFFIX_POS)
|
218
218
|
end
|
219
219
|
|
data/tests/test-suite.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), '../lib/cassiopee')
|
2
|
+
require File.join(File.dirname(__FILE__), '../lib/cassiopee-mt')
|
2
3
|
require 'rubygems'
|
3
4
|
require 'logger'
|
4
5
|
require 'test/unit'
|
@@ -40,6 +41,14 @@ class TestCrawler < Test::Unit::TestCase
|
|
40
41
|
assert_equal(1,matches.length)
|
41
42
|
end
|
42
43
|
|
44
|
+
def test_multithreadsearch
|
45
|
+
crawler = CassiopeeMt::CrawlerMt.new
|
46
|
+
crawler.maxthread=3
|
47
|
+
crawler.indexString('iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiimy sample exampleiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
|
48
|
+
matches = crawler.searchExact('exam')
|
49
|
+
assert_equal(1,matches.length)
|
50
|
+
end
|
51
|
+
|
43
52
|
end
|
44
53
|
|
45
54
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cassiopee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 3
|
10
|
+
version: 0.1.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Olivier Sallou
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-09-
|
18
|
+
date: 2011-09-19 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -47,7 +47,9 @@ files:
|
|
47
47
|
- Changelog
|
48
48
|
- LICENSE
|
49
49
|
- demo.rb
|
50
|
+
- demo-mt.rb
|
50
51
|
- lib/cassiopee.rb
|
52
|
+
- lib/cassiopee-mt.rb
|
51
53
|
- bin/cassie.rb
|
52
54
|
- tests/test-suite.rb
|
53
55
|
- tests/amb.map
|