cassiopee 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Changelog +2 -0
- data/demo-mt.rb +28 -0
- data/lib/cassiopee-mt.rb +79 -0
- data/lib/cassiopee.rb +3 -3
- data/tests/test-suite.rb +9 -0
- metadata +6 -4
data/Changelog
CHANGED
data/demo-mt.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'lib/cassiopee-mt')
|
2
|
+
require 'rubygems'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
# Instanciate a new crawler
|
6
|
+
crawler = CassiopeeMt::CrawlerMt.new
|
7
|
+
crawler.setLogLevel(Logger::INFO)
|
8
|
+
crawler.maxthread=3
|
9
|
+
#crawler.use_store = true
|
10
|
+
|
11
|
+
# String to index
|
12
|
+
crawler.indexString('iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiisallou salluiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
|
13
|
+
# Search pattern in indexed string
|
14
|
+
crawler.searchExact('llo')
|
15
|
+
|
16
|
+
# Go through matches
|
17
|
+
while((match = crawler.next())!=nil)
|
18
|
+
puts "got an exact match " << match.inspect
|
19
|
+
end
|
20
|
+
|
21
|
+
crawler.clear()
|
22
|
+
|
23
|
+
crawler.searchApproximate('llo',1)
|
24
|
+
|
25
|
+
# Go through matches
|
26
|
+
while((match = crawler.next())!=nil)
|
27
|
+
puts "got an approximate match " << match.inspect
|
28
|
+
end
|
data/lib/cassiopee-mt.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'digest/md5'
|
2
|
+
require 'logger'
|
3
|
+
require 'zlib'
|
4
|
+
#require 'rubygems'
|
5
|
+
#require 'text'
|
6
|
+
#require 'text/util'
|
7
|
+
require File.join(File.dirname(__FILE__), 'cassiopee')
|
8
|
+
|
9
|
+
include Cassiopee
|
10
|
+
|
11
|
+
# Module managing multi threads to search in strings, extending Cassiopee
|
12
|
+
module CassiopeeMt
|
13
|
+
|
14
|
+
# Multi threaded search using a Crawler per thread
|
15
|
+
# Filtering is used to split the input data according to maxthread
|
16
|
+
# Matches of each thread are merge to matches of CrawlerMT
|
17
|
+
class CrawlerMt < Crawler
|
18
|
+
|
19
|
+
# Max number fo threads to use
|
20
|
+
attr_accessor :maxthread
|
21
|
+
|
22
|
+
@th = []
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
super
|
26
|
+
@th = []
|
27
|
+
@matches = Array.new
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
def setParams(crawler,threadId)
|
32
|
+
crawler.setLogLevel($log.level)
|
33
|
+
crawler.file_suffix = @file_suffix
|
34
|
+
crawler.loadIndex()
|
35
|
+
#crawler.file_suffix = @file_suffix+"."+threadId.to_s
|
36
|
+
crawler.indexString(@sequence)
|
37
|
+
end
|
38
|
+
|
39
|
+
def searchExact(pattern)
|
40
|
+
nb = @sequence.length.div(maxthread)
|
41
|
+
min = 0
|
42
|
+
(1..maxthread).each do |i|
|
43
|
+
crawler = Crawler.new
|
44
|
+
setParams(crawler,i)
|
45
|
+
max = min + nb
|
46
|
+
if(i==maxthread)
|
47
|
+
max = @sequence.length
|
48
|
+
end
|
49
|
+
crawler.filter_position(min,max)
|
50
|
+
$log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
|
51
|
+
@th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchExact(pattern) }
|
52
|
+
min = max + 1
|
53
|
+
end
|
54
|
+
@th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
|
55
|
+
return @matches
|
56
|
+
end
|
57
|
+
|
58
|
+
def searchApproximate(s,edit)
|
59
|
+
nb = @sequence.length.div(maxthread)
|
60
|
+
min = 0
|
61
|
+
(1..maxthread).each do |i|
|
62
|
+
crawler = Crawler.new
|
63
|
+
setParams(crawler,i)
|
64
|
+
max = min + nb
|
65
|
+
if(i==maxthread)
|
66
|
+
max = @sequence.length
|
67
|
+
end
|
68
|
+
crawler.filter_position(min,max)
|
69
|
+
$log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
|
70
|
+
@th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchApproximate(s,edit) }
|
71
|
+
min = max + 1
|
72
|
+
end
|
73
|
+
@th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
|
74
|
+
return @matches
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/cassiopee.rb
CHANGED
@@ -136,15 +136,14 @@ module Cassiopee
|
|
136
136
|
# * ambigous is a Hash of char/Array of char mapping
|
137
137
|
|
138
138
|
def isAmbiguousEqual(a,b,ambiguous)
|
139
|
-
if(ambiguous==nil || ambiguous[a.chr]==nil)
|
139
|
+
if(ambiguous==nil || (ambiguous[a.chr]==nil && ambiguous[b.chr]==nil ))
|
140
140
|
if(a==b)
|
141
141
|
return true
|
142
142
|
else
|
143
143
|
return false
|
144
144
|
end
|
145
145
|
end
|
146
|
-
|
147
|
-
if(ambiguous[a.chr].index(b.chr)!=nil)
|
146
|
+
if(ambiguous[a.chr].index(b.chr)!=nil || ambiguous[b.chr].index(a.chr)!=nil || a==b)
|
148
147
|
return true
|
149
148
|
else
|
150
149
|
return false
|
@@ -214,6 +213,7 @@ module Cassiopee
|
|
214
213
|
|
215
214
|
def clear
|
216
215
|
@suffixes = Hash.new
|
216
|
+
@matches = Array.new
|
217
217
|
File.delete(@file_suffix+FILE_SUFFIX_POS) unless !File.exists?(@file_suffix+FILE_SUFFIX_POS)
|
218
218
|
end
|
219
219
|
|
data/tests/test-suite.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), '../lib/cassiopee')
|
2
|
+
require File.join(File.dirname(__FILE__), '../lib/cassiopee-mt')
|
2
3
|
require 'rubygems'
|
3
4
|
require 'logger'
|
4
5
|
require 'test/unit'
|
@@ -40,6 +41,14 @@ class TestCrawler < Test::Unit::TestCase
|
|
40
41
|
assert_equal(1,matches.length)
|
41
42
|
end
|
42
43
|
|
44
|
+
def test_multithreadsearch
|
45
|
+
crawler = CassiopeeMt::CrawlerMt.new
|
46
|
+
crawler.maxthread=3
|
47
|
+
crawler.indexString('iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiimy sample exampleiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
|
48
|
+
matches = crawler.searchExact('exam')
|
49
|
+
assert_equal(1,matches.length)
|
50
|
+
end
|
51
|
+
|
43
52
|
end
|
44
53
|
|
45
54
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cassiopee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 3
|
10
|
+
version: 0.1.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Olivier Sallou
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-09-
|
18
|
+
date: 2011-09-19 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -47,7 +47,9 @@ files:
|
|
47
47
|
- Changelog
|
48
48
|
- LICENSE
|
49
49
|
- demo.rb
|
50
|
+
- demo-mt.rb
|
50
51
|
- lib/cassiopee.rb
|
52
|
+
- lib/cassiopee-mt.rb
|
51
53
|
- bin/cassie.rb
|
52
54
|
- tests/test-suite.rb
|
53
55
|
- tests/amb.map
|