cassiopee 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Changelog CHANGED
@@ -1,3 +1,5 @@
1
+ v0.1.3 : 09/11 Olivier Sallou
2
+ add CrawlerMT in cassiopee-mt for multi thread support to speed up the search
1
3
  v0.1.2 : 09/11 Olivier Sallou
2
4
  add possibility to reload an "index" instead of using index method again
3
5
  fix comment mngt (comments attribute)
data/demo-mt.rb ADDED
@@ -0,0 +1,28 @@
1
+ require File.join(File.dirname(__FILE__), 'lib/cassiopee-mt')
2
+ require 'rubygems'
3
+ require 'logger'
4
+
5
+ # Instanciate a new crawler
6
+ crawler = CassiopeeMt::CrawlerMt.new
7
+ crawler.setLogLevel(Logger::INFO)
8
+ crawler.maxthread=3
9
+ #crawler.use_store = true
10
+
11
+ # String to index
12
+ crawler.indexString('iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiisallou salluiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
13
+ # Search pattern in indexed string
14
+ crawler.searchExact('llo')
15
+
16
+ # Go through matches
17
+ while((match = crawler.next())!=nil)
18
+ puts "got an exact match " << match.inspect
19
+ end
20
+
21
+ crawler.clear()
22
+
23
+ crawler.searchApproximate('llo',1)
24
+
25
+ # Go through matches
26
+ while((match = crawler.next())!=nil)
27
+ puts "got an approximate match " << match.inspect
28
+ end
@@ -0,0 +1,79 @@
1
+ require 'digest/md5'
2
+ require 'logger'
3
+ require 'zlib'
4
+ #require 'rubygems'
5
+ #require 'text'
6
+ #require 'text/util'
7
+ require File.join(File.dirname(__FILE__), 'cassiopee')
8
+
9
+ include Cassiopee
10
+
11
+ # Module managing multi threads to search in strings, extending Cassiopee
12
+ module CassiopeeMt
13
+
14
+ # Multi threaded search using a Crawler per thread
15
+ # Filtering is used to split the input data according to maxthread
16
+ # Matches of each thread are merge to matches of CrawlerMT
17
+ class CrawlerMt < Crawler
18
+
19
+ # Max number fo threads to use
20
+ attr_accessor :maxthread
21
+
22
+ @th = []
23
+
24
+ def initialize
25
+ super
26
+ @th = []
27
+ @matches = Array.new
28
+ end
29
+
30
+
31
+ def setParams(crawler,threadId)
32
+ crawler.setLogLevel($log.level)
33
+ crawler.file_suffix = @file_suffix
34
+ crawler.loadIndex()
35
+ #crawler.file_suffix = @file_suffix+"."+threadId.to_s
36
+ crawler.indexString(@sequence)
37
+ end
38
+
39
+ def searchExact(pattern)
40
+ nb = @sequence.length.div(maxthread)
41
+ min = 0
42
+ (1..maxthread).each do |i|
43
+ crawler = Crawler.new
44
+ setParams(crawler,i)
45
+ max = min + nb
46
+ if(i==maxthread)
47
+ max = @sequence.length
48
+ end
49
+ crawler.filter_position(min,max)
50
+ $log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
51
+ @th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchExact(pattern) }
52
+ min = max + 1
53
+ end
54
+ @th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
55
+ return @matches
56
+ end
57
+
58
+ def searchApproximate(s,edit)
59
+ nb = @sequence.length.div(maxthread)
60
+ min = 0
61
+ (1..maxthread).each do |i|
62
+ crawler = Crawler.new
63
+ setParams(crawler,i)
64
+ max = min + nb
65
+ if(i==maxthread)
66
+ max = @sequence.length
67
+ end
68
+ crawler.filter_position(min,max)
69
+ $log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
70
+ @th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchApproximate(s,edit) }
71
+ min = max + 1
72
+ end
73
+ @th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
74
+ return @matches
75
+ end
76
+
77
+ end
78
+
79
+ end
data/lib/cassiopee.rb CHANGED
@@ -136,15 +136,14 @@ module Cassiopee
136
136
  # * ambigous is a Hash of char/Array of char mapping
137
137
 
138
138
  def isAmbiguousEqual(a,b,ambiguous)
139
- if(ambiguous==nil || ambiguous[a.chr]==nil)
139
+ if(ambiguous==nil || (ambiguous[a.chr]==nil && ambiguous[b.chr]==nil ))
140
140
  if(a==b)
141
141
  return true
142
142
  else
143
143
  return false
144
144
  end
145
145
  end
146
- vin = "" << a.chr
147
- if(ambiguous[a.chr].index(b.chr)!=nil)
146
+ if(ambiguous[a.chr].index(b.chr)!=nil || ambiguous[b.chr].index(a.chr)!=nil || a==b)
148
147
  return true
149
148
  else
150
149
  return false
@@ -214,6 +213,7 @@ module Cassiopee
214
213
 
215
214
  def clear
216
215
  @suffixes = Hash.new
216
+ @matches = Array.new
217
217
  File.delete(@file_suffix+FILE_SUFFIX_POS) unless !File.exists?(@file_suffix+FILE_SUFFIX_POS)
218
218
  end
219
219
 
data/tests/test-suite.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require File.join(File.dirname(__FILE__), '../lib/cassiopee')
2
+ require File.join(File.dirname(__FILE__), '../lib/cassiopee-mt')
2
3
  require 'rubygems'
3
4
  require 'logger'
4
5
  require 'test/unit'
@@ -40,6 +41,14 @@ class TestCrawler < Test::Unit::TestCase
40
41
  assert_equal(1,matches.length)
41
42
  end
42
43
 
44
+ def test_multithreadsearch
45
+ crawler = CassiopeeMt::CrawlerMt.new
46
+ crawler.maxthread=3
47
+ crawler.indexString('iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiimy sample exampleiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii')
48
+ matches = crawler.searchExact('exam')
49
+ assert_equal(1,matches.length)
50
+ end
51
+
43
52
  end
44
53
 
45
54
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassiopee
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 2
10
- version: 0.1.2
9
+ - 3
10
+ version: 0.1.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Olivier Sallou
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-09 00:00:00 +02:00
18
+ date: 2011-09-19 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -47,7 +47,9 @@ files:
47
47
  - Changelog
48
48
  - LICENSE
49
49
  - demo.rb
50
+ - demo-mt.rb
50
51
  - lib/cassiopee.rb
52
+ - lib/cassiopee-mt.rb
51
53
  - bin/cassie.rb
52
54
  - tests/test-suite.rb
53
55
  - tests/amb.map