cassiopee 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/Changelog +1 -0
  2. data/lib/cassiopee-mt.rb +44 -15
  3. metadata +3 -3
data/Changelog CHANGED
@@ -1,3 +1,4 @@
1
+ v0.1.4 : fix 0.1.3 error on index load, add filter_position management in mt
1
2
  v0.1.3 : 09/11 Olivier Sallou
2
3
  add CrawlerMT in cassiopee-mt for multi thread support to speed up the search
3
4
  v0.1.2 : 09/11 Olivier Sallou
@@ -15,6 +15,8 @@ module CassiopeeMt
15
15
  # Filtering is used to split the input data according to maxthread
16
16
  # Matches of each thread are merge to matches of CrawlerMT
17
17
  class CrawlerMt < Crawler
18
+
19
+ MINSEQSIZE=10
18
20
 
19
21
  # Max number fo threads to use
20
22
  attr_accessor :maxthread
@@ -33,42 +35,69 @@ module CassiopeeMt
33
35
  crawler.file_suffix = @file_suffix
34
36
  crawler.loadIndex()
35
37
  #crawler.file_suffix = @file_suffix+"."+threadId.to_s
36
- crawler.indexString(@sequence)
37
38
  end
38
39
 
39
40
  def searchExact(pattern)
40
- nb = @sequence.length.div(maxthread)
41
- min = 0
41
+ len = @sequence.length
42
+ if(@min_position>0)
43
+ min = @min_position
44
+ else
45
+ min = 0
46
+ end
47
+ if(@max_position>0)
48
+ max = @max_position
49
+ else
50
+ max= @sequence.length
51
+ end
52
+ len = max - min
53
+ if(len<MINSEQSIZE)
54
+ @maxthread=1
55
+ end
56
+ nb = len.div(maxthread)
42
57
  (1..maxthread).each do |i|
43
58
  crawler = Crawler.new
44
59
  setParams(crawler,i)
45
- max = min + nb
60
+ curmax = min + nb
46
61
  if(i==maxthread)
47
- max = @sequence.length
62
+ curmax = max
48
63
  end
49
- crawler.filter_position(min,max)
50
- $log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
64
+ crawler.filter_position(min,curmax)
65
+ $log.debug("Start new Thread between " << min.to_s << " and " << curmax.to_s)
51
66
  @th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchExact(pattern) }
52
- min = max + 1
67
+ min = curmax + 1
53
68
  end
54
69
  @th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
55
70
  return @matches
56
71
  end
57
72
 
58
73
  def searchApproximate(s,edit)
59
- nb = @sequence.length.div(maxthread)
60
- min = 0
74
+ len = @sequence.length
75
+ if(@min_position>0)
76
+ min = @min_position
77
+ else
78
+ min = 0
79
+ end
80
+ if(@max_position>0)
81
+ max = @max_position
82
+ else
83
+ max = @sequence.length
84
+ end
85
+ len = max - min
86
+ if(len<MINSEQSIZE)
87
+ @maxthread=1
88
+ end
89
+ nb = len.div(maxthread)
61
90
  (1..maxthread).each do |i|
62
91
  crawler = Crawler.new
63
92
  setParams(crawler,i)
64
- max = min + nb
93
+ curmax = min + nb
65
94
  if(i==maxthread)
66
- max = @sequence.length
95
+ curmax = max
67
96
  end
68
- crawler.filter_position(min,max)
69
- $log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
97
+ crawler.filter_position(min,curmax)
98
+ $log.debug("Start new Thread between " << min.to_s << " and " << curmax.to_s)
70
99
  @th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchApproximate(s,edit) }
71
- min = max + 1
100
+ min = curmax + 1
72
101
  end
73
102
  @th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
74
103
  return @matches
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassiopee
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Olivier Sallou