cassiopee 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/Changelog +1 -0
  2. data/lib/cassiopee-mt.rb +44 -15
  3. metadata +3 -3
data/Changelog CHANGED
@@ -1,3 +1,4 @@
1
+ v0.1.4 : fix 0.1.3 error on index load, add filter_position management in mt
1
2
  v0.1.3 : 09/11 Olivier Sallou
2
3
  add CrawlerMT in cassiopee-mt for multi thread support to speed up the search
3
4
  v0.1.2 : 09/11 Olivier Sallou
@@ -15,6 +15,8 @@ module CassiopeeMt
15
15
  # Filtering is used to split the input data according to maxthread
16
16
  # Matches of each thread are merge to matches of CrawlerMT
17
17
  class CrawlerMt < Crawler
18
+
19
+ MINSEQSIZE=10
18
20
 
19
21
  # Max number fo threads to use
20
22
  attr_accessor :maxthread
@@ -33,42 +35,69 @@ module CassiopeeMt
33
35
  crawler.file_suffix = @file_suffix
34
36
  crawler.loadIndex()
35
37
  #crawler.file_suffix = @file_suffix+"."+threadId.to_s
36
- crawler.indexString(@sequence)
37
38
  end
38
39
 
39
40
  def searchExact(pattern)
40
- nb = @sequence.length.div(maxthread)
41
- min = 0
41
+ len = @sequence.length
42
+ if(@min_position>0)
43
+ min = @min_position
44
+ else
45
+ min = 0
46
+ end
47
+ if(@max_position>0)
48
+ max = @max_position
49
+ else
50
+ max= @sequence.length
51
+ end
52
+ len = max - min
53
+ if(len<MINSEQSIZE)
54
+ @maxthread=1
55
+ end
56
+ nb = len.div(maxthread)
42
57
  (1..maxthread).each do |i|
43
58
  crawler = Crawler.new
44
59
  setParams(crawler,i)
45
- max = min + nb
60
+ curmax = min + nb
46
61
  if(i==maxthread)
47
- max = @sequence.length
62
+ curmax = max
48
63
  end
49
- crawler.filter_position(min,max)
50
- $log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
64
+ crawler.filter_position(min,curmax)
65
+ $log.debug("Start new Thread between " << min.to_s << " and " << curmax.to_s)
51
66
  @th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchExact(pattern) }
52
- min = max + 1
67
+ min = curmax + 1
53
68
  end
54
69
  @th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
55
70
  return @matches
56
71
  end
57
72
 
58
73
  def searchApproximate(s,edit)
59
- nb = @sequence.length.div(maxthread)
60
- min = 0
74
+ len = @sequence.length
75
+ if(@min_position>0)
76
+ min = @min_position
77
+ else
78
+ min = 0
79
+ end
80
+ if(@max_position>0)
81
+ max = @max_position
82
+ else
83
+ max = @sequence.length
84
+ end
85
+ len = max - min
86
+ if(len<MINSEQSIZE)
87
+ @maxthread=1
88
+ end
89
+ nb = len.div(maxthread)
61
90
  (1..maxthread).each do |i|
62
91
  crawler = Crawler.new
63
92
  setParams(crawler,i)
64
- max = min + nb
93
+ curmax = min + nb
65
94
  if(i==maxthread)
66
- max = @sequence.length
95
+ curmax = max
67
96
  end
68
- crawler.filter_position(min,max)
69
- $log.debug("Start new Thread between " << min.to_s << " and " << max.to_s)
97
+ crawler.filter_position(min,curmax)
98
+ $log.debug("Start new Thread between " << min.to_s << " and " << curmax.to_s)
70
99
  @th[i-1] = Thread.new{ Thread.current["matches"] = crawler.searchApproximate(s,edit) }
71
- min = max + 1
100
+ min = curmax + 1
72
101
  end
73
102
  @th.each {|t| t.join; t["matches"].each { |m| @matches << m }}
74
103
  return @matches
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cassiopee
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 3
10
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Olivier Sallou