code_zauker 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@
6
6
  <title>
7
7
  Class: CodeZauker::Util
8
8
 
9
- &mdash; Code Zauker 0.0.3 Documentation
9
+ &mdash; Code Zauker 0.0.4 Documentation
10
10
 
11
11
  </title>
12
12
 
@@ -108,6 +108,8 @@
108
108
 
109
109
 
110
110
 
111
+
112
+
111
113
 
112
114
  <h2>
113
115
  Instance Method Summary
@@ -141,6 +143,50 @@ This code try to "guess" the right encoding switching to ISO-8859-1 if
141
143
  UTF-8 is not valid.</p>
142
144
  </div></span>
143
145
 
146
+ </li>
147
+
148
+
149
+ <li class="public ">
150
+ <span class="summary_signature">
151
+
152
+ <a href="#get_lines-instance_method" title="#get_lines (instance method)">- (Object) <strong>get_lines</strong>(filename) </a>
153
+
154
+
155
+
156
+ </span>
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+ <span class="summary_desc"><div class='inline'>
166
+ <p>Obtain lines from a filename It works even with pdf files.</p>
167
+ </div></span>
168
+
169
+ </li>
170
+
171
+
172
+ <li class="public ">
173
+ <span class="summary_signature">
174
+
175
+ <a href="#is_pdf%3F-instance_method" title="#is_pdf? (instance method)">- (Boolean) <strong>is_pdf?</strong>(filename) </a>
176
+
177
+
178
+
179
+ </span>
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+ <span class="summary_desc"><div class='inline'></div></span>
189
+
144
190
  </li>
145
191
 
146
192
 
@@ -207,7 +253,6 @@ interpreted as a UTF-8 whereas it is a ISO-8859 windows code.</p>
207
253
  <pre class="lines">
208
254
 
209
255
 
210
- 56
211
256
  57
212
257
  58
213
258
  59
@@ -224,10 +269,11 @@ interpreted as a UTF-8 whereas it is a ISO-8859 windows code.</p>
224
269
  70
225
270
  71
226
271
  72
227
- 73</pre>
272
+ 73
273
+ 74</pre>
228
274
  </td>
229
275
  <td>
230
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 56</span>
276
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 57</span>
231
277
 
232
278
  <span class='kw'>def</span> <span class='id identifier rubyid_ensureUTF8'>ensureUTF8</span><span class='lparen'>(</span><span class='id identifier rubyid_untrusted_string'>untrusted_string</span><span class='rparen'>)</span>
233
279
  <span class='kw'>if</span> <span class='id identifier rubyid_untrusted_string'>untrusted_string</span><span class='period'>.</span><span class='id identifier rubyid_valid_encoding?'>valid_encoding?</span><span class='lparen'>(</span><span class='rparen'>)</span><span class='op'>==</span><span class='kw'>false</span>
@@ -250,6 +296,135 @@ interpreted as a UTF-8 whereas it is a ISO-8859 windows code.</p>
250
296
  </td>
251
297
  </tr>
252
298
  </table>
299
+ </div>
300
+
301
+ <div class="method_details ">
302
+ <p class="signature " id="get_lines-instance_method">
303
+
304
+ - (<tt>Object</tt>) <strong>get_lines</strong>(filename)
305
+
306
+
307
+
308
+ </p><div class="docstring">
309
+ <div class="discussion">
310
+
311
+ <p>Obtain lines from a filename It works even with pdf files</p>
312
+
313
+
314
+ </div>
315
+ </div>
316
+ <div class="tags">
317
+
318
+
319
+ </div><table class="source_code">
320
+ <tr>
321
+ <td>
322
+ <pre class="lines">
323
+
324
+
325
+ 82
326
+ 83
327
+ 84
328
+ 85
329
+ 86
330
+ 87
331
+ 88
332
+ 89
333
+ 90
334
+ 91
335
+ 92
336
+ 93
337
+ 94
338
+ 95
339
+ 96
340
+ 97
341
+ 98
342
+ 99
343
+ 100
344
+ 101
345
+ 102
346
+ 103
347
+ 104</pre>
348
+ </td>
349
+ <td>
350
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 82</span>
351
+
352
+ <span class='kw'>def</span> <span class='id identifier rubyid_get_lines'>get_lines</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
353
+ <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
354
+ <span class='kw'>if</span> <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_is_pdf?'>is_pdf?</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
355
+ <span class='comment'># =&gt; enable pdf processing....
356
+ </span> <span class='comment'>#puts &quot;PDF...&quot;
357
+ </span> <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>rb</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_io'>io</span><span class='op'>|</span>
358
+ <span class='id identifier rubyid_reader'>reader</span> <span class='op'>=</span> <span class='const'>PDF</span><span class='op'>::</span><span class='const'>Reader</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='id identifier rubyid_io'>io</span><span class='rparen'>)</span>
359
+ <span class='comment'>#puts &quot;PDF Scanning...#{reader.info}&quot;
360
+ </span> <span class='id identifier rubyid_reader'>reader</span><span class='period'>.</span><span class='id identifier rubyid_pages'>pages</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_page'>page</span><span class='op'>|</span>
361
+ <span class='id identifier rubyid_linesToTrim'>linesToTrim</span><span class='op'>=</span><span class='id identifier rubyid_page'>page</span><span class='period'>.</span><span class='id identifier rubyid_text'>text</span><span class='period'>.</span><span class='id identifier rubyid_split'>split</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>\n</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span>
362
+ <span class='id identifier rubyid_linesToTrim'>linesToTrim</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_l'>l</span><span class='op'>|</span>
363
+ <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_strip'>strip</span><span class='lparen'>(</span><span class='rparen'>)</span><span class='rparen'>)</span>
364
+ <span class='kw'>end</span>
365
+ <span class='kw'>end</span>
366
+ <span class='comment'>#puts &quot;PDF Lines:#{lines.length}&quot;
367
+ </span> <span class='kw'>end</span>
368
+ <span class='kw'>else</span>
369
+ <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>r</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='lbrace'>{</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span>
370
+ <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_f'>f</span><span class='period'>.</span><span class='id identifier rubyid_readlines'>readlines</span><span class='lparen'>(</span><span class='rparen'>)</span>
371
+ <span class='rbrace'>}</span>
372
+ <span class='kw'>end</span>
373
+ <span class='kw'>return</span> <span class='id identifier rubyid_lines'>lines</span>
374
+ <span class='kw'>end</span></pre>
375
+ </td>
376
+ </tr>
377
+ </table>
378
+ </div>
379
+
380
+ <div class="method_details ">
381
+ <p class="signature " id="is_pdf?-instance_method">
382
+
383
+ - (<tt>Boolean</tt>) <strong>is_pdf?</strong>(filename)
384
+
385
+
386
+
387
+ </p><div class="docstring">
388
+ <div class="discussion">
389
+
390
+
391
+ </div>
392
+ </div>
393
+ <div class="tags">
394
+
395
+ <h3>Returns:</h3>
396
+ <ul class="return">
397
+
398
+ <li>
399
+
400
+
401
+ <span class='type'>(<tt>Boolean</tt>)</span>
402
+
403
+
404
+
405
+ </li>
406
+
407
+ </ul>
408
+
409
+ </div><table class="source_code">
410
+ <tr>
411
+ <td>
412
+ <pre class="lines">
413
+
414
+
415
+ 76
416
+ 77
417
+ 78</pre>
418
+ </td>
419
+ <td>
420
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 76</span>
421
+
422
+ <span class='kw'>def</span> <span class='id identifier rubyid_is_pdf?'>is_pdf?</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
423
+ <span class='kw'>return</span> <span class='id identifier rubyid_filename'>filename</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='lparen'>(</span><span class='rparen'>)</span><span class='period'>.</span><span class='id identifier rubyid_end_with?'>end_with?</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>.pdf</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span>
424
+ <span class='kw'>end</span></pre>
425
+ </td>
426
+ </tr>
427
+ </table>
253
428
  </div>
254
429
 
255
430
  <div class="method_details ">
@@ -277,7 +452,6 @@ TODO: Very bad implementation, need improvements</p>
277
452
  <pre class="lines">
278
453
 
279
454
 
280
- 19
281
455
  20
282
456
  21
283
457
  22
@@ -306,10 +480,11 @@ TODO: Very bad implementation, need improvements</p>
306
480
  45
307
481
  46
308
482
  47
309
- 48</pre>
483
+ 48
484
+ 49</pre>
310
485
  </td>
311
486
  <td>
312
- <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 19</span>
487
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 20</span>
313
488
 
314
489
  <span class='kw'>def</span> <span class='id identifier rubyid_mixCase'>mixCase</span><span class='lparen'>(</span><span class='id identifier rubyid_trigram'>trigram</span><span class='rparen'>)</span>
315
490
  <span class='id identifier rubyid_caseMixedElements'>caseMixedElements</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
@@ -351,9 +526,9 @@ TODO: Very bad implementation, need improvements</p>
351
526
  </div>
352
527
 
353
528
  <div id="footer">
354
- Generated on Fri Feb 3 17:18:43 2012 by
529
+ Generated on Sun Feb 12 19:16:26 2012 by
355
530
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
356
- 0.7.4 (ruby-1.9.3).
531
+ 0.7.5 (ruby-1.9.3).
357
532
  </div>
358
533
 
359
534
  </body>
data/doc/Grep.html CHANGED
@@ -6,7 +6,7 @@
6
6
  <title>
7
7
  Module: Grep
8
8
 
9
- &mdash; Code Zauker 0.0.3 Documentation
9
+ &mdash; Code Zauker 0.0.4 Documentation
10
10
 
11
11
  </title>
12
12
 
@@ -82,6 +82,8 @@
82
82
 
83
83
 
84
84
 
85
+
86
+
85
87
 
86
88
  <h2>
87
89
  Instance Method Summary
@@ -335,9 +337,9 @@ will be printed.</p>
335
337
  </div>
336
338
 
337
339
  <div id="footer">
338
- Generated on Fri Feb 3 17:18:44 2012 by
340
+ Generated on Sun Feb 12 19:16:27 2012 by
339
341
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
340
- 0.7.4 (ruby-1.9.3).
342
+ 0.7.5 (ruby-1.9.3).
341
343
  </div>
342
344
 
343
345
  </body>
data/doc/_index.html CHANGED
@@ -4,7 +4,7 @@
4
4
  <head>
5
5
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
6
  <title>
7
- Code Zauker 0.0.3 Documentation
7
+ Code Zauker 0.0.4 Documentation
8
8
 
9
9
  </title>
10
10
 
@@ -52,7 +52,7 @@
52
52
 
53
53
  <iframe id="search_frame"></iframe>
54
54
 
55
- <div id="content"><h1 class="noborder title">Code Zauker 0.0.3 Documentation</h1>
55
+ <div id="content"><h1 class="noborder title">Code Zauker 0.0.4 Documentation</h1>
56
56
  <div id="listing">
57
57
  <h1 class="alphaindex">Alphabetic Index</h1>
58
58
 
@@ -131,9 +131,9 @@
131
131
  </div>
132
132
 
133
133
  <div id="footer">
134
- Generated on Fri Feb 3 17:18:43 2012 by
134
+ Generated on Sun Feb 12 19:16:25 2012 by
135
135
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
136
- 0.7.4 (ruby-1.9.3).
136
+ 0.7.5 (ruby-1.9.3).
137
137
  </div>
138
138
 
139
139
  </body>
data/doc/frames.html CHANGED
@@ -4,7 +4,7 @@
4
4
  <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
5
  <head>
6
6
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
7
- <title>Code Zauker 0.0.3 Documentation</title>
7
+ <title>Code Zauker 0.0.4 Documentation</title>
8
8
  </head>
9
9
  <frameset cols="20%,*">
10
10
  <frame name="list" src="class_list.html" />
data/doc/index.html CHANGED
@@ -4,7 +4,7 @@
4
4
  <head>
5
5
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
6
  <title>
7
- Code Zauker 0.0.3 Documentation
7
+ Code Zauker 0.0.4 Documentation
8
8
 
9
9
  </title>
10
10
 
@@ -52,7 +52,7 @@
52
52
 
53
53
  <iframe id="search_frame"></iframe>
54
54
 
55
- <div id="content"><h1 class="noborder title">Code Zauker 0.0.3 Documentation</h1>
55
+ <div id="content"><h1 class="noborder title">Code Zauker 0.0.4 Documentation</h1>
56
56
  <div id="listing">
57
57
  <h1 class="alphaindex">Alphabetic Index</h1>
58
58
 
@@ -131,9 +131,9 @@
131
131
  </div>
132
132
 
133
133
  <div id="footer">
134
- Generated on Fri Feb 3 17:18:43 2012 by
134
+ Generated on Sun Feb 12 19:16:25 2012 by
135
135
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
136
- 0.7.4 (ruby-1.9.3).
136
+ 0.7.5 (ruby-1.9.3).
137
137
  </div>
138
138
 
139
139
  </body>
data/doc/js/full_list.js CHANGED
@@ -4,6 +4,9 @@ var searchCache = [];
4
4
  var searchString = '';
5
5
  var regexSearchString = '';
6
6
  var caseSensitiveMatch = false;
7
+ var ignoreKeyCodeMin = 8;
8
+ var ignoreKeyCodeMax = 46;
9
+ var commandKey = 91;
7
10
 
8
11
  RegExp.escape = function(text) {
9
12
  return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
@@ -19,6 +22,9 @@ function fullListSearch() {
19
22
  });
20
23
 
21
24
  $('#search input').keyup(function() {
25
+ if ((event.keyCode > ignoreKeyCodeMin && event.keyCode < ignoreKeyCodeMax)
26
+ || event.keyCode == commandKey)
27
+ return;
22
28
  searchString = this.value;
23
29
  caseSensitiveMatch = searchString.match(/[A-Z]/) != null;
24
30
  regexSearchString = RegExp.escape(searchString);
data/doc/method_list.html CHANGED
@@ -57,6 +57,14 @@
57
57
 
58
58
 
59
59
  <li class="r1 ">
60
+ <span class='object_link'><a href="CodeZauker/Util.html#get_lines-instance_method" title="CodeZauker::Util#get_lines (method)">#get_lines</a></span>
61
+
62
+ <small>CodeZauker::Util</small>
63
+
64
+ </li>
65
+
66
+
67
+ <li class="r2 ">
60
68
  <span class='object_link'><a href="Grep.html#grep-instance_method" title="Grep#grep (method)">#grep</a></span>
61
69
 
62
70
  <small>Grep</small>
@@ -64,7 +72,7 @@
64
72
  </li>
65
73
 
66
74
 
67
- <li class="r2 ">
75
+ <li class="r1 ">
68
76
  <span class='object_link'><a href="CodeZauker/FileScanner.html#initialize-instance_method" title="CodeZauker::FileScanner#initialize (method)">#initialize</a></span>
69
77
 
70
78
  <small>CodeZauker::FileScanner</small>
@@ -72,6 +80,14 @@
72
80
  </li>
73
81
 
74
82
 
83
+ <li class="r2 ">
84
+ <span class='object_link'><a href="CodeZauker/Util.html#is_pdf%3F-instance_method" title="CodeZauker::Util#is_pdf? (method)">#is_pdf?</a></span>
85
+
86
+ <small>CodeZauker::Util</small>
87
+
88
+ </li>
89
+
90
+
75
91
  <li class="r1 ">
76
92
  <span class='object_link'><a href="CodeZauker/FileScanner.html#isearch-instance_method" title="CodeZauker::FileScanner#isearch (method)">#isearch</a></span>
77
93
 
@@ -6,7 +6,7 @@
6
6
  <title>
7
7
  Top Level Namespace
8
8
 
9
- &mdash; Code Zauker 0.0.3 Documentation
9
+ &mdash; Code Zauker 0.0.4 Documentation
10
10
 
11
11
  </title>
12
12
 
@@ -91,12 +91,14 @@
91
91
 
92
92
 
93
93
 
94
+
95
+
94
96
  </div>
95
97
 
96
98
  <div id="footer">
97
- Generated on Fri Feb 3 17:18:44 2012 by
99
+ Generated on Sun Feb 12 19:16:27 2012 by
98
100
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
99
- 0.7.4 (ruby-1.9.3).
101
+ 0.7.5 (ruby-1.9.3).
100
102
  </div>
101
103
 
102
104
  </body>
data/lib/code_zauker.rb CHANGED
@@ -4,6 +4,7 @@ require "code_zauker/constants"
4
4
  require 'redis/connection/hiredis'
5
5
  require 'redis'
6
6
  require 'set'
7
+ require 'pdf/reader'
7
8
  # This module implements a simple reverse indexer
8
9
  # based on Redis
9
10
  # The idea is ispired by http://swtch.com/~rsc/regexp/regexp4.html
@@ -72,6 +73,37 @@ module CodeZauker
72
73
  end
73
74
  end
74
75
 
76
+ def is_pdf?(filename)
77
+ return filename.downcase().end_with?(".pdf")
78
+ end
79
+
80
+ # Obtain lines from a filename
81
+ # It works even with pdf files
82
+ def get_lines(filename)
83
+ lines=[]
84
+ if self.is_pdf?(filename)
85
+ # => enable pdf processing....
86
+ #puts "PDF..."
87
+ File.open(filename, "rb") do |io|
88
+ reader = PDF::Reader.new(io)
89
+ #puts "PDF Scanning...#{reader.info}"
90
+ reader.pages.each do |page|
91
+ linesToTrim=page.text.split("\n")
92
+ linesToTrim.each do |l|
93
+ lines.push(l.strip())
94
+ end
95
+ end
96
+ #puts "PDF Lines:#{lines.length}"
97
+ end
98
+ else
99
+ File.open(filename,"r") { |f|
100
+ lines=f.readlines()
101
+ }
102
+ end
103
+ return lines
104
+ end
105
+
106
+
75
107
  end
76
108
 
77
109
  # Scan a file and push it inside redis...
@@ -128,7 +160,11 @@ module CodeZauker
128
160
 
129
161
  def pushTrigramsSetRecoverable(s, fid, filename)
130
162
  error=false
131
- @redis.multi do
163
+ # @redis.multi do
164
+ # From 5.8
165
+ # to 7.6 Files per sec
166
+ # changing multi into pipielined
167
+ @redis.pipelined do
132
168
  s.each do | trigram |
133
169
  @redis.sadd "trigram:#{trigram}",fid
134
170
  @redis.sadd "fscan:trigramsOnFile:#{fid}", trigram
@@ -139,7 +175,7 @@ module CodeZauker
139
175
  error=true
140
176
  end
141
177
  end
142
- end # multi
178
+ end # multi/pipelined
143
179
  return error
144
180
  end
145
181
  private :pushTrigramsSetRecoverable
@@ -169,31 +205,31 @@ module CodeZauker
169
205
  # before sending it to redis. This avoid
170
206
  # a lot of spourios work
171
207
  s=Set.new
172
- File.open(filename,"r") { |f|
173
- lines=f.readlines()
174
- adaptiveSize= TRIGRAM_DEFAULT_PUSH_SIZE
175
- util=Util.new()
176
- lines.each do |lineNotUTF8|
177
- l= util.ensureUTF8(lineNotUTF8)
178
- # Split each line into 3-char chunks, and store in a redis set
179
- i=0
180
- for istart in 0...(l.length-GRAM_SIZE)
181
- trigram = l[istart, GRAM_SIZE]
182
- # Avoid storing the 3space guy enterely
183
- if trigram==SPACE_GUY
184
- next
185
- end
186
- # push the trigram to redis (highly optimized)
187
- s.add(trigram)
188
- if s.length > adaptiveSize
189
- pushTrigramsSet(s,fid,filename)
190
- s=Set.new()
191
- end
192
- trigramScanned += 1
193
- #puts "#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}"
208
+ util=Util.new()
209
+ lines=util.get_lines(filename)
210
+ adaptiveSize= TRIGRAM_DEFAULT_PUSH_SIZE
211
+
212
+ lines.each do |lineNotUTF8|
213
+ l= util.ensureUTF8(lineNotUTF8)
214
+ # Split each line into 3-char chunks, and store in a redis set
215
+ i=0
216
+ for istart in 0...(l.length-GRAM_SIZE)
217
+ trigram = l[istart, GRAM_SIZE]
218
+ # Avoid storing the 3space guy enterely
219
+ if trigram==SPACE_GUY
220
+ next
221
+ end
222
+ # push the trigram to redis (highly optimized)
223
+ s.add(trigram)
224
+ if s.length > adaptiveSize
225
+ pushTrigramsSet(s,fid,filename)
226
+ s=Set.new()
194
227
  end
228
+ trigramScanned += 1
229
+ #puts "#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}"
195
230
  end
196
- }
231
+ end
232
+
197
233
 
198
234
  if s.length > 0
199
235
  pushTrigramsSet(s,fid,filename)