code_zauker 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,742 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
+ <title>
7
+ Class: CodeZauker::FileScanner
8
+
9
+ &mdash; Code Zauker 0.0.2 Documentation
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="../css/common.css" type="text/css" media="screen" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ relpath = '..';
19
+ if (relpath != '') relpath += '/';
20
+ </script>
21
+
22
+ <script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
23
+
24
+ <script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
25
+
26
+
27
+ </head>
28
+ <body>
29
+ <script type="text/javascript" charset="utf-8">
30
+ if (window.top.frames.main) document.body.className = 'frames';
31
+ </script>
32
+
33
+ <div id="header">
34
+ <div id="menu">
35
+
36
+ <a href="../_index.html">Index (F)</a> &raquo;
37
+ <span class='title'><span class='object_link'><a href="../CodeZauker.html" title="CodeZauker (module)">CodeZauker</a></span></span>
38
+ &raquo;
39
+ <span class="title">FileScanner</span>
40
+
41
+
42
+ <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
43
+ </div>
44
+
45
+ <div id="search">
46
+
47
+ <a id="class_list_link" href="#">Class List</a>
48
+
49
+ <a id="method_list_link" href="#">Method List</a>
50
+
51
+ <a id="file_list_link" href="#">File List</a>
52
+
53
+ </div>
54
+ <div class="clear"></div>
55
+ </div>
56
+
57
+ <iframe id="search_frame"></iframe>
58
+
59
+ <div id="content"><h1>Class: CodeZauker::FileScanner
60
+
61
+
62
+
63
+ </h1>
64
+
65
+ <dl class="box">
66
+
67
+ <dt class="r1">Inherits:</dt>
68
+ <dd class="r1">
69
+ <span class="inheritName">Object</span>
70
+
71
+ <ul class="fullTree">
72
+ <li>Object</li>
73
+
74
+ <li class="next">CodeZauker::FileScanner</li>
75
+
76
+ </ul>
77
+ <a href="#" class="inheritanceTree">show all</a>
78
+
79
+ </dd>
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+ <dt class="r2 last">Defined in:</dt>
90
+ <dd class="r2 last">lib/code_zauker.rb</dd>
91
+
92
+ </dl>
93
+ <div class="clear"></div>
94
+
95
+ <h2>Overview</h2><div class="docstring">
96
+ <div class="discussion">
97
+
98
+ <p>Scan a file and push it inside redis... then it can provide handy method to
99
+ find file scontaining the trigram...</p>
100
+
101
+
102
+ </div>
103
+ </div>
104
+ <div class="tags">
105
+
106
+
107
+ </div>
108
+
109
+
110
+
111
+
112
+
113
+ <h2>
114
+ Instance Method Summary
115
+ <small>(<a href="#" class="summary_toggle">collapse</a>)</small>
116
+ </h2>
117
+
118
+ <ul class="summary">
119
+
120
+ <li class="public ">
121
+ <span class="summary_signature">
122
+
123
+ <a href="#disconnect-instance_method" title="#disconnect (instance method)">- (Object) <strong>disconnect</strong> </a>
124
+
125
+
126
+
127
+ </span>
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+ <span class="summary_desc"><div class='inline'></div></span>
137
+
138
+ </li>
139
+
140
+
141
+ <li class="public ">
142
+ <span class="summary_signature">
143
+
144
+ <a href="#initialize-instance_method" title="#initialize (instance method)">- (FileScanner) <strong>initialize</strong>(redisConnection = nil) </a>
145
+
146
+
147
+
148
+ </span>
149
+
150
+ <span class="note title constructor">constructor</span>
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+ <span class="summary_desc"><div class='inline'>
160
+ <p>A new instance of FileScanner.</p>
161
+ </div></span>
162
+
163
+ </li>
164
+
165
+
166
+ <li class="public ">
167
+ <span class="summary_signature">
168
+
169
+ <a href="#load-instance_method" title="#load (instance method)">- (Object) <strong>load</strong>(filename, noReload = false) </a>
170
+
171
+
172
+
173
+ </span>
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+ <span class="summary_desc"><div class='inline'></div></span>
183
+
184
+ </li>
185
+
186
+
187
+ <li class="public ">
188
+ <span class="summary_signature">
189
+
190
+ <a href="#remove-instance_method" title="#remove (instance method)">- (Object) <strong>remove</strong>(filePaths = nil) </a>
191
+
192
+
193
+
194
+ </span>
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+ <span class="summary_desc"><div class='inline'>
204
+ <p>Remove the files from the index, updating trigrams.</p>
205
+ </div></span>
206
+
207
+ </li>
208
+
209
+
210
+ <li class="public ">
211
+ <span class="summary_signature">
212
+
213
+ <a href="#removeAll-instance_method" title="#removeAll (instance method)">- (Object) <strong>removeAll</strong> </a>
214
+
215
+
216
+
217
+ </span>
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ <span class="summary_desc"><div class='inline'>
227
+ <p>Remove all the keys.</p>
228
+ </div></span>
229
+
230
+ </li>
231
+
232
+
233
+ <li class="public ">
234
+ <span class="summary_signature">
235
+
236
+ <a href="#search-instance_method" title="#search (instance method)">- (Object) <strong>search</strong>(term) </a>
237
+
238
+
239
+
240
+ </span>
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+ <span class="summary_desc"><div class='inline'>
250
+ <h1>search</h1>
251
+
252
+ <p>Find a list of file candidates to a search string The search string is
253
+ padded into trigrams.</p>
254
+ </div></span>
255
+
256
+ </li>
257
+
258
+
259
+ </ul>
260
+
261
+
262
+ <div id="constructor_details" class="method_details_list">
263
+ <h2>Constructor Details</h2>
264
+
265
+ <div class="method_details first">
266
+ <p class="signature first" id="initialize-instance_method">
267
+
268
+ - (<tt><span class='object_link'><a href="" title="CodeZauker::FileScanner (class)">FileScanner</a></span></tt>) <strong>initialize</strong>(redisConnection = nil)
269
+
270
+
271
+
272
+ </p><div class="docstring">
273
+ <div class="discussion">
274
+
275
+ <p>A new instance of FileScanner</p>
276
+
277
+
278
+ </div>
279
+ </div>
280
+ <div class="tags">
281
+
282
+
283
+ </div><table class="source_code">
284
+ <tr>
285
+ <td>
286
+ <pre class="lines">
287
+
288
+
289
+ 16
290
+ 17
291
+ 18
292
+ 19
293
+ 20
294
+ 21
295
+ 22</pre>
296
+ </td>
297
+ <td>
298
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 16</span>
299
+
300
+ <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span>
301
+ <span class='kw'>if</span> <span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>==</span><span class='kw'>nil</span>
302
+ <span class='ivar'>@redis</span><span class='op'>=</span><span class='const'>Redis</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
303
+ <span class='kw'>else</span>
304
+ <span class='ivar'>@redis</span><span class='op'>=</span><span class='id identifier rubyid_redisConnection'>redisConnection</span>
305
+ <span class='kw'>end</span>
306
+ <span class='kw'>end</span></pre>
307
+ </td>
308
+ </tr>
309
+ </table>
310
+ </div>
311
+
312
+ </div>
313
+
314
+
315
+ <div id="instance_method_details" class="method_details_list">
316
+ <h2>Instance Method Details</h2>
317
+
318
+
319
+ <div class="method_details first">
320
+ <p class="signature first" id="disconnect-instance_method">
321
+
322
+ - (<tt>Object</tt>) <strong>disconnect</strong>
323
+
324
+
325
+
326
+ </p><table class="source_code">
327
+ <tr>
328
+ <td>
329
+ <pre class="lines">
330
+
331
+
332
+ 23
333
+ 24
334
+ 25</pre>
335
+ </td>
336
+ <td>
337
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 23</span>
338
+
339
+ <span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span>
340
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_quit'>quit</span>
341
+ <span class='kw'>end</span></pre>
342
+ </td>
343
+ </tr>
344
+ </table>
345
+ </div>
346
+
347
+ <div class="method_details ">
348
+ <p class="signature " id="load-instance_method">
349
+
350
+ - (<tt>Object</tt>) <strong>load</strong>(filename, noReload = false)
351
+
352
+
353
+
354
+ </p><table class="source_code">
355
+ <tr>
356
+ <td>
357
+ <pre class="lines">
358
+
359
+
360
+ 52
361
+ 53
362
+ 54
363
+ 55
364
+ 56
365
+ 57
366
+ 58
367
+ 59
368
+ 60
369
+ 61
370
+ 62
371
+ 63
372
+ 64
373
+ 65
374
+ 66
375
+ 67
376
+ 68
377
+ 69
378
+ 70
379
+ 71
380
+ 72
381
+ 73
382
+ 74
383
+ 75
384
+ 76
385
+ 77
386
+ 78
387
+ 79
388
+ 80
389
+ 81
390
+ 82
391
+ 83
392
+ 84
393
+ 85
394
+ 86
395
+ 87
396
+ 88
397
+ 89
398
+ 90
399
+ 91
400
+ 92
401
+ 93
402
+ 94
403
+ 95
404
+ 96
405
+ 97
406
+ 98
407
+ 99
408
+ 100
409
+ 101
410
+ 102
411
+ 103
412
+ 104
413
+ 105
414
+ 106
415
+ 107
416
+ 108
417
+ 109
418
+ 110
419
+ 111
420
+ 112
421
+ 113
422
+ 114</pre>
423
+ </td>
424
+ <td>
425
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 52</span>
426
+
427
+ <span class='kw'>def</span> <span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span> <span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span>
428
+ <span class='comment'># Define my redis id...
429
+ </span> <span class='comment'># Already exists?...
430
+ </span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
431
+ <span class='kw'>if</span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>==</span><span class='kw'>nil</span>
432
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_setnx'>setnx</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:nextId</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='int'>0</span>
433
+ <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_incr'>incr</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:nextId</span><span class='tstring_end'>&quot;</span></span>
434
+ <span class='comment'># BUG: Consider storing it at the END of the processing
435
+ </span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span>
436
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span>
437
+ <span class='kw'>else</span>
438
+ <span class='kw'>if</span> <span class='id identifier rubyid_noReload'>noReload</span>
439
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Already found </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> as id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> and NOT RELOADED</span><span class='tstring_end'>&quot;</span></span>
440
+ <span class='kw'>return</span> <span class='kw'>nil</span>
441
+ <span class='kw'>end</span>
442
+ <span class='kw'>end</span>
443
+ <span class='comment'># fid is the set key!...
444
+ </span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='op'>=</span><span class='int'>0</span>
445
+ <span class='comment'># TEST_LICENSE.txt: 3290 Total Scanned: 24628
446
+ </span> <span class='comment'># The ratio is below 13% of total trigrams are unique for very big files
447
+ </span> <span class='comment'># So we avoid a huge roundtrip to redis, and store the trigram on a memory-based set
448
+ </span> <span class='comment'># before sending it to redis. This avoid
449
+ </span> <span class='comment'># a lot of spourios work
450
+ </span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
451
+ <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>r</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span>
452
+ <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_f'>f</span><span class='period'>.</span><span class='id identifier rubyid_readlines'>readlines</span><span class='lparen'>(</span><span class='rparen'>)</span>
453
+ <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='int'>6000</span>
454
+ <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_l'>l</span><span class='op'>|</span>
455
+ <span class='comment'># Split each line into 3-char chunks, and store in a redis set
456
+ </span> <span class='id identifier rubyid_i'>i</span><span class='op'>=</span><span class='int'>0</span>
457
+ <span class='kw'>for</span> <span class='id identifier rubyid_istart'>istart</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='const'>GRAM_SIZE</span><span class='rparen'>)</span>
458
+ <span class='id identifier rubyid_trigram'>trigram</span> <span class='op'>=</span> <span class='id identifier rubyid_l'>l</span><span class='lbracket'>[</span><span class='id identifier rubyid_istart'>istart</span><span class='comma'>,</span> <span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span>
459
+ <span class='comment'># Avoid storing the 3space guy enterely
460
+ </span> <span class='kw'>if</span> <span class='id identifier rubyid_trigram'>trigram</span><span class='op'>==</span><span class='const'>SPACE_GUY</span>
461
+ <span class='kw'>next</span>
462
+ <span class='kw'>end</span>
463
+ <span class='comment'># push the trigram to redis (highly optimized)
464
+ </span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_trigram'>trigram</span><span class='rparen'>)</span>
465
+ <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span>
466
+ <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
467
+ <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span>
468
+ <span class='kw'>end</span>
469
+ <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='op'>+=</span> <span class='int'>1</span>
470
+ <span class='comment'>#puts &quot;#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}&quot;
471
+ </span> <span class='kw'>end</span>
472
+ <span class='kw'>end</span>
473
+ <span class='kw'>end</span>
474
+
475
+ <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='int'>0</span>
476
+ <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
477
+ <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='kw'>nil</span>
478
+ <span class='comment'>#puts &quot;Final push of #{s.length}&quot;
479
+ </span> <span class='kw'>end</span>
480
+
481
+
482
+ <span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_scard'>scard</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
483
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sadd'>sadd</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
484
+ <span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='op'>=</span><span class='lparen'>(</span> <span class='lparen'>(</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>*</span><span class='float'>1.0</span><span class='rparen'>)</span> <span class='op'>/</span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='rparen'>)</span><span class='op'>*</span> <span class='float'>100.0</span>
485
+ <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&lt;</span> <span class='int'>10</span> <span class='kw'>or</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&gt;</span><span class='int'>75</span>
486
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'>\n\tRatio:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='period'>.</span><span class='id identifier rubyid_round'>round</span><span class='rbrace'>}</span><span class='tstring_content'>% Unique Trigrams:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='rbrace'>}</span><span class='tstring_content'> Total Scanned: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='rbrace'>}</span><span class='tstring_content'> </span><span class='tstring_end'>&quot;</span></span>
487
+ <span class='kw'>end</span>
488
+ <span class='kw'>return</span> <span class='kw'>nil</span>
489
+ <span class='kw'>end</span></pre>
490
+ </td>
491
+ </tr>
492
+ </table>
493
+ </div>
494
+
495
+ <div class="method_details ">
496
+ <p class="signature " id="remove-instance_method">
497
+
498
+ - (<tt>Object</tt>) <strong>remove</strong>(filePaths = nil)
499
+
500
+
501
+
502
+ </p><div class="docstring">
503
+ <div class="discussion">
504
+
505
+ <p>Remove the files from the index, updating trigrams</p>
506
+
507
+
508
+ </div>
509
+ </div>
510
+ <div class="tags">
511
+
512
+
513
+ </div><table class="source_code">
514
+ <tr>
515
+ <td>
516
+ <pre class="lines">
517
+
518
+
519
+ 163
520
+ 164
521
+ 165
522
+ 166
523
+ 167
524
+ 168
525
+ 169
526
+ 170
527
+ 171
528
+ 172
529
+ 173
530
+ 174
531
+ 175
532
+ 176
533
+ 177
534
+ 178
535
+ 179
536
+ 180
537
+ 181
538
+ 182
539
+ 183
540
+ 184
541
+ 185
542
+ 186
543
+ 187
544
+ 188
545
+ 189
546
+ 190
547
+ 191
548
+ 192
549
+ 193
550
+ 194
551
+ 195
552
+ 196
553
+ 197</pre>
554
+ </td>
555
+ <td>
556
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 163</span>
557
+
558
+ <span class='kw'>def</span> <span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span>
559
+ <span class='kw'>if</span> <span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>==</span><span class='kw'>nil</span>
560
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
561
+ <span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:*</span><span class='tstring_end'>&quot;</span></span>
562
+ <span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_fileKey'>fileKey</span><span class='op'>|</span>
563
+ <span class='id identifier rubyid_filename'>filename</span><span class='op'>=</span><span class='id identifier rubyid_fileKey'>fileKey</span><span class='period'>.</span><span class='id identifier rubyid_split'>split</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span>
564
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
565
+ <span class='kw'>end</span>
566
+ <span class='kw'>else</span>
567
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='id identifier rubyid_filePaths'>filePaths</span>
568
+ <span class='kw'>end</span>
569
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Files to remove from index...</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
570
+
571
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_filename'>filename</span><span class='op'>|</span>
572
+ <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
573
+ <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_smembers'>smembers</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
574
+ <span class='kw'>if</span> <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
575
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>?Nothing to do on </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
576
+ <span class='kw'>end</span>
577
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> id=</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> Trigrams: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
578
+ <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_ts'>ts</span> <span class='op'>|</span>
579
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span>
580
+ <span class='kw'>begin</span>
581
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:ci:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span>
582
+ <span class='kw'>rescue</span> <span class='const'>ArgumentError</span>
583
+ <span class='comment'># Ignore &quot;ArgumentError: invalid byte sequence in UTF-8&quot;
584
+ </span> <span class='comment'># and proceed...
585
+ </span> <span class='kw'>end</span>
586
+ <span class='kw'>end</span>
587
+
588
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
589
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_filename'>filename</span>
590
+ <span class='kw'>end</span>
591
+ <span class='kw'>return</span> <span class='kw'>nil</span>
592
+ <span class='kw'>end</span></pre>
593
+ </td>
594
+ </tr>
595
+ </table>
596
+ </div>
597
+
598
+ <div class="method_details ">
599
+ <p class="signature " id="removeAll-instance_method">
600
+
601
+ - (<tt>Object</tt>) <strong>removeAll</strong>
602
+
603
+
604
+
605
+ </p><div class="docstring">
606
+ <div class="discussion">
607
+
608
+ <p>Remove all the keys</p>
609
+
610
+
611
+ </div>
612
+ </div>
613
+ <div class="tags">
614
+
615
+
616
+ </div><table class="source_code">
617
+ <tr>
618
+ <td>
619
+ <pre class="lines">
620
+
621
+
622
+ 158
623
+ 159
624
+ 160</pre>
625
+ </td>
626
+ <td>
627
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 158</span>
628
+
629
+ <span class='kw'>def</span> <span class='id identifier rubyid_removeAll'>removeAll</span><span class='lparen'>(</span><span class='rparen'>)</span>
630
+ <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='kw'>nil</span><span class='rparen'>)</span>
631
+ <span class='kw'>end</span></pre>
632
+ </td>
633
+ </tr>
634
+ </table>
635
+ </div>
636
+
637
+ <div class="method_details ">
638
+ <p class="signature " id="search-instance_method">
639
+
640
+ - (<tt>Object</tt>) <strong>search</strong>(term)
641
+
642
+
643
+
644
+ </p><div class="docstring">
645
+ <div class="discussion">
646
+
647
+ <h1>search</h1>
648
+
649
+ <p>Find a list of file candidates to a search string The search string is
650
+ padded into trigrams</p>
651
+
652
+
653
+ </div>
654
+ </div>
655
+ <div class="tags">
656
+
657
+
658
+ </div><table class="source_code">
659
+ <tr>
660
+ <td>
661
+ <pre class="lines">
662
+
663
+
664
+ 119
665
+ 120
666
+ 121
667
+ 122
668
+ 123
669
+ 124
670
+ 125
671
+ 126
672
+ 127
673
+ 128
674
+ 129
675
+ 130
676
+ 131
677
+ 132
678
+ 133
679
+ 134
680
+ 135
681
+ 136
682
+ 137
683
+ 138
684
+ 139
685
+ 140
686
+ 141
687
+ 142
688
+ 143
689
+ 144
690
+ 145
691
+ 146
692
+ 147</pre>
693
+ </td>
694
+ <td>
695
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 119</span>
696
+
697
+ <span class='kw'>def</span> <span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span>
698
+ <span class='kw'>if</span> <span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span> <span class='const'>GRAM_SIZE</span>
699
+ <span class='id identifier rubyid_raise'>raise</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>FATAL: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_term'>term</span><span class='rbrace'>}</span><span class='tstring_content'> is shorter then the minimum size of </span><span class='embexpr_beg'>#{</span><span class='const'>GRAM_SIZE</span><span class='rbrace'>}</span><span class='tstring_content'> character</span><span class='tstring_end'>&quot;</span></span>
700
+ <span class='kw'>end</span>
701
+ <span class='comment'>#puts &quot; ** Searching: #{term}&quot;
702
+ </span> <span class='comment'># split the term in a padded trigram
703
+ </span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
704
+ <span class='comment'># Search=&gt; Sea AND ear AND arc AND rch
705
+ </span> <span class='kw'>for</span> <span class='id identifier rubyid_j'>j</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span>
706
+ <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='comma'>,</span><span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span>
707
+ <span class='kw'>if</span> <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span><span class='const'>GRAM_SIZE</span>
708
+ <span class='comment'># We are at the end...
709
+ </span> <span class='kw'>break</span>
710
+ <span class='kw'>end</span>
711
+ <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span>
712
+ <span class='kw'>end</span>
713
+ <span class='comment'>#puts &quot;Trigam conversion /#{term}/ into #{trigramInAnd}&quot;
714
+ </span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
715
+ <span class='kw'>return</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
716
+ <span class='kw'>end</span>
717
+ <span class='id identifier rubyid_fileIds'>fileIds</span><span class='op'>=</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sinter'>sinter</span><span class='lparen'>(</span><span class='op'>*</span><span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='rparen'>)</span>
718
+ <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
719
+ <span class='comment'># fscan:id2filename:#{fid}....
720
+ </span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span>
721
+ <span class='id identifier rubyid_filenames'>filenames</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_id'>id</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='rparen'>)</span>
722
+ <span class='kw'>end</span>
723
+ <span class='comment'>#puts &quot; ** Files found:#{filenames} from ids #{fileIds}&quot;
724
+ </span> <span class='kw'>return</span> <span class='id identifier rubyid_filenames'>filenames</span>
725
+ <span class='kw'>end</span></pre>
726
+ </td>
727
+ </tr>
728
+ </table>
729
+ </div>
730
+
731
+ </div>
732
+
733
+ </div>
734
+
735
+ <div id="footer">
736
+ Generated on Fri Jan 27 14:54:06 2012 by
737
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
738
+ 0.7.4 (ruby-1.9.3).
739
+ </div>
740
+
741
+ </body>
742
+ </html>