code_zauker 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,742 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
6
+ <title>
7
+ Class: CodeZauker::FileScanner
8
+
9
+ &mdash; Code Zauker 0.0.2 Documentation
10
+
11
+ </title>
12
+
13
+ <link rel="stylesheet" href="../css/style.css" type="text/css" media="screen" charset="utf-8" />
14
+
15
+ <link rel="stylesheet" href="../css/common.css" type="text/css" media="screen" charset="utf-8" />
16
+
17
+ <script type="text/javascript" charset="utf-8">
18
+ relpath = '..';
19
+ if (relpath != '') relpath += '/';
20
+ </script>
21
+
22
+ <script type="text/javascript" charset="utf-8" src="../js/jquery.js"></script>
23
+
24
+ <script type="text/javascript" charset="utf-8" src="../js/app.js"></script>
25
+
26
+
27
+ </head>
28
+ <body>
29
+ <script type="text/javascript" charset="utf-8">
30
+ if (window.top.frames.main) document.body.className = 'frames';
31
+ </script>
32
+
33
+ <div id="header">
34
+ <div id="menu">
35
+
36
+ <a href="../_index.html">Index (F)</a> &raquo;
37
+ <span class='title'><span class='object_link'><a href="../CodeZauker.html" title="CodeZauker (module)">CodeZauker</a></span></span>
38
+ &raquo;
39
+ <span class="title">FileScanner</span>
40
+
41
+
42
+ <div class="noframes"><span class="title">(</span><a href="." target="_top">no frames</a><span class="title">)</span></div>
43
+ </div>
44
+
45
+ <div id="search">
46
+
47
+ <a id="class_list_link" href="#">Class List</a>
48
+
49
+ <a id="method_list_link" href="#">Method List</a>
50
+
51
+ <a id="file_list_link" href="#">File List</a>
52
+
53
+ </div>
54
+ <div class="clear"></div>
55
+ </div>
56
+
57
+ <iframe id="search_frame"></iframe>
58
+
59
+ <div id="content"><h1>Class: CodeZauker::FileScanner
60
+
61
+
62
+
63
+ </h1>
64
+
65
+ <dl class="box">
66
+
67
+ <dt class="r1">Inherits:</dt>
68
+ <dd class="r1">
69
+ <span class="inheritName">Object</span>
70
+
71
+ <ul class="fullTree">
72
+ <li>Object</li>
73
+
74
+ <li class="next">CodeZauker::FileScanner</li>
75
+
76
+ </ul>
77
+ <a href="#" class="inheritanceTree">show all</a>
78
+
79
+ </dd>
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+ <dt class="r2 last">Defined in:</dt>
90
+ <dd class="r2 last">lib/code_zauker.rb</dd>
91
+
92
+ </dl>
93
+ <div class="clear"></div>
94
+
95
+ <h2>Overview</h2><div class="docstring">
96
+ <div class="discussion">
97
+
98
+ <p>Scan a file and push it inside redis... then it can provide handy method to
99
+ find file scontaining the trigram...</p>
100
+
101
+
102
+ </div>
103
+ </div>
104
+ <div class="tags">
105
+
106
+
107
+ </div>
108
+
109
+
110
+
111
+
112
+
113
+ <h2>
114
+ Instance Method Summary
115
+ <small>(<a href="#" class="summary_toggle">collapse</a>)</small>
116
+ </h2>
117
+
118
+ <ul class="summary">
119
+
120
+ <li class="public ">
121
+ <span class="summary_signature">
122
+
123
+ <a href="#disconnect-instance_method" title="#disconnect (instance method)">- (Object) <strong>disconnect</strong> </a>
124
+
125
+
126
+
127
+ </span>
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+ <span class="summary_desc"><div class='inline'></div></span>
137
+
138
+ </li>
139
+
140
+
141
+ <li class="public ">
142
+ <span class="summary_signature">
143
+
144
+ <a href="#initialize-instance_method" title="#initialize (instance method)">- (FileScanner) <strong>initialize</strong>(redisConnection = nil) </a>
145
+
146
+
147
+
148
+ </span>
149
+
150
+ <span class="note title constructor">constructor</span>
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+ <span class="summary_desc"><div class='inline'>
160
+ <p>A new instance of FileScanner.</p>
161
+ </div></span>
162
+
163
+ </li>
164
+
165
+
166
+ <li class="public ">
167
+ <span class="summary_signature">
168
+
169
+ <a href="#load-instance_method" title="#load (instance method)">- (Object) <strong>load</strong>(filename, noReload = false) </a>
170
+
171
+
172
+
173
+ </span>
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+ <span class="summary_desc"><div class='inline'></div></span>
183
+
184
+ </li>
185
+
186
+
187
+ <li class="public ">
188
+ <span class="summary_signature">
189
+
190
+ <a href="#remove-instance_method" title="#remove (instance method)">- (Object) <strong>remove</strong>(filePaths = nil) </a>
191
+
192
+
193
+
194
+ </span>
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+ <span class="summary_desc"><div class='inline'>
204
+ <p>Remove the files from the index, updating trigrams.</p>
205
+ </div></span>
206
+
207
+ </li>
208
+
209
+
210
+ <li class="public ">
211
+ <span class="summary_signature">
212
+
213
+ <a href="#removeAll-instance_method" title="#removeAll (instance method)">- (Object) <strong>removeAll</strong> </a>
214
+
215
+
216
+
217
+ </span>
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ <span class="summary_desc"><div class='inline'>
227
+ <p>Remove all the keys.</p>
228
+ </div></span>
229
+
230
+ </li>
231
+
232
+
233
+ <li class="public ">
234
+ <span class="summary_signature">
235
+
236
+ <a href="#search-instance_method" title="#search (instance method)">- (Object) <strong>search</strong>(term) </a>
237
+
238
+
239
+
240
+ </span>
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+ <span class="summary_desc"><div class='inline'>
250
+ <h1>search</h1>
251
+
252
+ <p>Find a list of file candidates to a search string The search string is
253
+ padded into trigrams.</p>
254
+ </div></span>
255
+
256
+ </li>
257
+
258
+
259
+ </ul>
260
+
261
+
262
+ <div id="constructor_details" class="method_details_list">
263
+ <h2>Constructor Details</h2>
264
+
265
+ <div class="method_details first">
266
+ <p class="signature first" id="initialize-instance_method">
267
+
268
+ - (<tt><span class='object_link'><a href="" title="CodeZauker::FileScanner (class)">FileScanner</a></span></tt>) <strong>initialize</strong>(redisConnection = nil)
269
+
270
+
271
+
272
+ </p><div class="docstring">
273
+ <div class="discussion">
274
+
275
+ <p>A new instance of FileScanner</p>
276
+
277
+
278
+ </div>
279
+ </div>
280
+ <div class="tags">
281
+
282
+
283
+ </div><table class="source_code">
284
+ <tr>
285
+ <td>
286
+ <pre class="lines">
287
+
288
+
289
+ 16
290
+ 17
291
+ 18
292
+ 19
293
+ 20
294
+ 21
295
+ 22</pre>
296
+ </td>
297
+ <td>
298
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 16</span>
299
+
300
+ <span class='kw'>def</span> <span class='id identifier rubyid_initialize'>initialize</span><span class='lparen'>(</span><span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span>
301
+ <span class='kw'>if</span> <span class='id identifier rubyid_redisConnection'>redisConnection</span><span class='op'>==</span><span class='kw'>nil</span>
302
+ <span class='ivar'>@redis</span><span class='op'>=</span><span class='const'>Redis</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
303
+ <span class='kw'>else</span>
304
+ <span class='ivar'>@redis</span><span class='op'>=</span><span class='id identifier rubyid_redisConnection'>redisConnection</span>
305
+ <span class='kw'>end</span>
306
+ <span class='kw'>end</span></pre>
307
+ </td>
308
+ </tr>
309
+ </table>
310
+ </div>
311
+
312
+ </div>
313
+
314
+
315
+ <div id="instance_method_details" class="method_details_list">
316
+ <h2>Instance Method Details</h2>
317
+
318
+
319
+ <div class="method_details first">
320
+ <p class="signature first" id="disconnect-instance_method">
321
+
322
+ - (<tt>Object</tt>) <strong>disconnect</strong>
323
+
324
+
325
+
326
+ </p><table class="source_code">
327
+ <tr>
328
+ <td>
329
+ <pre class="lines">
330
+
331
+
332
+ 23
333
+ 24
334
+ 25</pre>
335
+ </td>
336
+ <td>
337
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 23</span>
338
+
339
+ <span class='kw'>def</span> <span class='id identifier rubyid_disconnect'>disconnect</span><span class='lparen'>(</span><span class='rparen'>)</span>
340
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_quit'>quit</span>
341
+ <span class='kw'>end</span></pre>
342
+ </td>
343
+ </tr>
344
+ </table>
345
+ </div>
346
+
347
+ <div class="method_details ">
348
+ <p class="signature " id="load-instance_method">
349
+
350
+ - (<tt>Object</tt>) <strong>load</strong>(filename, noReload = false)
351
+
352
+
353
+
354
+ </p><table class="source_code">
355
+ <tr>
356
+ <td>
357
+ <pre class="lines">
358
+
359
+
360
+ 52
361
+ 53
362
+ 54
363
+ 55
364
+ 56
365
+ 57
366
+ 58
367
+ 59
368
+ 60
369
+ 61
370
+ 62
371
+ 63
372
+ 64
373
+ 65
374
+ 66
375
+ 67
376
+ 68
377
+ 69
378
+ 70
379
+ 71
380
+ 72
381
+ 73
382
+ 74
383
+ 75
384
+ 76
385
+ 77
386
+ 78
387
+ 79
388
+ 80
389
+ 81
390
+ 82
391
+ 83
392
+ 84
393
+ 85
394
+ 86
395
+ 87
396
+ 88
397
+ 89
398
+ 90
399
+ 91
400
+ 92
401
+ 93
402
+ 94
403
+ 95
404
+ 96
405
+ 97
406
+ 98
407
+ 99
408
+ 100
409
+ 101
410
+ 102
411
+ 103
412
+ 104
413
+ 105
414
+ 106
415
+ 107
416
+ 108
417
+ 109
418
+ 110
419
+ 111
420
+ 112
421
+ 113
422
+ 114</pre>
423
+ </td>
424
+ <td>
425
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 52</span>
426
+
427
+ <span class='kw'>def</span> <span class='id identifier rubyid_load'>load</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span> <span class='id identifier rubyid_noReload'>noReload</span><span class='op'>=</span><span class='kw'>false</span><span class='rparen'>)</span>
428
+ <span class='comment'># Define my redis id...
429
+ </span> <span class='comment'># Already exists?...
430
+ </span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
431
+ <span class='kw'>if</span> <span class='id identifier rubyid_fid'>fid</span><span class='op'>==</span><span class='kw'>nil</span>
432
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_setnx'>setnx</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:nextId</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='int'>0</span>
433
+ <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_incr'>incr</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:nextId</span><span class='tstring_end'>&quot;</span></span>
434
+ <span class='comment'># BUG: Consider storing it at the END of the processing
435
+ </span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span>
436
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_set'>set</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span>
437
+ <span class='kw'>else</span>
438
+ <span class='kw'>if</span> <span class='id identifier rubyid_noReload'>noReload</span>
439
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Already found </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> as id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> and NOT RELOADED</span><span class='tstring_end'>&quot;</span></span>
440
+ <span class='kw'>return</span> <span class='kw'>nil</span>
441
+ <span class='kw'>end</span>
442
+ <span class='kw'>end</span>
443
+ <span class='comment'># fid is the set key!...
444
+ </span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='op'>=</span><span class='int'>0</span>
445
+ <span class='comment'># TEST_LICENSE.txt: 3290 Total Scanned: 24628
446
+ </span> <span class='comment'># The ratio is below 13% of total trigrams are unique for very big files
447
+ </span> <span class='comment'># So we avoid a huge roundtrip to redis, and store the trigram on a memory-based set
448
+ </span> <span class='comment'># before sending it to redis. This avoid
449
+ </span> <span class='comment'># a lot of spourios work
450
+ </span> <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span>
451
+ <span class='const'>File</span><span class='period'>.</span><span class='id identifier rubyid_open'>open</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='comma'>,</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>r</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_f'>f</span><span class='op'>|</span>
452
+ <span class='id identifier rubyid_lines'>lines</span><span class='op'>=</span><span class='id identifier rubyid_f'>f</span><span class='period'>.</span><span class='id identifier rubyid_readlines'>readlines</span><span class='lparen'>(</span><span class='rparen'>)</span>
453
+ <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span><span class='op'>=</span> <span class='int'>6000</span>
454
+ <span class='id identifier rubyid_lines'>lines</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_l'>l</span><span class='op'>|</span>
455
+ <span class='comment'># Split each line into 3-char chunks, and store in a redis set
456
+ </span> <span class='id identifier rubyid_i'>i</span><span class='op'>=</span><span class='int'>0</span>
457
+ <span class='kw'>for</span> <span class='id identifier rubyid_istart'>istart</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='lparen'>(</span><span class='id identifier rubyid_l'>l</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>-</span><span class='const'>GRAM_SIZE</span><span class='rparen'>)</span>
458
+ <span class='id identifier rubyid_trigram'>trigram</span> <span class='op'>=</span> <span class='id identifier rubyid_l'>l</span><span class='lbracket'>[</span><span class='id identifier rubyid_istart'>istart</span><span class='comma'>,</span> <span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span>
459
+ <span class='comment'># Avoid storing the 3space guy enterely
460
+ </span> <span class='kw'>if</span> <span class='id identifier rubyid_trigram'>trigram</span><span class='op'>==</span><span class='const'>SPACE_GUY</span>
461
+ <span class='kw'>next</span>
462
+ <span class='kw'>end</span>
463
+ <span class='comment'># push the trigram to redis (highly optimized)
464
+ </span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_add'>add</span><span class='lparen'>(</span><span class='id identifier rubyid_trigram'>trigram</span><span class='rparen'>)</span>
465
+ <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='id identifier rubyid_adaptiveSize'>adaptiveSize</span>
466
+ <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
467
+ <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='const'>Set</span><span class='period'>.</span><span class='id identifier rubyid_new'>new</span><span class='lparen'>(</span><span class='rparen'>)</span>
468
+ <span class='kw'>end</span>
469
+ <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='op'>+=</span> <span class='int'>1</span>
470
+ <span class='comment'>#puts &quot;#{istart} Trigram fscan:#{trigram}/ FileId: #{fid}&quot;
471
+ </span> <span class='kw'>end</span>
472
+ <span class='kw'>end</span>
473
+ <span class='kw'>end</span>
474
+
475
+ <span class='kw'>if</span> <span class='id identifier rubyid_s'>s</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&gt;</span> <span class='int'>0</span>
476
+ <span class='id identifier rubyid_pushTrigramsSet'>pushTrigramsSet</span><span class='lparen'>(</span><span class='id identifier rubyid_s'>s</span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span><span class='comma'>,</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
477
+ <span class='id identifier rubyid_s'>s</span><span class='op'>=</span><span class='kw'>nil</span>
478
+ <span class='comment'>#puts &quot;Final push of #{s.length}&quot;
479
+ </span> <span class='kw'>end</span>
480
+
481
+
482
+ <span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_scard'>scard</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
483
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sadd'>sadd</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
484
+ <span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='op'>=</span><span class='lparen'>(</span> <span class='lparen'>(</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='op'>*</span><span class='float'>1.0</span><span class='rparen'>)</span> <span class='op'>/</span> <span class='id identifier rubyid_trigramScanned'>trigramScanned</span> <span class='rparen'>)</span><span class='op'>*</span> <span class='float'>100.0</span>
485
+ <span class='kw'>if</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&lt;</span> <span class='int'>10</span> <span class='kw'>or</span> <span class='id identifier rubyid_trigramRatio'>trigramRatio</span> <span class='op'>&gt;</span><span class='int'>75</span>
486
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'>\n\tRatio:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramRatio'>trigramRatio</span><span class='period'>.</span><span class='id identifier rubyid_round'>round</span><span class='rbrace'>}</span><span class='tstring_content'>% Unique Trigrams:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsOnFile'>trigramsOnFile</span><span class='rbrace'>}</span><span class='tstring_content'> Total Scanned: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramScanned'>trigramScanned</span><span class='rbrace'>}</span><span class='tstring_content'> </span><span class='tstring_end'>&quot;</span></span>
487
+ <span class='kw'>end</span>
488
+ <span class='kw'>return</span> <span class='kw'>nil</span>
489
+ <span class='kw'>end</span></pre>
490
+ </td>
491
+ </tr>
492
+ </table>
493
+ </div>
494
+
495
+ <div class="method_details ">
496
+ <p class="signature " id="remove-instance_method">
497
+
498
+ - (<tt>Object</tt>) <strong>remove</strong>(filePaths = nil)
499
+
500
+
501
+
502
+ </p><div class="docstring">
503
+ <div class="discussion">
504
+
505
+ <p>Remove the files from the index, updating trigrams</p>
506
+
507
+
508
+ </div>
509
+ </div>
510
+ <div class="tags">
511
+
512
+
513
+ </div><table class="source_code">
514
+ <tr>
515
+ <td>
516
+ <pre class="lines">
517
+
518
+
519
+ 163
520
+ 164
521
+ 165
522
+ 166
523
+ 167
524
+ 168
525
+ 169
526
+ 170
527
+ 171
528
+ 172
529
+ 173
530
+ 174
531
+ 175
532
+ 176
533
+ 177
534
+ 178
535
+ 179
536
+ 180
537
+ 181
538
+ 182
539
+ 183
540
+ 184
541
+ 185
542
+ 186
543
+ 187
544
+ 188
545
+ 189
546
+ 190
547
+ 191
548
+ 192
549
+ 193
550
+ 194
551
+ 195
552
+ 196
553
+ 197</pre>
554
+ </td>
555
+ <td>
556
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 163</span>
557
+
558
+ <span class='kw'>def</span> <span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>=</span><span class='kw'>nil</span><span class='rparen'>)</span>
559
+ <span class='kw'>if</span> <span class='id identifier rubyid_filePaths'>filePaths</span><span class='op'>==</span><span class='kw'>nil</span>
560
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
561
+ <span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_keys'>keys</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:*</span><span class='tstring_end'>&quot;</span></span>
562
+ <span class='id identifier rubyid_storedFiles'>storedFiles</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_fileKey'>fileKey</span><span class='op'>|</span>
563
+ <span class='id identifier rubyid_filename'>filename</span><span class='op'>=</span><span class='id identifier rubyid_fileKey'>fileKey</span><span class='period'>.</span><span class='id identifier rubyid_split'>split</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='lbracket'>[</span><span class='int'>1</span><span class='rbracket'>]</span>
564
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='id identifier rubyid_filename'>filename</span><span class='rparen'>)</span>
565
+ <span class='kw'>end</span>
566
+ <span class='kw'>else</span>
567
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='op'>=</span><span class='id identifier rubyid_filePaths'>filePaths</span>
568
+ <span class='kw'>end</span>
569
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>Files to remove from index...</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
570
+
571
+ <span class='id identifier rubyid_fileList'>fileList</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span><span class='id identifier rubyid_filename'>filename</span><span class='op'>|</span>
572
+ <span class='id identifier rubyid_fid'>fid</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
573
+ <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='op'>=</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_smembers'>smembers</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
574
+ <span class='kw'>if</span> <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
575
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>?Nothing to do on </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
576
+ <span class='kw'>end</span>
577
+ <span class='id identifier rubyid_puts'>puts</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_content'> id=</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_content'> Trigrams: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
578
+ <span class='id identifier rubyid_trigramsToExpurge'>trigramsToExpurge</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_ts'>ts</span> <span class='op'>|</span>
579
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_fid'>fid</span>
580
+ <span class='kw'>begin</span>
581
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:ci:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_ts'>ts</span><span class='period'>.</span><span class='id identifier rubyid_downcase'>downcase</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span><span class='id identifier rubyid_fid'>fid</span>
582
+ <span class='kw'>rescue</span> <span class='const'>ArgumentError</span>
583
+ <span class='comment'># Ignore &quot;ArgumentError: invalid byte sequence in UTF-8&quot;
584
+ </span> <span class='comment'># and proceed...
585
+ </span> <span class='kw'>end</span>
586
+ <span class='kw'>end</span>
587
+
588
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_del'>del</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_filename'>filename</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:trigramsOnFile:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_fid'>fid</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span>
589
+ <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_srem'>srem</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:processedFiles</span><span class='tstring_end'>&quot;</span></span><span class='comma'>,</span> <span class='id identifier rubyid_filename'>filename</span>
590
+ <span class='kw'>end</span>
591
+ <span class='kw'>return</span> <span class='kw'>nil</span>
592
+ <span class='kw'>end</span></pre>
593
+ </td>
594
+ </tr>
595
+ </table>
596
+ </div>
597
+
598
+ <div class="method_details ">
599
+ <p class="signature " id="removeAll-instance_method">
600
+
601
+ - (<tt>Object</tt>) <strong>removeAll</strong>
602
+
603
+
604
+
605
+ </p><div class="docstring">
606
+ <div class="discussion">
607
+
608
+ <p>Remove all the keys</p>
609
+
610
+
611
+ </div>
612
+ </div>
613
+ <div class="tags">
614
+
615
+
616
+ </div><table class="source_code">
617
+ <tr>
618
+ <td>
619
+ <pre class="lines">
620
+
621
+
622
+ 158
623
+ 159
624
+ 160</pre>
625
+ </td>
626
+ <td>
627
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 158</span>
628
+
629
+ <span class='kw'>def</span> <span class='id identifier rubyid_removeAll'>removeAll</span><span class='lparen'>(</span><span class='rparen'>)</span>
630
+ <span class='kw'>self</span><span class='period'>.</span><span class='id identifier rubyid_remove'>remove</span><span class='lparen'>(</span><span class='kw'>nil</span><span class='rparen'>)</span>
631
+ <span class='kw'>end</span></pre>
632
+ </td>
633
+ </tr>
634
+ </table>
635
+ </div>
636
+
637
+ <div class="method_details ">
638
+ <p class="signature " id="search-instance_method">
639
+
640
+ - (<tt>Object</tt>) <strong>search</strong>(term)
641
+
642
+
643
+
644
+ </p><div class="docstring">
645
+ <div class="discussion">
646
+
647
+ <h1>search</h1>
648
+
649
+ <p>Find a list of file candidates to a search string The search string is
650
+ padded into trigrams</p>
651
+
652
+
653
+ </div>
654
+ </div>
655
+ <div class="tags">
656
+
657
+
658
+ </div><table class="source_code">
659
+ <tr>
660
+ <td>
661
+ <pre class="lines">
662
+
663
+
664
+ 119
665
+ 120
666
+ 121
667
+ 122
668
+ 123
669
+ 124
670
+ 125
671
+ 126
672
+ 127
673
+ 128
674
+ 129
675
+ 130
676
+ 131
677
+ 132
678
+ 133
679
+ 134
680
+ 135
681
+ 136
682
+ 137
683
+ 138
684
+ 139
685
+ 140
686
+ 141
687
+ 142
688
+ 143
689
+ 144
690
+ 145
691
+ 146
692
+ 147</pre>
693
+ </td>
694
+ <td>
695
+ <pre class="code"><span class="info file"># File 'lib/code_zauker.rb', line 119</span>
696
+
697
+ <span class='kw'>def</span> <span class='id identifier rubyid_search'>search</span><span class='lparen'>(</span><span class='id identifier rubyid_term'>term</span><span class='rparen'>)</span>
698
+ <span class='kw'>if</span> <span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span> <span class='const'>GRAM_SIZE</span>
699
+ <span class='id identifier rubyid_raise'>raise</span> <span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>FATAL: </span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_term'>term</span><span class='rbrace'>}</span><span class='tstring_content'> is shorter then the minimum size of </span><span class='embexpr_beg'>#{</span><span class='const'>GRAM_SIZE</span><span class='rbrace'>}</span><span class='tstring_content'> character</span><span class='tstring_end'>&quot;</span></span>
700
+ <span class='kw'>end</span>
701
+ <span class='comment'>#puts &quot; ** Searching: #{term}&quot;
702
+ </span> <span class='comment'># split the term in a padded trigram
703
+ </span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
704
+ <span class='comment'># Search=&gt; Sea AND ear AND arc AND rch
705
+ </span> <span class='kw'>for</span> <span class='id identifier rubyid_j'>j</span> <span class='kw'>in</span> <span class='int'>0</span><span class='op'>...</span><span class='id identifier rubyid_term'>term</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span>
706
+ <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='op'>=</span><span class='id identifier rubyid_term'>term</span><span class='lbracket'>[</span><span class='id identifier rubyid_j'>j</span><span class='comma'>,</span><span class='const'>GRAM_SIZE</span><span class='rbracket'>]</span>
707
+ <span class='kw'>if</span> <span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span> <span class='op'>&lt;</span><span class='const'>GRAM_SIZE</span>
708
+ <span class='comment'># We are at the end...
709
+ </span> <span class='kw'>break</span>
710
+ <span class='kw'>end</span>
711
+ <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>trigram:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_currentTrigram'>currentTrigram</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span>
712
+ <span class='kw'>end</span>
713
+ <span class='comment'>#puts &quot;Trigam conversion /#{term}/ into #{trigramInAnd}&quot;
714
+ </span> <span class='kw'>if</span> <span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='period'>.</span><span class='id identifier rubyid_length'>length</span><span class='op'>==</span><span class='int'>0</span>
715
+ <span class='kw'>return</span> <span class='lbracket'>[</span><span class='rbracket'>]</span>
716
+ <span class='kw'>end</span>
717
+ <span class='id identifier rubyid_fileIds'>fileIds</span><span class='op'>=</span> <span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_sinter'>sinter</span><span class='lparen'>(</span><span class='op'>*</span><span class='id identifier rubyid_trigramInAnd'>trigramInAnd</span><span class='rparen'>)</span>
718
+ <span class='id identifier rubyid_filenames'>filenames</span><span class='op'>=</span><span class='lbracket'>[</span><span class='rbracket'>]</span>
719
+ <span class='comment'># fscan:id2filename:#{fid}....
720
+ </span> <span class='id identifier rubyid_fileIds'>fileIds</span><span class='period'>.</span><span class='id identifier rubyid_each'>each</span> <span class='kw'>do</span> <span class='op'>|</span> <span class='id identifier rubyid_id'>id</span> <span class='op'>|</span>
721
+ <span class='id identifier rubyid_filenames'>filenames</span><span class='period'>.</span><span class='id identifier rubyid_push'>push</span><span class='lparen'>(</span><span class='ivar'>@redis</span><span class='period'>.</span><span class='id identifier rubyid_get'>get</span><span class='lparen'>(</span><span class='tstring'><span class='tstring_beg'>&quot;</span><span class='tstring_content'>fscan:id2filename:</span><span class='embexpr_beg'>#{</span><span class='id identifier rubyid_id'>id</span><span class='rbrace'>}</span><span class='tstring_end'>&quot;</span></span><span class='rparen'>)</span><span class='rparen'>)</span>
722
+ <span class='kw'>end</span>
723
+ <span class='comment'>#puts &quot; ** Files found:#{filenames} from ids #{fileIds}&quot;
724
+ </span> <span class='kw'>return</span> <span class='id identifier rubyid_filenames'>filenames</span>
725
+ <span class='kw'>end</span></pre>
726
+ </td>
727
+ </tr>
728
+ </table>
729
+ </div>
730
+
731
+ </div>
732
+
733
+ </div>
734
+
735
+ <div id="footer">
736
+ Generated on Fri Jan 27 14:54:06 2012 by
737
+ <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool" target="_parent">yard</a>
738
+ 0.7.4 (ruby-1.9.3).
739
+ </div>
740
+
741
+ </body>
742
+ </html>