ankusa 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/Gemfile +4 -0
  2. data/Gemfile.lock +16 -0
  3. data/README.rdoc +5 -3
  4. data/Rakefile +5 -5
  5. data/lib/ankusa/naive_bayes.rb +3 -3
  6. data/lib/ankusa/version.rb +1 -1
  7. metadata +36 -100
  8. data/docs/Ankusa.html +0 -229
  9. data/docs/Ankusa/CassandraStorage.html +0 -801
  10. data/docs/Ankusa/Classifier.html +0 -440
  11. data/docs/Ankusa/FileSystemStorage.html +0 -376
  12. data/docs/Ankusa/HBaseStorage.html +0 -845
  13. data/docs/Ankusa/KLDivergenceClassifier.html +0 -265
  14. data/docs/Ankusa/MemoryStorage.html +0 -672
  15. data/docs/Ankusa/NaiveBayesClassifier.html +0 -313
  16. data/docs/Ankusa/TextHash.html +0 -390
  17. data/docs/README_rdoc.html +0 -268
  18. data/docs/String.html +0 -241
  19. data/docs/created.rid +0 -14
  20. data/docs/images/brick.png +0 -0
  21. data/docs/images/brick_link.png +0 -0
  22. data/docs/images/bug.png +0 -0
  23. data/docs/images/bullet_black.png +0 -0
  24. data/docs/images/bullet_toggle_minus.png +0 -0
  25. data/docs/images/bullet_toggle_plus.png +0 -0
  26. data/docs/images/date.png +0 -0
  27. data/docs/images/find.png +0 -0
  28. data/docs/images/loadingAnimation.gif +0 -0
  29. data/docs/images/macFFBgHack.png +0 -0
  30. data/docs/images/package.png +0 -0
  31. data/docs/images/page_green.png +0 -0
  32. data/docs/images/page_white_text.png +0 -0
  33. data/docs/images/page_white_width.png +0 -0
  34. data/docs/images/plugin.png +0 -0
  35. data/docs/images/ruby.png +0 -0
  36. data/docs/images/tag_green.png +0 -0
  37. data/docs/images/wrench.png +0 -0
  38. data/docs/images/wrench_orange.png +0 -0
  39. data/docs/images/zoom.png +0 -0
  40. data/docs/index.html +0 -212
  41. data/docs/js/darkfish.js +0 -116
  42. data/docs/js/jquery.js +0 -32
  43. data/docs/js/quicksearch.js +0 -114
  44. data/docs/js/thickbox-compressed.js +0 -10
  45. data/docs/lib/ankusa/cassandra_storage_rb.html +0 -54
  46. data/docs/lib/ankusa/classifier_rb.html +0 -52
  47. data/docs/lib/ankusa/extensions_rb.html +0 -54
  48. data/docs/lib/ankusa/file_system_storage_rb.html +0 -54
  49. data/docs/lib/ankusa/hasher_rb.html +0 -56
  50. data/docs/lib/ankusa/hbase_storage_rb.html +0 -54
  51. data/docs/lib/ankusa/kl_divergence_rb.html +0 -52
  52. data/docs/lib/ankusa/memory_storage_rb.html +0 -52
  53. data/docs/lib/ankusa/naive_bayes_rb.html +0 -52
  54. data/docs/lib/ankusa/stopwords_rb.html +0 -52
  55. data/docs/lib/ankusa/version_rb.html +0 -52
  56. data/docs/lib/ankusa_rb.html +0 -64
  57. data/docs/rdoc.css +0 -759
@@ -1,801 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
- <head>
6
- <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
7
-
8
- <title>Class: Ankusa::CassandraStorage</title>
9
-
10
- <link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
11
-
12
- <script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
13
- <script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
14
- <script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
15
- <script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
16
-
17
- </head>
18
- <body id="top" class="class">
19
-
20
- <div id="metadata">
21
- <div id="home-metadata">
22
- <div id="home-section" class="section">
23
- <h3 class="section-header">
24
- <a href="../index.html">Home</a>
25
- <a href="../index.html#classes">Classes</a>
26
- <a href="../index.html#methods">Methods</a>
27
- </h3>
28
- </div>
29
- </div>
30
-
31
- <div id="file-metadata">
32
- <div id="file-list-section" class="section">
33
- <h3 class="section-header">In Files</h3>
34
- <div class="section-body">
35
- <ul>
36
-
37
- <li><a href="../lib/ankusa/cassandra_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
38
- class="thickbox" title="lib/ankusa/cassandra_storage.rb">lib/ankusa/cassandra_storage.rb</a></li>
39
-
40
- </ul>
41
- </div>
42
- </div>
43
-
44
-
45
- </div>
46
-
47
- <div id="class-metadata">
48
-
49
- <!-- Parent Class -->
50
- <div id="parent-class-section" class="section">
51
- <h3 class="section-header">Parent</h3>
52
-
53
- <p class="link">Object</p>
54
-
55
- </div>
56
-
57
-
58
-
59
-
60
-
61
-
62
-
63
- <!-- Method Quickref -->
64
- <div id="method-list-section" class="section">
65
- <h3 class="section-header">Methods</h3>
66
- <ul class="link-list">
67
-
68
- <li><a href="#method-c-new">::new</a></li>
69
-
70
- <li><a href="#method-i-classnames">#classnames</a></li>
71
-
72
- <li><a href="#method-i-close">#close</a></li>
73
-
74
- <li><a href="#method-i-doc_count_totals">#doc_count_totals</a></li>
75
-
76
- <li><a href="#method-i-drop_tables">#drop_tables</a></li>
77
-
78
- <li><a href="#method-i-get_doc_count">#get_doc_count</a></li>
79
-
80
- <li><a href="#method-i-get_summary">#get_summary</a></li>
81
-
82
- <li><a href="#method-i-get_total_word_count">#get_total_word_count</a></li>
83
-
84
- <li><a href="#method-i-get_vocabulary_sizes">#get_vocabulary_sizes</a></li>
85
-
86
- <li><a href="#method-i-get_word_counts">#get_word_counts</a></li>
87
-
88
- <li><a href="#method-i-incr_doc_count">#incr_doc_count</a></li>
89
-
90
- <li><a href="#method-i-incr_total_word_count">#incr_total_word_count</a></li>
91
-
92
- <li><a href="#method-i-incr_word_count">#incr_word_count</a></li>
93
-
94
- <li><a href="#method-i-init_tables">#init_tables</a></li>
95
-
96
- <li><a href="#method-i-reset">#reset</a></li>
97
-
98
- </ul>
99
- </div>
100
-
101
-
102
-
103
- </div>
104
-
105
- <div id="project-metadata">
106
-
107
-
108
- <div id="fileindex-section" class="section project-section">
109
- <h3 class="section-header">Files</h3>
110
- <ul>
111
-
112
- <li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
113
-
114
- </ul>
115
- </div>
116
-
117
-
118
- <div id="classindex-section" class="section project-section">
119
- <h3 class="section-header">Class/Module Index
120
- <span class="search-toggle"><img src="../images/find.png"
121
- height="16" width="16" alt="[+]"
122
- title="show/hide quicksearch" /></span></h3>
123
- <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
124
- <fieldset>
125
- <legend>Quicksearch</legend>
126
- <input type="text" name="quicksearch" value=""
127
- class="quicksearch-field" />
128
- </fieldset>
129
- </form>
130
-
131
- <ul class="link-list">
132
-
133
- <li><a href="../Ankusa.html">Ankusa</a></li>
134
-
135
- <li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
136
-
137
- <li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
138
-
139
- <li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
140
-
141
- <li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
142
-
143
- <li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
144
-
145
- <li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
146
-
147
- <li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
148
-
149
- <li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
150
-
151
- <li><a href="../String.html">String</a></li>
152
-
153
- </ul>
154
- <div id="no-class-search-results" style="display: none;">No matching classes.</div>
155
- </div>
156
-
157
-
158
- </div>
159
- </div>
160
-
161
- <div id="documentation">
162
- <h1 class="class">Ankusa::CassandraStorage</h1>
163
-
164
- <div id="description" class="description">
165
-
166
- </div><!-- description -->
167
-
168
-
169
- <div id="5Buntitled-5D" class="documentation-section">
170
-
171
-
172
-
173
-
174
-
175
-
176
-
177
- <!-- Attributes -->
178
- <div id="attribute-method-details" class="method-section section">
179
- <h3 class="section-header">Attributes</h3>
180
-
181
-
182
- <div id="cassandra-attribute-method" class="method-detail">
183
- <a name="cassandra"></a>
184
-
185
- <div class="method-heading attribute-method-heading">
186
- <span class="method-name">cassandra</span><span
187
- class="attribute-access-type">[R]</span>
188
- </div>
189
-
190
- <div class="method-description">
191
-
192
-
193
-
194
- </div>
195
- </div>
196
-
197
- </div><!-- attribute-method-details -->
198
-
199
-
200
- <!-- Methods -->
201
-
202
- <div id="public-class-method-details" class="method-section section">
203
- <h3 class="section-header">Public Class Methods</h3>
204
-
205
-
206
- <div id="new-method" class="method-detail ">
207
- <a name="method-c-new"></a>
208
-
209
-
210
- <div class="method-heading">
211
- <span class="method-name">new</span><span
212
- class="method-args">(host='127.0.0.1', port=9160, keyspace = 'ankusa', max_classes = 100)</span>
213
- <span class="method-click-advice">click to toggle source</span>
214
- </div>
215
-
216
-
217
- <div class="method-description">
218
-
219
- <p>Necessary to set max classes since current implementation of ruby cassandra
220
- client doesn’t support table scans. Using crufty get_range method at the
221
- moment.</p>
222
-
223
-
224
-
225
- <div class="method-source-code" id="new-source">
226
- <pre>
227
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 21</span>
228
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-string">'127.0.0.1'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9160</span>, <span class="ruby-identifier">keyspace</span> = <span class="ruby-string">'ankusa'</span>, <span class="ruby-identifier">max_classes</span> = <span class="ruby-value">100</span>)
229
- <span class="ruby-ivar">@cassandra</span> = <span class="ruby-constant">Cassandra</span>.<span class="ruby-identifier">new</span>(<span class="ruby-string">'system'</span>, <span class="ruby-node">&quot;#{host}:#{port}&quot;</span>)
230
- <span class="ruby-ivar">@klass_word_counts</span>, <span class="ruby-ivar">@klass_doc_counts</span> = {}
231
- <span class="ruby-ivar">@keyspace</span> = <span class="ruby-identifier">keyspace</span>
232
- <span class="ruby-ivar">@max_classes</span> = <span class="ruby-identifier">max_classes</span>
233
- <span class="ruby-identifier">init_tables</span>
234
- <span class="ruby-keyword">end</span></pre>
235
- </div><!-- new-source -->
236
-
237
- </div>
238
-
239
-
240
-
241
-
242
- </div><!-- new-method -->
243
-
244
-
245
- </div><!-- public-class-method-details -->
246
-
247
- <div id="public-instance-method-details" class="method-section section">
248
- <h3 class="section-header">Public Instance Methods</h3>
249
-
250
-
251
- <div id="classnames-method" class="method-detail ">
252
- <a name="method-i-classnames"></a>
253
-
254
-
255
- <div class="method-heading">
256
- <span class="method-name">classnames</span><span
257
- class="method-args">()</span>
258
- <span class="method-click-advice">click to toggle source</span>
259
- </div>
260
-
261
-
262
- <div class="method-description">
263
-
264
- <p>Fetch the names of the distinct classes for classification: eg. :spam,
265
- :good, etc</p>
266
-
267
-
268
-
269
- <div class="method-source-code" id="classnames-source">
270
- <pre>
271
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 33</span>
272
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">classnames</span>
273
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-value">:totals</span>, {<span class="ruby-value">:start</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:finish</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:count</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">inject</span>([]) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">cs</span>, <span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
274
- <span class="ruby-identifier">cs</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>
275
- <span class="ruby-keyword">end</span>
276
- <span class="ruby-keyword">end</span></pre>
277
- </div><!-- classnames-source -->
278
-
279
- </div>
280
-
281
-
282
-
283
-
284
- </div><!-- classnames-method -->
285
-
286
-
287
- <div id="close-method" class="method-detail ">
288
- <a name="method-i-close"></a>
289
-
290
-
291
- <div class="method-heading">
292
- <span class="method-name">close</span><span
293
- class="method-args">()</span>
294
- <span class="method-click-advice">click to toggle source</span>
295
- </div>
296
-
297
-
298
- <div class="method-description">
299
-
300
- <p>Doesn’t do anything</p>
301
-
302
-
303
-
304
- <div class="method-source-code" id="close-source">
305
- <pre>
306
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 172</span>
307
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
308
- <span class="ruby-keyword">end</span></pre>
309
- </div><!-- close-source -->
310
-
311
- </div>
312
-
313
-
314
-
315
-
316
- </div><!-- close-method -->
317
-
318
-
319
- <div id="doc_count_totals-method" class="method-detail ">
320
- <a name="method-i-doc_count_totals"></a>
321
-
322
-
323
- <div class="method-heading">
324
- <span class="method-name">doc_count_totals</span><span
325
- class="method-args">()</span>
326
- <span class="method-click-advice">click to toggle source</span>
327
- </div>
328
-
329
-
330
- <div class="method-description">
331
-
332
-
333
-
334
-
335
-
336
- <div class="method-source-code" id="doc_count_totals-source">
337
- <pre>
338
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 165</span>
339
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">doc_count_totals</span>
340
- <span class="ruby-identifier">get_summary</span> <span class="ruby-string">&quot;doc_count&quot;</span>
341
- <span class="ruby-keyword">end</span></pre>
342
- </div><!-- doc_count_totals-source -->
343
-
344
- </div>
345
-
346
-
347
-
348
-
349
- </div><!-- doc_count_totals-method -->
350
-
351
-
352
- <div id="drop_tables-method" class="method-detail ">
353
- <a name="method-i-drop_tables"></a>
354
-
355
-
356
- <div class="method-heading">
357
- <span class="method-name">drop_tables</span><span
358
- class="method-args">()</span>
359
- <span class="method-click-advice">click to toggle source</span>
360
- </div>
361
-
362
-
363
- <div class="method-description">
364
-
365
- <p>Drop ankusa keyspace, reset internal caches</p>
366
-
367
- <p>FIXME: truncate doesn’t work with cassandra-beta2</p>
368
-
369
-
370
-
371
- <div class="method-source-code" id="drop_tables-source">
372
- <pre>
373
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 49</span>
374
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">drop_tables</span>
375
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-string">'classes'</span>)
376
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">truncate!</span>(<span class="ruby-string">'totals'</span>)
377
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">drop_keyspace</span>(<span class="ruby-ivar">@keyspace</span>)
378
- <span class="ruby-ivar">@klass_word_counts</span>, <span class="ruby-ivar">@klass_doc_counts</span> = {}
379
- <span class="ruby-keyword">end</span></pre>
380
- </div><!-- drop_tables-source -->
381
-
382
- </div>
383
-
384
-
385
-
386
-
387
- </div><!-- drop_tables-method -->
388
-
389
-
390
- <div id="get_doc_count-method" class="method-detail ">
391
- <a name="method-i-get_doc_count"></a>
392
-
393
-
394
- <div class="method-heading">
395
- <span class="method-name">get_doc_count</span><span
396
- class="method-args">(klass)</span>
397
- <span class="method-click-advice">click to toggle source</span>
398
- </div>
399
-
400
-
401
- <div class="method-description">
402
-
403
- <p>Fetch total documents for a given class and cache it</p>
404
-
405
-
406
-
407
- <div class="method-source-code" id="get_doc_count-source">
408
- <pre>
409
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 107</span>
410
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
411
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-string">&quot;doc_count&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
412
- <span class="ruby-keyword">end</span></pre>
413
- </div><!-- get_doc_count-source -->
414
-
415
- </div>
416
-
417
-
418
-
419
-
420
- </div><!-- get_doc_count-method -->
421
-
422
-
423
- <div id="get_total_word_count-method" class="method-detail ">
424
- <a name="method-i-get_total_word_count"></a>
425
-
426
-
427
- <div class="method-heading">
428
- <span class="method-name">get_total_word_count</span><span
429
- class="method-args">(klass)</span>
430
- <span class="method-click-advice">click to toggle source</span>
431
- </div>
432
-
433
-
434
- <div class="method-description">
435
-
436
- <p>Fetch total word count for a given class and cache it</p>
437
-
438
-
439
-
440
- <div class="method-source-code" id="get_total_word_count-source">
441
- <pre>
442
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 100</span>
443
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
444
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>, <span class="ruby-string">&quot;wordcount&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>
445
- <span class="ruby-keyword">end</span></pre>
446
- </div><!-- get_total_word_count-source -->
447
-
448
- </div>
449
-
450
-
451
-
452
-
453
- </div><!-- get_total_word_count-method -->
454
-
455
-
456
- <div id="get_vocabulary_sizes-method" class="method-detail ">
457
- <a name="method-i-get_vocabulary_sizes"></a>
458
-
459
-
460
- <div class="method-heading">
461
- <span class="method-name">get_vocabulary_sizes</span><span
462
- class="method-args">()</span>
463
- <span class="method-click-advice">click to toggle source</span>
464
- </div>
465
-
466
-
467
- <div class="method-description">
468
-
469
- <p>Does a table ‘scan’ of summary table pulling out the ‘vocabsize’
470
- column from each row. Generates a hash of (class, vocab_size) key value
471
- pairs</p>
472
-
473
-
474
-
475
- <div class="method-source-code" id="get_vocabulary_sizes-source">
476
- <pre>
477
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 93</span>
478
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
479
- <span class="ruby-identifier">get_summary</span> <span class="ruby-string">&quot;vocabsize&quot;</span>
480
- <span class="ruby-keyword">end</span></pre>
481
- </div><!-- get_vocabulary_sizes-source -->
482
-
483
- </div>
484
-
485
-
486
-
487
-
488
- </div><!-- get_vocabulary_sizes-method -->
489
-
490
-
491
- <div id="get_word_counts-method" class="method-detail ">
492
- <a name="method-i-get_word_counts"></a>
493
-
494
-
495
- <div class="method-heading">
496
- <span class="method-name">get_word_counts</span><span
497
- class="method-args">(word)</span>
498
- <span class="method-click-advice">click to toggle source</span>
499
- </div>
500
-
501
-
502
- <div class="method-description">
503
-
504
- <p>Fetch hash of word counts as a single row from cassandra. Here column_name
505
- is the class and column value is the count</p>
506
-
507
-
508
-
509
- <div class="method-source-code" id="get_word_counts-source">
510
- <pre>
511
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 82</span>
512
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
513
- <span class="ruby-comment"># fetch all (class,count) pairs for a given word</span>
514
- <span class="ruby-identifier">row</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>)
515
- <span class="ruby-keyword">return</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">to_hash</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">empty?</span>
516
- <span class="ruby-identifier">row</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">counts</span>, <span class="ruby-identifier">col</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">col</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_sym</span>] = [<span class="ruby-identifier">col</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_f</span>,<span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>; <span class="ruby-identifier">counts</span>}
517
- <span class="ruby-keyword">end</span></pre>
518
- </div><!-- get_word_counts-source -->
519
-
520
- </div>
521
-
522
-
523
-
524
-
525
- </div><!-- get_word_counts-method -->
526
-
527
-
528
- <div id="incr_doc_count-method" class="method-detail ">
529
- <a name="method-i-incr_doc_count"></a>
530
-
531
-
532
- <div class="method-heading">
533
- <span class="method-name">incr_doc_count</span><span
534
- class="method-args">(klass, count)</span>
535
- <span class="method-click-advice">click to toggle source</span>
536
- </div>
537
-
538
-
539
- <div class="method-description">
540
-
541
- <p>Increment total document count for a given class by ‘count’</p>
542
-
543
-
544
-
545
- <div class="method-source-code" id="incr_doc_count-source">
546
- <pre>
547
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 157</span>
548
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
549
- <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
550
- <span class="ruby-identifier">doc_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;doc_count&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
551
- <span class="ruby-identifier">doc_count</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
552
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;doc_count&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">doc_count</span>.<span class="ruby-identifier">to_s</span>})
553
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">doc_count</span>
554
- <span class="ruby-keyword">end</span></pre>
555
- </div><!-- incr_doc_count-source -->
556
-
557
- </div>
558
-
559
-
560
-
561
-
562
- </div><!-- incr_doc_count-method -->
563
-
564
-
565
- <div id="incr_total_word_count-method" class="method-detail ">
566
- <a name="method-i-incr_total_word_count"></a>
567
-
568
-
569
- <div class="method-heading">
570
- <span class="method-name">incr_total_word_count</span><span
571
- class="method-args">(klass, count)</span>
572
- <span class="method-click-advice">click to toggle source</span>
573
- </div>
574
-
575
-
576
- <div class="method-description">
577
-
578
- <p>Increment total word count for a given class by ‘count’</p>
579
-
580
-
581
-
582
- <div class="method-source-code" id="incr_total_word_count-source">
583
- <pre>
584
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 146</span>
585
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
586
- <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
587
- <span class="ruby-identifier">wordcount</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;wordcount&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
588
- <span class="ruby-identifier">wordcount</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
589
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;wordcount&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">wordcount</span>.<span class="ruby-identifier">to_s</span>})
590
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">wordcount</span>
591
- <span class="ruby-keyword">end</span></pre>
592
- </div><!-- incr_total_word_count-source -->
593
-
594
- </div>
595
-
596
-
597
-
598
-
599
- </div><!-- incr_total_word_count-method -->
600
-
601
-
602
- <div id="incr_word_count-method" class="method-detail ">
603
- <a name="method-i-incr_word_count"></a>
604
-
605
-
606
- <div class="method-heading">
607
- <span class="method-name">incr_word_count</span><span
608
- class="method-args">(klass, word, count)</span>
609
- <span class="method-click-advice">click to toggle source</span>
610
- </div>
611
-
612
-
613
- <div class="method-description">
614
-
615
- <p>Increment the count for a given (word,class) pair. Evidently, cassandra
616
- does not support atomic increment/decrement. Psh. HBase uses ZooKeeper to
617
- implement atomic operations, ain’t it special?</p>
618
-
619
-
620
-
621
- <div class="method-source-code" id="incr_word_count-source">
622
- <pre>
623
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 116</span>
624
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
625
- <span class="ruby-comment"># Only wants strings</span>
626
- <span class="ruby-identifier">klass</span> = <span class="ruby-identifier">klass</span>.<span class="ruby-identifier">to_s</span>
627
- <span class="ruby-identifier">word</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">to_s</span>
628
-
629
- <span class="ruby-identifier">prior_count</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">klass</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
630
- <span class="ruby-identifier">new_count</span> = <span class="ruby-identifier">prior_count</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">count</span>
631
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:classes</span>, <span class="ruby-identifier">word</span>, {<span class="ruby-identifier">klass</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">new_count</span>.<span class="ruby-identifier">to_s</span>})
632
-
633
- <span class="ruby-keyword">if</span> (<span class="ruby-identifier">prior_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">count</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">0</span>)
634
- <span class="ruby-comment">#</span>
635
- <span class="ruby-comment"># we've never seen this word before and we're not trying to unlearn it</span>
636
- <span class="ruby-comment">#</span>
637
- <span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;vocabsize&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
638
- <span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
639
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;vocabsize&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
640
- <span class="ruby-keyword">elsif</span> <span class="ruby-identifier">new_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
641
- <span class="ruby-comment">#</span>
642
- <span class="ruby-comment"># we've seen this word before but we're trying to unlearn it</span>
643
- <span class="ruby-comment">#</span>
644
- <span class="ruby-identifier">vocab_size</span> = <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;vocabsize&quot;</span>).<span class="ruby-identifier">values</span>.<span class="ruby-identifier">last</span>.<span class="ruby-identifier">to_i</span>
645
- <span class="ruby-identifier">vocab_size</span> <span class="ruby-operator">-=</span> <span class="ruby-value">1</span>
646
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">insert</span>(<span class="ruby-value">:totals</span>, <span class="ruby-identifier">klass</span>, {<span class="ruby-string">&quot;vocabsize&quot;</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">vocab_size</span>.<span class="ruby-identifier">to_s</span>})
647
- <span class="ruby-keyword">end</span>
648
- <span class="ruby-identifier">new_count</span>
649
- <span class="ruby-keyword">end</span></pre>
650
- </div><!-- incr_word_count-source -->
651
-
652
- </div>
653
-
654
-
655
-
656
-
657
- </div><!-- incr_word_count-method -->
658
-
659
-
660
- <div id="init_tables-method" class="method-detail ">
661
- <a name="method-i-init_tables"></a>
662
-
663
-
664
- <div class="method-heading">
665
- <span class="method-name">init_tables</span><span
666
- class="method-args">()</span>
667
- <span class="method-click-advice">click to toggle source</span>
668
- </div>
669
-
670
-
671
- <div class="method-description">
672
-
673
- <p>Create required keyspace and column families</p>
674
-
675
-
676
-
677
- <div class="method-source-code" id="init_tables-source">
678
- <pre>
679
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 60</span>
680
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">init_tables</span>
681
- <span class="ruby-comment"># Do nothing if keyspace already exists</span>
682
- <span class="ruby-keyword">if</span> <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspaces</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-ivar">@keyspace</span>)
683
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
684
- <span class="ruby-keyword">else</span>
685
- <span class="ruby-identifier">freq_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-value">:keyspace</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-value">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">&quot;classes&quot;</span>}) <span class="ruby-comment"># word =&gt; {classname =&gt; count}</span>
686
- <span class="ruby-identifier">summary_table</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">ColumnFamily</span>.<span class="ruby-identifier">new</span>({<span class="ruby-value">:keyspace</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>, <span class="ruby-value">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">&quot;totals&quot;</span>}) <span class="ruby-comment"># class =&gt; {wordcount =&gt; count}</span>
687
- <span class="ruby-identifier">ks_def</span> = <span class="ruby-constant">Cassandra</span><span class="ruby-operator">::</span><span class="ruby-constant">Keyspace</span>.<span class="ruby-identifier">new</span>({
688
- <span class="ruby-value">:name</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@keyspace</span>,
689
- <span class="ruby-value">:strategy_class</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">'org.apache.cassandra.locator.SimpleStrategy'</span>,
690
- <span class="ruby-value">:replication_factor</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-value">1</span>,
691
- <span class="ruby-value">:cf_defs</span> =<span class="ruby-operator">&gt;</span> [<span class="ruby-identifier">freq_table</span>, <span class="ruby-identifier">summary_table</span>]
692
- })
693
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">add_keyspace</span> <span class="ruby-identifier">ks_def</span>
694
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">keyspace</span> = <span class="ruby-ivar">@keyspace</span>
695
- <span class="ruby-keyword">end</span>
696
- <span class="ruby-keyword">end</span></pre>
697
- </div><!-- init_tables-source -->
698
-
699
- </div>
700
-
701
-
702
-
703
-
704
- </div><!-- init_tables-method -->
705
-
706
-
707
- <div id="reset-method" class="method-detail ">
708
- <a name="method-i-reset"></a>
709
-
710
-
711
- <div class="method-heading">
712
- <span class="method-name">reset</span><span
713
- class="method-args">()</span>
714
- <span class="method-click-advice">click to toggle source</span>
715
- </div>
716
-
717
-
718
- <div class="method-description">
719
-
720
-
721
-
722
-
723
-
724
- <div class="method-source-code" id="reset-source">
725
- <pre>
726
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 39</span>
727
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">reset</span>
728
- <span class="ruby-identifier">drop_tables</span>
729
- <span class="ruby-identifier">init_tables</span>
730
- <span class="ruby-keyword">end</span></pre>
731
- </div><!-- reset-source -->
732
-
733
- </div>
734
-
735
-
736
-
737
-
738
- </div><!-- reset-method -->
739
-
740
-
741
- </div><!-- public-instance-method-details -->
742
-
743
- <div id="protected-instance-method-details" class="method-section section">
744
- <h3 class="section-header">Protected Instance Methods</h3>
745
-
746
-
747
- <div id="get_summary-method" class="method-detail ">
748
- <a name="method-i-get_summary"></a>
749
-
750
-
751
- <div class="method-heading">
752
- <span class="method-name">get_summary</span><span
753
- class="method-args">(name)</span>
754
- <span class="method-click-advice">click to toggle source</span>
755
- </div>
756
-
757
-
758
- <div class="method-description">
759
-
760
- <p>Fetch 100 rows from summary table, yes, increase if necessary</p>
761
-
762
-
763
-
764
- <div class="method-source-code" id="get_summary-source">
765
- <pre>
766
- <span class="ruby-comment"># File lib/ankusa/cassandra_storage.rb, line 180</span>
767
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
768
- <span class="ruby-identifier">counts</span> = {}
769
- <span class="ruby-ivar">@cassandra</span>.<span class="ruby-identifier">get_range</span>(<span class="ruby-value">:totals</span>, {<span class="ruby-value">:start</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:finish</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-string">''</span>, <span class="ruby-value">:count</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-ivar">@max_classes</span>}).<span class="ruby-identifier">each</span> <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">key_slice</span><span class="ruby-operator">|</span>
770
- <span class="ruby-comment"># keyslice is a clunky thrift object, map into a ruby hash</span>
771
- <span class="ruby-identifier">row</span> = <span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">inject</span>({}){<span class="ruby-operator">|</span><span class="ruby-identifier">hsh</span>, <span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-identifier">hsh</span>[<span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">name</span>] = <span class="ruby-identifier">c</span>.<span class="ruby-identifier">column</span>.<span class="ruby-identifier">value</span>; <span class="ruby-identifier">hsh</span>}
772
- <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">key_slice</span>.<span class="ruby-identifier">key</span>.<span class="ruby-identifier">to_sym</span>] = <span class="ruby-identifier">row</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_f</span>
773
- <span class="ruby-keyword">end</span>
774
- <span class="ruby-identifier">counts</span>
775
- <span class="ruby-keyword">end</span></pre>
776
- </div><!-- get_summary-source -->
777
-
778
- </div>
779
-
780
-
781
-
782
-
783
- </div><!-- get_summary-method -->
784
-
785
-
786
- </div><!-- protected-instance-method-details -->
787
-
788
- </div><!-- 5Buntitled-5D -->
789
-
790
-
791
- </div><!-- documentation -->
792
-
793
- <div id="validator-badges">
794
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
795
- <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
796
- Rdoc Generator</a> 2</small>.</p>
797
- </div>
798
-
799
- </body>
800
- </html>
801
-