ankusa 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Gemfile +4 -0
  2. data/Gemfile.lock +16 -0
  3. data/README.rdoc +5 -3
  4. data/Rakefile +5 -5
  5. data/lib/ankusa/naive_bayes.rb +3 -3
  6. data/lib/ankusa/version.rb +1 -1
  7. metadata +36 -100
  8. data/docs/Ankusa.html +0 -229
  9. data/docs/Ankusa/CassandraStorage.html +0 -801
  10. data/docs/Ankusa/Classifier.html +0 -440
  11. data/docs/Ankusa/FileSystemStorage.html +0 -376
  12. data/docs/Ankusa/HBaseStorage.html +0 -845
  13. data/docs/Ankusa/KLDivergenceClassifier.html +0 -265
  14. data/docs/Ankusa/MemoryStorage.html +0 -672
  15. data/docs/Ankusa/NaiveBayesClassifier.html +0 -313
  16. data/docs/Ankusa/TextHash.html +0 -390
  17. data/docs/README_rdoc.html +0 -268
  18. data/docs/String.html +0 -241
  19. data/docs/created.rid +0 -14
  20. data/docs/images/brick.png +0 -0
  21. data/docs/images/brick_link.png +0 -0
  22. data/docs/images/bug.png +0 -0
  23. data/docs/images/bullet_black.png +0 -0
  24. data/docs/images/bullet_toggle_minus.png +0 -0
  25. data/docs/images/bullet_toggle_plus.png +0 -0
  26. data/docs/images/date.png +0 -0
  27. data/docs/images/find.png +0 -0
  28. data/docs/images/loadingAnimation.gif +0 -0
  29. data/docs/images/macFFBgHack.png +0 -0
  30. data/docs/images/package.png +0 -0
  31. data/docs/images/page_green.png +0 -0
  32. data/docs/images/page_white_text.png +0 -0
  33. data/docs/images/page_white_width.png +0 -0
  34. data/docs/images/plugin.png +0 -0
  35. data/docs/images/ruby.png +0 -0
  36. data/docs/images/tag_green.png +0 -0
  37. data/docs/images/wrench.png +0 -0
  38. data/docs/images/wrench_orange.png +0 -0
  39. data/docs/images/zoom.png +0 -0
  40. data/docs/index.html +0 -212
  41. data/docs/js/darkfish.js +0 -116
  42. data/docs/js/jquery.js +0 -32
  43. data/docs/js/quicksearch.js +0 -114
  44. data/docs/js/thickbox-compressed.js +0 -10
  45. data/docs/lib/ankusa/cassandra_storage_rb.html +0 -54
  46. data/docs/lib/ankusa/classifier_rb.html +0 -52
  47. data/docs/lib/ankusa/extensions_rb.html +0 -54
  48. data/docs/lib/ankusa/file_system_storage_rb.html +0 -54
  49. data/docs/lib/ankusa/hasher_rb.html +0 -56
  50. data/docs/lib/ankusa/hbase_storage_rb.html +0 -54
  51. data/docs/lib/ankusa/kl_divergence_rb.html +0 -52
  52. data/docs/lib/ankusa/memory_storage_rb.html +0 -52
  53. data/docs/lib/ankusa/naive_bayes_rb.html +0 -52
  54. data/docs/lib/ankusa/stopwords_rb.html +0 -52
  55. data/docs/lib/ankusa/version_rb.html +0 -52
  56. data/docs/lib/ankusa_rb.html +0 -64
  57. data/docs/rdoc.css +0 -759
@@ -1,845 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
- <head>
6
- <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
7
-
8
- <title>Class: Ankusa::HBaseStorage</title>
9
-
10
- <link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
11
-
12
- <script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
13
- <script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
14
- <script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
15
- <script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
16
-
17
- </head>
18
- <body id="top" class="class">
19
-
20
- <div id="metadata">
21
- <div id="home-metadata">
22
- <div id="home-section" class="section">
23
- <h3 class="section-header">
24
- <a href="../index.html">Home</a>
25
- <a href="../index.html#classes">Classes</a>
26
- <a href="../index.html#methods">Methods</a>
27
- </h3>
28
- </div>
29
- </div>
30
-
31
- <div id="file-metadata">
32
- <div id="file-list-section" class="section">
33
- <h3 class="section-header">In Files</h3>
34
- <div class="section-body">
35
- <ul>
36
-
37
- <li><a href="../lib/ankusa/hbase_storage_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
38
- class="thickbox" title="lib/ankusa/hbase_storage.rb">lib/ankusa/hbase_storage.rb</a></li>
39
-
40
- </ul>
41
- </div>
42
- </div>
43
-
44
-
45
- </div>
46
-
47
- <div id="class-metadata">
48
-
49
- <!-- Parent Class -->
50
- <div id="parent-class-section" class="section">
51
- <h3 class="section-header">Parent</h3>
52
-
53
- <p class="link">Object</p>
54
-
55
- </div>
56
-
57
-
58
-
59
-
60
-
61
-
62
-
63
- <!-- Method Quickref -->
64
- <div id="method-list-section" class="section">
65
- <h3 class="section-header">Methods</h3>
66
- <ul class="link-list">
67
-
68
- <li><a href="#method-c-new">::new</a></li>
69
-
70
- <li><a href="#method-i-classnames">#classnames</a></li>
71
-
72
- <li><a href="#method-i-close">#close</a></li>
73
-
74
- <li><a href="#method-i-doc_count_totals">#doc_count_totals</a></li>
75
-
76
- <li><a href="#method-i-drop_tables">#drop_tables</a></li>
77
-
78
- <li><a href="#method-i-freq_table">#freq_table</a></li>
79
-
80
- <li><a href="#method-i-get_doc_count">#get_doc_count</a></li>
81
-
82
- <li><a href="#method-i-get_summary">#get_summary</a></li>
83
-
84
- <li><a href="#method-i-get_total_word_count">#get_total_word_count</a></li>
85
-
86
- <li><a href="#method-i-get_vocabulary_sizes">#get_vocabulary_sizes</a></li>
87
-
88
- <li><a href="#method-i-get_word_counts">#get_word_counts</a></li>
89
-
90
- <li><a href="#method-i-incr_doc_count">#incr_doc_count</a></li>
91
-
92
- <li><a href="#method-i-incr_total_word_count">#incr_total_word_count</a></li>
93
-
94
- <li><a href="#method-i-incr_word_count">#incr_word_count</a></li>
95
-
96
- <li><a href="#method-i-init_tables">#init_tables</a></li>
97
-
98
- <li><a href="#method-i-reset">#reset</a></li>
99
-
100
- <li><a href="#method-i-summary_table">#summary_table</a></li>
101
-
102
- </ul>
103
- </div>
104
-
105
-
106
-
107
- </div>
108
-
109
- <div id="project-metadata">
110
-
111
-
112
- <div id="fileindex-section" class="section project-section">
113
- <h3 class="section-header">Files</h3>
114
- <ul>
115
-
116
- <li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
117
-
118
- </ul>
119
- </div>
120
-
121
-
122
- <div id="classindex-section" class="section project-section">
123
- <h3 class="section-header">Class/Module Index
124
- <span class="search-toggle"><img src="../images/find.png"
125
- height="16" width="16" alt="[+]"
126
- title="show/hide quicksearch" /></span></h3>
127
- <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
128
- <fieldset>
129
- <legend>Quicksearch</legend>
130
- <input type="text" name="quicksearch" value=""
131
- class="quicksearch-field" />
132
- </fieldset>
133
- </form>
134
-
135
- <ul class="link-list">
136
-
137
- <li><a href="../Ankusa.html">Ankusa</a></li>
138
-
139
- <li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
140
-
141
- <li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
142
-
143
- <li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
144
-
145
- <li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
146
-
147
- <li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
148
-
149
- <li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
150
-
151
- <li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
152
-
153
- <li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
154
-
155
- <li><a href="../String.html">String</a></li>
156
-
157
- </ul>
158
- <div id="no-class-search-results" style="display: none;">No matching classes.</div>
159
- </div>
160
-
161
-
162
- </div>
163
- </div>
164
-
165
- <div id="documentation">
166
- <h1 class="class">Ankusa::HBaseStorage</h1>
167
-
168
- <div id="description" class="description">
169
-
170
- </div><!-- description -->
171
-
172
-
173
- <div id="5Buntitled-5D" class="documentation-section">
174
-
175
-
176
-
177
-
178
-
179
-
180
-
181
- <!-- Attributes -->
182
- <div id="attribute-method-details" class="method-section section">
183
- <h3 class="section-header">Attributes</h3>
184
-
185
-
186
- <div id="hbase-attribute-method" class="method-detail">
187
- <a name="hbase"></a>
188
-
189
- <div class="method-heading attribute-method-heading">
190
- <span class="method-name">hbase</span><span
191
- class="attribute-access-type">[R]</span>
192
- </div>
193
-
194
- <div class="method-description">
195
-
196
-
197
-
198
- </div>
199
- </div>
200
-
201
- </div><!-- attribute-method-details -->
202
-
203
-
204
- <!-- Methods -->
205
-
206
- <div id="public-class-method-details" class="method-section section">
207
- <h3 class="section-header">Public Class Methods</h3>
208
-
209
-
210
- <div id="new-method" class="method-detail ">
211
- <a name="method-c-new"></a>
212
-
213
-
214
- <div class="method-heading">
215
- <span class="method-name">new</span><span
216
- class="method-args">(host='localhost', port=9090, frequency_tablename="ankusa_word_frequencies", summary_tablename="ankusa_summary")</span>
217
- <span class="method-click-advice">click to toggle source</span>
218
- </div>
219
-
220
-
221
- <div class="method-description">
222
-
223
-
224
-
225
-
226
-
227
- <div class="method-source-code" id="new-source">
228
- <pre>
229
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 8</span>
230
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-string">'localhost'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9090</span>, <span class="ruby-identifier">frequency_tablename</span>=<span class="ruby-string">&quot;ankusa_word_frequencies&quot;</span>, <span class="ruby-identifier">summary_tablename</span>=<span class="ruby-string">&quot;ankusa_summary&quot;</span>)
231
- <span class="ruby-ivar">@hbase</span> = <span class="ruby-constant">HBaseRb</span><span class="ruby-operator">::</span><span class="ruby-constant">Client</span>.<span class="ruby-identifier">new</span> <span class="ruby-identifier">host</span>, <span class="ruby-identifier">port</span>
232
- <span class="ruby-ivar">@ftablename</span> = <span class="ruby-identifier">frequency_tablename</span>
233
- <span class="ruby-ivar">@stablename</span> = <span class="ruby-identifier">summary_tablename</span>
234
- <span class="ruby-ivar">@klass_word_counts</span> = {}
235
- <span class="ruby-ivar">@klass_doc_counts</span> = {}
236
- <span class="ruby-identifier">init_tables</span>
237
- <span class="ruby-keyword">end</span></pre>
238
- </div><!-- new-source -->
239
-
240
- </div>
241
-
242
-
243
-
244
-
245
- </div><!-- new-method -->
246
-
247
-
248
- </div><!-- public-class-method-details -->
249
-
250
- <div id="public-instance-method-details" class="method-section section">
251
- <h3 class="section-header">Public Instance Methods</h3>
252
-
253
-
254
- <div id="classnames-method" class="method-detail ">
255
- <a name="method-i-classnames"></a>
256
-
257
-
258
- <div class="method-heading">
259
- <span class="method-name">classnames</span><span
260
- class="method-args">()</span>
261
- <span class="method-click-advice">click to toggle source</span>
262
- </div>
263
-
264
-
265
- <div class="method-description">
266
-
267
-
268
-
269
-
270
-
271
- <div class="method-source-code" id="classnames-source">
272
- <pre>
273
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 17</span>
274
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">classnames</span>
275
- <span class="ruby-identifier">cs</span> = []
276
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-string">&quot;&quot;</span>, <span class="ruby-string">&quot;totals&quot;</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
277
- <span class="ruby-identifier">cs</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>
278
- }
279
- <span class="ruby-identifier">cs</span>
280
- <span class="ruby-keyword">end</span></pre>
281
- </div><!-- classnames-source -->
282
-
283
- </div>
284
-
285
-
286
-
287
-
288
- </div><!-- classnames-method -->
289
-
290
-
291
- <div id="close-method" class="method-detail ">
292
- <a name="method-i-close"></a>
293
-
294
-
295
- <div class="method-heading">
296
- <span class="method-name">close</span><span
297
- class="method-args">()</span>
298
- <span class="method-click-advice">click to toggle source</span>
299
- </div>
300
-
301
-
302
- <div class="method-description">
303
-
304
-
305
-
306
-
307
-
308
- <div class="method-source-code" id="close-source">
309
- <pre>
310
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 103</span>
311
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
312
- <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">close</span>
313
- <span class="ruby-keyword">end</span></pre>
314
- </div><!-- close-source -->
315
-
316
- </div>
317
-
318
-
319
-
320
-
321
- </div><!-- close-method -->
322
-
323
-
324
- <div id="doc_count_totals-method" class="method-detail ">
325
- <a name="method-i-doc_count_totals"></a>
326
-
327
-
328
- <div class="method-heading">
329
- <span class="method-name">doc_count_totals</span><span
330
- class="method-args">()</span>
331
- <span class="method-click-advice">click to toggle source</span>
332
- </div>
333
-
334
-
335
- <div class="method-description">
336
-
337
-
338
-
339
-
340
-
341
- <div class="method-source-code" id="doc_count_totals-source">
342
- <pre>
343
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 99</span>
344
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">doc_count_totals</span>
345
- <span class="ruby-identifier">get_summary</span> <span class="ruby-string">&quot;totals:doccount&quot;</span>
346
- <span class="ruby-keyword">end</span></pre>
347
- </div><!-- doc_count_totals-source -->
348
-
349
- </div>
350
-
351
-
352
-
353
-
354
- </div><!-- doc_count_totals-method -->
355
-
356
-
357
- <div id="drop_tables-method" class="method-detail ">
358
- <a name="method-i-drop_tables"></a>
359
-
360
-
361
- <div class="method-heading">
362
- <span class="method-name">drop_tables</span><span
363
- class="method-args">()</span>
364
- <span class="method-click-advice">click to toggle source</span>
365
- </div>
366
-
367
-
368
- <div class="method-description">
369
-
370
-
371
-
372
-
373
-
374
- <div class="method-source-code" id="drop_tables-source">
375
- <pre>
376
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 30</span>
377
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">drop_tables</span>
378
- <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">delete</span>
379
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">delete</span>
380
- <span class="ruby-ivar">@stable</span> = <span class="ruby-keyword">nil</span>
381
- <span class="ruby-ivar">@ftable</span> = <span class="ruby-keyword">nil</span>
382
- <span class="ruby-ivar">@klass_word_counts</span> = {}
383
- <span class="ruby-ivar">@klass_doc_counts</span> = {}
384
- <span class="ruby-keyword">end</span></pre>
385
- </div><!-- drop_tables-source -->
386
-
387
- </div>
388
-
389
-
390
-
391
-
392
- </div><!-- drop_tables-method -->
393
-
394
-
395
- <div id="get_doc_count-method" class="method-detail ">
396
- <a name="method-i-get_doc_count"></a>
397
-
398
-
399
- <div class="method-heading">
400
- <span class="method-name">get_doc_count</span><span
401
- class="method-args">(klass)</span>
402
- <span class="method-click-advice">click to toggle source</span>
403
- </div>
404
-
405
-
406
- <div class="method-description">
407
-
408
-
409
-
410
-
411
-
412
- <div class="method-source-code" id="get_doc_count-source">
413
- <pre>
414
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 73</span>
415
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
416
- <span class="ruby-ivar">@klass_doc_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
417
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;totals:doccount&quot;</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
418
- }
419
- <span class="ruby-keyword">end</span></pre>
420
- </div><!-- get_doc_count-source -->
421
-
422
- </div>
423
-
424
-
425
-
426
-
427
- </div><!-- get_doc_count-method -->
428
-
429
-
430
- <div id="get_total_word_count-method" class="method-detail ">
431
- <a name="method-i-get_total_word_count"></a>
432
-
433
-
434
- <div class="method-heading">
435
- <span class="method-name">get_total_word_count</span><span
436
- class="method-args">(klass)</span>
437
- <span class="method-click-advice">click to toggle source</span>
438
- </div>
439
-
440
-
441
- <div class="method-description">
442
-
443
-
444
-
445
-
446
-
447
- <div class="method-source-code" id="get_total_word_count-source">
448
- <pre>
449
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 67</span>
450
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
451
- <span class="ruby-ivar">@klass_word_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
452
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;totals:wordcount&quot;</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
453
- }
454
- <span class="ruby-keyword">end</span></pre>
455
- </div><!-- get_total_word_count-source -->
456
-
457
- </div>
458
-
459
-
460
-
461
-
462
- </div><!-- get_total_word_count-method -->
463
-
464
-
465
- <div id="get_vocabulary_sizes-method" class="method-detail ">
466
- <a name="method-i-get_vocabulary_sizes"></a>
467
-
468
-
469
- <div class="method-heading">
470
- <span class="method-name">get_vocabulary_sizes</span><span
471
- class="method-args">()</span>
472
- <span class="method-click-advice">click to toggle source</span>
473
- </div>
474
-
475
-
476
- <div class="method-description">
477
-
478
-
479
-
480
-
481
-
482
- <div class="method-source-code" id="get_vocabulary_sizes-source">
483
- <pre>
484
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 63</span>
485
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
486
- <span class="ruby-identifier">get_summary</span> <span class="ruby-string">&quot;totals:vocabsize&quot;</span>
487
- <span class="ruby-keyword">end</span></pre>
488
- </div><!-- get_vocabulary_sizes-source -->
489
-
490
- </div>
491
-
492
-
493
-
494
-
495
- </div><!-- get_vocabulary_sizes-method -->
496
-
497
-
498
- <div id="get_word_counts-method" class="method-detail ">
499
- <a name="method-i-get_word_counts"></a>
500
-
501
-
502
- <div class="method-heading">
503
- <span class="method-name">get_word_counts</span><span
504
- class="method-args">(word)</span>
505
- <span class="method-click-advice">click to toggle source</span>
506
- </div>
507
-
508
-
509
- <div class="method-description">
510
-
511
-
512
-
513
-
514
-
515
- <div class="method-source-code" id="get_word_counts-source">
516
- <pre>
517
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 49</span>
518
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
519
- <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
520
- <span class="ruby-identifier">row</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">get_row</span>(<span class="ruby-identifier">word</span>)
521
- <span class="ruby-keyword">return</span> <span class="ruby-identifier">counts</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
522
-
523
- <span class="ruby-identifier">row</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">colname</span>, <span class="ruby-identifier">cell</span><span class="ruby-operator">|</span>
524
- <span class="ruby-identifier">classname</span> = <span class="ruby-identifier">colname</span>.<span class="ruby-identifier">split</span>(<span class="ruby-string">':'</span>)[<span class="ruby-value">1</span>].<span class="ruby-identifier">intern</span>
525
- <span class="ruby-comment"># in case untrain has been called too many times</span>
526
- <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">classname</span>] = [<span class="ruby-identifier">cell</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>, <span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>
527
- }
528
-
529
- <span class="ruby-identifier">counts</span>
530
- <span class="ruby-keyword">end</span></pre>
531
- </div><!-- get_word_counts-source -->
532
-
533
- </div>
534
-
535
-
536
-
537
-
538
- </div><!-- get_word_counts-method -->
539
-
540
-
541
- <div id="incr_doc_count-method" class="method-detail ">
542
- <a name="method-i-incr_doc_count"></a>
543
-
544
-
545
- <div class="method-heading">
546
- <span class="method-name">incr_doc_count</span><span
547
- class="method-args">(klass, count)</span>
548
- <span class="method-click-advice">click to toggle source</span>
549
- </div>
550
-
551
-
552
- <div class="method-description">
553
-
554
-
555
-
556
-
557
-
558
- <div class="method-source-code" id="incr_doc_count-source">
559
- <pre>
560
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 95</span>
561
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
562
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;totals:doccount&quot;</span>, <span class="ruby-identifier">count</span>
563
- <span class="ruby-keyword">end</span></pre>
564
- </div><!-- incr_doc_count-source -->
565
-
566
- </div>
567
-
568
-
569
-
570
-
571
- </div><!-- incr_doc_count-method -->
572
-
573
-
574
- <div id="incr_total_word_count-method" class="method-detail ">
575
- <a name="method-i-incr_total_word_count"></a>
576
-
577
-
578
- <div class="method-heading">
579
- <span class="method-name">incr_total_word_count</span><span
580
- class="method-args">(klass, count)</span>
581
- <span class="method-click-advice">click to toggle source</span>
582
- </div>
583
-
584
-
585
- <div class="method-description">
586
-
587
-
588
-
589
-
590
-
591
- <div class="method-source-code" id="incr_total_word_count-source">
592
- <pre>
593
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 91</span>
594
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
595
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;totals:wordcount&quot;</span>, <span class="ruby-identifier">count</span>
596
- <span class="ruby-keyword">end</span></pre>
597
- </div><!-- incr_total_word_count-source -->
598
-
599
- </div>
600
-
601
-
602
-
603
-
604
- </div><!-- incr_total_word_count-method -->
605
-
606
-
607
- <div id="incr_word_count-method" class="method-detail ">
608
- <a name="method-i-incr_word_count"></a>
609
-
610
-
611
- <div class="method-heading">
612
- <span class="method-name">incr_word_count</span><span
613
- class="method-args">(klass, word, count)</span>
614
- <span class="method-click-advice">click to toggle source</span>
615
- </div>
616
-
617
-
618
- <div class="method-description">
619
-
620
-
621
-
622
-
623
-
624
- <div class="method-source-code" id="incr_word_count-source">
625
- <pre>
626
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 79</span>
627
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
628
- <span class="ruby-identifier">size</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">word</span>, <span class="ruby-node">&quot;classes:#{klass.to_s}&quot;</span>, <span class="ruby-identifier">count</span>
629
- <span class="ruby-comment"># if this is a new word, increase the klass's vocab size. If the new word</span>
630
- <span class="ruby-comment"># count is 0, then we need to decrement our vocab size</span>
631
- <span class="ruby-keyword">if</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">count</span>
632
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;totals:vocabsize&quot;</span>
633
- <span class="ruby-keyword">elsif</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
634
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">&quot;totals:vocabsize&quot;</span>, <span class="ruby-value">-1</span>
635
- <span class="ruby-keyword">end</span>
636
- <span class="ruby-identifier">size</span>
637
- <span class="ruby-keyword">end</span></pre>
638
- </div><!-- incr_word_count-source -->
639
-
640
- </div>
641
-
642
-
643
-
644
-
645
- </div><!-- incr_word_count-method -->
646
-
647
-
648
- <div id="init_tables-method" class="method-detail ">
649
- <a name="method-i-init_tables"></a>
650
-
651
-
652
- <div class="method-heading">
653
- <span class="method-name">init_tables</span><span
654
- class="method-args">()</span>
655
- <span class="method-click-advice">click to toggle source</span>
656
- </div>
657
-
658
-
659
- <div class="method-description">
660
-
661
-
662
-
663
-
664
-
665
- <div class="method-source-code" id="init_tables-source">
666
- <pre>
667
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 39</span>
668
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">init_tables</span>
669
- <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@ftablename</span>
670
- <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@ftablename</span>, <span class="ruby-string">&quot;classes&quot;</span>, <span class="ruby-string">&quot;total&quot;</span>
671
- <span class="ruby-keyword">end</span>
672
-
673
- <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@stablename</span>
674
- <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@stablename</span>, <span class="ruby-string">&quot;totals&quot;</span>
675
- <span class="ruby-keyword">end</span>
676
- <span class="ruby-keyword">end</span></pre>
677
- </div><!-- init_tables-source -->
678
-
679
- </div>
680
-
681
-
682
-
683
-
684
- </div><!-- init_tables-method -->
685
-
686
-
687
- <div id="reset-method" class="method-detail ">
688
- <a name="method-i-reset"></a>
689
-
690
-
691
- <div class="method-heading">
692
- <span class="method-name">reset</span><span
693
- class="method-args">()</span>
694
- <span class="method-click-advice">click to toggle source</span>
695
- </div>
696
-
697
-
698
- <div class="method-description">
699
-
700
-
701
-
702
-
703
-
704
- <div class="method-source-code" id="reset-source">
705
- <pre>
706
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 25</span>
707
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">reset</span>
708
- <span class="ruby-identifier">drop_tables</span>
709
- <span class="ruby-identifier">init_tables</span>
710
- <span class="ruby-keyword">end</span></pre>
711
- </div><!-- reset-source -->
712
-
713
- </div>
714
-
715
-
716
-
717
-
718
- </div><!-- reset-method -->
719
-
720
-
721
- </div><!-- public-instance-method-details -->
722
-
723
- <div id="protected-instance-method-details" class="method-section section">
724
- <h3 class="section-header">Protected Instance Methods</h3>
725
-
726
-
727
- <div id="freq_table-method" class="method-detail ">
728
- <a name="method-i-freq_table"></a>
729
-
730
-
731
- <div class="method-heading">
732
- <span class="method-name">freq_table</span><span
733
- class="method-args">()</span>
734
- <span class="method-click-advice">click to toggle source</span>
735
- </div>
736
-
737
-
738
- <div class="method-description">
739
-
740
-
741
-
742
-
743
-
744
- <div class="method-source-code" id="freq_table-source">
745
- <pre>
746
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 120</span>
747
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">freq_table</span>
748
- <span class="ruby-ivar">@ftable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@ftablename</span>
749
- <span class="ruby-keyword">end</span></pre>
750
- </div><!-- freq_table-source -->
751
-
752
- </div>
753
-
754
-
755
-
756
-
757
- </div><!-- freq_table-method -->
758
-
759
-
760
- <div id="get_summary-method" class="method-detail ">
761
- <a name="method-i-get_summary"></a>
762
-
763
-
764
- <div class="method-heading">
765
- <span class="method-name">get_summary</span><span
766
- class="method-args">(name)</span>
767
- <span class="method-click-advice">click to toggle source</span>
768
- </div>
769
-
770
-
771
- <div class="method-description">
772
-
773
-
774
-
775
-
776
-
777
- <div class="method-source-code" id="get_summary-source">
778
- <pre>
779
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 108</span>
780
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
781
- <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
782
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-string">&quot;&quot;</span>, <span class="ruby-identifier">name</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
783
- <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-identifier">row</span>.<span class="ruby-identifier">columns</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_i64</span>
784
- }
785
- <span class="ruby-identifier">counts</span>
786
- <span class="ruby-keyword">end</span></pre>
787
- </div><!-- get_summary-source -->
788
-
789
- </div>
790
-
791
-
792
-
793
-
794
- </div><!-- get_summary-method -->
795
-
796
-
797
- <div id="summary_table-method" class="method-detail ">
798
- <a name="method-i-summary_table"></a>
799
-
800
-
801
- <div class="method-heading">
802
- <span class="method-name">summary_table</span><span
803
- class="method-args">()</span>
804
- <span class="method-click-advice">click to toggle source</span>
805
- </div>
806
-
807
-
808
- <div class="method-description">
809
-
810
-
811
-
812
-
813
-
814
- <div class="method-source-code" id="summary_table-source">
815
- <pre>
816
- <span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 116</span>
817
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">summary_table</span>
818
- <span class="ruby-ivar">@stable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@stablename</span>
819
- <span class="ruby-keyword">end</span></pre>
820
- </div><!-- summary_table-source -->
821
-
822
- </div>
823
-
824
-
825
-
826
-
827
- </div><!-- summary_table-method -->
828
-
829
-
830
- </div><!-- protected-instance-method-details -->
831
-
832
- </div><!-- 5Buntitled-5D -->
833
-
834
-
835
- </div><!-- documentation -->
836
-
837
- <div id="validator-badges">
838
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
839
- <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
840
- Rdoc Generator</a> 2</small>.</p>
841
- </div>
842
-
843
- </body>
844
- </html>
845
-