ankusa 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/README.rdoc +2 -1
  2. data/Rakefile +4 -26
  3. data/lib/ankusa.rb +1 -0
  4. data/lib/ankusa/classifier.rb +3 -0
  5. data/lib/ankusa/naive_bayes.rb +8 -4
  6. data/lib/ankusa/version.rb +3 -0
  7. metadata +6 -33
  8. data/docs/classes/Ankusa.html +0 -182
  9. data/docs/classes/Ankusa/CassandraStorage.html +0 -615
  10. data/docs/classes/Ankusa/Classifier.html +0 -315
  11. data/docs/classes/Ankusa/FileSystemStorage.html +0 -272
  12. data/docs/classes/Ankusa/HBaseStorage.html +0 -594
  13. data/docs/classes/Ankusa/KLDivergenceClassifier.html +0 -194
  14. data/docs/classes/Ankusa/MemoryStorage.html +0 -467
  15. data/docs/classes/Ankusa/NaiveBayesClassifier.html +0 -231
  16. data/docs/classes/Ankusa/TextHash.html +0 -275
  17. data/docs/classes/String.html +0 -172
  18. data/docs/created.rid +0 -1
  19. data/docs/files/README_rdoc.html +0 -294
  20. data/docs/files/lib/ankusa/cassandra_storage_rb.html +0 -108
  21. data/docs/files/lib/ankusa/classifier_rb.html +0 -101
  22. data/docs/files/lib/ankusa/extensions_rb.html +0 -108
  23. data/docs/files/lib/ankusa/file_system_storage_rb.html +0 -108
  24. data/docs/files/lib/ankusa/hasher_rb.html +0 -109
  25. data/docs/files/lib/ankusa/hbase_storage_rb.html +0 -108
  26. data/docs/files/lib/ankusa/kl_divergence_rb.html +0 -101
  27. data/docs/files/lib/ankusa/memory_storage_rb.html +0 -101
  28. data/docs/files/lib/ankusa/naive_bayes_rb.html +0 -101
  29. data/docs/files/lib/ankusa/stopwords_rb.html +0 -101
  30. data/docs/files/lib/ankusa_rb.html +0 -112
  31. data/docs/fr_class_index.html +0 -36
  32. data/docs/fr_file_index.html +0 -38
  33. data/docs/fr_method_index.html +0 -95
  34. data/docs/index.html +0 -24
  35. data/docs/rdoc-style.css +0 -208
@@ -1,594 +0,0 @@
1
- <?xml version="1.0" encoding="iso-8859-1"?>
2
- <!DOCTYPE html
3
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
-
6
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
- <head>
8
- <title>Class: Ankusa::HBaseStorage</title>
9
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
- <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
- <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
- <script type="text/javascript">
13
- // <![CDATA[
14
-
15
- function popupCode( url ) {
16
- window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
- }
18
-
19
- function toggleCode( id ) {
20
- if ( document.getElementById )
21
- elem = document.getElementById( id );
22
- else if ( document.all )
23
- elem = eval( "document.all." + id );
24
- else
25
- return false;
26
-
27
- elemStyle = elem.style;
28
-
29
- if ( elemStyle.display != "block" ) {
30
- elemStyle.display = "block"
31
- } else {
32
- elemStyle.display = "none"
33
- }
34
-
35
- return true;
36
- }
37
-
38
- // Make codeblocks hidden by default
39
- document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
-
41
- // ]]>
42
- </script>
43
-
44
- </head>
45
- <body>
46
-
47
-
48
-
49
- <div id="classHeader">
50
- <table class="header-table">
51
- <tr class="top-aligned-row">
52
- <td><strong>Class</strong></td>
53
- <td class="class-name-in-header">Ankusa::HBaseStorage</td>
54
- </tr>
55
- <tr class="top-aligned-row">
56
- <td><strong>In:</strong></td>
57
- <td>
58
- <a href="../../files/lib/ankusa/hbase_storage_rb.html">
59
- lib/ankusa/hbase_storage.rb
60
- </a>
61
- <br />
62
- </td>
63
- </tr>
64
-
65
- <tr class="top-aligned-row">
66
- <td><strong>Parent:</strong></td>
67
- <td>
68
- Object
69
- </td>
70
- </tr>
71
- </table>
72
- </div>
73
- <!-- banner header -->
74
-
75
- <div id="bodyContent">
76
-
77
-
78
-
79
- <div id="contextContent">
80
-
81
-
82
-
83
- </div>
84
-
85
- <div id="method-list">
86
- <h3 class="section-bar">Methods</h3>
87
-
88
- <div class="name-list">
89
- <a href="#M000044">classnames</a>&nbsp;&nbsp;
90
- <a href="#M000056">close</a>&nbsp;&nbsp;
91
- <a href="#M000055">doc_count_totals</a>&nbsp;&nbsp;
92
- <a href="#M000046">drop_tables</a>&nbsp;&nbsp;
93
- <a href="#M000059">freq_table</a>&nbsp;&nbsp;
94
- <a href="#M000051">get_doc_count</a>&nbsp;&nbsp;
95
- <a href="#M000057">get_summary</a>&nbsp;&nbsp;
96
- <a href="#M000050">get_total_word_count</a>&nbsp;&nbsp;
97
- <a href="#M000049">get_vocabulary_sizes</a>&nbsp;&nbsp;
98
- <a href="#M000048">get_word_counts</a>&nbsp;&nbsp;
99
- <a href="#M000054">incr_doc_count</a>&nbsp;&nbsp;
100
- <a href="#M000053">incr_total_word_count</a>&nbsp;&nbsp;
101
- <a href="#M000052">incr_word_count</a>&nbsp;&nbsp;
102
- <a href="#M000047">init_tables</a>&nbsp;&nbsp;
103
- <a href="#M000043">new</a>&nbsp;&nbsp;
104
- <a href="#M000045">reset</a>&nbsp;&nbsp;
105
- <a href="#M000058">summary_table</a>&nbsp;&nbsp;
106
- </div>
107
- </div>
108
-
109
- </div>
110
-
111
-
112
- <!-- if includes -->
113
-
114
- <div id="section">
115
-
116
-
117
-
118
-
119
-
120
- <div id="attribute-list">
121
- <h3 class="section-bar">Attributes</h3>
122
-
123
- <div class="name-list">
124
- <table>
125
- <tr class="top-aligned-row context-row">
126
- <td class="context-item-name">hbase</td>
127
- <td class="context-item-value">&nbsp;[R]&nbsp;</td>
128
- <td class="context-item-desc"></td>
129
- </tr>
130
- </table>
131
- </div>
132
- </div>
133
-
134
-
135
-
136
- <!-- if method_list -->
137
- <div id="methods">
138
- <h3 class="section-bar">Public Class methods</h3>
139
-
140
- <div id="method-M000043" class="method-detail">
141
- <a name="M000043"></a>
142
-
143
- <div class="method-heading">
144
- <a href="#M000043" class="method-signature">
145
- <span class="method-name">new</span><span class="method-args">(host='localhost', port=9090, frequency_tablename=&quot;ankusa_word_frequencies&quot;, summary_tablename=&quot;ankusa_summary&quot;)</span>
146
- </a>
147
- </div>
148
-
149
- <div class="method-description">
150
- <p><a class="source-toggle" href="#"
151
- onclick="toggleCode('M000043-source');return false;">[Source]</a></p>
152
- <div class="method-source-code" id="M000043-source">
153
- <pre>
154
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 8</span>
155
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-value str">'localhost'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9090</span>, <span class="ruby-identifier">frequency_tablename</span>=<span class="ruby-value str">&quot;ankusa_word_frequencies&quot;</span>, <span class="ruby-identifier">summary_tablename</span>=<span class="ruby-value str">&quot;ankusa_summary&quot;</span>)
156
- <span class="ruby-ivar">@hbase</span> = <span class="ruby-constant">HBaseRb</span><span class="ruby-operator">::</span><span class="ruby-constant">Client</span>.<span class="ruby-identifier">new</span> <span class="ruby-identifier">host</span>, <span class="ruby-identifier">port</span>
157
- <span class="ruby-ivar">@ftablename</span> = <span class="ruby-identifier">frequency_tablename</span>
158
- <span class="ruby-ivar">@stablename</span> = <span class="ruby-identifier">summary_tablename</span>
159
- <span class="ruby-ivar">@klass_word_counts</span> = {}
160
- <span class="ruby-ivar">@klass_doc_counts</span> = {}
161
- <span class="ruby-identifier">init_tables</span>
162
- <span class="ruby-keyword kw">end</span>
163
- </pre>
164
- </div>
165
- </div>
166
- </div>
167
-
168
- <h3 class="section-bar">Public Instance methods</h3>
169
-
170
- <div id="method-M000044" class="method-detail">
171
- <a name="M000044"></a>
172
-
173
- <div class="method-heading">
174
- <a href="#M000044" class="method-signature">
175
- <span class="method-name">classnames</span><span class="method-args">()</span>
176
- </a>
177
- </div>
178
-
179
- <div class="method-description">
180
- <p><a class="source-toggle" href="#"
181
- onclick="toggleCode('M000044-source');return false;">[Source]</a></p>
182
- <div class="method-source-code" id="M000044-source">
183
- <pre>
184
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 17</span>
185
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classnames</span>
186
- <span class="ruby-identifier">cs</span> = []
187
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-value str">&quot;&quot;</span>, <span class="ruby-value str">&quot;totals&quot;</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
188
- <span class="ruby-identifier">cs</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>
189
- }
190
- <span class="ruby-identifier">cs</span>
191
- <span class="ruby-keyword kw">end</span>
192
- </pre>
193
- </div>
194
- </div>
195
- </div>
196
-
197
- <div id="method-M000056" class="method-detail">
198
- <a name="M000056"></a>
199
-
200
- <div class="method-heading">
201
- <a href="#M000056" class="method-signature">
202
- <span class="method-name">close</span><span class="method-args">()</span>
203
- </a>
204
- </div>
205
-
206
- <div class="method-description">
207
- <p><a class="source-toggle" href="#"
208
- onclick="toggleCode('M000056-source');return false;">[Source]</a></p>
209
- <div class="method-source-code" id="M000056-source">
210
- <pre>
211
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 103</span>
212
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">close</span>
213
- <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">close</span>
214
- <span class="ruby-keyword kw">end</span>
215
- </pre>
216
- </div>
217
- </div>
218
- </div>
219
-
220
- <div id="method-M000055" class="method-detail">
221
- <a name="M000055"></a>
222
-
223
- <div class="method-heading">
224
- <a href="#M000055" class="method-signature">
225
- <span class="method-name">doc_count_totals</span><span class="method-args">()</span>
226
- </a>
227
- </div>
228
-
229
- <div class="method-description">
230
- <p><a class="source-toggle" href="#"
231
- onclick="toggleCode('M000055-source');return false;">[Source]</a></p>
232
- <div class="method-source-code" id="M000055-source">
233
- <pre>
234
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 99</span>
235
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">doc_count_totals</span>
236
- <span class="ruby-identifier">get_summary</span> <span class="ruby-value str">&quot;totals:doccount&quot;</span>
237
- <span class="ruby-keyword kw">end</span>
238
- </pre>
239
- </div>
240
- </div>
241
- </div>
242
-
243
- <div id="method-M000046" class="method-detail">
244
- <a name="M000046"></a>
245
-
246
- <div class="method-heading">
247
- <a href="#M000046" class="method-signature">
248
- <span class="method-name">drop_tables</span><span class="method-args">()</span>
249
- </a>
250
- </div>
251
-
252
- <div class="method-description">
253
- <p><a class="source-toggle" href="#"
254
- onclick="toggleCode('M000046-source');return false;">[Source]</a></p>
255
- <div class="method-source-code" id="M000046-source">
256
- <pre>
257
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 30</span>
258
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">drop_tables</span>
259
- <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">delete</span>
260
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">delete</span>
261
- <span class="ruby-ivar">@stable</span> = <span class="ruby-keyword kw">nil</span>
262
- <span class="ruby-ivar">@ftable</span> = <span class="ruby-keyword kw">nil</span>
263
- <span class="ruby-ivar">@klass_word_counts</span> = {}
264
- <span class="ruby-ivar">@klass_doc_counts</span> = {}
265
- <span class="ruby-keyword kw">end</span>
266
- </pre>
267
- </div>
268
- </div>
269
- </div>
270
-
271
- <div id="method-M000051" class="method-detail">
272
- <a name="M000051"></a>
273
-
274
- <div class="method-heading">
275
- <a href="#M000051" class="method-signature">
276
- <span class="method-name">get_doc_count</span><span class="method-args">(klass)</span>
277
- </a>
278
- </div>
279
-
280
- <div class="method-description">
281
- <p><a class="source-toggle" href="#"
282
- onclick="toggleCode('M000051-source');return false;">[Source]</a></p>
283
- <div class="method-source-code" id="M000051-source">
284
- <pre>
285
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 73</span>
286
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
287
- <span class="ruby-ivar">@klass_doc_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
288
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;totals:doccount&quot;</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
289
- }
290
- <span class="ruby-keyword kw">end</span>
291
- </pre>
292
- </div>
293
- </div>
294
- </div>
295
-
296
- <div id="method-M000050" class="method-detail">
297
- <a name="M000050"></a>
298
-
299
- <div class="method-heading">
300
- <a href="#M000050" class="method-signature">
301
- <span class="method-name">get_total_word_count</span><span class="method-args">(klass)</span>
302
- </a>
303
- </div>
304
-
305
- <div class="method-description">
306
- <p><a class="source-toggle" href="#"
307
- onclick="toggleCode('M000050-source');return false;">[Source]</a></p>
308
- <div class="method-source-code" id="M000050-source">
309
- <pre>
310
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 67</span>
311
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
312
- <span class="ruby-ivar">@klass_word_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
313
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;totals:wordcount&quot;</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
314
- }
315
- <span class="ruby-keyword kw">end</span>
316
- </pre>
317
- </div>
318
- </div>
319
- </div>
320
-
321
- <div id="method-M000049" class="method-detail">
322
- <a name="M000049"></a>
323
-
324
- <div class="method-heading">
325
- <a href="#M000049" class="method-signature">
326
- <span class="method-name">get_vocabulary_sizes</span><span class="method-args">()</span>
327
- </a>
328
- </div>
329
-
330
- <div class="method-description">
331
- <p><a class="source-toggle" href="#"
332
- onclick="toggleCode('M000049-source');return false;">[Source]</a></p>
333
- <div class="method-source-code" id="M000049-source">
334
- <pre>
335
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 63</span>
336
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
337
- <span class="ruby-identifier">get_summary</span> <span class="ruby-value str">&quot;totals:vocabsize&quot;</span>
338
- <span class="ruby-keyword kw">end</span>
339
- </pre>
340
- </div>
341
- </div>
342
- </div>
343
-
344
- <div id="method-M000048" class="method-detail">
345
- <a name="M000048"></a>
346
-
347
- <div class="method-heading">
348
- <a href="#M000048" class="method-signature">
349
- <span class="method-name">get_word_counts</span><span class="method-args">(word)</span>
350
- </a>
351
- </div>
352
-
353
- <div class="method-description">
354
- <p><a class="source-toggle" href="#"
355
- onclick="toggleCode('M000048-source');return false;">[Source]</a></p>
356
- <div class="method-source-code" id="M000048-source">
357
- <pre>
358
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 49</span>
359
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
360
- <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
361
- <span class="ruby-identifier">row</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">get_row</span>(<span class="ruby-identifier">word</span>)
362
- <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">counts</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
363
-
364
- <span class="ruby-identifier">row</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">colname</span>, <span class="ruby-identifier">cell</span><span class="ruby-operator">|</span>
365
- <span class="ruby-identifier">classname</span> = <span class="ruby-identifier">colname</span>.<span class="ruby-identifier">split</span>(<span class="ruby-value str">':'</span>)[<span class="ruby-value">1</span>].<span class="ruby-identifier">intern</span>
366
- <span class="ruby-comment cmt"># in case untrain has been called too many times</span>
367
- <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">classname</span>] = [<span class="ruby-identifier">cell</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>, <span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>
368
- }
369
-
370
- <span class="ruby-identifier">counts</span>
371
- <span class="ruby-keyword kw">end</span>
372
- </pre>
373
- </div>
374
- </div>
375
- </div>
376
-
377
- <div id="method-M000054" class="method-detail">
378
- <a name="M000054"></a>
379
-
380
- <div class="method-heading">
381
- <a href="#M000054" class="method-signature">
382
- <span class="method-name">incr_doc_count</span><span class="method-args">(klass, count)</span>
383
- </a>
384
- </div>
385
-
386
- <div class="method-description">
387
- <p><a class="source-toggle" href="#"
388
- onclick="toggleCode('M000054-source');return false;">[Source]</a></p>
389
- <div class="method-source-code" id="M000054-source">
390
- <pre>
391
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 95</span>
392
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
393
- <span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;totals:doccount&quot;</span>, <span class="ruby-identifier">count</span>
394
- <span class="ruby-keyword kw">end</span>
395
- </pre>
396
- </div>
397
- </div>
398
- </div>
399
-
400
- <div id="method-M000053" class="method-detail">
401
- <a name="M000053"></a>
402
-
403
- <div class="method-heading">
404
- <a href="#M000053" class="method-signature">
405
- <span class="method-name">incr_total_word_count</span><span class="method-args">(klass, count)</span>
406
- </a>
407
- </div>
408
-
409
- <div class="method-description">
410
- <p><a class="source-toggle" href="#"
411
- onclick="toggleCode('M000053-source');return false;">[Source]</a></p>
412
- <div class="method-source-code" id="M000053-source">
413
- <pre>
414
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 91</span>
415
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
416
- <span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;totals:wordcount&quot;</span>, <span class="ruby-identifier">count</span>
417
- <span class="ruby-keyword kw">end</span>
418
- </pre>
419
- </div>
420
- </div>
421
- </div>
422
-
423
- <div id="method-M000052" class="method-detail">
424
- <a name="M000052"></a>
425
-
426
- <div class="method-heading">
427
- <a href="#M000052" class="method-signature">
428
- <span class="method-name">incr_word_count</span><span class="method-args">(klass, word, count)</span>
429
- </a>
430
- </div>
431
-
432
- <div class="method-description">
433
- <p><a class="source-toggle" href="#"
434
- onclick="toggleCode('M000052-source');return false;">[Source]</a></p>
435
- <div class="method-source-code" id="M000052-source">
436
- <pre>
437
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 79</span>
438
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
439
- <span class="ruby-identifier">size</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">word</span>, <span class="ruby-node">&quot;classes:#{klass.to_s}&quot;</span>, <span class="ruby-identifier">count</span>
440
- <span class="ruby-comment cmt"># if this is a new word, increase the klass's vocab size. If the new word</span>
441
- <span class="ruby-comment cmt"># count is 0, then we need to decrement our vocab size</span>
442
- <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">count</span>
443
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;totals:vocabsize&quot;</span>
444
- <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
445
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">&quot;totals:vocabsize&quot;</span>, <span class="ruby-value">-1</span>
446
- <span class="ruby-keyword kw">end</span>
447
- <span class="ruby-identifier">size</span>
448
- <span class="ruby-keyword kw">end</span>
449
- </pre>
450
- </div>
451
- </div>
452
- </div>
453
-
454
- <div id="method-M000047" class="method-detail">
455
- <a name="M000047"></a>
456
-
457
- <div class="method-heading">
458
- <a href="#M000047" class="method-signature">
459
- <span class="method-name">init_tables</span><span class="method-args">()</span>
460
- </a>
461
- </div>
462
-
463
- <div class="method-description">
464
- <p><a class="source-toggle" href="#"
465
- onclick="toggleCode('M000047-source');return false;">[Source]</a></p>
466
- <div class="method-source-code" id="M000047-source">
467
- <pre>
468
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 39</span>
469
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">init_tables</span>
470
- <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@ftablename</span>
471
- <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@ftablename</span>, <span class="ruby-value str">&quot;classes&quot;</span>, <span class="ruby-value str">&quot;total&quot;</span>
472
- <span class="ruby-keyword kw">end</span>
473
-
474
- <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@stablename</span>
475
- <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@stablename</span>, <span class="ruby-value str">&quot;totals&quot;</span>
476
- <span class="ruby-keyword kw">end</span>
477
- <span class="ruby-keyword kw">end</span>
478
- </pre>
479
- </div>
480
- </div>
481
- </div>
482
-
483
- <div id="method-M000045" class="method-detail">
484
- <a name="M000045"></a>
485
-
486
- <div class="method-heading">
487
- <a href="#M000045" class="method-signature">
488
- <span class="method-name">reset</span><span class="method-args">()</span>
489
- </a>
490
- </div>
491
-
492
- <div class="method-description">
493
- <p><a class="source-toggle" href="#"
494
- onclick="toggleCode('M000045-source');return false;">[Source]</a></p>
495
- <div class="method-source-code" id="M000045-source">
496
- <pre>
497
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 25</span>
498
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">reset</span>
499
- <span class="ruby-identifier">drop_tables</span>
500
- <span class="ruby-identifier">init_tables</span>
501
- <span class="ruby-keyword kw">end</span>
502
- </pre>
503
- </div>
504
- </div>
505
- </div>
506
-
507
- <h3 class="section-bar">Protected Instance methods</h3>
508
-
509
- <div id="method-M000059" class="method-detail">
510
- <a name="M000059"></a>
511
-
512
- <div class="method-heading">
513
- <a href="#M000059" class="method-signature">
514
- <span class="method-name">freq_table</span><span class="method-args">()</span>
515
- </a>
516
- </div>
517
-
518
- <div class="method-description">
519
- <p><a class="source-toggle" href="#"
520
- onclick="toggleCode('M000059-source');return false;">[Source]</a></p>
521
- <div class="method-source-code" id="M000059-source">
522
- <pre>
523
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 120</span>
524
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">freq_table</span>
525
- <span class="ruby-ivar">@ftable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@ftablename</span>
526
- <span class="ruby-keyword kw">end</span>
527
- </pre>
528
- </div>
529
- </div>
530
- </div>
531
-
532
- <div id="method-M000057" class="method-detail">
533
- <a name="M000057"></a>
534
-
535
- <div class="method-heading">
536
- <a href="#M000057" class="method-signature">
537
- <span class="method-name">get_summary</span><span class="method-args">(name)</span>
538
- </a>
539
- </div>
540
-
541
- <div class="method-description">
542
- <p><a class="source-toggle" href="#"
543
- onclick="toggleCode('M000057-source');return false;">[Source]</a></p>
544
- <div class="method-source-code" id="M000057-source">
545
- <pre>
546
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 108</span>
547
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
548
- <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
549
- <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-value str">&quot;&quot;</span>, <span class="ruby-identifier">name</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
550
- <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-identifier">row</span>.<span class="ruby-identifier">columns</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_i64</span>
551
- }
552
- <span class="ruby-identifier">counts</span>
553
- <span class="ruby-keyword kw">end</span>
554
- </pre>
555
- </div>
556
- </div>
557
- </div>
558
-
559
- <div id="method-M000058" class="method-detail">
560
- <a name="M000058"></a>
561
-
562
- <div class="method-heading">
563
- <a href="#M000058" class="method-signature">
564
- <span class="method-name">summary_table</span><span class="method-args">()</span>
565
- </a>
566
- </div>
567
-
568
- <div class="method-description">
569
- <p><a class="source-toggle" href="#"
570
- onclick="toggleCode('M000058-source');return false;">[Source]</a></p>
571
- <div class="method-source-code" id="M000058-source">
572
- <pre>
573
- <span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 116</span>
574
- <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">summary_table</span>
575
- <span class="ruby-ivar">@stable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@stablename</span>
576
- <span class="ruby-keyword kw">end</span>
577
- </pre>
578
- </div>
579
- </div>
580
- </div>
581
-
582
-
583
- </div>
584
-
585
-
586
- </div>
587
-
588
-
589
- <div id="validator-badges">
590
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
591
- </div>
592
-
593
- </body>
594
- </html>