ankusa 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -1
- data/Rakefile +4 -26
- data/lib/ankusa.rb +1 -0
- data/lib/ankusa/classifier.rb +3 -0
- data/lib/ankusa/naive_bayes.rb +8 -4
- data/lib/ankusa/version.rb +3 -0
- metadata +6 -33
- data/docs/classes/Ankusa.html +0 -182
- data/docs/classes/Ankusa/CassandraStorage.html +0 -615
- data/docs/classes/Ankusa/Classifier.html +0 -315
- data/docs/classes/Ankusa/FileSystemStorage.html +0 -272
- data/docs/classes/Ankusa/HBaseStorage.html +0 -594
- data/docs/classes/Ankusa/KLDivergenceClassifier.html +0 -194
- data/docs/classes/Ankusa/MemoryStorage.html +0 -467
- data/docs/classes/Ankusa/NaiveBayesClassifier.html +0 -231
- data/docs/classes/Ankusa/TextHash.html +0 -275
- data/docs/classes/String.html +0 -172
- data/docs/created.rid +0 -1
- data/docs/files/README_rdoc.html +0 -294
- data/docs/files/lib/ankusa/cassandra_storage_rb.html +0 -108
- data/docs/files/lib/ankusa/classifier_rb.html +0 -101
- data/docs/files/lib/ankusa/extensions_rb.html +0 -108
- data/docs/files/lib/ankusa/file_system_storage_rb.html +0 -108
- data/docs/files/lib/ankusa/hasher_rb.html +0 -109
- data/docs/files/lib/ankusa/hbase_storage_rb.html +0 -108
- data/docs/files/lib/ankusa/kl_divergence_rb.html +0 -101
- data/docs/files/lib/ankusa/memory_storage_rb.html +0 -101
- data/docs/files/lib/ankusa/naive_bayes_rb.html +0 -101
- data/docs/files/lib/ankusa/stopwords_rb.html +0 -101
- data/docs/files/lib/ankusa_rb.html +0 -112
- data/docs/fr_class_index.html +0 -36
- data/docs/fr_file_index.html +0 -38
- data/docs/fr_method_index.html +0 -95
- data/docs/index.html +0 -24
- data/docs/rdoc-style.css +0 -208
@@ -1,594 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
-
<!DOCTYPE html
|
3
|
-
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
-
|
6
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
-
<head>
|
8
|
-
<title>Class: Ankusa::HBaseStorage</title>
|
9
|
-
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
-
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
-
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
-
<script type="text/javascript">
|
13
|
-
// <![CDATA[
|
14
|
-
|
15
|
-
function popupCode( url ) {
|
16
|
-
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
-
}
|
18
|
-
|
19
|
-
function toggleCode( id ) {
|
20
|
-
if ( document.getElementById )
|
21
|
-
elem = document.getElementById( id );
|
22
|
-
else if ( document.all )
|
23
|
-
elem = eval( "document.all." + id );
|
24
|
-
else
|
25
|
-
return false;
|
26
|
-
|
27
|
-
elemStyle = elem.style;
|
28
|
-
|
29
|
-
if ( elemStyle.display != "block" ) {
|
30
|
-
elemStyle.display = "block"
|
31
|
-
} else {
|
32
|
-
elemStyle.display = "none"
|
33
|
-
}
|
34
|
-
|
35
|
-
return true;
|
36
|
-
}
|
37
|
-
|
38
|
-
// Make codeblocks hidden by default
|
39
|
-
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
-
|
41
|
-
// ]]>
|
42
|
-
</script>
|
43
|
-
|
44
|
-
</head>
|
45
|
-
<body>
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
<div id="classHeader">
|
50
|
-
<table class="header-table">
|
51
|
-
<tr class="top-aligned-row">
|
52
|
-
<td><strong>Class</strong></td>
|
53
|
-
<td class="class-name-in-header">Ankusa::HBaseStorage</td>
|
54
|
-
</tr>
|
55
|
-
<tr class="top-aligned-row">
|
56
|
-
<td><strong>In:</strong></td>
|
57
|
-
<td>
|
58
|
-
<a href="../../files/lib/ankusa/hbase_storage_rb.html">
|
59
|
-
lib/ankusa/hbase_storage.rb
|
60
|
-
</a>
|
61
|
-
<br />
|
62
|
-
</td>
|
63
|
-
</tr>
|
64
|
-
|
65
|
-
<tr class="top-aligned-row">
|
66
|
-
<td><strong>Parent:</strong></td>
|
67
|
-
<td>
|
68
|
-
Object
|
69
|
-
</td>
|
70
|
-
</tr>
|
71
|
-
</table>
|
72
|
-
</div>
|
73
|
-
<!-- banner header -->
|
74
|
-
|
75
|
-
<div id="bodyContent">
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
<div id="contextContent">
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
</div>
|
84
|
-
|
85
|
-
<div id="method-list">
|
86
|
-
<h3 class="section-bar">Methods</h3>
|
87
|
-
|
88
|
-
<div class="name-list">
|
89
|
-
<a href="#M000044">classnames</a>
|
90
|
-
<a href="#M000056">close</a>
|
91
|
-
<a href="#M000055">doc_count_totals</a>
|
92
|
-
<a href="#M000046">drop_tables</a>
|
93
|
-
<a href="#M000059">freq_table</a>
|
94
|
-
<a href="#M000051">get_doc_count</a>
|
95
|
-
<a href="#M000057">get_summary</a>
|
96
|
-
<a href="#M000050">get_total_word_count</a>
|
97
|
-
<a href="#M000049">get_vocabulary_sizes</a>
|
98
|
-
<a href="#M000048">get_word_counts</a>
|
99
|
-
<a href="#M000054">incr_doc_count</a>
|
100
|
-
<a href="#M000053">incr_total_word_count</a>
|
101
|
-
<a href="#M000052">incr_word_count</a>
|
102
|
-
<a href="#M000047">init_tables</a>
|
103
|
-
<a href="#M000043">new</a>
|
104
|
-
<a href="#M000045">reset</a>
|
105
|
-
<a href="#M000058">summary_table</a>
|
106
|
-
</div>
|
107
|
-
</div>
|
108
|
-
|
109
|
-
</div>
|
110
|
-
|
111
|
-
|
112
|
-
<!-- if includes -->
|
113
|
-
|
114
|
-
<div id="section">
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
<div id="attribute-list">
|
121
|
-
<h3 class="section-bar">Attributes</h3>
|
122
|
-
|
123
|
-
<div class="name-list">
|
124
|
-
<table>
|
125
|
-
<tr class="top-aligned-row context-row">
|
126
|
-
<td class="context-item-name">hbase</td>
|
127
|
-
<td class="context-item-value"> [R] </td>
|
128
|
-
<td class="context-item-desc"></td>
|
129
|
-
</tr>
|
130
|
-
</table>
|
131
|
-
</div>
|
132
|
-
</div>
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
<!-- if method_list -->
|
137
|
-
<div id="methods">
|
138
|
-
<h3 class="section-bar">Public Class methods</h3>
|
139
|
-
|
140
|
-
<div id="method-M000043" class="method-detail">
|
141
|
-
<a name="M000043"></a>
|
142
|
-
|
143
|
-
<div class="method-heading">
|
144
|
-
<a href="#M000043" class="method-signature">
|
145
|
-
<span class="method-name">new</span><span class="method-args">(host='localhost', port=9090, frequency_tablename="ankusa_word_frequencies", summary_tablename="ankusa_summary")</span>
|
146
|
-
</a>
|
147
|
-
</div>
|
148
|
-
|
149
|
-
<div class="method-description">
|
150
|
-
<p><a class="source-toggle" href="#"
|
151
|
-
onclick="toggleCode('M000043-source');return false;">[Source]</a></p>
|
152
|
-
<div class="method-source-code" id="M000043-source">
|
153
|
-
<pre>
|
154
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 8</span>
|
155
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-value str">'localhost'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9090</span>, <span class="ruby-identifier">frequency_tablename</span>=<span class="ruby-value str">"ankusa_word_frequencies"</span>, <span class="ruby-identifier">summary_tablename</span>=<span class="ruby-value str">"ankusa_summary"</span>)
|
156
|
-
<span class="ruby-ivar">@hbase</span> = <span class="ruby-constant">HBaseRb</span><span class="ruby-operator">::</span><span class="ruby-constant">Client</span>.<span class="ruby-identifier">new</span> <span class="ruby-identifier">host</span>, <span class="ruby-identifier">port</span>
|
157
|
-
<span class="ruby-ivar">@ftablename</span> = <span class="ruby-identifier">frequency_tablename</span>
|
158
|
-
<span class="ruby-ivar">@stablename</span> = <span class="ruby-identifier">summary_tablename</span>
|
159
|
-
<span class="ruby-ivar">@klass_word_counts</span> = {}
|
160
|
-
<span class="ruby-ivar">@klass_doc_counts</span> = {}
|
161
|
-
<span class="ruby-identifier">init_tables</span>
|
162
|
-
<span class="ruby-keyword kw">end</span>
|
163
|
-
</pre>
|
164
|
-
</div>
|
165
|
-
</div>
|
166
|
-
</div>
|
167
|
-
|
168
|
-
<h3 class="section-bar">Public Instance methods</h3>
|
169
|
-
|
170
|
-
<div id="method-M000044" class="method-detail">
|
171
|
-
<a name="M000044"></a>
|
172
|
-
|
173
|
-
<div class="method-heading">
|
174
|
-
<a href="#M000044" class="method-signature">
|
175
|
-
<span class="method-name">classnames</span><span class="method-args">()</span>
|
176
|
-
</a>
|
177
|
-
</div>
|
178
|
-
|
179
|
-
<div class="method-description">
|
180
|
-
<p><a class="source-toggle" href="#"
|
181
|
-
onclick="toggleCode('M000044-source');return false;">[Source]</a></p>
|
182
|
-
<div class="method-source-code" id="M000044-source">
|
183
|
-
<pre>
|
184
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 17</span>
|
185
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classnames</span>
|
186
|
-
<span class="ruby-identifier">cs</span> = []
|
187
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-value str">""</span>, <span class="ruby-value str">"totals"</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
|
188
|
-
<span class="ruby-identifier">cs</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>
|
189
|
-
}
|
190
|
-
<span class="ruby-identifier">cs</span>
|
191
|
-
<span class="ruby-keyword kw">end</span>
|
192
|
-
</pre>
|
193
|
-
</div>
|
194
|
-
</div>
|
195
|
-
</div>
|
196
|
-
|
197
|
-
<div id="method-M000056" class="method-detail">
|
198
|
-
<a name="M000056"></a>
|
199
|
-
|
200
|
-
<div class="method-heading">
|
201
|
-
<a href="#M000056" class="method-signature">
|
202
|
-
<span class="method-name">close</span><span class="method-args">()</span>
|
203
|
-
</a>
|
204
|
-
</div>
|
205
|
-
|
206
|
-
<div class="method-description">
|
207
|
-
<p><a class="source-toggle" href="#"
|
208
|
-
onclick="toggleCode('M000056-source');return false;">[Source]</a></p>
|
209
|
-
<div class="method-source-code" id="M000056-source">
|
210
|
-
<pre>
|
211
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 103</span>
|
212
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">close</span>
|
213
|
-
<span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">close</span>
|
214
|
-
<span class="ruby-keyword kw">end</span>
|
215
|
-
</pre>
|
216
|
-
</div>
|
217
|
-
</div>
|
218
|
-
</div>
|
219
|
-
|
220
|
-
<div id="method-M000055" class="method-detail">
|
221
|
-
<a name="M000055"></a>
|
222
|
-
|
223
|
-
<div class="method-heading">
|
224
|
-
<a href="#M000055" class="method-signature">
|
225
|
-
<span class="method-name">doc_count_totals</span><span class="method-args">()</span>
|
226
|
-
</a>
|
227
|
-
</div>
|
228
|
-
|
229
|
-
<div class="method-description">
|
230
|
-
<p><a class="source-toggle" href="#"
|
231
|
-
onclick="toggleCode('M000055-source');return false;">[Source]</a></p>
|
232
|
-
<div class="method-source-code" id="M000055-source">
|
233
|
-
<pre>
|
234
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 99</span>
|
235
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">doc_count_totals</span>
|
236
|
-
<span class="ruby-identifier">get_summary</span> <span class="ruby-value str">"totals:doccount"</span>
|
237
|
-
<span class="ruby-keyword kw">end</span>
|
238
|
-
</pre>
|
239
|
-
</div>
|
240
|
-
</div>
|
241
|
-
</div>
|
242
|
-
|
243
|
-
<div id="method-M000046" class="method-detail">
|
244
|
-
<a name="M000046"></a>
|
245
|
-
|
246
|
-
<div class="method-heading">
|
247
|
-
<a href="#M000046" class="method-signature">
|
248
|
-
<span class="method-name">drop_tables</span><span class="method-args">()</span>
|
249
|
-
</a>
|
250
|
-
</div>
|
251
|
-
|
252
|
-
<div class="method-description">
|
253
|
-
<p><a class="source-toggle" href="#"
|
254
|
-
onclick="toggleCode('M000046-source');return false;">[Source]</a></p>
|
255
|
-
<div class="method-source-code" id="M000046-source">
|
256
|
-
<pre>
|
257
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 30</span>
|
258
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">drop_tables</span>
|
259
|
-
<span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">delete</span>
|
260
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">delete</span>
|
261
|
-
<span class="ruby-ivar">@stable</span> = <span class="ruby-keyword kw">nil</span>
|
262
|
-
<span class="ruby-ivar">@ftable</span> = <span class="ruby-keyword kw">nil</span>
|
263
|
-
<span class="ruby-ivar">@klass_word_counts</span> = {}
|
264
|
-
<span class="ruby-ivar">@klass_doc_counts</span> = {}
|
265
|
-
<span class="ruby-keyword kw">end</span>
|
266
|
-
</pre>
|
267
|
-
</div>
|
268
|
-
</div>
|
269
|
-
</div>
|
270
|
-
|
271
|
-
<div id="method-M000051" class="method-detail">
|
272
|
-
<a name="M000051"></a>
|
273
|
-
|
274
|
-
<div class="method-heading">
|
275
|
-
<a href="#M000051" class="method-signature">
|
276
|
-
<span class="method-name">get_doc_count</span><span class="method-args">(klass)</span>
|
277
|
-
</a>
|
278
|
-
</div>
|
279
|
-
|
280
|
-
<div class="method-description">
|
281
|
-
<p><a class="source-toggle" href="#"
|
282
|
-
onclick="toggleCode('M000051-source');return false;">[Source]</a></p>
|
283
|
-
<div class="method-source-code" id="M000051-source">
|
284
|
-
<pre>
|
285
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 73</span>
|
286
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
|
287
|
-
<span class="ruby-ivar">@klass_doc_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
|
288
|
-
<span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-value str">"totals:doccount"</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
|
289
|
-
}
|
290
|
-
<span class="ruby-keyword kw">end</span>
|
291
|
-
</pre>
|
292
|
-
</div>
|
293
|
-
</div>
|
294
|
-
</div>
|
295
|
-
|
296
|
-
<div id="method-M000050" class="method-detail">
|
297
|
-
<a name="M000050"></a>
|
298
|
-
|
299
|
-
<div class="method-heading">
|
300
|
-
<a href="#M000050" class="method-signature">
|
301
|
-
<span class="method-name">get_total_word_count</span><span class="method-args">(klass)</span>
|
302
|
-
</a>
|
303
|
-
</div>
|
304
|
-
|
305
|
-
<div class="method-description">
|
306
|
-
<p><a class="source-toggle" href="#"
|
307
|
-
onclick="toggleCode('M000050-source');return false;">[Source]</a></p>
|
308
|
-
<div class="method-source-code" id="M000050-source">
|
309
|
-
<pre>
|
310
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 67</span>
|
311
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
|
312
|
-
<span class="ruby-ivar">@klass_word_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
|
313
|
-
<span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-value str">"totals:wordcount"</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
|
314
|
-
}
|
315
|
-
<span class="ruby-keyword kw">end</span>
|
316
|
-
</pre>
|
317
|
-
</div>
|
318
|
-
</div>
|
319
|
-
</div>
|
320
|
-
|
321
|
-
<div id="method-M000049" class="method-detail">
|
322
|
-
<a name="M000049"></a>
|
323
|
-
|
324
|
-
<div class="method-heading">
|
325
|
-
<a href="#M000049" class="method-signature">
|
326
|
-
<span class="method-name">get_vocabulary_sizes</span><span class="method-args">()</span>
|
327
|
-
</a>
|
328
|
-
</div>
|
329
|
-
|
330
|
-
<div class="method-description">
|
331
|
-
<p><a class="source-toggle" href="#"
|
332
|
-
onclick="toggleCode('M000049-source');return false;">[Source]</a></p>
|
333
|
-
<div class="method-source-code" id="M000049-source">
|
334
|
-
<pre>
|
335
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 63</span>
|
336
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
|
337
|
-
<span class="ruby-identifier">get_summary</span> <span class="ruby-value str">"totals:vocabsize"</span>
|
338
|
-
<span class="ruby-keyword kw">end</span>
|
339
|
-
</pre>
|
340
|
-
</div>
|
341
|
-
</div>
|
342
|
-
</div>
|
343
|
-
|
344
|
-
<div id="method-M000048" class="method-detail">
|
345
|
-
<a name="M000048"></a>
|
346
|
-
|
347
|
-
<div class="method-heading">
|
348
|
-
<a href="#M000048" class="method-signature">
|
349
|
-
<span class="method-name">get_word_counts</span><span class="method-args">(word)</span>
|
350
|
-
</a>
|
351
|
-
</div>
|
352
|
-
|
353
|
-
<div class="method-description">
|
354
|
-
<p><a class="source-toggle" href="#"
|
355
|
-
onclick="toggleCode('M000048-source');return false;">[Source]</a></p>
|
356
|
-
<div class="method-source-code" id="M000048-source">
|
357
|
-
<pre>
|
358
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 49</span>
|
359
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
|
360
|
-
<span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
361
|
-
<span class="ruby-identifier">row</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">get_row</span>(<span class="ruby-identifier">word</span>)
|
362
|
-
<span class="ruby-keyword kw">return</span> <span class="ruby-identifier">counts</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
|
363
|
-
|
364
|
-
<span class="ruby-identifier">row</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">colname</span>, <span class="ruby-identifier">cell</span><span class="ruby-operator">|</span>
|
365
|
-
<span class="ruby-identifier">classname</span> = <span class="ruby-identifier">colname</span>.<span class="ruby-identifier">split</span>(<span class="ruby-value str">':'</span>)[<span class="ruby-value">1</span>].<span class="ruby-identifier">intern</span>
|
366
|
-
<span class="ruby-comment cmt"># in case untrain has been called too many times</span>
|
367
|
-
<span class="ruby-identifier">counts</span>[<span class="ruby-identifier">classname</span>] = [<span class="ruby-identifier">cell</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>, <span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>
|
368
|
-
}
|
369
|
-
|
370
|
-
<span class="ruby-identifier">counts</span>
|
371
|
-
<span class="ruby-keyword kw">end</span>
|
372
|
-
</pre>
|
373
|
-
</div>
|
374
|
-
</div>
|
375
|
-
</div>
|
376
|
-
|
377
|
-
<div id="method-M000054" class="method-detail">
|
378
|
-
<a name="M000054"></a>
|
379
|
-
|
380
|
-
<div class="method-heading">
|
381
|
-
<a href="#M000054" class="method-signature">
|
382
|
-
<span class="method-name">incr_doc_count</span><span class="method-args">(klass, count)</span>
|
383
|
-
</a>
|
384
|
-
</div>
|
385
|
-
|
386
|
-
<div class="method-description">
|
387
|
-
<p><a class="source-toggle" href="#"
|
388
|
-
onclick="toggleCode('M000054-source');return false;">[Source]</a></p>
|
389
|
-
<div class="method-source-code" id="M000054-source">
|
390
|
-
<pre>
|
391
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 95</span>
|
392
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
393
|
-
<span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">"totals:doccount"</span>, <span class="ruby-identifier">count</span>
|
394
|
-
<span class="ruby-keyword kw">end</span>
|
395
|
-
</pre>
|
396
|
-
</div>
|
397
|
-
</div>
|
398
|
-
</div>
|
399
|
-
|
400
|
-
<div id="method-M000053" class="method-detail">
|
401
|
-
<a name="M000053"></a>
|
402
|
-
|
403
|
-
<div class="method-heading">
|
404
|
-
<a href="#M000053" class="method-signature">
|
405
|
-
<span class="method-name">incr_total_word_count</span><span class="method-args">(klass, count)</span>
|
406
|
-
</a>
|
407
|
-
</div>
|
408
|
-
|
409
|
-
<div class="method-description">
|
410
|
-
<p><a class="source-toggle" href="#"
|
411
|
-
onclick="toggleCode('M000053-source');return false;">[Source]</a></p>
|
412
|
-
<div class="method-source-code" id="M000053-source">
|
413
|
-
<pre>
|
414
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 91</span>
|
415
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
416
|
-
<span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">"totals:wordcount"</span>, <span class="ruby-identifier">count</span>
|
417
|
-
<span class="ruby-keyword kw">end</span>
|
418
|
-
</pre>
|
419
|
-
</div>
|
420
|
-
</div>
|
421
|
-
</div>
|
422
|
-
|
423
|
-
<div id="method-M000052" class="method-detail">
|
424
|
-
<a name="M000052"></a>
|
425
|
-
|
426
|
-
<div class="method-heading">
|
427
|
-
<a href="#M000052" class="method-signature">
|
428
|
-
<span class="method-name">incr_word_count</span><span class="method-args">(klass, word, count)</span>
|
429
|
-
</a>
|
430
|
-
</div>
|
431
|
-
|
432
|
-
<div class="method-description">
|
433
|
-
<p><a class="source-toggle" href="#"
|
434
|
-
onclick="toggleCode('M000052-source');return false;">[Source]</a></p>
|
435
|
-
<div class="method-source-code" id="M000052-source">
|
436
|
-
<pre>
|
437
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 79</span>
|
438
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
|
439
|
-
<span class="ruby-identifier">size</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">word</span>, <span class="ruby-node">"classes:#{klass.to_s}"</span>, <span class="ruby-identifier">count</span>
|
440
|
-
<span class="ruby-comment cmt"># if this is a new word, increase the klass's vocab size. If the new word</span>
|
441
|
-
<span class="ruby-comment cmt"># count is 0, then we need to decrement our vocab size</span>
|
442
|
-
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">count</span>
|
443
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">"totals:vocabsize"</span>
|
444
|
-
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
|
445
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-value str">"totals:vocabsize"</span>, <span class="ruby-value">-1</span>
|
446
|
-
<span class="ruby-keyword kw">end</span>
|
447
|
-
<span class="ruby-identifier">size</span>
|
448
|
-
<span class="ruby-keyword kw">end</span>
|
449
|
-
</pre>
|
450
|
-
</div>
|
451
|
-
</div>
|
452
|
-
</div>
|
453
|
-
|
454
|
-
<div id="method-M000047" class="method-detail">
|
455
|
-
<a name="M000047"></a>
|
456
|
-
|
457
|
-
<div class="method-heading">
|
458
|
-
<a href="#M000047" class="method-signature">
|
459
|
-
<span class="method-name">init_tables</span><span class="method-args">()</span>
|
460
|
-
</a>
|
461
|
-
</div>
|
462
|
-
|
463
|
-
<div class="method-description">
|
464
|
-
<p><a class="source-toggle" href="#"
|
465
|
-
onclick="toggleCode('M000047-source');return false;">[Source]</a></p>
|
466
|
-
<div class="method-source-code" id="M000047-source">
|
467
|
-
<pre>
|
468
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 39</span>
|
469
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">init_tables</span>
|
470
|
-
<span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@ftablename</span>
|
471
|
-
<span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@ftablename</span>, <span class="ruby-value str">"classes"</span>, <span class="ruby-value str">"total"</span>
|
472
|
-
<span class="ruby-keyword kw">end</span>
|
473
|
-
|
474
|
-
<span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">not</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@stablename</span>
|
475
|
-
<span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@stablename</span>, <span class="ruby-value str">"totals"</span>
|
476
|
-
<span class="ruby-keyword kw">end</span>
|
477
|
-
<span class="ruby-keyword kw">end</span>
|
478
|
-
</pre>
|
479
|
-
</div>
|
480
|
-
</div>
|
481
|
-
</div>
|
482
|
-
|
483
|
-
<div id="method-M000045" class="method-detail">
|
484
|
-
<a name="M000045"></a>
|
485
|
-
|
486
|
-
<div class="method-heading">
|
487
|
-
<a href="#M000045" class="method-signature">
|
488
|
-
<span class="method-name">reset</span><span class="method-args">()</span>
|
489
|
-
</a>
|
490
|
-
</div>
|
491
|
-
|
492
|
-
<div class="method-description">
|
493
|
-
<p><a class="source-toggle" href="#"
|
494
|
-
onclick="toggleCode('M000045-source');return false;">[Source]</a></p>
|
495
|
-
<div class="method-source-code" id="M000045-source">
|
496
|
-
<pre>
|
497
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 25</span>
|
498
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">reset</span>
|
499
|
-
<span class="ruby-identifier">drop_tables</span>
|
500
|
-
<span class="ruby-identifier">init_tables</span>
|
501
|
-
<span class="ruby-keyword kw">end</span>
|
502
|
-
</pre>
|
503
|
-
</div>
|
504
|
-
</div>
|
505
|
-
</div>
|
506
|
-
|
507
|
-
<h3 class="section-bar">Protected Instance methods</h3>
|
508
|
-
|
509
|
-
<div id="method-M000059" class="method-detail">
|
510
|
-
<a name="M000059"></a>
|
511
|
-
|
512
|
-
<div class="method-heading">
|
513
|
-
<a href="#M000059" class="method-signature">
|
514
|
-
<span class="method-name">freq_table</span><span class="method-args">()</span>
|
515
|
-
</a>
|
516
|
-
</div>
|
517
|
-
|
518
|
-
<div class="method-description">
|
519
|
-
<p><a class="source-toggle" href="#"
|
520
|
-
onclick="toggleCode('M000059-source');return false;">[Source]</a></p>
|
521
|
-
<div class="method-source-code" id="M000059-source">
|
522
|
-
<pre>
|
523
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 120</span>
|
524
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">freq_table</span>
|
525
|
-
<span class="ruby-ivar">@ftable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@ftablename</span>
|
526
|
-
<span class="ruby-keyword kw">end</span>
|
527
|
-
</pre>
|
528
|
-
</div>
|
529
|
-
</div>
|
530
|
-
</div>
|
531
|
-
|
532
|
-
<div id="method-M000057" class="method-detail">
|
533
|
-
<a name="M000057"></a>
|
534
|
-
|
535
|
-
<div class="method-heading">
|
536
|
-
<a href="#M000057" class="method-signature">
|
537
|
-
<span class="method-name">get_summary</span><span class="method-args">(name)</span>
|
538
|
-
</a>
|
539
|
-
</div>
|
540
|
-
|
541
|
-
<div class="method-description">
|
542
|
-
<p><a class="source-toggle" href="#"
|
543
|
-
onclick="toggleCode('M000057-source');return false;">[Source]</a></p>
|
544
|
-
<div class="method-source-code" id="M000057-source">
|
545
|
-
<pre>
|
546
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 108</span>
|
547
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
|
548
|
-
<span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
|
549
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-value str">""</span>, <span class="ruby-identifier">name</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
|
550
|
-
<span class="ruby-identifier">counts</span>[<span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-identifier">row</span>.<span class="ruby-identifier">columns</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_i64</span>
|
551
|
-
}
|
552
|
-
<span class="ruby-identifier">counts</span>
|
553
|
-
<span class="ruby-keyword kw">end</span>
|
554
|
-
</pre>
|
555
|
-
</div>
|
556
|
-
</div>
|
557
|
-
</div>
|
558
|
-
|
559
|
-
<div id="method-M000058" class="method-detail">
|
560
|
-
<a name="M000058"></a>
|
561
|
-
|
562
|
-
<div class="method-heading">
|
563
|
-
<a href="#M000058" class="method-signature">
|
564
|
-
<span class="method-name">summary_table</span><span class="method-args">()</span>
|
565
|
-
</a>
|
566
|
-
</div>
|
567
|
-
|
568
|
-
<div class="method-description">
|
569
|
-
<p><a class="source-toggle" href="#"
|
570
|
-
onclick="toggleCode('M000058-source');return false;">[Source]</a></p>
|
571
|
-
<div class="method-source-code" id="M000058-source">
|
572
|
-
<pre>
|
573
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hbase_storage.rb, line 116</span>
|
574
|
-
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">summary_table</span>
|
575
|
-
<span class="ruby-ivar">@stable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@stablename</span>
|
576
|
-
<span class="ruby-keyword kw">end</span>
|
577
|
-
</pre>
|
578
|
-
</div>
|
579
|
-
</div>
|
580
|
-
</div>
|
581
|
-
|
582
|
-
|
583
|
-
</div>
|
584
|
-
|
585
|
-
|
586
|
-
</div>
|
587
|
-
|
588
|
-
|
589
|
-
<div id="validator-badges">
|
590
|
-
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
591
|
-
</div>
|
592
|
-
|
593
|
-
</body>
|
594
|
-
</html>
|