ankusa 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/Gemfile.lock +16 -0
- data/README.rdoc +5 -3
- data/Rakefile +5 -5
- data/lib/ankusa/naive_bayes.rb +3 -3
- data/lib/ankusa/version.rb +1 -1
- metadata +36 -100
- data/docs/Ankusa.html +0 -229
- data/docs/Ankusa/CassandraStorage.html +0 -801
- data/docs/Ankusa/Classifier.html +0 -440
- data/docs/Ankusa/FileSystemStorage.html +0 -376
- data/docs/Ankusa/HBaseStorage.html +0 -845
- data/docs/Ankusa/KLDivergenceClassifier.html +0 -265
- data/docs/Ankusa/MemoryStorage.html +0 -672
- data/docs/Ankusa/NaiveBayesClassifier.html +0 -313
- data/docs/Ankusa/TextHash.html +0 -390
- data/docs/README_rdoc.html +0 -268
- data/docs/String.html +0 -241
- data/docs/created.rid +0 -14
- data/docs/images/brick.png +0 -0
- data/docs/images/brick_link.png +0 -0
- data/docs/images/bug.png +0 -0
- data/docs/images/bullet_black.png +0 -0
- data/docs/images/bullet_toggle_minus.png +0 -0
- data/docs/images/bullet_toggle_plus.png +0 -0
- data/docs/images/date.png +0 -0
- data/docs/images/find.png +0 -0
- data/docs/images/loadingAnimation.gif +0 -0
- data/docs/images/macFFBgHack.png +0 -0
- data/docs/images/package.png +0 -0
- data/docs/images/page_green.png +0 -0
- data/docs/images/page_white_text.png +0 -0
- data/docs/images/page_white_width.png +0 -0
- data/docs/images/plugin.png +0 -0
- data/docs/images/ruby.png +0 -0
- data/docs/images/tag_green.png +0 -0
- data/docs/images/wrench.png +0 -0
- data/docs/images/wrench_orange.png +0 -0
- data/docs/images/zoom.png +0 -0
- data/docs/index.html +0 -212
- data/docs/js/darkfish.js +0 -116
- data/docs/js/jquery.js +0 -32
- data/docs/js/quicksearch.js +0 -114
- data/docs/js/thickbox-compressed.js +0 -10
- data/docs/lib/ankusa/cassandra_storage_rb.html +0 -54
- data/docs/lib/ankusa/classifier_rb.html +0 -52
- data/docs/lib/ankusa/extensions_rb.html +0 -54
- data/docs/lib/ankusa/file_system_storage_rb.html +0 -54
- data/docs/lib/ankusa/hasher_rb.html +0 -56
- data/docs/lib/ankusa/hbase_storage_rb.html +0 -54
- data/docs/lib/ankusa/kl_divergence_rb.html +0 -52
- data/docs/lib/ankusa/memory_storage_rb.html +0 -52
- data/docs/lib/ankusa/naive_bayes_rb.html +0 -52
- data/docs/lib/ankusa/stopwords_rb.html +0 -52
- data/docs/lib/ankusa/version_rb.html +0 -52
- data/docs/lib/ankusa_rb.html +0 -64
- data/docs/rdoc.css +0 -759
@@ -1,845 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
-
<head>
|
6
|
-
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
7
|
-
|
8
|
-
<title>Class: Ankusa::HBaseStorage</title>
|
9
|
-
|
10
|
-
<link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
|
11
|
-
|
12
|
-
<script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
|
13
|
-
<script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
|
14
|
-
<script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
|
15
|
-
<script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
|
16
|
-
|
17
|
-
</head>
|
18
|
-
<body id="top" class="class">
|
19
|
-
|
20
|
-
<div id="metadata">
|
21
|
-
<div id="home-metadata">
|
22
|
-
<div id="home-section" class="section">
|
23
|
-
<h3 class="section-header">
|
24
|
-
<a href="../index.html">Home</a>
|
25
|
-
<a href="../index.html#classes">Classes</a>
|
26
|
-
<a href="../index.html#methods">Methods</a>
|
27
|
-
</h3>
|
28
|
-
</div>
|
29
|
-
</div>
|
30
|
-
|
31
|
-
<div id="file-metadata">
|
32
|
-
<div id="file-list-section" class="section">
|
33
|
-
<h3 class="section-header">In Files</h3>
|
34
|
-
<div class="section-body">
|
35
|
-
<ul>
|
36
|
-
|
37
|
-
<li><a href="../lib/ankusa/hbase_storage_rb.html?TB_iframe=true&height=550&width=785"
|
38
|
-
class="thickbox" title="lib/ankusa/hbase_storage.rb">lib/ankusa/hbase_storage.rb</a></li>
|
39
|
-
|
40
|
-
</ul>
|
41
|
-
</div>
|
42
|
-
</div>
|
43
|
-
|
44
|
-
|
45
|
-
</div>
|
46
|
-
|
47
|
-
<div id="class-metadata">
|
48
|
-
|
49
|
-
<!-- Parent Class -->
|
50
|
-
<div id="parent-class-section" class="section">
|
51
|
-
<h3 class="section-header">Parent</h3>
|
52
|
-
|
53
|
-
<p class="link">Object</p>
|
54
|
-
|
55
|
-
</div>
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
<!-- Method Quickref -->
|
64
|
-
<div id="method-list-section" class="section">
|
65
|
-
<h3 class="section-header">Methods</h3>
|
66
|
-
<ul class="link-list">
|
67
|
-
|
68
|
-
<li><a href="#method-c-new">::new</a></li>
|
69
|
-
|
70
|
-
<li><a href="#method-i-classnames">#classnames</a></li>
|
71
|
-
|
72
|
-
<li><a href="#method-i-close">#close</a></li>
|
73
|
-
|
74
|
-
<li><a href="#method-i-doc_count_totals">#doc_count_totals</a></li>
|
75
|
-
|
76
|
-
<li><a href="#method-i-drop_tables">#drop_tables</a></li>
|
77
|
-
|
78
|
-
<li><a href="#method-i-freq_table">#freq_table</a></li>
|
79
|
-
|
80
|
-
<li><a href="#method-i-get_doc_count">#get_doc_count</a></li>
|
81
|
-
|
82
|
-
<li><a href="#method-i-get_summary">#get_summary</a></li>
|
83
|
-
|
84
|
-
<li><a href="#method-i-get_total_word_count">#get_total_word_count</a></li>
|
85
|
-
|
86
|
-
<li><a href="#method-i-get_vocabulary_sizes">#get_vocabulary_sizes</a></li>
|
87
|
-
|
88
|
-
<li><a href="#method-i-get_word_counts">#get_word_counts</a></li>
|
89
|
-
|
90
|
-
<li><a href="#method-i-incr_doc_count">#incr_doc_count</a></li>
|
91
|
-
|
92
|
-
<li><a href="#method-i-incr_total_word_count">#incr_total_word_count</a></li>
|
93
|
-
|
94
|
-
<li><a href="#method-i-incr_word_count">#incr_word_count</a></li>
|
95
|
-
|
96
|
-
<li><a href="#method-i-init_tables">#init_tables</a></li>
|
97
|
-
|
98
|
-
<li><a href="#method-i-reset">#reset</a></li>
|
99
|
-
|
100
|
-
<li><a href="#method-i-summary_table">#summary_table</a></li>
|
101
|
-
|
102
|
-
</ul>
|
103
|
-
</div>
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
</div>
|
108
|
-
|
109
|
-
<div id="project-metadata">
|
110
|
-
|
111
|
-
|
112
|
-
<div id="fileindex-section" class="section project-section">
|
113
|
-
<h3 class="section-header">Files</h3>
|
114
|
-
<ul>
|
115
|
-
|
116
|
-
<li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
|
117
|
-
|
118
|
-
</ul>
|
119
|
-
</div>
|
120
|
-
|
121
|
-
|
122
|
-
<div id="classindex-section" class="section project-section">
|
123
|
-
<h3 class="section-header">Class/Module Index
|
124
|
-
<span class="search-toggle"><img src="../images/find.png"
|
125
|
-
height="16" width="16" alt="[+]"
|
126
|
-
title="show/hide quicksearch" /></span></h3>
|
127
|
-
<form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
|
128
|
-
<fieldset>
|
129
|
-
<legend>Quicksearch</legend>
|
130
|
-
<input type="text" name="quicksearch" value=""
|
131
|
-
class="quicksearch-field" />
|
132
|
-
</fieldset>
|
133
|
-
</form>
|
134
|
-
|
135
|
-
<ul class="link-list">
|
136
|
-
|
137
|
-
<li><a href="../Ankusa.html">Ankusa</a></li>
|
138
|
-
|
139
|
-
<li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
|
140
|
-
|
141
|
-
<li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
|
142
|
-
|
143
|
-
<li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
|
144
|
-
|
145
|
-
<li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
|
146
|
-
|
147
|
-
<li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
|
148
|
-
|
149
|
-
<li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
|
150
|
-
|
151
|
-
<li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
|
152
|
-
|
153
|
-
<li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
|
154
|
-
|
155
|
-
<li><a href="../String.html">String</a></li>
|
156
|
-
|
157
|
-
</ul>
|
158
|
-
<div id="no-class-search-results" style="display: none;">No matching classes.</div>
|
159
|
-
</div>
|
160
|
-
|
161
|
-
|
162
|
-
</div>
|
163
|
-
</div>
|
164
|
-
|
165
|
-
<div id="documentation">
|
166
|
-
<h1 class="class">Ankusa::HBaseStorage</h1>
|
167
|
-
|
168
|
-
<div id="description" class="description">
|
169
|
-
|
170
|
-
</div><!-- description -->
|
171
|
-
|
172
|
-
|
173
|
-
<div id="5Buntitled-5D" class="documentation-section">
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
<!-- Attributes -->
|
182
|
-
<div id="attribute-method-details" class="method-section section">
|
183
|
-
<h3 class="section-header">Attributes</h3>
|
184
|
-
|
185
|
-
|
186
|
-
<div id="hbase-attribute-method" class="method-detail">
|
187
|
-
<a name="hbase"></a>
|
188
|
-
|
189
|
-
<div class="method-heading attribute-method-heading">
|
190
|
-
<span class="method-name">hbase</span><span
|
191
|
-
class="attribute-access-type">[R]</span>
|
192
|
-
</div>
|
193
|
-
|
194
|
-
<div class="method-description">
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
</div>
|
199
|
-
</div>
|
200
|
-
|
201
|
-
</div><!-- attribute-method-details -->
|
202
|
-
|
203
|
-
|
204
|
-
<!-- Methods -->
|
205
|
-
|
206
|
-
<div id="public-class-method-details" class="method-section section">
|
207
|
-
<h3 class="section-header">Public Class Methods</h3>
|
208
|
-
|
209
|
-
|
210
|
-
<div id="new-method" class="method-detail ">
|
211
|
-
<a name="method-c-new"></a>
|
212
|
-
|
213
|
-
|
214
|
-
<div class="method-heading">
|
215
|
-
<span class="method-name">new</span><span
|
216
|
-
class="method-args">(host='localhost', port=9090, frequency_tablename="ankusa_word_frequencies", summary_tablename="ankusa_summary")</span>
|
217
|
-
<span class="method-click-advice">click to toggle source</span>
|
218
|
-
</div>
|
219
|
-
|
220
|
-
|
221
|
-
<div class="method-description">
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
<div class="method-source-code" id="new-source">
|
228
|
-
<pre>
|
229
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 8</span>
|
230
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">host</span>=<span class="ruby-string">'localhost'</span>, <span class="ruby-identifier">port</span>=<span class="ruby-value">9090</span>, <span class="ruby-identifier">frequency_tablename</span>=<span class="ruby-string">"ankusa_word_frequencies"</span>, <span class="ruby-identifier">summary_tablename</span>=<span class="ruby-string">"ankusa_summary"</span>)
|
231
|
-
<span class="ruby-ivar">@hbase</span> = <span class="ruby-constant">HBaseRb</span><span class="ruby-operator">::</span><span class="ruby-constant">Client</span>.<span class="ruby-identifier">new</span> <span class="ruby-identifier">host</span>, <span class="ruby-identifier">port</span>
|
232
|
-
<span class="ruby-ivar">@ftablename</span> = <span class="ruby-identifier">frequency_tablename</span>
|
233
|
-
<span class="ruby-ivar">@stablename</span> = <span class="ruby-identifier">summary_tablename</span>
|
234
|
-
<span class="ruby-ivar">@klass_word_counts</span> = {}
|
235
|
-
<span class="ruby-ivar">@klass_doc_counts</span> = {}
|
236
|
-
<span class="ruby-identifier">init_tables</span>
|
237
|
-
<span class="ruby-keyword">end</span></pre>
|
238
|
-
</div><!-- new-source -->
|
239
|
-
|
240
|
-
</div>
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
</div><!-- new-method -->
|
246
|
-
|
247
|
-
|
248
|
-
</div><!-- public-class-method-details -->
|
249
|
-
|
250
|
-
<div id="public-instance-method-details" class="method-section section">
|
251
|
-
<h3 class="section-header">Public Instance Methods</h3>
|
252
|
-
|
253
|
-
|
254
|
-
<div id="classnames-method" class="method-detail ">
|
255
|
-
<a name="method-i-classnames"></a>
|
256
|
-
|
257
|
-
|
258
|
-
<div class="method-heading">
|
259
|
-
<span class="method-name">classnames</span><span
|
260
|
-
class="method-args">()</span>
|
261
|
-
<span class="method-click-advice">click to toggle source</span>
|
262
|
-
</div>
|
263
|
-
|
264
|
-
|
265
|
-
<div class="method-description">
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
<div class="method-source-code" id="classnames-source">
|
272
|
-
<pre>
|
273
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 17</span>
|
274
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">classnames</span>
|
275
|
-
<span class="ruby-identifier">cs</span> = []
|
276
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-string">""</span>, <span class="ruby-string">"totals"</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
|
277
|
-
<span class="ruby-identifier">cs</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>
|
278
|
-
}
|
279
|
-
<span class="ruby-identifier">cs</span>
|
280
|
-
<span class="ruby-keyword">end</span></pre>
|
281
|
-
</div><!-- classnames-source -->
|
282
|
-
|
283
|
-
</div>
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
</div><!-- classnames-method -->
|
289
|
-
|
290
|
-
|
291
|
-
<div id="close-method" class="method-detail ">
|
292
|
-
<a name="method-i-close"></a>
|
293
|
-
|
294
|
-
|
295
|
-
<div class="method-heading">
|
296
|
-
<span class="method-name">close</span><span
|
297
|
-
class="method-args">()</span>
|
298
|
-
<span class="method-click-advice">click to toggle source</span>
|
299
|
-
</div>
|
300
|
-
|
301
|
-
|
302
|
-
<div class="method-description">
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
<div class="method-source-code" id="close-source">
|
309
|
-
<pre>
|
310
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 103</span>
|
311
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">close</span>
|
312
|
-
<span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">close</span>
|
313
|
-
<span class="ruby-keyword">end</span></pre>
|
314
|
-
</div><!-- close-source -->
|
315
|
-
|
316
|
-
</div>
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
</div><!-- close-method -->
|
322
|
-
|
323
|
-
|
324
|
-
<div id="doc_count_totals-method" class="method-detail ">
|
325
|
-
<a name="method-i-doc_count_totals"></a>
|
326
|
-
|
327
|
-
|
328
|
-
<div class="method-heading">
|
329
|
-
<span class="method-name">doc_count_totals</span><span
|
330
|
-
class="method-args">()</span>
|
331
|
-
<span class="method-click-advice">click to toggle source</span>
|
332
|
-
</div>
|
333
|
-
|
334
|
-
|
335
|
-
<div class="method-description">
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
<div class="method-source-code" id="doc_count_totals-source">
|
342
|
-
<pre>
|
343
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 99</span>
|
344
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">doc_count_totals</span>
|
345
|
-
<span class="ruby-identifier">get_summary</span> <span class="ruby-string">"totals:doccount"</span>
|
346
|
-
<span class="ruby-keyword">end</span></pre>
|
347
|
-
</div><!-- doc_count_totals-source -->
|
348
|
-
|
349
|
-
</div>
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
</div><!-- doc_count_totals-method -->
|
355
|
-
|
356
|
-
|
357
|
-
<div id="drop_tables-method" class="method-detail ">
|
358
|
-
<a name="method-i-drop_tables"></a>
|
359
|
-
|
360
|
-
|
361
|
-
<div class="method-heading">
|
362
|
-
<span class="method-name">drop_tables</span><span
|
363
|
-
class="method-args">()</span>
|
364
|
-
<span class="method-click-advice">click to toggle source</span>
|
365
|
-
</div>
|
366
|
-
|
367
|
-
|
368
|
-
<div class="method-description">
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
<div class="method-source-code" id="drop_tables-source">
|
375
|
-
<pre>
|
376
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 30</span>
|
377
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">drop_tables</span>
|
378
|
-
<span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">delete</span>
|
379
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">delete</span>
|
380
|
-
<span class="ruby-ivar">@stable</span> = <span class="ruby-keyword">nil</span>
|
381
|
-
<span class="ruby-ivar">@ftable</span> = <span class="ruby-keyword">nil</span>
|
382
|
-
<span class="ruby-ivar">@klass_word_counts</span> = {}
|
383
|
-
<span class="ruby-ivar">@klass_doc_counts</span> = {}
|
384
|
-
<span class="ruby-keyword">end</span></pre>
|
385
|
-
</div><!-- drop_tables-source -->
|
386
|
-
|
387
|
-
</div>
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
</div><!-- drop_tables-method -->
|
393
|
-
|
394
|
-
|
395
|
-
<div id="get_doc_count-method" class="method-detail ">
|
396
|
-
<a name="method-i-get_doc_count"></a>
|
397
|
-
|
398
|
-
|
399
|
-
<div class="method-heading">
|
400
|
-
<span class="method-name">get_doc_count</span><span
|
401
|
-
class="method-args">(klass)</span>
|
402
|
-
<span class="method-click-advice">click to toggle source</span>
|
403
|
-
</div>
|
404
|
-
|
405
|
-
|
406
|
-
<div class="method-description">
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
<div class="method-source-code" id="get_doc_count-source">
|
413
|
-
<pre>
|
414
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 73</span>
|
415
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
|
416
|
-
<span class="ruby-ivar">@klass_doc_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
|
417
|
-
<span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-string">"totals:doccount"</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
|
418
|
-
}
|
419
|
-
<span class="ruby-keyword">end</span></pre>
|
420
|
-
</div><!-- get_doc_count-source -->
|
421
|
-
|
422
|
-
</div>
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
</div><!-- get_doc_count-method -->
|
428
|
-
|
429
|
-
|
430
|
-
<div id="get_total_word_count-method" class="method-detail ">
|
431
|
-
<a name="method-i-get_total_word_count"></a>
|
432
|
-
|
433
|
-
|
434
|
-
<div class="method-heading">
|
435
|
-
<span class="method-name">get_total_word_count</span><span
|
436
|
-
class="method-args">(klass)</span>
|
437
|
-
<span class="method-click-advice">click to toggle source</span>
|
438
|
-
</div>
|
439
|
-
|
440
|
-
|
441
|
-
<div class="method-description">
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
<div class="method-source-code" id="get_total_word_count-source">
|
448
|
-
<pre>
|
449
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 67</span>
|
450
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
|
451
|
-
<span class="ruby-ivar">@klass_word_counts</span>.<span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">klass</span>) {
|
452
|
-
<span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">get</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-string">"totals:wordcount"</span>).<span class="ruby-identifier">first</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>
|
453
|
-
}
|
454
|
-
<span class="ruby-keyword">end</span></pre>
|
455
|
-
</div><!-- get_total_word_count-source -->
|
456
|
-
|
457
|
-
</div>
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
</div><!-- get_total_word_count-method -->
|
463
|
-
|
464
|
-
|
465
|
-
<div id="get_vocabulary_sizes-method" class="method-detail ">
|
466
|
-
<a name="method-i-get_vocabulary_sizes"></a>
|
467
|
-
|
468
|
-
|
469
|
-
<div class="method-heading">
|
470
|
-
<span class="method-name">get_vocabulary_sizes</span><span
|
471
|
-
class="method-args">()</span>
|
472
|
-
<span class="method-click-advice">click to toggle source</span>
|
473
|
-
</div>
|
474
|
-
|
475
|
-
|
476
|
-
<div class="method-description">
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
<div class="method-source-code" id="get_vocabulary_sizes-source">
|
483
|
-
<pre>
|
484
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 63</span>
|
485
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_vocabulary_sizes</span>
|
486
|
-
<span class="ruby-identifier">get_summary</span> <span class="ruby-string">"totals:vocabsize"</span>
|
487
|
-
<span class="ruby-keyword">end</span></pre>
|
488
|
-
</div><!-- get_vocabulary_sizes-source -->
|
489
|
-
|
490
|
-
</div>
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
</div><!-- get_vocabulary_sizes-method -->
|
496
|
-
|
497
|
-
|
498
|
-
<div id="get_word_counts-method" class="method-detail ">
|
499
|
-
<a name="method-i-get_word_counts"></a>
|
500
|
-
|
501
|
-
|
502
|
-
<div class="method-heading">
|
503
|
-
<span class="method-name">get_word_counts</span><span
|
504
|
-
class="method-args">(word)</span>
|
505
|
-
<span class="method-click-advice">click to toggle source</span>
|
506
|
-
</div>
|
507
|
-
|
508
|
-
|
509
|
-
<div class="method-description">
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
<div class="method-source-code" id="get_word_counts-source">
|
516
|
-
<pre>
|
517
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 49</span>
|
518
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
|
519
|
-
<span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
520
|
-
<span class="ruby-identifier">row</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">get_row</span>(<span class="ruby-identifier">word</span>)
|
521
|
-
<span class="ruby-keyword">return</span> <span class="ruby-identifier">counts</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">row</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
|
522
|
-
|
523
|
-
<span class="ruby-identifier">row</span>.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">columns</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">colname</span>, <span class="ruby-identifier">cell</span><span class="ruby-operator">|</span>
|
524
|
-
<span class="ruby-identifier">classname</span> = <span class="ruby-identifier">colname</span>.<span class="ruby-identifier">split</span>(<span class="ruby-string">':'</span>)[<span class="ruby-value">1</span>].<span class="ruby-identifier">intern</span>
|
525
|
-
<span class="ruby-comment"># in case untrain has been called too many times</span>
|
526
|
-
<span class="ruby-identifier">counts</span>[<span class="ruby-identifier">classname</span>] = [<span class="ruby-identifier">cell</span>.<span class="ruby-identifier">to_i64</span>.<span class="ruby-identifier">to_f</span>, <span class="ruby-value">0</span>].<span class="ruby-identifier">max</span>
|
527
|
-
}
|
528
|
-
|
529
|
-
<span class="ruby-identifier">counts</span>
|
530
|
-
<span class="ruby-keyword">end</span></pre>
|
531
|
-
</div><!-- get_word_counts-source -->
|
532
|
-
|
533
|
-
</div>
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
</div><!-- get_word_counts-method -->
|
539
|
-
|
540
|
-
|
541
|
-
<div id="incr_doc_count-method" class="method-detail ">
|
542
|
-
<a name="method-i-incr_doc_count"></a>
|
543
|
-
|
544
|
-
|
545
|
-
<div class="method-heading">
|
546
|
-
<span class="method-name">incr_doc_count</span><span
|
547
|
-
class="method-args">(klass, count)</span>
|
548
|
-
<span class="method-click-advice">click to toggle source</span>
|
549
|
-
</div>
|
550
|
-
|
551
|
-
|
552
|
-
<div class="method-description">
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
<div class="method-source-code" id="incr_doc_count-source">
|
559
|
-
<pre>
|
560
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 95</span>
|
561
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
562
|
-
<span class="ruby-ivar">@klass_doc_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">"totals:doccount"</span>, <span class="ruby-identifier">count</span>
|
563
|
-
<span class="ruby-keyword">end</span></pre>
|
564
|
-
</div><!-- incr_doc_count-source -->
|
565
|
-
|
566
|
-
</div>
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
</div><!-- incr_doc_count-method -->
|
572
|
-
|
573
|
-
|
574
|
-
<div id="incr_total_word_count-method" class="method-detail ">
|
575
|
-
<a name="method-i-incr_total_word_count"></a>
|
576
|
-
|
577
|
-
|
578
|
-
<div class="method-heading">
|
579
|
-
<span class="method-name">incr_total_word_count</span><span
|
580
|
-
class="method-args">(klass, count)</span>
|
581
|
-
<span class="method-click-advice">click to toggle source</span>
|
582
|
-
</div>
|
583
|
-
|
584
|
-
|
585
|
-
<div class="method-description">
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
<div class="method-source-code" id="incr_total_word_count-source">
|
592
|
-
<pre>
|
593
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 91</span>
|
594
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
595
|
-
<span class="ruby-ivar">@klass_word_counts</span>[<span class="ruby-identifier">klass</span>] = <span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">"totals:wordcount"</span>, <span class="ruby-identifier">count</span>
|
596
|
-
<span class="ruby-keyword">end</span></pre>
|
597
|
-
</div><!-- incr_total_word_count-source -->
|
598
|
-
|
599
|
-
</div>
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
</div><!-- incr_total_word_count-method -->
|
605
|
-
|
606
|
-
|
607
|
-
<div id="incr_word_count-method" class="method-detail ">
|
608
|
-
<a name="method-i-incr_word_count"></a>
|
609
|
-
|
610
|
-
|
611
|
-
<div class="method-heading">
|
612
|
-
<span class="method-name">incr_word_count</span><span
|
613
|
-
class="method-args">(klass, word, count)</span>
|
614
|
-
<span class="method-click-advice">click to toggle source</span>
|
615
|
-
</div>
|
616
|
-
|
617
|
-
|
618
|
-
<div class="method-description">
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
<div class="method-source-code" id="incr_word_count-source">
|
625
|
-
<pre>
|
626
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 79</span>
|
627
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
|
628
|
-
<span class="ruby-identifier">size</span> = <span class="ruby-identifier">freq_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">word</span>, <span class="ruby-node">"classes:#{klass.to_s}"</span>, <span class="ruby-identifier">count</span>
|
629
|
-
<span class="ruby-comment"># if this is a new word, increase the klass's vocab size. If the new word</span>
|
630
|
-
<span class="ruby-comment"># count is 0, then we need to decrement our vocab size</span>
|
631
|
-
<span class="ruby-keyword">if</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">count</span>
|
632
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">"totals:vocabsize"</span>
|
633
|
-
<span class="ruby-keyword">elsif</span> <span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
|
634
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">atomic_increment</span> <span class="ruby-identifier">klass</span>, <span class="ruby-string">"totals:vocabsize"</span>, <span class="ruby-value">-1</span>
|
635
|
-
<span class="ruby-keyword">end</span>
|
636
|
-
<span class="ruby-identifier">size</span>
|
637
|
-
<span class="ruby-keyword">end</span></pre>
|
638
|
-
</div><!-- incr_word_count-source -->
|
639
|
-
|
640
|
-
</div>
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
</div><!-- incr_word_count-method -->
|
646
|
-
|
647
|
-
|
648
|
-
<div id="init_tables-method" class="method-detail ">
|
649
|
-
<a name="method-i-init_tables"></a>
|
650
|
-
|
651
|
-
|
652
|
-
<div class="method-heading">
|
653
|
-
<span class="method-name">init_tables</span><span
|
654
|
-
class="method-args">()</span>
|
655
|
-
<span class="method-click-advice">click to toggle source</span>
|
656
|
-
</div>
|
657
|
-
|
658
|
-
|
659
|
-
<div class="method-description">
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
<div class="method-source-code" id="init_tables-source">
|
666
|
-
<pre>
|
667
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 39</span>
|
668
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">init_tables</span>
|
669
|
-
<span class="ruby-keyword">unless</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@ftablename</span>
|
670
|
-
<span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@ftablename</span>, <span class="ruby-string">"classes"</span>, <span class="ruby-string">"total"</span>
|
671
|
-
<span class="ruby-keyword">end</span>
|
672
|
-
|
673
|
-
<span class="ruby-keyword">unless</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">has_table?</span> <span class="ruby-ivar">@stablename</span>
|
674
|
-
<span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">create_table</span> <span class="ruby-ivar">@stablename</span>, <span class="ruby-string">"totals"</span>
|
675
|
-
<span class="ruby-keyword">end</span>
|
676
|
-
<span class="ruby-keyword">end</span></pre>
|
677
|
-
</div><!-- init_tables-source -->
|
678
|
-
|
679
|
-
</div>
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
</div><!-- init_tables-method -->
|
685
|
-
|
686
|
-
|
687
|
-
<div id="reset-method" class="method-detail ">
|
688
|
-
<a name="method-i-reset"></a>
|
689
|
-
|
690
|
-
|
691
|
-
<div class="method-heading">
|
692
|
-
<span class="method-name">reset</span><span
|
693
|
-
class="method-args">()</span>
|
694
|
-
<span class="method-click-advice">click to toggle source</span>
|
695
|
-
</div>
|
696
|
-
|
697
|
-
|
698
|
-
<div class="method-description">
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
<div class="method-source-code" id="reset-source">
|
705
|
-
<pre>
|
706
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 25</span>
|
707
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">reset</span>
|
708
|
-
<span class="ruby-identifier">drop_tables</span>
|
709
|
-
<span class="ruby-identifier">init_tables</span>
|
710
|
-
<span class="ruby-keyword">end</span></pre>
|
711
|
-
</div><!-- reset-source -->
|
712
|
-
|
713
|
-
</div>
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
</div><!-- reset-method -->
|
719
|
-
|
720
|
-
|
721
|
-
</div><!-- public-instance-method-details -->
|
722
|
-
|
723
|
-
<div id="protected-instance-method-details" class="method-section section">
|
724
|
-
<h3 class="section-header">Protected Instance Methods</h3>
|
725
|
-
|
726
|
-
|
727
|
-
<div id="freq_table-method" class="method-detail ">
|
728
|
-
<a name="method-i-freq_table"></a>
|
729
|
-
|
730
|
-
|
731
|
-
<div class="method-heading">
|
732
|
-
<span class="method-name">freq_table</span><span
|
733
|
-
class="method-args">()</span>
|
734
|
-
<span class="method-click-advice">click to toggle source</span>
|
735
|
-
</div>
|
736
|
-
|
737
|
-
|
738
|
-
<div class="method-description">
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
<div class="method-source-code" id="freq_table-source">
|
745
|
-
<pre>
|
746
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 120</span>
|
747
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">freq_table</span>
|
748
|
-
<span class="ruby-ivar">@ftable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@ftablename</span>
|
749
|
-
<span class="ruby-keyword">end</span></pre>
|
750
|
-
</div><!-- freq_table-source -->
|
751
|
-
|
752
|
-
</div>
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
</div><!-- freq_table-method -->
|
758
|
-
|
759
|
-
|
760
|
-
<div id="get_summary-method" class="method-detail ">
|
761
|
-
<a name="method-i-get_summary"></a>
|
762
|
-
|
763
|
-
|
764
|
-
<div class="method-heading">
|
765
|
-
<span class="method-name">get_summary</span><span
|
766
|
-
class="method-args">(name)</span>
|
767
|
-
<span class="method-click-advice">click to toggle source</span>
|
768
|
-
</div>
|
769
|
-
|
770
|
-
|
771
|
-
<div class="method-description">
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
<div class="method-source-code" id="get_summary-source">
|
778
|
-
<pre>
|
779
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 108</span>
|
780
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_summary</span>(<span class="ruby-identifier">name</span>)
|
781
|
-
<span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
|
782
|
-
<span class="ruby-identifier">summary_table</span>.<span class="ruby-identifier">create_scanner</span>(<span class="ruby-string">""</span>, <span class="ruby-identifier">name</span>) { <span class="ruby-operator">|</span><span class="ruby-identifier">row</span><span class="ruby-operator">|</span>
|
783
|
-
<span class="ruby-identifier">counts</span>[<span class="ruby-identifier">row</span>.<span class="ruby-identifier">row</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-identifier">row</span>.<span class="ruby-identifier">columns</span>[<span class="ruby-identifier">name</span>].<span class="ruby-identifier">to_i64</span>
|
784
|
-
}
|
785
|
-
<span class="ruby-identifier">counts</span>
|
786
|
-
<span class="ruby-keyword">end</span></pre>
|
787
|
-
</div><!-- get_summary-source -->
|
788
|
-
|
789
|
-
</div>
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
</div><!-- get_summary-method -->
|
795
|
-
|
796
|
-
|
797
|
-
<div id="summary_table-method" class="method-detail ">
|
798
|
-
<a name="method-i-summary_table"></a>
|
799
|
-
|
800
|
-
|
801
|
-
<div class="method-heading">
|
802
|
-
<span class="method-name">summary_table</span><span
|
803
|
-
class="method-args">()</span>
|
804
|
-
<span class="method-click-advice">click to toggle source</span>
|
805
|
-
</div>
|
806
|
-
|
807
|
-
|
808
|
-
<div class="method-description">
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
<div class="method-source-code" id="summary_table-source">
|
815
|
-
<pre>
|
816
|
-
<span class="ruby-comment"># File lib/ankusa/hbase_storage.rb, line 116</span>
|
817
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">summary_table</span>
|
818
|
-
<span class="ruby-ivar">@stable</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@hbase</span>.<span class="ruby-identifier">get_table</span> <span class="ruby-ivar">@stablename</span>
|
819
|
-
<span class="ruby-keyword">end</span></pre>
|
820
|
-
</div><!-- summary_table-source -->
|
821
|
-
|
822
|
-
</div>
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
</div><!-- summary_table-method -->
|
828
|
-
|
829
|
-
|
830
|
-
</div><!-- protected-instance-method-details -->
|
831
|
-
|
832
|
-
</div><!-- 5Buntitled-5D -->
|
833
|
-
|
834
|
-
|
835
|
-
</div><!-- documentation -->
|
836
|
-
|
837
|
-
<div id="validator-badges">
|
838
|
-
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
839
|
-
<p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
|
840
|
-
Rdoc Generator</a> 2</small>.</p>
|
841
|
-
</div>
|
842
|
-
|
843
|
-
</body>
|
844
|
-
</html>
|
845
|
-
|