ankusa 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/Gemfile.lock +16 -0
- data/README.rdoc +5 -3
- data/Rakefile +5 -5
- data/lib/ankusa/naive_bayes.rb +3 -3
- data/lib/ankusa/version.rb +1 -1
- metadata +36 -100
- data/docs/Ankusa.html +0 -229
- data/docs/Ankusa/CassandraStorage.html +0 -801
- data/docs/Ankusa/Classifier.html +0 -440
- data/docs/Ankusa/FileSystemStorage.html +0 -376
- data/docs/Ankusa/HBaseStorage.html +0 -845
- data/docs/Ankusa/KLDivergenceClassifier.html +0 -265
- data/docs/Ankusa/MemoryStorage.html +0 -672
- data/docs/Ankusa/NaiveBayesClassifier.html +0 -313
- data/docs/Ankusa/TextHash.html +0 -390
- data/docs/README_rdoc.html +0 -268
- data/docs/String.html +0 -241
- data/docs/created.rid +0 -14
- data/docs/images/brick.png +0 -0
- data/docs/images/brick_link.png +0 -0
- data/docs/images/bug.png +0 -0
- data/docs/images/bullet_black.png +0 -0
- data/docs/images/bullet_toggle_minus.png +0 -0
- data/docs/images/bullet_toggle_plus.png +0 -0
- data/docs/images/date.png +0 -0
- data/docs/images/find.png +0 -0
- data/docs/images/loadingAnimation.gif +0 -0
- data/docs/images/macFFBgHack.png +0 -0
- data/docs/images/package.png +0 -0
- data/docs/images/page_green.png +0 -0
- data/docs/images/page_white_text.png +0 -0
- data/docs/images/page_white_width.png +0 -0
- data/docs/images/plugin.png +0 -0
- data/docs/images/ruby.png +0 -0
- data/docs/images/tag_green.png +0 -0
- data/docs/images/wrench.png +0 -0
- data/docs/images/wrench_orange.png +0 -0
- data/docs/images/zoom.png +0 -0
- data/docs/index.html +0 -212
- data/docs/js/darkfish.js +0 -116
- data/docs/js/jquery.js +0 -32
- data/docs/js/quicksearch.js +0 -114
- data/docs/js/thickbox-compressed.js +0 -10
- data/docs/lib/ankusa/cassandra_storage_rb.html +0 -54
- data/docs/lib/ankusa/classifier_rb.html +0 -52
- data/docs/lib/ankusa/extensions_rb.html +0 -54
- data/docs/lib/ankusa/file_system_storage_rb.html +0 -54
- data/docs/lib/ankusa/hasher_rb.html +0 -56
- data/docs/lib/ankusa/hbase_storage_rb.html +0 -54
- data/docs/lib/ankusa/kl_divergence_rb.html +0 -52
- data/docs/lib/ankusa/memory_storage_rb.html +0 -52
- data/docs/lib/ankusa/naive_bayes_rb.html +0 -52
- data/docs/lib/ankusa/stopwords_rb.html +0 -52
- data/docs/lib/ankusa/version_rb.html +0 -52
- data/docs/lib/ankusa_rb.html +0 -64
- data/docs/rdoc.css +0 -759
data/docs/Ankusa/Classifier.html
DELETED
@@ -1,440 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
-
<head>
|
6
|
-
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
7
|
-
|
8
|
-
<title>Module: Ankusa::Classifier</title>
|
9
|
-
|
10
|
-
<link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
|
11
|
-
|
12
|
-
<script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
|
13
|
-
<script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
|
14
|
-
<script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
|
15
|
-
<script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
|
16
|
-
|
17
|
-
</head>
|
18
|
-
<body id="top" class="module">
|
19
|
-
|
20
|
-
<div id="metadata">
|
21
|
-
<div id="home-metadata">
|
22
|
-
<div id="home-section" class="section">
|
23
|
-
<h3 class="section-header">
|
24
|
-
<a href="../index.html">Home</a>
|
25
|
-
<a href="../index.html#classes">Classes</a>
|
26
|
-
<a href="../index.html#methods">Methods</a>
|
27
|
-
</h3>
|
28
|
-
</div>
|
29
|
-
</div>
|
30
|
-
|
31
|
-
<div id="file-metadata">
|
32
|
-
<div id="file-list-section" class="section">
|
33
|
-
<h3 class="section-header">In Files</h3>
|
34
|
-
<div class="section-body">
|
35
|
-
<ul>
|
36
|
-
|
37
|
-
<li><a href="../lib/ankusa/classifier_rb.html?TB_iframe=true&height=550&width=785"
|
38
|
-
class="thickbox" title="lib/ankusa/classifier.rb">lib/ankusa/classifier.rb</a></li>
|
39
|
-
|
40
|
-
</ul>
|
41
|
-
</div>
|
42
|
-
</div>
|
43
|
-
|
44
|
-
|
45
|
-
</div>
|
46
|
-
|
47
|
-
<div id="class-metadata">
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
<!-- Method Quickref -->
|
56
|
-
<div id="method-list-section" class="section">
|
57
|
-
<h3 class="section-header">Methods</h3>
|
58
|
-
<ul class="link-list">
|
59
|
-
|
60
|
-
<li><a href="#method-c-new">::new</a></li>
|
61
|
-
|
62
|
-
<li><a href="#method-i-doc_count_totals">#doc_count_totals</a></li>
|
63
|
-
|
64
|
-
<li><a href="#method-i-get_word_probs">#get_word_probs</a></li>
|
65
|
-
|
66
|
-
<li><a href="#method-i-train">#train</a></li>
|
67
|
-
|
68
|
-
<li><a href="#method-i-untrain">#untrain</a></li>
|
69
|
-
|
70
|
-
<li><a href="#method-i-vocab_sizes">#vocab_sizes</a></li>
|
71
|
-
|
72
|
-
</ul>
|
73
|
-
</div>
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
</div>
|
78
|
-
|
79
|
-
<div id="project-metadata">
|
80
|
-
|
81
|
-
|
82
|
-
<div id="fileindex-section" class="section project-section">
|
83
|
-
<h3 class="section-header">Files</h3>
|
84
|
-
<ul>
|
85
|
-
|
86
|
-
<li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
|
87
|
-
|
88
|
-
</ul>
|
89
|
-
</div>
|
90
|
-
|
91
|
-
|
92
|
-
<div id="classindex-section" class="section project-section">
|
93
|
-
<h3 class="section-header">Class/Module Index
|
94
|
-
<span class="search-toggle"><img src="../images/find.png"
|
95
|
-
height="16" width="16" alt="[+]"
|
96
|
-
title="show/hide quicksearch" /></span></h3>
|
97
|
-
<form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
|
98
|
-
<fieldset>
|
99
|
-
<legend>Quicksearch</legend>
|
100
|
-
<input type="text" name="quicksearch" value=""
|
101
|
-
class="quicksearch-field" />
|
102
|
-
</fieldset>
|
103
|
-
</form>
|
104
|
-
|
105
|
-
<ul class="link-list">
|
106
|
-
|
107
|
-
<li><a href="../Ankusa.html">Ankusa</a></li>
|
108
|
-
|
109
|
-
<li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
|
110
|
-
|
111
|
-
<li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
|
112
|
-
|
113
|
-
<li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
|
114
|
-
|
115
|
-
<li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
|
116
|
-
|
117
|
-
<li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
|
118
|
-
|
119
|
-
<li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
|
120
|
-
|
121
|
-
<li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
|
122
|
-
|
123
|
-
<li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
|
124
|
-
|
125
|
-
<li><a href="../String.html">String</a></li>
|
126
|
-
|
127
|
-
</ul>
|
128
|
-
<div id="no-class-search-results" style="display: none;">No matching classes.</div>
|
129
|
-
</div>
|
130
|
-
|
131
|
-
|
132
|
-
</div>
|
133
|
-
</div>
|
134
|
-
|
135
|
-
<div id="documentation">
|
136
|
-
<h1 class="module">Ankusa::Classifier</h1>
|
137
|
-
|
138
|
-
<div id="description" class="description">
|
139
|
-
|
140
|
-
</div><!-- description -->
|
141
|
-
|
142
|
-
|
143
|
-
<div id="5Buntitled-5D" class="documentation-section">
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
<!-- Attributes -->
|
152
|
-
<div id="attribute-method-details" class="method-section section">
|
153
|
-
<h3 class="section-header">Attributes</h3>
|
154
|
-
|
155
|
-
|
156
|
-
<div id="classnames-attribute-method" class="method-detail">
|
157
|
-
<a name="classnames"></a>
|
158
|
-
|
159
|
-
<div class="method-heading attribute-method-heading">
|
160
|
-
<span class="method-name">classnames</span><span
|
161
|
-
class="attribute-access-type">[R]</span>
|
162
|
-
</div>
|
163
|
-
|
164
|
-
<div class="method-description">
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
</div>
|
169
|
-
</div>
|
170
|
-
|
171
|
-
</div><!-- attribute-method-details -->
|
172
|
-
|
173
|
-
|
174
|
-
<!-- Methods -->
|
175
|
-
|
176
|
-
<div id="public-class-method-details" class="method-section section">
|
177
|
-
<h3 class="section-header">Public Class Methods</h3>
|
178
|
-
|
179
|
-
|
180
|
-
<div id="new-method" class="method-detail ">
|
181
|
-
<a name="method-c-new"></a>
|
182
|
-
|
183
|
-
|
184
|
-
<div class="method-heading">
|
185
|
-
<span class="method-name">new</span><span
|
186
|
-
class="method-args">(storage)</span>
|
187
|
-
<span class="method-click-advice">click to toggle source</span>
|
188
|
-
</div>
|
189
|
-
|
190
|
-
|
191
|
-
<div class="method-description">
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
<div class="method-source-code" id="new-source">
|
198
|
-
<pre>
|
199
|
-
<span class="ruby-comment"># File lib/ankusa/classifier.rb, line 6</span>
|
200
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">storage</span>)
|
201
|
-
<span class="ruby-ivar">@storage</span> = <span class="ruby-identifier">storage</span>
|
202
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">init_tables</span>
|
203
|
-
<span class="ruby-ivar">@classnames</span> = <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">classnames</span>
|
204
|
-
<span class="ruby-keyword">end</span></pre>
|
205
|
-
</div><!-- new-source -->
|
206
|
-
|
207
|
-
</div>
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
</div><!-- new-method -->
|
213
|
-
|
214
|
-
|
215
|
-
</div><!-- public-class-method-details -->
|
216
|
-
|
217
|
-
<div id="public-instance-method-details" class="method-section section">
|
218
|
-
<h3 class="section-header">Public Instance Methods</h3>
|
219
|
-
|
220
|
-
|
221
|
-
<div id="train-method" class="method-detail ">
|
222
|
-
<a name="method-i-train"></a>
|
223
|
-
|
224
|
-
|
225
|
-
<div class="method-heading">
|
226
|
-
<span class="method-name">train</span><span
|
227
|
-
class="method-args">(klass, text)</span>
|
228
|
-
<span class="method-click-advice">click to toggle source</span>
|
229
|
-
</div>
|
230
|
-
|
231
|
-
|
232
|
-
<div class="method-description">
|
233
|
-
|
234
|
-
<p>text can be either an array of strings or a string klass is a symbol</p>
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
<div class="method-source-code" id="train-source">
|
239
|
-
<pre>
|
240
|
-
<span class="ruby-comment"># File lib/ankusa/classifier.rb, line 14</span>
|
241
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>)
|
242
|
-
<span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>)
|
243
|
-
<span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
244
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>
|
245
|
-
<span class="ruby-keyword">yield</span> <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">block_given?</span>
|
246
|
-
}
|
247
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span>
|
248
|
-
<span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
|
249
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">doccount</span>
|
250
|
-
<span class="ruby-ivar">@classnames</span> <span class="ruby-operator"><<</span> <span class="ruby-identifier">klass</span> <span class="ruby-keyword">unless</span> <span class="ruby-ivar">@classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">klass</span>
|
251
|
-
<span class="ruby-comment"># cache is now dirty of these vars</span>
|
252
|
-
<span class="ruby-ivar">@doc_count_totals</span> = <span class="ruby-keyword">nil</span>
|
253
|
-
<span class="ruby-ivar">@vocab_sizes</span> = <span class="ruby-keyword">nil</span>
|
254
|
-
<span class="ruby-identifier">th</span>
|
255
|
-
<span class="ruby-keyword">end</span></pre>
|
256
|
-
</div><!-- train-source -->
|
257
|
-
|
258
|
-
</div>
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
</div><!-- train-method -->
|
264
|
-
|
265
|
-
|
266
|
-
<div id="untrain-method" class="method-detail ">
|
267
|
-
<a name="method-i-untrain"></a>
|
268
|
-
|
269
|
-
|
270
|
-
<div class="method-heading">
|
271
|
-
<span class="method-name">untrain</span><span
|
272
|
-
class="method-args">(klass, text)</span>
|
273
|
-
<span class="method-click-advice">click to toggle source</span>
|
274
|
-
</div>
|
275
|
-
|
276
|
-
|
277
|
-
<div class="method-description">
|
278
|
-
|
279
|
-
<p>text can be either an array of strings or a string klass is a symbol</p>
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
<div class="method-source-code" id="untrain-source">
|
284
|
-
<pre>
|
285
|
-
<span class="ruby-comment"># File lib/ankusa/classifier.rb, line 32</span>
|
286
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">text</span>)
|
287
|
-
<span class="ruby-identifier">th</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>)
|
288
|
-
<span class="ruby-identifier">th</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
289
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">count</span>
|
290
|
-
<span class="ruby-keyword">yield</span> <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">block_given?</span>
|
291
|
-
}
|
292
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_total_word_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">th</span>.<span class="ruby-identifier">word_count</span>
|
293
|
-
<span class="ruby-identifier">doccount</span> = (<span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">:</span> <span class="ruby-value">1</span>
|
294
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">incr_doc_count</span> <span class="ruby-identifier">klass</span>, <span class="ruby-operator">-</span><span class="ruby-identifier">doccount</span>
|
295
|
-
<span class="ruby-comment"># cache is now dirty of these vars</span>
|
296
|
-
<span class="ruby-ivar">@doc_count_totals</span> = <span class="ruby-keyword">nil</span>
|
297
|
-
<span class="ruby-ivar">@vocab_sizes</span> = <span class="ruby-keyword">nil</span>
|
298
|
-
<span class="ruby-identifier">th</span>
|
299
|
-
<span class="ruby-keyword">end</span></pre>
|
300
|
-
</div><!-- untrain-source -->
|
301
|
-
|
302
|
-
</div>
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
</div><!-- untrain-method -->
|
308
|
-
|
309
|
-
|
310
|
-
</div><!-- public-instance-method-details -->
|
311
|
-
|
312
|
-
<div id="protected-instance-method-details" class="method-section section">
|
313
|
-
<h3 class="section-header">Protected Instance Methods</h3>
|
314
|
-
|
315
|
-
|
316
|
-
<div id="doc_count_totals-method" class="method-detail ">
|
317
|
-
<a name="method-i-doc_count_totals"></a>
|
318
|
-
|
319
|
-
|
320
|
-
<div class="method-heading">
|
321
|
-
<span class="method-name">doc_count_totals</span><span
|
322
|
-
class="method-args">()</span>
|
323
|
-
<span class="method-click-advice">click to toggle source</span>
|
324
|
-
</div>
|
325
|
-
|
326
|
-
|
327
|
-
<div class="method-description">
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
<div class="method-source-code" id="doc_count_totals-source">
|
334
|
-
<pre>
|
335
|
-
<span class="ruby-comment"># File lib/ankusa/classifier.rb, line 62</span>
|
336
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">doc_count_totals</span>
|
337
|
-
<span class="ruby-ivar">@doc_count_totals</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">doc_count_totals</span>
|
338
|
-
<span class="ruby-keyword">end</span></pre>
|
339
|
-
</div><!-- doc_count_totals-source -->
|
340
|
-
|
341
|
-
</div>
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
</div><!-- doc_count_totals-method -->
|
347
|
-
|
348
|
-
|
349
|
-
<div id="get_word_probs-method" class="method-detail ">
|
350
|
-
<a name="method-i-get_word_probs"></a>
|
351
|
-
|
352
|
-
|
353
|
-
<div class="method-heading">
|
354
|
-
<span class="method-name">get_word_probs</span><span
|
355
|
-
class="method-args">(word, classnames)</span>
|
356
|
-
<span class="method-click-advice">click to toggle source</span>
|
357
|
-
</div>
|
358
|
-
|
359
|
-
|
360
|
-
<div class="method-description">
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
<div class="method-source-code" id="get_word_probs-source">
|
367
|
-
<pre>
|
368
|
-
<span class="ruby-comment"># File lib/ankusa/classifier.rb, line 48</span>
|
369
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>, <span class="ruby-identifier">classnames</span>)
|
370
|
-
<span class="ruby-identifier">probs</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
|
371
|
-
<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">v</span> <span class="ruby-keyword">if</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">k</span> }
|
372
|
-
<span class="ruby-identifier">vs</span> = <span class="ruby-identifier">vocab_sizes</span>
|
373
|
-
<span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cn</span><span class="ruby-operator">|</span>
|
374
|
-
<span class="ruby-comment"># if we've never seen the class, the word prob is 0</span>
|
375
|
-
<span class="ruby-keyword">next</span> <span class="ruby-keyword">unless</span> <span class="ruby-identifier">vs</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">cn</span>
|
376
|
-
|
377
|
-
<span class="ruby-comment"># use a laplacian smoother</span>
|
378
|
-
<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">cn</span>] = (<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">cn</span>] <span class="ruby-operator">+</span> <span class="ruby-value">1</span>).<span class="ruby-identifier">to_f</span> <span class="ruby-operator">/</span> (<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">cn</span>) <span class="ruby-operator">+</span> <span class="ruby-identifier">vs</span>[<span class="ruby-identifier">cn</span>]).<span class="ruby-identifier">to_f</span>
|
379
|
-
}
|
380
|
-
<span class="ruby-identifier">probs</span>
|
381
|
-
<span class="ruby-keyword">end</span></pre>
|
382
|
-
</div><!-- get_word_probs-source -->
|
383
|
-
|
384
|
-
</div>
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
</div><!-- get_word_probs-method -->
|
390
|
-
|
391
|
-
|
392
|
-
<div id="vocab_sizes-method" class="method-detail ">
|
393
|
-
<a name="method-i-vocab_sizes"></a>
|
394
|
-
|
395
|
-
|
396
|
-
<div class="method-heading">
|
397
|
-
<span class="method-name">vocab_sizes</span><span
|
398
|
-
class="method-args">()</span>
|
399
|
-
<span class="method-click-advice">click to toggle source</span>
|
400
|
-
</div>
|
401
|
-
|
402
|
-
|
403
|
-
<div class="method-description">
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
<div class="method-source-code" id="vocab_sizes-source">
|
410
|
-
<pre>
|
411
|
-
<span class="ruby-comment"># File lib/ankusa/classifier.rb, line 66</span>
|
412
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">vocab_sizes</span>
|
413
|
-
<span class="ruby-ivar">@vocab_sizes</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_vocabulary_sizes</span>
|
414
|
-
<span class="ruby-keyword">end</span></pre>
|
415
|
-
</div><!-- vocab_sizes-source -->
|
416
|
-
|
417
|
-
</div>
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
</div><!-- vocab_sizes-method -->
|
423
|
-
|
424
|
-
|
425
|
-
</div><!-- protected-instance-method-details -->
|
426
|
-
|
427
|
-
</div><!-- 5Buntitled-5D -->
|
428
|
-
|
429
|
-
|
430
|
-
</div><!-- documentation -->
|
431
|
-
|
432
|
-
<div id="validator-badges">
|
433
|
-
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
434
|
-
<p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
|
435
|
-
Rdoc Generator</a> 2</small>.</p>
|
436
|
-
</div>
|
437
|
-
|
438
|
-
</body>
|
439
|
-
</html>
|
440
|
-
|
@@ -1,376 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
-
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
5
|
-
<head>
|
6
|
-
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
|
7
|
-
|
8
|
-
<title>Class: Ankusa::FileSystemStorage</title>
|
9
|
-
|
10
|
-
<link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
|
11
|
-
|
12
|
-
<script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
|
13
|
-
<script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
|
14
|
-
<script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
|
15
|
-
<script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
|
16
|
-
|
17
|
-
</head>
|
18
|
-
<body id="top" class="class">
|
19
|
-
|
20
|
-
<div id="metadata">
|
21
|
-
<div id="home-metadata">
|
22
|
-
<div id="home-section" class="section">
|
23
|
-
<h3 class="section-header">
|
24
|
-
<a href="../index.html">Home</a>
|
25
|
-
<a href="../index.html#classes">Classes</a>
|
26
|
-
<a href="../index.html#methods">Methods</a>
|
27
|
-
</h3>
|
28
|
-
</div>
|
29
|
-
</div>
|
30
|
-
|
31
|
-
<div id="file-metadata">
|
32
|
-
<div id="file-list-section" class="section">
|
33
|
-
<h3 class="section-header">In Files</h3>
|
34
|
-
<div class="section-body">
|
35
|
-
<ul>
|
36
|
-
|
37
|
-
<li><a href="../lib/ankusa/file_system_storage_rb.html?TB_iframe=true&height=550&width=785"
|
38
|
-
class="thickbox" title="lib/ankusa/file_system_storage.rb">lib/ankusa/file_system_storage.rb</a></li>
|
39
|
-
|
40
|
-
</ul>
|
41
|
-
</div>
|
42
|
-
</div>
|
43
|
-
|
44
|
-
|
45
|
-
</div>
|
46
|
-
|
47
|
-
<div id="class-metadata">
|
48
|
-
|
49
|
-
<!-- Parent Class -->
|
50
|
-
<div id="parent-class-section" class="section">
|
51
|
-
<h3 class="section-header">Parent</h3>
|
52
|
-
|
53
|
-
<p class="link">MemoryStorage</p>
|
54
|
-
|
55
|
-
</div>
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
<!-- Method Quickref -->
|
64
|
-
<div id="method-list-section" class="section">
|
65
|
-
<h3 class="section-header">Methods</h3>
|
66
|
-
<ul class="link-list">
|
67
|
-
|
68
|
-
<li><a href="#method-c-new">::new</a></li>
|
69
|
-
|
70
|
-
<li><a href="#method-i-drop_tables">#drop_tables</a></li>
|
71
|
-
|
72
|
-
<li><a href="#method-i-init_tables">#init_tables</a></li>
|
73
|
-
|
74
|
-
<li><a href="#method-i-reset">#reset</a></li>
|
75
|
-
|
76
|
-
<li><a href="#method-i-save">#save</a></li>
|
77
|
-
|
78
|
-
</ul>
|
79
|
-
</div>
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
</div>
|
84
|
-
|
85
|
-
<div id="project-metadata">
|
86
|
-
|
87
|
-
|
88
|
-
<div id="fileindex-section" class="section project-section">
|
89
|
-
<h3 class="section-header">Files</h3>
|
90
|
-
<ul>
|
91
|
-
|
92
|
-
<li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
|
93
|
-
|
94
|
-
</ul>
|
95
|
-
</div>
|
96
|
-
|
97
|
-
|
98
|
-
<div id="classindex-section" class="section project-section">
|
99
|
-
<h3 class="section-header">Class/Module Index
|
100
|
-
<span class="search-toggle"><img src="../images/find.png"
|
101
|
-
height="16" width="16" alt="[+]"
|
102
|
-
title="show/hide quicksearch" /></span></h3>
|
103
|
-
<form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
|
104
|
-
<fieldset>
|
105
|
-
<legend>Quicksearch</legend>
|
106
|
-
<input type="text" name="quicksearch" value=""
|
107
|
-
class="quicksearch-field" />
|
108
|
-
</fieldset>
|
109
|
-
</form>
|
110
|
-
|
111
|
-
<ul class="link-list">
|
112
|
-
|
113
|
-
<li><a href="../Ankusa.html">Ankusa</a></li>
|
114
|
-
|
115
|
-
<li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
|
116
|
-
|
117
|
-
<li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
|
118
|
-
|
119
|
-
<li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
|
120
|
-
|
121
|
-
<li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
|
122
|
-
|
123
|
-
<li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
|
124
|
-
|
125
|
-
<li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
|
126
|
-
|
127
|
-
<li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
|
128
|
-
|
129
|
-
<li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
|
130
|
-
|
131
|
-
<li><a href="../String.html">String</a></li>
|
132
|
-
|
133
|
-
</ul>
|
134
|
-
<div id="no-class-search-results" style="display: none;">No matching classes.</div>
|
135
|
-
</div>
|
136
|
-
|
137
|
-
|
138
|
-
</div>
|
139
|
-
</div>
|
140
|
-
|
141
|
-
<div id="documentation">
|
142
|
-
<h1 class="class">Ankusa::FileSystemStorage</h1>
|
143
|
-
|
144
|
-
<div id="description" class="description">
|
145
|
-
|
146
|
-
</div><!-- description -->
|
147
|
-
|
148
|
-
|
149
|
-
<div id="5Buntitled-5D" class="documentation-section">
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
<!-- Methods -->
|
159
|
-
|
160
|
-
<div id="public-class-method-details" class="method-section section">
|
161
|
-
<h3 class="section-header">Public Class Methods</h3>
|
162
|
-
|
163
|
-
|
164
|
-
<div id="new-method" class="method-detail ">
|
165
|
-
<a name="method-c-new"></a>
|
166
|
-
|
167
|
-
|
168
|
-
<div class="method-heading">
|
169
|
-
<span class="method-name">new</span><span
|
170
|
-
class="method-args">(file)</span>
|
171
|
-
<span class="method-click-advice">click to toggle source</span>
|
172
|
-
</div>
|
173
|
-
|
174
|
-
|
175
|
-
<div class="method-description">
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
<div class="method-source-code" id="new-source">
|
182
|
-
<pre>
|
183
|
-
<span class="ruby-comment"># File lib/ankusa/file_system_storage.rb, line 7</span>
|
184
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">file</span>)
|
185
|
-
<span class="ruby-ivar">@file</span> = <span class="ruby-identifier">file</span>
|
186
|
-
<span class="ruby-identifier">init_tables</span>
|
187
|
-
<span class="ruby-keyword">end</span></pre>
|
188
|
-
</div><!-- new-source -->
|
189
|
-
|
190
|
-
</div>
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
</div><!-- new-method -->
|
196
|
-
|
197
|
-
|
198
|
-
</div><!-- public-class-method-details -->
|
199
|
-
|
200
|
-
<div id="public-instance-method-details" class="method-section section">
|
201
|
-
<h3 class="section-header">Public Instance Methods</h3>
|
202
|
-
|
203
|
-
|
204
|
-
<div id="drop_tables-method" class="method-detail ">
|
205
|
-
<a name="method-i-drop_tables"></a>
|
206
|
-
|
207
|
-
|
208
|
-
<div class="method-heading">
|
209
|
-
<span class="method-name">drop_tables</span><span
|
210
|
-
class="method-args">()</span>
|
211
|
-
<span class="method-click-advice">click to toggle source</span>
|
212
|
-
</div>
|
213
|
-
|
214
|
-
|
215
|
-
<div class="method-description">
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
<div class="method-source-code" id="drop_tables-source">
|
222
|
-
<pre>
|
223
|
-
<span class="ruby-comment"># File lib/ankusa/file_system_storage.rb, line 20</span>
|
224
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">drop_tables</span>
|
225
|
-
<span class="ruby-constant">File</span>.<span class="ruby-identifier">delete</span>(<span class="ruby-ivar">@file</span>) <span class="ruby-keyword">rescue</span> <span class="ruby-constant">Errno</span><span class="ruby-operator">::</span><span class="ruby-constant">ENOENT</span>
|
226
|
-
<span class="ruby-identifier">reset</span>
|
227
|
-
<span class="ruby-keyword">end</span></pre>
|
228
|
-
</div><!-- drop_tables-source -->
|
229
|
-
|
230
|
-
</div>
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
</div><!-- drop_tables-method -->
|
236
|
-
|
237
|
-
|
238
|
-
<div id="init_tables-method" class="method-detail ">
|
239
|
-
<a name="method-i-init_tables"></a>
|
240
|
-
|
241
|
-
|
242
|
-
<div class="method-heading">
|
243
|
-
<span class="method-name">init_tables</span><span
|
244
|
-
class="method-args">()</span>
|
245
|
-
<span class="method-click-advice">click to toggle source</span>
|
246
|
-
</div>
|
247
|
-
|
248
|
-
|
249
|
-
<div class="method-description">
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
<div class="method-source-code" id="init_tables-source">
|
256
|
-
<pre>
|
257
|
-
<span class="ruby-comment"># File lib/ankusa/file_system_storage.rb, line 25</span>
|
258
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">init_tables</span>
|
259
|
-
<span class="ruby-identifier">data</span> = {}
|
260
|
-
<span class="ruby-keyword">begin</span>
|
261
|
-
<span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-ivar">@file</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">f</span><span class="ruby-operator">|</span>
|
262
|
-
<span class="ruby-identifier">data</span> = <span class="ruby-constant">Marshal</span>.<span class="ruby-identifier">load</span>(<span class="ruby-identifier">f</span>)
|
263
|
-
<span class="ruby-keyword">end</span>
|
264
|
-
<span class="ruby-ivar">@freqs</span> = <span class="ruby-identifier">data</span>[<span class="ruby-value">:freqs</span>]
|
265
|
-
<span class="ruby-ivar">@total_word_counts</span> = <span class="ruby-identifier">data</span>[<span class="ruby-value">:total_word_counts</span>]
|
266
|
-
<span class="ruby-ivar">@total_doc_counts</span> = <span class="ruby-identifier">data</span>[<span class="ruby-value">:total_doc_counts</span>]
|
267
|
-
<span class="ruby-ivar">@klass_word_counts</span> = <span class="ruby-identifier">data</span>[<span class="ruby-value">:klass_word_counts</span>]
|
268
|
-
<span class="ruby-ivar">@klass_doc_counts</span> = <span class="ruby-identifier">data</span>[<span class="ruby-value">:klass_word_counts</span>]
|
269
|
-
<span class="ruby-keyword">rescue</span> <span class="ruby-constant">Errno</span><span class="ruby-operator">::</span><span class="ruby-constant">ENOENT</span>
|
270
|
-
<span class="ruby-identifier">reset</span>
|
271
|
-
<span class="ruby-keyword">end</span>
|
272
|
-
<span class="ruby-keyword">end</span></pre>
|
273
|
-
</div><!-- init_tables-source -->
|
274
|
-
|
275
|
-
</div>
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
</div><!-- init_tables-method -->
|
281
|
-
|
282
|
-
|
283
|
-
<div id="reset-method" class="method-detail ">
|
284
|
-
<a name="method-i-reset"></a>
|
285
|
-
|
286
|
-
|
287
|
-
<div class="method-heading">
|
288
|
-
<span class="method-name">reset</span><span
|
289
|
-
class="method-args">()</span>
|
290
|
-
<span class="method-click-advice">click to toggle source</span>
|
291
|
-
</div>
|
292
|
-
|
293
|
-
|
294
|
-
<div class="method-description">
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
<div class="method-source-code" id="reset-source">
|
301
|
-
<pre>
|
302
|
-
<span class="ruby-comment"># File lib/ankusa/file_system_storage.rb, line 12</span>
|
303
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">reset</span>
|
304
|
-
<span class="ruby-ivar">@freqs</span> = {}
|
305
|
-
<span class="ruby-ivar">@total_word_counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
306
|
-
<span class="ruby-ivar">@total_doc_counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
307
|
-
<span class="ruby-ivar">@klass_word_counts</span> = {}
|
308
|
-
<span class="ruby-ivar">@klass_doc_counts</span> = {}
|
309
|
-
<span class="ruby-keyword">end</span></pre>
|
310
|
-
</div><!-- reset-source -->
|
311
|
-
|
312
|
-
</div>
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
</div><!-- reset-method -->
|
318
|
-
|
319
|
-
|
320
|
-
<div id="save-method" class="method-detail ">
|
321
|
-
<a name="method-i-save"></a>
|
322
|
-
|
323
|
-
|
324
|
-
<div class="method-heading">
|
325
|
-
<span class="method-name">save</span><span
|
326
|
-
class="method-args">(file = nil)</span>
|
327
|
-
<span class="method-click-advice">click to toggle source</span>
|
328
|
-
</div>
|
329
|
-
|
330
|
-
|
331
|
-
<div class="method-description">
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
<div class="method-source-code" id="save-source">
|
338
|
-
<pre>
|
339
|
-
<span class="ruby-comment"># File lib/ankusa/file_system_storage.rb, line 41</span>
|
340
|
-
<span class="ruby-keyword">def</span> <span class="ruby-identifier">save</span>(<span class="ruby-identifier">file</span> = <span class="ruby-keyword">nil</span>)
|
341
|
-
<span class="ruby-identifier">file</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@file</span>
|
342
|
-
<span class="ruby-identifier">data</span> = { <span class="ruby-value">:freqs</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@freqs</span>,
|
343
|
-
<span class="ruby-value">:total_word_counts</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@total_word_counts</span>,
|
344
|
-
<span class="ruby-value">:total_doc_counts</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@total_doc_counts</span>,
|
345
|
-
<span class="ruby-value">:klass_word_counts</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@klass_word_counts</span>,
|
346
|
-
<span class="ruby-value">:klass_doc_counts</span> =<span class="ruby-operator">></span> <span class="ruby-ivar">@klass_doc_counts</span> }
|
347
|
-
<span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">file</span>, <span class="ruby-string">'w+'</span>) <span class="ruby-keyword">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">f</span><span class="ruby-operator">|</span>
|
348
|
-
<span class="ruby-constant">Marshal</span>.<span class="ruby-identifier">dump</span>(<span class="ruby-identifier">data</span>, <span class="ruby-identifier">f</span>)
|
349
|
-
<span class="ruby-keyword">end</span>
|
350
|
-
<span class="ruby-keyword">end</span></pre>
|
351
|
-
</div><!-- save-source -->
|
352
|
-
|
353
|
-
</div>
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
</div><!-- save-method -->
|
359
|
-
|
360
|
-
|
361
|
-
</div><!-- public-instance-method-details -->
|
362
|
-
|
363
|
-
</div><!-- 5Buntitled-5D -->
|
364
|
-
|
365
|
-
|
366
|
-
</div><!-- documentation -->
|
367
|
-
|
368
|
-
<div id="validator-badges">
|
369
|
-
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
370
|
-
<p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
|
371
|
-
Rdoc Generator</a> 2</small>.</p>
|
372
|
-
</div>
|
373
|
-
|
374
|
-
</body>
|
375
|
-
</html>
|
376
|
-
|