ankusa 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +6 -4
- data/Rakefile +2 -2
- data/docs/classes/Ankusa.html +8 -8
- data/docs/classes/Ankusa/Classifier.html +82 -271
- data/docs/classes/Ankusa/HBaseStorage.html +537 -0
- data/docs/classes/Ankusa/MemoryStorage.html +439 -0
- data/docs/classes/Ankusa/TextHash.html +84 -29
- data/docs/classes/String.html +172 -0
- data/docs/created.rid +1 -1
- data/docs/files/README_rdoc.html +6 -4
- data/docs/files/lib/ankusa/classifier_rb.html +1 -1
- data/docs/files/lib/ankusa/extensions_rb.html +108 -0
- data/docs/files/lib/ankusa/hasher_rb.html +1 -1
- data/docs/files/lib/ankusa/hbase_storage_rb.html +108 -0
- data/docs/files/lib/ankusa/{nbclass_rb.html → memory_storage_rb.html} +4 -4
- data/docs/files/lib/ankusa_rb.html +4 -2
- data/docs/fr_class_index.html +3 -1
- data/docs/fr_file_index.html +3 -1
- data/docs/fr_method_index.html +41 -17
- data/lib/ankusa.rb +3 -1
- data/lib/ankusa/classifier.rb +37 -86
- data/lib/ankusa/extensions.rb +13 -0
- data/lib/ankusa/hasher.rb +24 -10
- data/lib/ankusa/hbase_storage.rb +109 -0
- data/lib/ankusa/memory_storage.rb +61 -0
- metadata +13 -7
- data/docs/classes/Ankusa/NBClass.html +0 -168
- data/lib/ankusa/nbclass.rb +0 -15
@@ -0,0 +1,439 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Ankusa::MemoryStorage</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Ankusa::MemoryStorage</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/ankusa/memory_storage_rb.html">
|
59
|
+
lib/ankusa/memory_storage.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000010">classnames</a>
|
90
|
+
<a href="#M000021">close</a>
|
91
|
+
<a href="#M000020">doc_count_total</a>
|
92
|
+
<a href="#M000012">drop_tables</a>
|
93
|
+
<a href="#M000016">get_doc_count</a>
|
94
|
+
<a href="#M000015">get_total_word_count</a>
|
95
|
+
<a href="#M000014">get_word_counts</a>
|
96
|
+
<a href="#M000019">incr_doc_count</a>
|
97
|
+
<a href="#M000018">incr_total_word_count</a>
|
98
|
+
<a href="#M000017">incr_word_count</a>
|
99
|
+
<a href="#M000013">init_tables</a>
|
100
|
+
<a href="#M000009">new</a>
|
101
|
+
<a href="#M000011">reset</a>
|
102
|
+
</div>
|
103
|
+
</div>
|
104
|
+
|
105
|
+
</div>
|
106
|
+
|
107
|
+
|
108
|
+
<!-- if includes -->
|
109
|
+
|
110
|
+
<div id="section">
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
<!-- if method_list -->
|
120
|
+
<div id="methods">
|
121
|
+
<h3 class="section-bar">Public Class methods</h3>
|
122
|
+
|
123
|
+
<div id="method-M000009" class="method-detail">
|
124
|
+
<a name="M000009"></a>
|
125
|
+
|
126
|
+
<div class="method-heading">
|
127
|
+
<a href="#M000009" class="method-signature">
|
128
|
+
<span class="method-name">new</span><span class="method-args">()</span>
|
129
|
+
</a>
|
130
|
+
</div>
|
131
|
+
|
132
|
+
<div class="method-description">
|
133
|
+
<p><a class="source-toggle" href="#"
|
134
|
+
onclick="toggleCode('M000009-source');return false;">[Source]</a></p>
|
135
|
+
<div class="method-source-code" id="M000009-source">
|
136
|
+
<pre>
|
137
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 4</span>
|
138
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>
|
139
|
+
<span class="ruby-identifier">init_tables</span>
|
140
|
+
<span class="ruby-keyword kw">end</span>
|
141
|
+
</pre>
|
142
|
+
</div>
|
143
|
+
</div>
|
144
|
+
</div>
|
145
|
+
|
146
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
147
|
+
|
148
|
+
<div id="method-M000010" class="method-detail">
|
149
|
+
<a name="M000010"></a>
|
150
|
+
|
151
|
+
<div class="method-heading">
|
152
|
+
<a href="#M000010" class="method-signature">
|
153
|
+
<span class="method-name">classnames</span><span class="method-args">()</span>
|
154
|
+
</a>
|
155
|
+
</div>
|
156
|
+
|
157
|
+
<div class="method-description">
|
158
|
+
<p><a class="source-toggle" href="#"
|
159
|
+
onclick="toggleCode('M000010-source');return false;">[Source]</a></p>
|
160
|
+
<div class="method-source-code" id="M000010-source">
|
161
|
+
<pre>
|
162
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 8</span>
|
163
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classnames</span>
|
164
|
+
<span class="ruby-ivar">@total_doc_counts</span>.<span class="ruby-identifier">keys</span>
|
165
|
+
<span class="ruby-keyword kw">end</span>
|
166
|
+
</pre>
|
167
|
+
</div>
|
168
|
+
</div>
|
169
|
+
</div>
|
170
|
+
|
171
|
+
<div id="method-M000021" class="method-detail">
|
172
|
+
<a name="M000021"></a>
|
173
|
+
|
174
|
+
<div class="method-heading">
|
175
|
+
<a href="#M000021" class="method-signature">
|
176
|
+
<span class="method-name">close</span><span class="method-args">()</span>
|
177
|
+
</a>
|
178
|
+
</div>
|
179
|
+
|
180
|
+
<div class="method-description">
|
181
|
+
<p><a class="source-toggle" href="#"
|
182
|
+
onclick="toggleCode('M000021-source');return false;">[Source]</a></p>
|
183
|
+
<div class="method-source-code" id="M000021-source">
|
184
|
+
<pre>
|
185
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 56</span>
|
186
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">close</span>
|
187
|
+
<span class="ruby-keyword kw">end</span>
|
188
|
+
</pre>
|
189
|
+
</div>
|
190
|
+
</div>
|
191
|
+
</div>
|
192
|
+
|
193
|
+
<div id="method-M000020" class="method-detail">
|
194
|
+
<a name="M000020"></a>
|
195
|
+
|
196
|
+
<div class="method-heading">
|
197
|
+
<a href="#M000020" class="method-signature">
|
198
|
+
<span class="method-name">doc_count_total</span><span class="method-args">()</span>
|
199
|
+
</a>
|
200
|
+
</div>
|
201
|
+
|
202
|
+
<div class="method-description">
|
203
|
+
<p><a class="source-toggle" href="#"
|
204
|
+
onclick="toggleCode('M000020-source');return false;">[Source]</a></p>
|
205
|
+
<div class="method-source-code" id="M000020-source">
|
206
|
+
<pre>
|
207
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 52</span>
|
208
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">doc_count_total</span>
|
209
|
+
<span class="ruby-ivar">@total_doc_counts</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> }
|
210
|
+
<span class="ruby-keyword kw">end</span>
|
211
|
+
</pre>
|
212
|
+
</div>
|
213
|
+
</div>
|
214
|
+
</div>
|
215
|
+
|
216
|
+
<div id="method-M000012" class="method-detail">
|
217
|
+
<a name="M000012"></a>
|
218
|
+
|
219
|
+
<div class="method-heading">
|
220
|
+
<a href="#M000012" class="method-signature">
|
221
|
+
<span class="method-name">drop_tables</span><span class="method-args">()</span>
|
222
|
+
</a>
|
223
|
+
</div>
|
224
|
+
|
225
|
+
<div class="method-description">
|
226
|
+
<p><a class="source-toggle" href="#"
|
227
|
+
onclick="toggleCode('M000012-source');return false;">[Source]</a></p>
|
228
|
+
<div class="method-source-code" id="M000012-source">
|
229
|
+
<pre>
|
230
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 16</span>
|
231
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">drop_tables</span>
|
232
|
+
<span class="ruby-keyword kw">end</span>
|
233
|
+
</pre>
|
234
|
+
</div>
|
235
|
+
</div>
|
236
|
+
</div>
|
237
|
+
|
238
|
+
<div id="method-M000016" class="method-detail">
|
239
|
+
<a name="M000016"></a>
|
240
|
+
|
241
|
+
<div class="method-heading">
|
242
|
+
<a href="#M000016" class="method-signature">
|
243
|
+
<span class="method-name">get_doc_count</span><span class="method-args">(klass)</span>
|
244
|
+
</a>
|
245
|
+
</div>
|
246
|
+
|
247
|
+
<div class="method-description">
|
248
|
+
<p><a class="source-toggle" href="#"
|
249
|
+
onclick="toggleCode('M000016-source');return false;">[Source]</a></p>
|
250
|
+
<div class="method-source-code" id="M000016-source">
|
251
|
+
<pre>
|
252
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 35</span>
|
253
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">klass</span>)
|
254
|
+
<span class="ruby-ivar">@total_doc_counts</span>[<span class="ruby-identifier">klass</span>]
|
255
|
+
<span class="ruby-keyword kw">end</span>
|
256
|
+
</pre>
|
257
|
+
</div>
|
258
|
+
</div>
|
259
|
+
</div>
|
260
|
+
|
261
|
+
<div id="method-M000015" class="method-detail">
|
262
|
+
<a name="M000015"></a>
|
263
|
+
|
264
|
+
<div class="method-heading">
|
265
|
+
<a href="#M000015" class="method-signature">
|
266
|
+
<span class="method-name">get_total_word_count</span><span class="method-args">(klass)</span>
|
267
|
+
</a>
|
268
|
+
</div>
|
269
|
+
|
270
|
+
<div class="method-description">
|
271
|
+
<p><a class="source-toggle" href="#"
|
272
|
+
onclick="toggleCode('M000015-source');return false;">[Source]</a></p>
|
273
|
+
<div class="method-source-code" id="M000015-source">
|
274
|
+
<pre>
|
275
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 31</span>
|
276
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_total_word_count</span>(<span class="ruby-identifier">klass</span>)
|
277
|
+
<span class="ruby-ivar">@total_word_counts</span>[<span class="ruby-identifier">klass</span>]
|
278
|
+
<span class="ruby-keyword kw">end</span>
|
279
|
+
</pre>
|
280
|
+
</div>
|
281
|
+
</div>
|
282
|
+
</div>
|
283
|
+
|
284
|
+
<div id="method-M000014" class="method-detail">
|
285
|
+
<a name="M000014"></a>
|
286
|
+
|
287
|
+
<div class="method-heading">
|
288
|
+
<a href="#M000014" class="method-signature">
|
289
|
+
<span class="method-name">get_word_counts</span><span class="method-args">(word)</span>
|
290
|
+
</a>
|
291
|
+
</div>
|
292
|
+
|
293
|
+
<div class="method-description">
|
294
|
+
<p><a class="source-toggle" href="#"
|
295
|
+
onclick="toggleCode('M000014-source');return false;">[Source]</a></p>
|
296
|
+
<div class="method-source-code" id="M000014-source">
|
297
|
+
<pre>
|
298
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 27</span>
|
299
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">get_word_counts</span>(<span class="ruby-identifier">word</span>)
|
300
|
+
<span class="ruby-ivar">@freqs</span>.<span class="ruby-identifier">fetch</span> <span class="ruby-identifier">word</span>, <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
301
|
+
<span class="ruby-keyword kw">end</span>
|
302
|
+
</pre>
|
303
|
+
</div>
|
304
|
+
</div>
|
305
|
+
</div>
|
306
|
+
|
307
|
+
<div id="method-M000019" class="method-detail">
|
308
|
+
<a name="M000019"></a>
|
309
|
+
|
310
|
+
<div class="method-heading">
|
311
|
+
<a href="#M000019" class="method-signature">
|
312
|
+
<span class="method-name">incr_doc_count</span><span class="method-args">(klass, count)</span>
|
313
|
+
</a>
|
314
|
+
</div>
|
315
|
+
|
316
|
+
<div class="method-description">
|
317
|
+
<p><a class="source-toggle" href="#"
|
318
|
+
onclick="toggleCode('M000019-source');return false;">[Source]</a></p>
|
319
|
+
<div class="method-source-code" id="M000019-source">
|
320
|
+
<pre>
|
321
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 48</span>
|
322
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_doc_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
323
|
+
<span class="ruby-ivar">@total_doc_counts</span>[<span class="ruby-identifier">klass</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
324
|
+
<span class="ruby-keyword kw">end</span>
|
325
|
+
</pre>
|
326
|
+
</div>
|
327
|
+
</div>
|
328
|
+
</div>
|
329
|
+
|
330
|
+
<div id="method-M000018" class="method-detail">
|
331
|
+
<a name="M000018"></a>
|
332
|
+
|
333
|
+
<div class="method-heading">
|
334
|
+
<a href="#M000018" class="method-signature">
|
335
|
+
<span class="method-name">incr_total_word_count</span><span class="method-args">(klass, count)</span>
|
336
|
+
</a>
|
337
|
+
</div>
|
338
|
+
|
339
|
+
<div class="method-description">
|
340
|
+
<p><a class="source-toggle" href="#"
|
341
|
+
onclick="toggleCode('M000018-source');return false;">[Source]</a></p>
|
342
|
+
<div class="method-source-code" id="M000018-source">
|
343
|
+
<pre>
|
344
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 44</span>
|
345
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_total_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">count</span>)
|
346
|
+
<span class="ruby-ivar">@total_word_counts</span>[<span class="ruby-identifier">klass</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
347
|
+
<span class="ruby-keyword kw">end</span>
|
348
|
+
</pre>
|
349
|
+
</div>
|
350
|
+
</div>
|
351
|
+
</div>
|
352
|
+
|
353
|
+
<div id="method-M000017" class="method-detail">
|
354
|
+
<a name="M000017"></a>
|
355
|
+
|
356
|
+
<div class="method-heading">
|
357
|
+
<a href="#M000017" class="method-signature">
|
358
|
+
<span class="method-name">incr_word_count</span><span class="method-args">(klass, word, count)</span>
|
359
|
+
</a>
|
360
|
+
</div>
|
361
|
+
|
362
|
+
<div class="method-description">
|
363
|
+
<p><a class="source-toggle" href="#"
|
364
|
+
onclick="toggleCode('M000017-source');return false;">[Source]</a></p>
|
365
|
+
<div class="method-source-code" id="M000017-source">
|
366
|
+
<pre>
|
367
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 39</span>
|
368
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">incr_word_count</span>(<span class="ruby-identifier">klass</span>, <span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span>)
|
369
|
+
<span class="ruby-ivar">@freqs</span>[<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
370
|
+
<span class="ruby-ivar">@freqs</span>[<span class="ruby-identifier">word</span>][<span class="ruby-identifier">klass</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
371
|
+
<span class="ruby-keyword kw">end</span>
|
372
|
+
</pre>
|
373
|
+
</div>
|
374
|
+
</div>
|
375
|
+
</div>
|
376
|
+
|
377
|
+
<div id="method-M000013" class="method-detail">
|
378
|
+
<a name="M000013"></a>
|
379
|
+
|
380
|
+
<div class="method-heading">
|
381
|
+
<a href="#M000013" class="method-signature">
|
382
|
+
<span class="method-name">init_tables</span><span class="method-args">()</span>
|
383
|
+
</a>
|
384
|
+
</div>
|
385
|
+
|
386
|
+
<div class="method-description">
|
387
|
+
<p><a class="source-toggle" href="#"
|
388
|
+
onclick="toggleCode('M000013-source');return false;">[Source]</a></p>
|
389
|
+
<div class="method-source-code" id="M000013-source">
|
390
|
+
<pre>
|
391
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 19</span>
|
392
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">init_tables</span>
|
393
|
+
<span class="ruby-ivar">@freqs</span> = {}
|
394
|
+
<span class="ruby-ivar">@total_word_counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
395
|
+
<span class="ruby-ivar">@total_doc_counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value">0</span>)
|
396
|
+
<span class="ruby-ivar">@klass_word_counts</span> = {}
|
397
|
+
<span class="ruby-ivar">@klass_doc_counts</span> = {}
|
398
|
+
<span class="ruby-keyword kw">end</span>
|
399
|
+
</pre>
|
400
|
+
</div>
|
401
|
+
</div>
|
402
|
+
</div>
|
403
|
+
|
404
|
+
<div id="method-M000011" class="method-detail">
|
405
|
+
<a name="M000011"></a>
|
406
|
+
|
407
|
+
<div class="method-heading">
|
408
|
+
<a href="#M000011" class="method-signature">
|
409
|
+
<span class="method-name">reset</span><span class="method-args">()</span>
|
410
|
+
</a>
|
411
|
+
</div>
|
412
|
+
|
413
|
+
<div class="method-description">
|
414
|
+
<p><a class="source-toggle" href="#"
|
415
|
+
onclick="toggleCode('M000011-source');return false;">[Source]</a></p>
|
416
|
+
<div class="method-source-code" id="M000011-source">
|
417
|
+
<pre>
|
418
|
+
<span class="ruby-comment cmt"># File lib/ankusa/memory_storage.rb, line 12</span>
|
419
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">reset</span>
|
420
|
+
<span class="ruby-identifier">init_tables</span>
|
421
|
+
<span class="ruby-keyword kw">end</span>
|
422
|
+
</pre>
|
423
|
+
</div>
|
424
|
+
</div>
|
425
|
+
</div>
|
426
|
+
|
427
|
+
|
428
|
+
</div>
|
429
|
+
|
430
|
+
|
431
|
+
</div>
|
432
|
+
|
433
|
+
|
434
|
+
<div id="validator-badges">
|
435
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
436
|
+
</div>
|
437
|
+
|
438
|
+
</body>
|
439
|
+
</html>
|
@@ -86,9 +86,11 @@
|
|
86
86
|
<h3 class="section-bar">Methods</h3>
|
87
87
|
|
88
88
|
<div class="name-list">
|
89
|
-
<a href="#
|
90
|
-
<a href="#
|
91
|
-
<a href="#
|
89
|
+
<a href="#M000038">add_text</a>
|
90
|
+
<a href="#M000039">add_word</a>
|
91
|
+
<a href="#M000040">atomize</a>
|
92
|
+
<a href="#M000037">new</a>
|
93
|
+
<a href="#M000041">valid_word?</a>
|
92
94
|
</div>
|
93
95
|
</div>
|
94
96
|
|
@@ -123,19 +125,42 @@
|
|
123
125
|
<div id="methods">
|
124
126
|
<h3 class="section-bar">Public Class methods</h3>
|
125
127
|
|
126
|
-
<div id="method-
|
127
|
-
<a name="
|
128
|
+
<div id="method-M000040" class="method-detail">
|
129
|
+
<a name="M000040"></a>
|
128
130
|
|
129
131
|
<div class="method-heading">
|
130
|
-
<a href="#
|
132
|
+
<a href="#M000040" class="method-signature">
|
133
|
+
<span class="method-name">atomize</span><span class="method-args">(text)</span>
|
134
|
+
</a>
|
135
|
+
</div>
|
136
|
+
|
137
|
+
<div class="method-description">
|
138
|
+
<p><a class="source-toggle" href="#"
|
139
|
+
onclick="toggleCode('M000040-source');return false;">[Source]</a></p>
|
140
|
+
<div class="method-source-code" id="M000040-source">
|
141
|
+
<pre>
|
142
|
+
<span class="ruby-comment cmt"># File lib/ankusa/hasher.rb, line 33</span>
|
143
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">atomize</span>(<span class="ruby-identifier">text</span>)
|
144
|
+
<span class="ruby-identifier">text</span>.<span class="ruby-identifier">to_ascii</span>.<span class="ruby-identifier">tr</span>(<span class="ruby-value str">'-'</span>, <span class="ruby-value str">' '</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/[^\w\s]/</span>,<span class="ruby-value str">" "</span>).<span class="ruby-identifier">split</span>.<span class="ruby-identifier">map</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span> }
|
145
|
+
<span class="ruby-keyword kw">end</span>
|
146
|
+
</pre>
|
147
|
+
</div>
|
148
|
+
</div>
|
149
|
+
</div>
|
150
|
+
|
151
|
+
<div id="method-M000037" class="method-detail">
|
152
|
+
<a name="M000037"></a>
|
153
|
+
|
154
|
+
<div class="method-heading">
|
155
|
+
<a href="#M000037" class="method-signature">
|
131
156
|
<span class="method-name">new</span><span class="method-args">(text=nil)</span>
|
132
157
|
</a>
|
133
158
|
</div>
|
134
159
|
|
135
160
|
<div class="method-description">
|
136
161
|
<p><a class="source-toggle" href="#"
|
137
|
-
onclick="toggleCode('
|
138
|
-
<div class="method-source-code" id="
|
162
|
+
onclick="toggleCode('M000037-source');return false;">[Source]</a></p>
|
163
|
+
<div class="method-source-code" id="M000037-source">
|
139
164
|
<pre>
|
140
165
|
<span class="ruby-comment cmt"># File lib/ankusa/hasher.rb, line 9</span>
|
141
166
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">text</span>=<span class="ruby-keyword kw">nil</span>)
|
@@ -148,28 +173,61 @@
|
|
148
173
|
</div>
|
149
174
|
</div>
|
150
175
|
|
176
|
+
<div id="method-M000041" class="method-detail">
|
177
|
+
<a name="M000041"></a>
|
178
|
+
|
179
|
+
<div class="method-heading">
|
180
|
+
<a href="#M000041" class="method-signature">
|
181
|
+
<span class="method-name">valid_word?</span><span class="method-args">(word)</span>
|
182
|
+
</a>
|
183
|
+
</div>
|
184
|
+
|
185
|
+
<div class="method-description">
|
186
|
+
<p>
|
187
|
+
word should be only alphanum chars at this point
|
188
|
+
</p>
|
189
|
+
<p><a class="source-toggle" href="#"
|
190
|
+
onclick="toggleCode('M000041-source');return false;">[Source]</a></p>
|
191
|
+
<div class="method-source-code" id="M000041-source">
|
192
|
+
<pre>
|
193
|
+
<span class="ruby-comment cmt"># File lib/ankusa/hasher.rb, line 38</span>
|
194
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">valid_word?</span>(<span class="ruby-identifier">word</span>)
|
195
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">false</span> <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Ankusa</span><span class="ruby-operator">::</span><span class="ruby-constant">STOPWORDS</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">word</span>
|
196
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">false</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">word</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><</span> <span class="ruby-value">3</span>
|
197
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">false</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">word</span>.<span class="ruby-identifier">numeric?</span>
|
198
|
+
<span class="ruby-keyword kw">true</span>
|
199
|
+
<span class="ruby-keyword kw">end</span>
|
200
|
+
</pre>
|
201
|
+
</div>
|
202
|
+
</div>
|
203
|
+
</div>
|
204
|
+
|
151
205
|
<h3 class="section-bar">Public Instance methods</h3>
|
152
206
|
|
153
|
-
<div id="method-
|
154
|
-
<a name="
|
207
|
+
<div id="method-M000038" class="method-detail">
|
208
|
+
<a name="M000038"></a>
|
155
209
|
|
156
210
|
<div class="method-heading">
|
157
|
-
<a href="#
|
211
|
+
<a href="#M000038" class="method-signature">
|
158
212
|
<span class="method-name">add_text</span><span class="method-args">(text)</span>
|
159
213
|
</a>
|
160
214
|
</div>
|
161
215
|
|
162
216
|
<div class="method-description">
|
163
217
|
<p><a class="source-toggle" href="#"
|
164
|
-
onclick="toggleCode('
|
165
|
-
<div class="method-source-code" id="
|
218
|
+
onclick="toggleCode('M000038-source');return false;">[Source]</a></p>
|
219
|
+
<div class="method-source-code" id="M000038-source">
|
166
220
|
<pre>
|
167
221
|
<span class="ruby-comment cmt"># File lib/ankusa/hasher.rb, line 15</span>
|
168
222
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_text</span>(<span class="ruby-identifier">text</span>)
|
169
|
-
<span class="ruby-
|
170
|
-
|
171
|
-
<span class="ruby-
|
172
|
-
|
223
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">kind_of?</span> <span class="ruby-constant">Array</span>
|
224
|
+
<span class="ruby-identifier">text</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span> <span class="ruby-identifier">add_text</span> <span class="ruby-identifier">t</span> }
|
225
|
+
<span class="ruby-keyword kw">else</span>
|
226
|
+
<span class="ruby-comment cmt"># replace dashes with spaces, then get rid of non-word/non-space characters, </span>
|
227
|
+
<span class="ruby-comment cmt"># then split by space to get words</span>
|
228
|
+
<span class="ruby-identifier">words</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">atomize</span> <span class="ruby-identifier">text</span>
|
229
|
+
<span class="ruby-identifier">words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span><span class="ruby-operator">|</span> <span class="ruby-identifier">add_word</span>(<span class="ruby-identifier">word</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">valid_word?</span>(<span class="ruby-identifier">word</span>) }
|
230
|
+
<span class="ruby-keyword kw">end</span>
|
173
231
|
<span class="ruby-keyword kw">self</span>
|
174
232
|
<span class="ruby-keyword kw">end</span>
|
175
233
|
</pre>
|
@@ -177,28 +235,25 @@
|
|
177
235
|
</div>
|
178
236
|
</div>
|
179
237
|
|
180
|
-
<div id="method-
|
181
|
-
<a name="
|
238
|
+
<div id="method-M000039" class="method-detail">
|
239
|
+
<a name="M000039"></a>
|
182
240
|
|
183
241
|
<div class="method-heading">
|
184
|
-
<a href="#
|
242
|
+
<a href="#M000039" class="method-signature">
|
185
243
|
<span class="method-name">add_word</span><span class="method-args">(word)</span>
|
186
244
|
</a>
|
187
245
|
</div>
|
188
246
|
|
189
247
|
<div class="method-description">
|
190
248
|
<p><a class="source-toggle" href="#"
|
191
|
-
onclick="toggleCode('
|
192
|
-
<div class="method-source-code" id="
|
249
|
+
onclick="toggleCode('M000039-source');return false;">[Source]</a></p>
|
250
|
+
<div class="method-source-code" id="M000039-source">
|
193
251
|
<pre>
|
194
|
-
<span class="ruby-comment cmt"># File lib/ankusa/hasher.rb, line
|
252
|
+
<span class="ruby-comment cmt"># File lib/ankusa/hasher.rb, line 27</span>
|
195
253
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_word</span>(<span class="ruby-identifier">word</span>)
|
196
|
-
<span class="ruby-
|
197
|
-
<span class="ruby-
|
198
|
-
|
199
|
-
<span class="ruby-identifier">key</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">stem</span>.<span class="ruby-identifier">intern</span>
|
200
|
-
<span class="ruby-identifier">store</span> <span class="ruby-identifier">key</span>, <span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">key</span>, <span class="ruby-value">0</span>)<span class="ruby-operator">+</span><span class="ruby-value">1</span>
|
201
|
-
<span class="ruby-keyword kw">end</span>
|
254
|
+
<span class="ruby-ivar">@word_count</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
|
255
|
+
<span class="ruby-identifier">key</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">stem</span>.<span class="ruby-identifier">intern</span>
|
256
|
+
<span class="ruby-identifier">store</span> <span class="ruby-identifier">key</span>, <span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">key</span>, <span class="ruby-value">0</span>)<span class="ruby-operator">+</span><span class="ruby-value">1</span>
|
202
257
|
<span class="ruby-keyword kw">end</span>
|
203
258
|
</pre>
|
204
259
|
</div>
|