ankusa 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/Gemfile +4 -0
  2. data/Gemfile.lock +16 -0
  3. data/README.rdoc +5 -3
  4. data/Rakefile +5 -5
  5. data/lib/ankusa/naive_bayes.rb +3 -3
  6. data/lib/ankusa/version.rb +1 -1
  7. metadata +36 -100
  8. data/docs/Ankusa.html +0 -229
  9. data/docs/Ankusa/CassandraStorage.html +0 -801
  10. data/docs/Ankusa/Classifier.html +0 -440
  11. data/docs/Ankusa/FileSystemStorage.html +0 -376
  12. data/docs/Ankusa/HBaseStorage.html +0 -845
  13. data/docs/Ankusa/KLDivergenceClassifier.html +0 -265
  14. data/docs/Ankusa/MemoryStorage.html +0 -672
  15. data/docs/Ankusa/NaiveBayesClassifier.html +0 -313
  16. data/docs/Ankusa/TextHash.html +0 -390
  17. data/docs/README_rdoc.html +0 -268
  18. data/docs/String.html +0 -241
  19. data/docs/created.rid +0 -14
  20. data/docs/images/brick.png +0 -0
  21. data/docs/images/brick_link.png +0 -0
  22. data/docs/images/bug.png +0 -0
  23. data/docs/images/bullet_black.png +0 -0
  24. data/docs/images/bullet_toggle_minus.png +0 -0
  25. data/docs/images/bullet_toggle_plus.png +0 -0
  26. data/docs/images/date.png +0 -0
  27. data/docs/images/find.png +0 -0
  28. data/docs/images/loadingAnimation.gif +0 -0
  29. data/docs/images/macFFBgHack.png +0 -0
  30. data/docs/images/package.png +0 -0
  31. data/docs/images/page_green.png +0 -0
  32. data/docs/images/page_white_text.png +0 -0
  33. data/docs/images/page_white_width.png +0 -0
  34. data/docs/images/plugin.png +0 -0
  35. data/docs/images/ruby.png +0 -0
  36. data/docs/images/tag_green.png +0 -0
  37. data/docs/images/wrench.png +0 -0
  38. data/docs/images/wrench_orange.png +0 -0
  39. data/docs/images/zoom.png +0 -0
  40. data/docs/index.html +0 -212
  41. data/docs/js/darkfish.js +0 -116
  42. data/docs/js/jquery.js +0 -32
  43. data/docs/js/quicksearch.js +0 -114
  44. data/docs/js/thickbox-compressed.js +0 -10
  45. data/docs/lib/ankusa/cassandra_storage_rb.html +0 -54
  46. data/docs/lib/ankusa/classifier_rb.html +0 -52
  47. data/docs/lib/ankusa/extensions_rb.html +0 -54
  48. data/docs/lib/ankusa/file_system_storage_rb.html +0 -54
  49. data/docs/lib/ankusa/hasher_rb.html +0 -56
  50. data/docs/lib/ankusa/hbase_storage_rb.html +0 -54
  51. data/docs/lib/ankusa/kl_divergence_rb.html +0 -52
  52. data/docs/lib/ankusa/memory_storage_rb.html +0 -52
  53. data/docs/lib/ankusa/naive_bayes_rb.html +0 -52
  54. data/docs/lib/ankusa/stopwords_rb.html +0 -52
  55. data/docs/lib/ankusa/version_rb.html +0 -52
  56. data/docs/lib/ankusa_rb.html +0 -64
  57. data/docs/rdoc.css +0 -759
@@ -1,313 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
- <head>
6
- <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
7
-
8
- <title>Class: Ankusa::NaiveBayesClassifier</title>
9
-
10
- <link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
11
-
12
- <script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
13
- <script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
14
- <script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
15
- <script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
16
-
17
- </head>
18
- <body id="top" class="class">
19
-
20
- <div id="metadata">
21
- <div id="home-metadata">
22
- <div id="home-section" class="section">
23
- <h3 class="section-header">
24
- <a href="../index.html">Home</a>
25
- <a href="../index.html#classes">Classes</a>
26
- <a href="../index.html#methods">Methods</a>
27
- </h3>
28
- </div>
29
- </div>
30
-
31
- <div id="file-metadata">
32
- <div id="file-list-section" class="section">
33
- <h3 class="section-header">In Files</h3>
34
- <div class="section-body">
35
- <ul>
36
-
37
- <li><a href="../lib/ankusa/naive_bayes_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
38
- class="thickbox" title="lib/ankusa/naive_bayes.rb">lib/ankusa/naive_bayes.rb</a></li>
39
-
40
- </ul>
41
- </div>
42
- </div>
43
-
44
-
45
- </div>
46
-
47
- <div id="class-metadata">
48
-
49
- <!-- Parent Class -->
50
- <div id="parent-class-section" class="section">
51
- <h3 class="section-header">Parent</h3>
52
-
53
- <p class="link">Object</p>
54
-
55
- </div>
56
-
57
-
58
-
59
-
60
-
61
-
62
-
63
- <!-- Method Quickref -->
64
- <div id="method-list-section" class="section">
65
- <h3 class="section-header">Methods</h3>
66
- <ul class="link-list">
67
-
68
- <li><a href="#method-i-classifications">#classifications</a></li>
69
-
70
- <li><a href="#method-i-classify">#classify</a></li>
71
-
72
- <li><a href="#method-i-log_likelihoods">#log_likelihoods</a></li>
73
-
74
- </ul>
75
- </div>
76
-
77
-
78
-
79
- <!-- Included Modules -->
80
- <div id="includes-section" class="section">
81
- <h3 class="section-header">Included Modules</h3>
82
- <ul class="link-list">
83
-
84
-
85
- <li><a class="include" href="Classifier.html">Ankusa::Classifier</a></li>
86
-
87
-
88
- </ul>
89
- </div>
90
-
91
- </div>
92
-
93
- <div id="project-metadata">
94
-
95
-
96
- <div id="fileindex-section" class="section project-section">
97
- <h3 class="section-header">Files</h3>
98
- <ul>
99
-
100
- <li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
101
-
102
- </ul>
103
- </div>
104
-
105
-
106
- <div id="classindex-section" class="section project-section">
107
- <h3 class="section-header">Class/Module Index
108
- <span class="search-toggle"><img src="../images/find.png"
109
- height="16" width="16" alt="[+]"
110
- title="show/hide quicksearch" /></span></h3>
111
- <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
112
- <fieldset>
113
- <legend>Quicksearch</legend>
114
- <input type="text" name="quicksearch" value=""
115
- class="quicksearch-field" />
116
- </fieldset>
117
- </form>
118
-
119
- <ul class="link-list">
120
-
121
- <li><a href="../Ankusa.html">Ankusa</a></li>
122
-
123
- <li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
124
-
125
- <li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
126
-
127
- <li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
128
-
129
- <li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
130
-
131
- <li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
132
-
133
- <li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
134
-
135
- <li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
136
-
137
- <li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
138
-
139
- <li><a href="../String.html">String</a></li>
140
-
141
- </ul>
142
- <div id="no-class-search-results" style="display: none;">No matching classes.</div>
143
- </div>
144
-
145
-
146
- </div>
147
- </div>
148
-
149
- <div id="documentation">
150
- <h1 class="class">Ankusa::NaiveBayesClassifier</h1>
151
-
152
- <div id="description" class="description">
153
-
154
- </div><!-- description -->
155
-
156
-
157
- <div id="5Buntitled-5D" class="documentation-section">
158
-
159
-
160
-
161
-
162
-
163
-
164
-
165
-
166
- <!-- Methods -->
167
-
168
- <div id="public-instance-method-details" class="method-section section">
169
- <h3 class="section-header">Public Instance Methods</h3>
170
-
171
-
172
- <div id="classifications-method" class="method-detail ">
173
- <a name="method-i-classifications"></a>
174
-
175
-
176
- <div class="method-heading">
177
- <span class="method-name">classifications</span><span
178
- class="method-args">(text, classnames=nil)</span>
179
- <span class="method-click-advice">click to toggle source</span>
180
- </div>
181
-
182
-
183
- <div class="method-description">
184
-
185
- <p>Classes is an array of classes to look at</p>
186
-
187
-
188
-
189
- <div class="method-source-code" id="classifications-source">
190
- <pre>
191
- <span class="ruby-comment"># File lib/ankusa/naive_bayes.rb, line 13</span>
192
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>=<span class="ruby-keyword">nil</span>)
193
- <span class="ruby-identifier">result</span> = <span class="ruby-identifier">log_likelihoods</span> <span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>
194
- <span class="ruby-identifier">result</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span>
195
- <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = (<span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">==</span> <span class="ruby-constant">INFTY</span>) <span class="ruby-operator">?</span> <span class="ruby-value">0</span> <span class="ruby-operator">:</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">exp</span>(<span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>])
196
- }
197
-
198
- <span class="ruby-comment"># normalize to get probs</span>
199
- <span class="ruby-identifier">sum</span> = <span class="ruby-identifier">result</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> }
200
- <span class="ruby-identifier">result</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">/</span> <span class="ruby-identifier">sum</span> }
201
- <span class="ruby-identifier">result</span>
202
- <span class="ruby-keyword">end</span></pre>
203
- </div><!-- classifications-source -->
204
-
205
- </div>
206
-
207
-
208
-
209
-
210
- </div><!-- classifications-method -->
211
-
212
-
213
- <div id="classify-method" class="method-detail ">
214
- <a name="method-i-classify"></a>
215
-
216
-
217
- <div class="method-heading">
218
- <span class="method-name">classify</span><span
219
- class="method-args">(text, classes=nil)</span>
220
- <span class="method-click-advice">click to toggle source</span>
221
- </div>
222
-
223
-
224
- <div class="method-description">
225
-
226
-
227
-
228
-
229
-
230
- <div class="method-source-code" id="classify-source">
231
- <pre>
232
- <span class="ruby-comment"># File lib/ankusa/naive_bayes.rb, line 7</span>
233
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classes</span>=<span class="ruby-keyword">nil</span>)
234
- <span class="ruby-comment"># return the most probable class</span>
235
- <span class="ruby-identifier">log_likelihoods</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classes</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">c</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">c</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">first</span>.<span class="ruby-identifier">first</span>
236
- <span class="ruby-keyword">end</span></pre>
237
- </div><!-- classify-source -->
238
-
239
- </div>
240
-
241
-
242
-
243
-
244
- </div><!-- classify-method -->
245
-
246
-
247
- <div id="log_likelihoods-method" class="method-detail ">
248
- <a name="method-i-log_likelihoods"></a>
249
-
250
-
251
- <div class="method-heading">
252
- <span class="method-name">log_likelihoods</span><span
253
- class="method-args">(text, classnames=nil)</span>
254
- <span class="method-click-advice">click to toggle source</span>
255
- </div>
256
-
257
-
258
- <div class="method-description">
259
-
260
- <p>Classes is an array of classes to look at</p>
261
-
262
-
263
-
264
- <div class="method-source-code" id="log_likelihoods-source">
265
- <pre>
266
- <span class="ruby-comment"># File lib/ankusa/naive_bayes.rb, line 26</span>
267
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">log_likelihoods</span>(<span class="ruby-identifier">text</span>, <span class="ruby-identifier">classnames</span>=<span class="ruby-keyword">nil</span>)
268
- <span class="ruby-identifier">classnames</span> <span class="ruby-operator">||=</span> <span class="ruby-ivar">@classnames</span>
269
- <span class="ruby-identifier">result</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">0</span>
270
-
271
- <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
272
- <span class="ruby-identifier">probs</span> = <span class="ruby-identifier">get_word_probs</span>(<span class="ruby-identifier">word</span>, <span class="ruby-identifier">classnames</span>)
273
- <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span>
274
- <span class="ruby-comment"># log likelihood should be infinity if we've never seen the klass</span>
275
- <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">&gt;</span> <span class="ruby-value">0</span> <span class="ruby-operator">?</span> (<span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">probs</span>[<span class="ruby-identifier">k</span>]) * <span class="ruby-identifier">count</span>) <span class="ruby-operator">:</span> <span class="ruby-constant">INFTY</span>
276
- }
277
- }
278
-
279
- <span class="ruby-comment"># add the prior</span>
280
- <span class="ruby-identifier">doc_counts</span> = <span class="ruby-identifier">doc_count_totals</span>.<span class="ruby-identifier">select</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">k</span> }.<span class="ruby-identifier">map</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span>,<span class="ruby-identifier">v</span><span class="ruby-operator">|</span> <span class="ruby-identifier">v</span> }
281
- <span class="ruby-identifier">doc_count_total</span> = (<span class="ruby-identifier">doc_counts</span>.<span class="ruby-identifier">inject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span>,<span class="ruby-identifier">y</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span><span class="ruby-operator">+</span><span class="ruby-identifier">y</span> } <span class="ruby-operator">+</span> <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">length</span>).<span class="ruby-identifier">to_f</span>
282
- <span class="ruby-identifier">classnames</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">k</span><span class="ruby-operator">|</span>
283
- <span class="ruby-identifier">result</span>[<span class="ruby-identifier">k</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>((<span class="ruby-ivar">@storage</span>.<span class="ruby-identifier">get_doc_count</span>(<span class="ruby-identifier">k</span>) <span class="ruby-operator">+</span> <span class="ruby-value">1</span>).<span class="ruby-identifier">to_f</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">doc_count_total</span>)
284
- }
285
-
286
- <span class="ruby-identifier">result</span>
287
- <span class="ruby-keyword">end</span></pre>
288
- </div><!-- log_likelihoods-source -->
289
-
290
- </div>
291
-
292
-
293
-
294
-
295
- </div><!-- log_likelihoods-method -->
296
-
297
-
298
- </div><!-- public-instance-method-details -->
299
-
300
- </div><!-- 5Buntitled-5D -->
301
-
302
-
303
- </div><!-- documentation -->
304
-
305
- <div id="validator-badges">
306
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
307
- <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
308
- Rdoc Generator</a> 2</small>.</p>
309
- </div>
310
-
311
- </body>
312
- </html>
313
-
@@ -1,390 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
- <head>
6
- <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" />
7
-
8
- <title>Class: Ankusa::TextHash</title>
9
-
10
- <link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
11
-
12
- <script src="../js/jquery.js" type="text/javascript" charset="utf-8"></script>
13
- <script src="../js/thickbox-compressed.js" type="text/javascript" charset="utf-8"></script>
14
- <script src="../js/quicksearch.js" type="text/javascript" charset="utf-8"></script>
15
- <script src="../js/darkfish.js" type="text/javascript" charset="utf-8"></script>
16
-
17
- </head>
18
- <body id="top" class="class">
19
-
20
- <div id="metadata">
21
- <div id="home-metadata">
22
- <div id="home-section" class="section">
23
- <h3 class="section-header">
24
- <a href="../index.html">Home</a>
25
- <a href="../index.html#classes">Classes</a>
26
- <a href="../index.html#methods">Methods</a>
27
- </h3>
28
- </div>
29
- </div>
30
-
31
- <div id="file-metadata">
32
- <div id="file-list-section" class="section">
33
- <h3 class="section-header">In Files</h3>
34
- <div class="section-body">
35
- <ul>
36
-
37
- <li><a href="../lib/ankusa/hasher_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
38
- class="thickbox" title="lib/ankusa/hasher.rb">lib/ankusa/hasher.rb</a></li>
39
-
40
- </ul>
41
- </div>
42
- </div>
43
-
44
-
45
- </div>
46
-
47
- <div id="class-metadata">
48
-
49
- <!-- Parent Class -->
50
- <div id="parent-class-section" class="section">
51
- <h3 class="section-header">Parent</h3>
52
-
53
- <p class="link">Hash</p>
54
-
55
- </div>
56
-
57
-
58
-
59
-
60
-
61
-
62
-
63
- <!-- Method Quickref -->
64
- <div id="method-list-section" class="section">
65
- <h3 class="section-header">Methods</h3>
66
- <ul class="link-list">
67
-
68
- <li><a href="#method-c-atomize">::atomize</a></li>
69
-
70
- <li><a href="#method-c-new">::new</a></li>
71
-
72
- <li><a href="#method-c-valid_word-3F">::valid_word?</a></li>
73
-
74
- <li><a href="#method-i-add_text">#add_text</a></li>
75
-
76
- <li><a href="#method-i-add_word">#add_word</a></li>
77
-
78
- </ul>
79
- </div>
80
-
81
-
82
-
83
- </div>
84
-
85
- <div id="project-metadata">
86
-
87
-
88
- <div id="fileindex-section" class="section project-section">
89
- <h3 class="section-header">Files</h3>
90
- <ul>
91
-
92
- <li class="file"><a href="../README_rdoc.html">README.rdoc</a></li>
93
-
94
- </ul>
95
- </div>
96
-
97
-
98
- <div id="classindex-section" class="section project-section">
99
- <h3 class="section-header">Class/Module Index
100
- <span class="search-toggle"><img src="../images/find.png"
101
- height="16" width="16" alt="[+]"
102
- title="show/hide quicksearch" /></span></h3>
103
- <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
104
- <fieldset>
105
- <legend>Quicksearch</legend>
106
- <input type="text" name="quicksearch" value=""
107
- class="quicksearch-field" />
108
- </fieldset>
109
- </form>
110
-
111
- <ul class="link-list">
112
-
113
- <li><a href="../Ankusa.html">Ankusa</a></li>
114
-
115
- <li><a href="../Ankusa/CassandraStorage.html">Ankusa::CassandraStorage</a></li>
116
-
117
- <li><a href="../Ankusa/Classifier.html">Ankusa::Classifier</a></li>
118
-
119
- <li><a href="../Ankusa/FileSystemStorage.html">Ankusa::FileSystemStorage</a></li>
120
-
121
- <li><a href="../Ankusa/HBaseStorage.html">Ankusa::HBaseStorage</a></li>
122
-
123
- <li><a href="../Ankusa/KLDivergenceClassifier.html">Ankusa::KLDivergenceClassifier</a></li>
124
-
125
- <li><a href="../Ankusa/MemoryStorage.html">Ankusa::MemoryStorage</a></li>
126
-
127
- <li><a href="../Ankusa/NaiveBayesClassifier.html">Ankusa::NaiveBayesClassifier</a></li>
128
-
129
- <li><a href="../Ankusa/TextHash.html">Ankusa::TextHash</a></li>
130
-
131
- <li><a href="../String.html">String</a></li>
132
-
133
- </ul>
134
- <div id="no-class-search-results" style="display: none;">No matching classes.</div>
135
- </div>
136
-
137
-
138
- </div>
139
- </div>
140
-
141
- <div id="documentation">
142
- <h1 class="class">Ankusa::TextHash</h1>
143
-
144
- <div id="description" class="description">
145
-
146
- </div><!-- description -->
147
-
148
-
149
- <div id="5Buntitled-5D" class="documentation-section">
150
-
151
-
152
-
153
-
154
-
155
-
156
-
157
- <!-- Attributes -->
158
- <div id="attribute-method-details" class="method-section section">
159
- <h3 class="section-header">Attributes</h3>
160
-
161
-
162
- <div id="word_count-attribute-method" class="method-detail">
163
- <a name="word_count"></a>
164
-
165
- <div class="method-heading attribute-method-heading">
166
- <span class="method-name">word_count</span><span
167
- class="attribute-access-type">[R]</span>
168
- </div>
169
-
170
- <div class="method-description">
171
-
172
-
173
-
174
- </div>
175
- </div>
176
-
177
- </div><!-- attribute-method-details -->
178
-
179
-
180
- <!-- Methods -->
181
-
182
- <div id="public-class-method-details" class="method-section section">
183
- <h3 class="section-header">Public Class Methods</h3>
184
-
185
-
186
- <div id="atomize-method" class="method-detail ">
187
- <a name="method-c-atomize"></a>
188
-
189
-
190
- <div class="method-heading">
191
- <span class="method-name">atomize</span><span
192
- class="method-args">(text)</span>
193
- <span class="method-click-advice">click to toggle source</span>
194
- </div>
195
-
196
-
197
- <div class="method-description">
198
-
199
-
200
-
201
-
202
-
203
- <div class="method-source-code" id="atomize-source">
204
- <pre>
205
- <span class="ruby-comment"># File lib/ankusa/hasher.rb, line 15</span>
206
- <span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">atomize</span>(<span class="ruby-identifier">text</span>)
207
- <span class="ruby-identifier">text</span>.<span class="ruby-identifier">downcase</span>.<span class="ruby-identifier">to_ascii</span>.<span class="ruby-identifier">tr</span>(<span class="ruby-string">'-'</span>, <span class="ruby-string">' '</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp">/[^\w\s]/</span>,<span class="ruby-string">&quot; &quot;</span>).<span class="ruby-identifier">split</span>
208
- <span class="ruby-keyword">end</span></pre>
209
- </div><!-- atomize-source -->
210
-
211
- </div>
212
-
213
-
214
-
215
-
216
- </div><!-- atomize-method -->
217
-
218
-
219
- <div id="new-method" class="method-detail ">
220
- <a name="method-c-new"></a>
221
-
222
-
223
- <div class="method-heading">
224
- <span class="method-name">new</span><span
225
- class="method-args">(text=nil)</span>
226
- <span class="method-click-advice">click to toggle source</span>
227
- </div>
228
-
229
-
230
- <div class="method-description">
231
-
232
-
233
-
234
-
235
-
236
- <div class="method-source-code" id="new-source">
237
- <pre>
238
- <span class="ruby-comment"># File lib/ankusa/hasher.rb, line 9</span>
239
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">text</span>=<span class="ruby-keyword">nil</span>)
240
- <span class="ruby-keyword">super</span> <span class="ruby-value">0</span>
241
- <span class="ruby-ivar">@word_count</span> = <span class="ruby-value">0</span>
242
- <span class="ruby-identifier">add_text</span>(<span class="ruby-identifier">text</span>) <span class="ruby-keyword">unless</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">nil?</span>
243
- <span class="ruby-keyword">end</span></pre>
244
- </div><!-- new-source -->
245
-
246
- </div>
247
-
248
-
249
-
250
-
251
- </div><!-- new-method -->
252
-
253
-
254
- <div id="valid_word-3F-method" class="method-detail ">
255
- <a name="method-c-valid_word-3F"></a>
256
-
257
-
258
- <div class="method-heading">
259
- <span class="method-name">valid_word?</span><span
260
- class="method-args">(word)</span>
261
- <span class="method-click-advice">click to toggle source</span>
262
- </div>
263
-
264
-
265
- <div class="method-description">
266
-
267
- <p>word should be only alphanum chars at this point</p>
268
-
269
-
270
-
271
- <div class="method-source-code" id="valid_word-3F-source">
272
- <pre>
273
- <span class="ruby-comment"># File lib/ankusa/hasher.rb, line 20</span>
274
- <span class="ruby-keyword">def</span> <span class="ruby-keyword">self</span>.<span class="ruby-identifier">valid_word?</span>(<span class="ruby-identifier">word</span>)
275
- <span class="ruby-keyword">return</span> <span class="ruby-keyword">true</span> <span class="ruby-keyword">unless</span> <span class="ruby-constant">Ankusa</span><span class="ruby-operator">::</span><span class="ruby-constant">STOPWORDS</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">word</span> <span class="ruby-operator">||</span> <span class="ruby-identifier">word</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;</span> <span class="ruby-value">3</span> <span class="ruby-operator">||</span> <span class="ruby-identifier">word</span>.<span class="ruby-identifier">numeric?</span>
276
- <span class="ruby-keyword">end</span></pre>
277
- </div><!-- valid_word-3F-source -->
278
-
279
- </div>
280
-
281
-
282
-
283
-
284
- </div><!-- valid_word-3F-method -->
285
-
286
-
287
- </div><!-- public-class-method-details -->
288
-
289
- <div id="public-instance-method-details" class="method-section section">
290
- <h3 class="section-header">Public Instance Methods</h3>
291
-
292
-
293
- <div id="add_text-method" class="method-detail ">
294
- <a name="method-i-add_text"></a>
295
-
296
-
297
- <div class="method-heading">
298
- <span class="method-name">add_text</span><span
299
- class="method-args">(text)</span>
300
- <span class="method-click-advice">click to toggle source</span>
301
- </div>
302
-
303
-
304
- <div class="method-description">
305
-
306
-
307
-
308
-
309
-
310
- <div class="method-source-code" id="add_text-source">
311
- <pre>
312
- <span class="ruby-comment"># File lib/ankusa/hasher.rb, line 24</span>
313
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">add_text</span>(<span class="ruby-identifier">text</span>)
314
- <span class="ruby-keyword">if</span> <span class="ruby-identifier">text</span>.<span class="ruby-identifier">instance_of?</span> <span class="ruby-constant">Array</span>
315
- <span class="ruby-identifier">text</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">t</span><span class="ruby-operator">|</span> <span class="ruby-identifier">add_text</span> <span class="ruby-identifier">t</span> }
316
- <span class="ruby-keyword">else</span>
317
- <span class="ruby-comment"># replace dashes with spaces, then get rid of non-word/non-space characters, </span>
318
- <span class="ruby-comment"># then split by space to get words</span>
319
- <span class="ruby-identifier">words</span> = <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">atomize</span> <span class="ruby-identifier">text</span>
320
- <span class="ruby-identifier">words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">word</span><span class="ruby-operator">|</span> <span class="ruby-identifier">add_word</span>(<span class="ruby-identifier">word</span>) <span class="ruby-keyword">if</span> <span class="ruby-constant">TextHash</span>.<span class="ruby-identifier">valid_word?</span>(<span class="ruby-identifier">word</span>) }
321
- <span class="ruby-keyword">end</span>
322
- <span class="ruby-keyword">self</span>
323
- <span class="ruby-keyword">end</span></pre>
324
- </div><!-- add_text-source -->
325
-
326
- </div>
327
-
328
-
329
-
330
-
331
- </div><!-- add_text-method -->
332
-
333
-
334
- </div><!-- public-instance-method-details -->
335
-
336
- <div id="protected-instance-method-details" class="method-section section">
337
- <h3 class="section-header">Protected Instance Methods</h3>
338
-
339
-
340
- <div id="add_word-method" class="method-detail ">
341
- <a name="method-i-add_word"></a>
342
-
343
-
344
- <div class="method-heading">
345
- <span class="method-name">add_word</span><span
346
- class="method-args">(word)</span>
347
- <span class="method-click-advice">click to toggle source</span>
348
- </div>
349
-
350
-
351
- <div class="method-description">
352
-
353
-
354
-
355
-
356
-
357
- <div class="method-source-code" id="add_word-source">
358
- <pre>
359
- <span class="ruby-comment"># File lib/ankusa/hasher.rb, line 38</span>
360
- <span class="ruby-keyword">def</span> <span class="ruby-identifier">add_word</span>(<span class="ruby-identifier">word</span>)
361
- <span class="ruby-ivar">@word_count</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
362
- <span class="ruby-identifier">key</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">stem</span>.<span class="ruby-identifier">intern</span>
363
- <span class="ruby-identifier">store</span> <span class="ruby-identifier">key</span>, <span class="ruby-identifier">fetch</span>(<span class="ruby-identifier">key</span>, <span class="ruby-value">0</span>)<span class="ruby-operator">+</span><span class="ruby-value">1</span>
364
- <span class="ruby-keyword">end</span></pre>
365
- </div><!-- add_word-source -->
366
-
367
- </div>
368
-
369
-
370
-
371
-
372
- </div><!-- add_word-method -->
373
-
374
-
375
- </div><!-- protected-instance-method-details -->
376
-
377
- </div><!-- 5Buntitled-5D -->
378
-
379
-
380
- </div><!-- documentation -->
381
-
382
- <div id="validator-badges">
383
- <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
384
- <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
385
- Rdoc Generator</a> 2</small>.</p>
386
- </div>
387
-
388
- </body>
389
- </html>
390
-