classifier 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
data/bin/bayes.rb CHANGED
@@ -1,6 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'classifier'
3
+ begin
4
+ require 'rubygems'
5
+ require_gem 'classifier'
6
+ rescue
7
+ require 'classifier'
8
+ end
9
+
4
10
  require 'madeleine'
5
11
 
6
12
  m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
@@ -24,7 +30,7 @@ when "classify"
24
30
  puts m.system.classify(File.open(ARGV[1]).read)
25
31
  else
26
32
  puts "Invalid option: choose add [category] [file] or clasify [file]"
27
- exit(1)
33
+ exit(-1)
28
34
  end
29
35
 
30
36
  m.take_snapshot
@@ -59,12 +59,16 @@
59
59
  lib/classifier/bayes.rb
60
60
  </a>
61
61
  <br />
62
- <a href="../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
63
- lib/classifier/string_extensions/porter_stemmer.rb
62
+ <a href="../files/lib/classifier/lsi_rb.html">
63
+ lib/classifier/lsi.rb
64
64
  </a>
65
65
  <br />
66
- <a href="../files/lib/classifier/string_extensions/word_hash_rb.html">
67
- lib/classifier/string_extensions/word_hash.rb
66
+ <a href="../files/lib/classifier/extensions/word_list_rb.html">
67
+ lib/classifier/extensions/word_list.rb
68
+ </a>
69
+ <br />
70
+ <a href="../files/lib/classifier/lsi/content_node_rb.html">
71
+ lib/classifier/lsi/content_node.rb
68
72
  </a>
69
73
  <br />
70
74
  </td>
@@ -82,13 +86,13 @@
82
86
 
83
87
  <div id="description">
84
88
  <table>
85
- <tr><td valign="top">Author:</td><td>Lucas Carlson (<a href="mailto:lucas@rufy.com">lucas@rufy.com</a>)
89
+ <tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
86
90
 
87
91
  </td></tr>
88
- <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 Lucas Carlson
92
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 David Fayram II
89
93
 
90
94
  </td></tr>
91
- <tr><td valign="top">License:</td><td>LGPL
95
+ <tr><td valign="top">License:</td><td>GPL
92
96
 
93
97
  </td></tr>
94
98
  </table>
@@ -109,9 +113,10 @@
109
113
  <div id="class-list">
110
114
  <h3 class="section-bar">Classes and Modules</h3>
111
115
 
112
- Module <a href="Classifier/Stemmable.html" class="link">Classifier::Stemmable</a><br />
113
- Module <a href="Classifier/WordHash.html" class="link">Classifier::WordHash</a><br />
114
- Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
116
+ Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
117
+ Class <a href="Classifier/ContentNode.html" class="link">Classifier::ContentNode</a><br />
118
+ Class <a href="Classifier/LSI.html" class="link">Classifier::LSI</a><br />
119
+ Class <a href="Classifier/WordList.html" class="link">Classifier::WordList</a><br />
115
120
 
116
121
  </div>
117
122
 
@@ -65,7 +65,9 @@
65
65
  <tr class="top-aligned-row">
66
66
  <td><strong>Parent:</strong></td>
67
67
  <td>
68
+ <a href="../Object.html">
68
69
  Object
70
+ </a>
69
71
  </td>
70
72
  </tr>
71
73
  </table>
@@ -86,13 +88,14 @@
86
88
  <h3 class="section-bar">Methods</h3>
87
89
 
88
90
  <div class="name-list">
89
- <a href="#M000010">add_category</a>&nbsp;&nbsp;
90
- <a href="#M000011">append_category</a>&nbsp;&nbsp;
91
- <a href="#M000007">classifications</a>&nbsp;&nbsp;
92
- <a href="#M000008">classify</a>&nbsp;&nbsp;
93
- <a href="#M000009">method_missing</a>&nbsp;&nbsp;
94
- <a href="#M000005">new</a>&nbsp;&nbsp;
95
- <a href="#M000006">train</a>&nbsp;&nbsp;
91
+ <a href="#M000029">add_category</a>&nbsp;&nbsp;
92
+ <a href="#M000030">append_category</a>&nbsp;&nbsp;
93
+ <a href="#M000026">classifications</a>&nbsp;&nbsp;
94
+ <a href="#M000027">classify</a>&nbsp;&nbsp;
95
+ <a href="#M000028">method_missing</a>&nbsp;&nbsp;
96
+ <a href="#M000023">new</a>&nbsp;&nbsp;
97
+ <a href="#M000024">train</a>&nbsp;&nbsp;
98
+ <a href="#M000025">untrain</a>&nbsp;&nbsp;
96
99
  </div>
97
100
  </div>
98
101
 
@@ -114,12 +117,12 @@
114
117
  <div id="methods">
115
118
  <h3 class="section-bar">Public Class methods</h3>
116
119
 
117
- <div id="method-M000005" class="method-detail">
118
- <a name="M000005"></a>
120
+ <div id="method-M000023" class="method-detail">
121
+ <a name="M000023"></a>
119
122
 
120
123
  <div class="method-heading">
121
- <a href="Bayes.src/M000005.html" target="Code" class="method-signature"
122
- onclick="popupCode('Bayes.src/M000005.html');return false;">
124
+ <a href="Bayes.src/M000023.html" target="Code" class="method-signature"
125
+ onclick="popupCode('Bayes.src/M000023.html');return false;">
123
126
  <span class="method-name">new</span><span class="method-args">(*categories)</span>
124
127
  </a>
125
128
  </div>
@@ -137,12 +140,12 @@ initialized and given a training method. E.g.,
137
140
 
138
141
  <h3 class="section-bar">Public Instance methods</h3>
139
142
 
140
- <div id="method-M000010" class="method-detail">
141
- <a name="M000010"></a>
143
+ <div id="method-M000029" class="method-detail">
144
+ <a name="M000029"></a>
142
145
 
143
146
  <div class="method-heading">
144
- <a href="Bayes.src/M000010.html" target="Code" class="method-signature"
145
- onclick="popupCode('Bayes.src/M000010.html');return false;">
147
+ <a href="Bayes.src/M000029.html" target="Code" class="method-signature"
148
+ onclick="popupCode('Bayes.src/M000029.html');return false;">
146
149
  <span class="method-name">add_category</span><span class="method-args">(category)</span>
147
150
  </a>
148
151
  </div>
@@ -163,8 +166,8 @@ at initialization.
163
166
  </div>
164
167
  </div>
165
168
 
166
- <div id="method-M000011" class="method-detail">
167
- <a name="M000011"></a>
169
+ <div id="method-M000030" class="method-detail">
170
+ <a name="M000030"></a>
168
171
 
169
172
  <div class="method-heading">
170
173
  <span class="method-name">append_category</span><span class="method-args">(category)</span>
@@ -172,17 +175,17 @@ at initialization.
172
175
 
173
176
  <div class="method-description">
174
177
  <p>
175
- Alias for <a href="Bayes.html#M000010">add_category</a>
178
+ Alias for <a href="Bayes.html#M000029">add_category</a>
176
179
  </p>
177
180
  </div>
178
181
  </div>
179
182
 
180
- <div id="method-M000007" class="method-detail">
181
- <a name="M000007"></a>
183
+ <div id="method-M000026" class="method-detail">
184
+ <a name="M000026"></a>
182
185
 
183
186
  <div class="method-heading">
184
- <a href="Bayes.src/M000007.html" target="Code" class="method-signature"
185
- onclick="popupCode('Bayes.src/M000007.html');return false;">
187
+ <a href="Bayes.src/M000026.html" target="Code" class="method-signature"
188
+ onclick="popupCode('Bayes.src/M000026.html');return false;">
186
189
  <span class="method-name">classifications</span><span class="method-args">(text)</span>
187
190
  </a>
188
191
  </div>
@@ -197,17 +200,17 @@ Returns the scores in each category the provided <tt>text</tt>. E.g.,
197
200
  </pre>
198
201
  <p>
199
202
  The largest of these scores (the one closest to 0) is the one picked out by
200
- <a href="Bayes.html#M000008">classify</a>
203
+ <a href="Bayes.html#M000027">classify</a>
201
204
  </p>
202
205
  </div>
203
206
  </div>
204
207
 
205
- <div id="method-M000008" class="method-detail">
206
- <a name="M000008"></a>
208
+ <div id="method-M000027" class="method-detail">
209
+ <a name="M000027"></a>
207
210
 
208
211
  <div class="method-heading">
209
- <a href="Bayes.src/M000008.html" target="Code" class="method-signature"
210
- onclick="popupCode('Bayes.src/M000008.html');return false;">
212
+ <a href="Bayes.src/M000027.html" target="Code" class="method-signature"
213
+ onclick="popupCode('Bayes.src/M000027.html');return false;">
211
214
  <span class="method-name">classify</span><span class="method-args">(text)</span>
212
215
  </a>
213
216
  </div>
@@ -224,36 +227,37 @@ the categories given in the initializer. E.g.,
224
227
  </div>
225
228
  </div>
226
229
 
227
- <div id="method-M000009" class="method-detail">
228
- <a name="M000009"></a>
230
+ <div id="method-M000028" class="method-detail">
231
+ <a name="M000028"></a>
229
232
 
230
233
  <div class="method-heading">
231
- <a href="Bayes.src/M000009.html" target="Code" class="method-signature"
232
- onclick="popupCode('Bayes.src/M000009.html');return false;">
234
+ <a href="Bayes.src/M000028.html" target="Code" class="method-signature"
235
+ onclick="popupCode('Bayes.src/M000028.html');return false;">
233
236
  <span class="method-name">method_missing</span><span class="method-args">(name, *args)</span>
234
237
  </a>
235
238
  </div>
236
239
 
237
240
  <div class="method-description">
238
241
  <p>
239
- Provides training methods for the categories specified in <a
240
- href="Bayes.html#M000005">Bayes#new</a> For example:
242
+ Provides training and untraining methods for the categories specified in <a
243
+ href="Bayes.html#M000023">Bayes#new</a> For example:
241
244
  </p>
242
245
  <pre>
243
246
  b = Classifier::Bayes.new 'This', 'That', 'the_other'
244
247
  b.train_this &quot;This text&quot;
245
248
  b.train_that &quot;That text&quot;
249
+ b.untrain_that &quot;That text&quot;
246
250
  b.train_the_other &quot;The other text&quot;
247
251
  </pre>
248
252
  </div>
249
253
  </div>
250
254
 
251
- <div id="method-M000006" class="method-detail">
252
- <a name="M000006"></a>
255
+ <div id="method-M000024" class="method-detail">
256
+ <a name="M000024"></a>
253
257
 
254
258
  <div class="method-heading">
255
- <a href="Bayes.src/M000006.html" target="Code" class="method-signature"
256
- onclick="popupCode('Bayes.src/M000006.html');return false;">
259
+ <a href="Bayes.src/M000024.html" target="Code" class="method-signature"
260
+ onclick="popupCode('Bayes.src/M000024.html');return false;">
257
261
  <span class="method-name">train</span><span class="method-args">(category, text)</span>
258
262
  </a>
259
263
  </div>
@@ -261,7 +265,7 @@ href="Bayes.html#M000005">Bayes#new</a> For example:
261
265
  <div class="method-description">
262
266
  <p>
263
267
  Provides a general training method for all categories specified in <a
264
- href="Bayes.html#M000005">Bayes#new</a> For example:
268
+ href="Bayes.html#M000023">Bayes#new</a> For example:
265
269
  </p>
266
270
  <pre>
267
271
  b = Classifier::Bayes.new 'This', 'That', 'the_other'
@@ -272,6 +276,32 @@ href="Bayes.html#M000005">Bayes#new</a> For example:
272
276
  </div>
273
277
  </div>
274
278
 
279
+ <div id="method-M000025" class="method-detail">
280
+ <a name="M000025"></a>
281
+
282
+ <div class="method-heading">
283
+ <a href="Bayes.src/M000025.html" target="Code" class="method-signature"
284
+ onclick="popupCode('Bayes.src/M000025.html');return false;">
285
+ <span class="method-name">untrain</span><span class="method-args">(category, text)</span>
286
+ </a>
287
+ </div>
288
+
289
+ <div class="method-description">
290
+ <p>
291
+ Provides a untraining method for all categories specified in <a
292
+ href="Bayes.html#M000023">Bayes#new</a> Be very careful with this method.
293
+ </p>
294
+ <p>
295
+ For example:
296
+ </p>
297
+ <pre>
298
+ b = Classifier::Bayes.new 'This', 'That', 'the_other'
299
+ b.train :this, &quot;This text&quot;
300
+ b.untrain :this, &quot;This text&quot;
301
+ </pre>
302
+ </div>
303
+ </div>
304
+
275
305
 
276
306
  </div>
277
307
 
@@ -13,7 +13,7 @@
13
13
  <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 11</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>)
15
15
  <span class="ruby-ivar">@categories</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
- <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
16
+ <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
17
17
  <span class="ruby-ivar">@total_words</span> = <span class="ruby-value">0</span>
18
18
  <span class="ruby-keyword kw">end</span></pre>
19
19
  </body>
@@ -12,7 +12,7 @@
12
12
  <body class="standalone-code">
13
13
  <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 24</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
15
- <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>
16
16
  <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
17
17
  <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
18
18
  <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
@@ -0,0 +1,30 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>untrain (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 41</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>
16
+ <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
17
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">&gt;=</span> <span class="ruby-value">0</span>
18
+ <span class="ruby-identifier">orig</span> = <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>]
19
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
20
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">-=</span> <span class="ruby-identifier">count</span>
21
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">&lt;=</span> <span class="ruby-value">0</span>
22
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>].<span class="ruby-identifier">delete</span>(<span class="ruby-identifier">word</span>)
23
+ <span class="ruby-identifier">count</span> = <span class="ruby-identifier">orig</span>
24
+ <span class="ruby-keyword kw">end</span>
25
+ <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">-=</span> <span class="ruby-identifier">count</span>
26
+ <span class="ruby-keyword kw">end</span>
27
+ <span class="ruby-keyword kw">end</span>
28
+ <span class="ruby-keyword kw">end</span></pre>
29
+ </body>
30
+ </html>
@@ -10,7 +10,7 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 38</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 62</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
15
15
  <span class="ruby-identifier">score</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
16
  <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span>, <span class="ruby-identifier">category_words</span><span class="ruby-operator">|</span>
@@ -10,7 +10,7 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 56</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 80</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
15
15
  (<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">a</span>[<span class="ruby-value">1</span>] })[<span class="ruby-value">0</span>][<span class="ruby-value">0</span>]
16
16
  <span class="ruby-keyword kw">end</span></pre>
@@ -10,12 +10,12 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 67</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 92</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">method_missing</span>(<span class="ruby-identifier">name</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">args</span>)
15
- <span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/train_([\w]+)/</span>, <span class="ruby-value str">'\1'</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/(un)?train_([\w]+)/</span>, <span class="ruby-value str">'\2'</span>).<span class="ruby-identifier">prepare_category_name</span>
16
16
  <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">category</span>
17
- <span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">train</span> <span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>}
18
- <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/train_([\w]+)/</span>
17
+ <span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">eval</span>(<span class="ruby-node">&quot;#{$1}train(category, text)&quot;</span>) }
18
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(un)?train_([\w]+)/</span>
19
19
  <span class="ruby-identifier">raise</span> <span class="ruby-constant">StandardError</span>, <span class="ruby-node">&quot;No such category: #{category}&quot;</span>
20
20
  <span class="ruby-keyword kw">else</span>
21
21
  <span class="ruby-keyword kw">super</span> <span class="ruby-comment cmt">#raise StandardError, &quot;No such method: #{name}&quot;</span>
@@ -10,9 +10,9 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 96</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 121</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_category</span>(<span class="ruby-identifier">category</span>)
15
- <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
15
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
16
  <span class="ruby-keyword kw">end</span></pre>
17
17
  </body>
18
18
  </html>
@@ -0,0 +1,252 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Classifier::ContentNode</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Classifier::ContentNode</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/lsi/content_node_rb.html">
59
+ lib/classifier/lsi/content_node.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ <a href="../Object.html">
69
+ Object
70
+ </a>
71
+ </td>
72
+ </tr>
73
+ </table>
74
+ </div>
75
+ <!-- banner header -->
76
+
77
+ <div id="bodyContent">
78
+
79
+
80
+
81
+ <div id="contextContent">
82
+
83
+ <div id="description">
84
+ <p>
85
+ This is an internal data structure class for the <a href="LSI.html">LSI</a>
86
+ node. Save for <a href="ContentNode.html#M000034">raw_vector_with</a>, it
87
+ should be fairly straightforward to understand. You should never have to
88
+ use it directly.
89
+ </p>
90
+
91
+ </div>
92
+
93
+
94
+ </div>
95
+
96
+ <div id="method-list">
97
+ <h3 class="section-bar">Methods</h3>
98
+
99
+ <div class="name-list">
100
+ <a href="#M000031">new</a>&nbsp;&nbsp;
101
+ <a href="#M000034">raw_vector_with</a>&nbsp;&nbsp;
102
+ <a href="#M000033">search_norm</a>&nbsp;&nbsp;
103
+ <a href="#M000032">search_vector</a>&nbsp;&nbsp;
104
+ </div>
105
+ </div>
106
+
107
+ </div>
108
+
109
+
110
+ <!-- if includes -->
111
+
112
+ <div id="section">
113
+
114
+
115
+
116
+
117
+
118
+ <div id="attribute-list">
119
+ <h3 class="section-bar">Attributes</h3>
120
+
121
+ <div class="name-list">
122
+ <table>
123
+ <tr class="top-aligned-row context-row">
124
+ <td class="context-item-name">categories</td>
125
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
126
+ <td class="context-item-desc"></td>
127
+ </tr>
128
+ <tr class="top-aligned-row context-row">
129
+ <td class="context-item-name">lsi_norm</td>
130
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
131
+ <td class="context-item-desc"></td>
132
+ </tr>
133
+ <tr class="top-aligned-row context-row">
134
+ <td class="context-item-name">lsi_vector</td>
135
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
136
+ <td class="context-item-desc"></td>
137
+ </tr>
138
+ <tr class="top-aligned-row context-row">
139
+ <td class="context-item-name">raw_norm</td>
140
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
141
+ <td class="context-item-desc"></td>
142
+ </tr>
143
+ <tr class="top-aligned-row context-row">
144
+ <td class="context-item-name">raw_vector</td>
145
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
146
+ <td class="context-item-desc"></td>
147
+ </tr>
148
+ <tr class="top-aligned-row context-row">
149
+ <td class="context-item-name">source</td>
150
+ <td class="context-item-value">&nbsp;[R]&nbsp;</td>
151
+ <td class="context-item-desc"></td>
152
+ </tr>
153
+ <tr class="top-aligned-row context-row">
154
+ <td class="context-item-name">word_hash</td>
155
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
156
+ <td class="context-item-desc"></td>
157
+ </tr>
158
+ </table>
159
+ </div>
160
+ </div>
161
+
162
+
163
+
164
+ <!-- if method_list -->
165
+ <div id="methods">
166
+ <h3 class="section-bar">Public Class methods</h3>
167
+
168
+ <div id="method-M000031" class="method-detail">
169
+ <a name="M000031"></a>
170
+
171
+ <div class="method-heading">
172
+ <a href="ContentNode.src/M000031.html" target="Code" class="method-signature"
173
+ onclick="popupCode('ContentNode.src/M000031.html');return false;">
174
+ <span class="method-name">new</span><span class="method-args">( source, categories=nil, text_proc=nil )</span>
175
+ </a>
176
+ </div>
177
+
178
+ <div class="method-description">
179
+ <p>
180
+ If text_proc is not specified, the source will be duck-typed via
181
+ source.to_s
182
+ </p>
183
+ </div>
184
+ </div>
185
+
186
+ <h3 class="section-bar">Public Instance methods</h3>
187
+
188
+ <div id="method-M000034" class="method-detail">
189
+ <a name="M000034"></a>
190
+
191
+ <div class="method-heading">
192
+ <a href="ContentNode.src/M000034.html" target="Code" class="method-signature"
193
+ onclick="popupCode('ContentNode.src/M000034.html');return false;">
194
+ <span class="method-name">raw_vector_with</span><span class="method-args">( word_list )</span>
195
+ </a>
196
+ </div>
197
+
198
+ <div class="method-description">
199
+ <p>
200
+ Creates the raw vector out of word_hash using word_list as the key for
201
+ mapping the vector space.
202
+ </p>
203
+ </div>
204
+ </div>
205
+
206
+ <div id="method-M000033" class="method-detail">
207
+ <a name="M000033"></a>
208
+
209
+ <div class="method-heading">
210
+ <a href="ContentNode.src/M000033.html" target="Code" class="method-signature"
211
+ onclick="popupCode('ContentNode.src/M000033.html');return false;">
212
+ <span class="method-name">search_norm</span><span class="method-args">()</span>
213
+ </a>
214
+ </div>
215
+
216
+ <div class="method-description">
217
+ <p>
218
+ Use this to fetch the appropriate search vector in normalized form.
219
+ </p>
220
+ </div>
221
+ </div>
222
+
223
+ <div id="method-M000032" class="method-detail">
224
+ <a name="M000032"></a>
225
+
226
+ <div class="method-heading">
227
+ <a href="ContentNode.src/M000032.html" target="Code" class="method-signature"
228
+ onclick="popupCode('ContentNode.src/M000032.html');return false;">
229
+ <span class="method-name">search_vector</span><span class="method-args">()</span>
230
+ </a>
231
+ </div>
232
+
233
+ <div class="method-description">
234
+ <p>
235
+ Use this to fetch the appropriate search vector.
236
+ </p>
237
+ </div>
238
+ </div>
239
+
240
+
241
+ </div>
242
+
243
+
244
+ </div>
245
+
246
+
247
+ <div id="validator-badges">
248
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
249
+ </div>
250
+
251
+ </body>
252
+ </html>