classifier 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
data/bin/bayes.rb CHANGED
@@ -1,6 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'classifier'
3
+ begin
4
+ require 'rubygems'
5
+ require_gem 'classifier'
6
+ rescue
7
+ require 'classifier'
8
+ end
9
+
4
10
  require 'madeleine'
5
11
 
6
12
  m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
@@ -24,7 +30,7 @@ when "classify"
24
30
  puts m.system.classify(File.open(ARGV[1]).read)
25
31
  else
26
32
  puts "Invalid option: choose add [category] [file] or clasify [file]"
27
- exit(1)
33
+ exit(-1)
28
34
  end
29
35
 
30
36
  m.take_snapshot
@@ -59,12 +59,16 @@
59
59
  lib/classifier/bayes.rb
60
60
  </a>
61
61
  <br />
62
- <a href="../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
63
- lib/classifier/string_extensions/porter_stemmer.rb
62
+ <a href="../files/lib/classifier/lsi_rb.html">
63
+ lib/classifier/lsi.rb
64
64
  </a>
65
65
  <br />
66
- <a href="../files/lib/classifier/string_extensions/word_hash_rb.html">
67
- lib/classifier/string_extensions/word_hash.rb
66
+ <a href="../files/lib/classifier/extensions/word_list_rb.html">
67
+ lib/classifier/extensions/word_list.rb
68
+ </a>
69
+ <br />
70
+ <a href="../files/lib/classifier/lsi/content_node_rb.html">
71
+ lib/classifier/lsi/content_node.rb
68
72
  </a>
69
73
  <br />
70
74
  </td>
@@ -82,13 +86,13 @@
82
86
 
83
87
  <div id="description">
84
88
  <table>
85
- <tr><td valign="top">Author:</td><td>Lucas Carlson (<a href="mailto:lucas@rufy.com">lucas@rufy.com</a>)
89
+ <tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
86
90
 
87
91
  </td></tr>
88
- <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 Lucas Carlson
92
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 David Fayram II
89
93
 
90
94
  </td></tr>
91
- <tr><td valign="top">License:</td><td>LGPL
95
+ <tr><td valign="top">License:</td><td>GPL
92
96
 
93
97
  </td></tr>
94
98
  </table>
@@ -109,9 +113,10 @@
109
113
  <div id="class-list">
110
114
  <h3 class="section-bar">Classes and Modules</h3>
111
115
 
112
- Module <a href="Classifier/Stemmable.html" class="link">Classifier::Stemmable</a><br />
113
- Module <a href="Classifier/WordHash.html" class="link">Classifier::WordHash</a><br />
114
- Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
116
+ Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
117
+ Class <a href="Classifier/ContentNode.html" class="link">Classifier::ContentNode</a><br />
118
+ Class <a href="Classifier/LSI.html" class="link">Classifier::LSI</a><br />
119
+ Class <a href="Classifier/WordList.html" class="link">Classifier::WordList</a><br />
115
120
 
116
121
  </div>
117
122
 
@@ -65,7 +65,9 @@
65
65
  <tr class="top-aligned-row">
66
66
  <td><strong>Parent:</strong></td>
67
67
  <td>
68
+ <a href="../Object.html">
68
69
  Object
70
+ </a>
69
71
  </td>
70
72
  </tr>
71
73
  </table>
@@ -86,13 +88,14 @@
86
88
  <h3 class="section-bar">Methods</h3>
87
89
 
88
90
  <div class="name-list">
89
- <a href="#M000010">add_category</a>&nbsp;&nbsp;
90
- <a href="#M000011">append_category</a>&nbsp;&nbsp;
91
- <a href="#M000007">classifications</a>&nbsp;&nbsp;
92
- <a href="#M000008">classify</a>&nbsp;&nbsp;
93
- <a href="#M000009">method_missing</a>&nbsp;&nbsp;
94
- <a href="#M000005">new</a>&nbsp;&nbsp;
95
- <a href="#M000006">train</a>&nbsp;&nbsp;
91
+ <a href="#M000029">add_category</a>&nbsp;&nbsp;
92
+ <a href="#M000030">append_category</a>&nbsp;&nbsp;
93
+ <a href="#M000026">classifications</a>&nbsp;&nbsp;
94
+ <a href="#M000027">classify</a>&nbsp;&nbsp;
95
+ <a href="#M000028">method_missing</a>&nbsp;&nbsp;
96
+ <a href="#M000023">new</a>&nbsp;&nbsp;
97
+ <a href="#M000024">train</a>&nbsp;&nbsp;
98
+ <a href="#M000025">untrain</a>&nbsp;&nbsp;
96
99
  </div>
97
100
  </div>
98
101
 
@@ -114,12 +117,12 @@
114
117
  <div id="methods">
115
118
  <h3 class="section-bar">Public Class methods</h3>
116
119
 
117
- <div id="method-M000005" class="method-detail">
118
- <a name="M000005"></a>
120
+ <div id="method-M000023" class="method-detail">
121
+ <a name="M000023"></a>
119
122
 
120
123
  <div class="method-heading">
121
- <a href="Bayes.src/M000005.html" target="Code" class="method-signature"
122
- onclick="popupCode('Bayes.src/M000005.html');return false;">
124
+ <a href="Bayes.src/M000023.html" target="Code" class="method-signature"
125
+ onclick="popupCode('Bayes.src/M000023.html');return false;">
123
126
  <span class="method-name">new</span><span class="method-args">(*categories)</span>
124
127
  </a>
125
128
  </div>
@@ -137,12 +140,12 @@ initialized and given a training method. E.g.,
137
140
 
138
141
  <h3 class="section-bar">Public Instance methods</h3>
139
142
 
140
- <div id="method-M000010" class="method-detail">
141
- <a name="M000010"></a>
143
+ <div id="method-M000029" class="method-detail">
144
+ <a name="M000029"></a>
142
145
 
143
146
  <div class="method-heading">
144
- <a href="Bayes.src/M000010.html" target="Code" class="method-signature"
145
- onclick="popupCode('Bayes.src/M000010.html');return false;">
147
+ <a href="Bayes.src/M000029.html" target="Code" class="method-signature"
148
+ onclick="popupCode('Bayes.src/M000029.html');return false;">
146
149
  <span class="method-name">add_category</span><span class="method-args">(category)</span>
147
150
  </a>
148
151
  </div>
@@ -163,8 +166,8 @@ at initialization.
163
166
  </div>
164
167
  </div>
165
168
 
166
- <div id="method-M000011" class="method-detail">
167
- <a name="M000011"></a>
169
+ <div id="method-M000030" class="method-detail">
170
+ <a name="M000030"></a>
168
171
 
169
172
  <div class="method-heading">
170
173
  <span class="method-name">append_category</span><span class="method-args">(category)</span>
@@ -172,17 +175,17 @@ at initialization.
172
175
 
173
176
  <div class="method-description">
174
177
  <p>
175
- Alias for <a href="Bayes.html#M000010">add_category</a>
178
+ Alias for <a href="Bayes.html#M000029">add_category</a>
176
179
  </p>
177
180
  </div>
178
181
  </div>
179
182
 
180
- <div id="method-M000007" class="method-detail">
181
- <a name="M000007"></a>
183
+ <div id="method-M000026" class="method-detail">
184
+ <a name="M000026"></a>
182
185
 
183
186
  <div class="method-heading">
184
- <a href="Bayes.src/M000007.html" target="Code" class="method-signature"
185
- onclick="popupCode('Bayes.src/M000007.html');return false;">
187
+ <a href="Bayes.src/M000026.html" target="Code" class="method-signature"
188
+ onclick="popupCode('Bayes.src/M000026.html');return false;">
186
189
  <span class="method-name">classifications</span><span class="method-args">(text)</span>
187
190
  </a>
188
191
  </div>
@@ -197,17 +200,17 @@ Returns the scores in each category the provided <tt>text</tt>. E.g.,
197
200
  </pre>
198
201
  <p>
199
202
  The largest of these scores (the one closest to 0) is the one picked out by
200
- <a href="Bayes.html#M000008">classify</a>
203
+ <a href="Bayes.html#M000027">classify</a>
201
204
  </p>
202
205
  </div>
203
206
  </div>
204
207
 
205
- <div id="method-M000008" class="method-detail">
206
- <a name="M000008"></a>
208
+ <div id="method-M000027" class="method-detail">
209
+ <a name="M000027"></a>
207
210
 
208
211
  <div class="method-heading">
209
- <a href="Bayes.src/M000008.html" target="Code" class="method-signature"
210
- onclick="popupCode('Bayes.src/M000008.html');return false;">
212
+ <a href="Bayes.src/M000027.html" target="Code" class="method-signature"
213
+ onclick="popupCode('Bayes.src/M000027.html');return false;">
211
214
  <span class="method-name">classify</span><span class="method-args">(text)</span>
212
215
  </a>
213
216
  </div>
@@ -224,36 +227,37 @@ the categories given in the initializer. E.g.,
224
227
  </div>
225
228
  </div>
226
229
 
227
- <div id="method-M000009" class="method-detail">
228
- <a name="M000009"></a>
230
+ <div id="method-M000028" class="method-detail">
231
+ <a name="M000028"></a>
229
232
 
230
233
  <div class="method-heading">
231
- <a href="Bayes.src/M000009.html" target="Code" class="method-signature"
232
- onclick="popupCode('Bayes.src/M000009.html');return false;">
234
+ <a href="Bayes.src/M000028.html" target="Code" class="method-signature"
235
+ onclick="popupCode('Bayes.src/M000028.html');return false;">
233
236
  <span class="method-name">method_missing</span><span class="method-args">(name, *args)</span>
234
237
  </a>
235
238
  </div>
236
239
 
237
240
  <div class="method-description">
238
241
  <p>
239
- Provides training methods for the categories specified in <a
240
- href="Bayes.html#M000005">Bayes#new</a> For example:
242
+ Provides training and untraining methods for the categories specified in <a
243
+ href="Bayes.html#M000023">Bayes#new</a> For example:
241
244
  </p>
242
245
  <pre>
243
246
  b = Classifier::Bayes.new 'This', 'That', 'the_other'
244
247
  b.train_this &quot;This text&quot;
245
248
  b.train_that &quot;That text&quot;
249
+ b.untrain_that &quot;That text&quot;
246
250
  b.train_the_other &quot;The other text&quot;
247
251
  </pre>
248
252
  </div>
249
253
  </div>
250
254
 
251
- <div id="method-M000006" class="method-detail">
252
- <a name="M000006"></a>
255
+ <div id="method-M000024" class="method-detail">
256
+ <a name="M000024"></a>
253
257
 
254
258
  <div class="method-heading">
255
- <a href="Bayes.src/M000006.html" target="Code" class="method-signature"
256
- onclick="popupCode('Bayes.src/M000006.html');return false;">
259
+ <a href="Bayes.src/M000024.html" target="Code" class="method-signature"
260
+ onclick="popupCode('Bayes.src/M000024.html');return false;">
257
261
  <span class="method-name">train</span><span class="method-args">(category, text)</span>
258
262
  </a>
259
263
  </div>
@@ -261,7 +265,7 @@ href="Bayes.html#M000005">Bayes#new</a> For example:
261
265
  <div class="method-description">
262
266
  <p>
263
267
  Provides a general training method for all categories specified in <a
264
- href="Bayes.html#M000005">Bayes#new</a> For example:
268
+ href="Bayes.html#M000023">Bayes#new</a> For example:
265
269
  </p>
266
270
  <pre>
267
271
  b = Classifier::Bayes.new 'This', 'That', 'the_other'
@@ -272,6 +276,32 @@ href="Bayes.html#M000005">Bayes#new</a> For example:
272
276
  </div>
273
277
  </div>
274
278
 
279
+ <div id="method-M000025" class="method-detail">
280
+ <a name="M000025"></a>
281
+
282
+ <div class="method-heading">
283
+ <a href="Bayes.src/M000025.html" target="Code" class="method-signature"
284
+ onclick="popupCode('Bayes.src/M000025.html');return false;">
285
+ <span class="method-name">untrain</span><span class="method-args">(category, text)</span>
286
+ </a>
287
+ </div>
288
+
289
+ <div class="method-description">
290
+ <p>
291
+ Provides a untraining method for all categories specified in <a
292
+ href="Bayes.html#M000023">Bayes#new</a> Be very careful with this method.
293
+ </p>
294
+ <p>
295
+ For example:
296
+ </p>
297
+ <pre>
298
+ b = Classifier::Bayes.new 'This', 'That', 'the_other'
299
+ b.train :this, &quot;This text&quot;
300
+ b.untrain :this, &quot;This text&quot;
301
+ </pre>
302
+ </div>
303
+ </div>
304
+
275
305
 
276
306
  </div>
277
307
 
@@ -13,7 +13,7 @@
13
13
  <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 11</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>)
15
15
  <span class="ruby-ivar">@categories</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
- <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
16
+ <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
17
17
  <span class="ruby-ivar">@total_words</span> = <span class="ruby-value">0</span>
18
18
  <span class="ruby-keyword kw">end</span></pre>
19
19
  </body>
@@ -12,7 +12,7 @@
12
12
  <body class="standalone-code">
13
13
  <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 24</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
15
- <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>
16
16
  <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
17
17
  <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
18
18
  <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
@@ -0,0 +1,30 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>untrain (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 41</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>
16
+ <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
17
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">&gt;=</span> <span class="ruby-value">0</span>
18
+ <span class="ruby-identifier">orig</span> = <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>]
19
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
20
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">-=</span> <span class="ruby-identifier">count</span>
21
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">&lt;=</span> <span class="ruby-value">0</span>
22
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>].<span class="ruby-identifier">delete</span>(<span class="ruby-identifier">word</span>)
23
+ <span class="ruby-identifier">count</span> = <span class="ruby-identifier">orig</span>
24
+ <span class="ruby-keyword kw">end</span>
25
+ <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">-=</span> <span class="ruby-identifier">count</span>
26
+ <span class="ruby-keyword kw">end</span>
27
+ <span class="ruby-keyword kw">end</span>
28
+ <span class="ruby-keyword kw">end</span></pre>
29
+ </body>
30
+ </html>
@@ -10,7 +10,7 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 38</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 62</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
15
15
  <span class="ruby-identifier">score</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
16
  <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span>, <span class="ruby-identifier">category_words</span><span class="ruby-operator">|</span>
@@ -10,7 +10,7 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 56</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 80</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
15
15
  (<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">a</span>[<span class="ruby-value">1</span>] })[<span class="ruby-value">0</span>][<span class="ruby-value">0</span>]
16
16
  <span class="ruby-keyword kw">end</span></pre>
@@ -10,12 +10,12 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 67</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 92</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">method_missing</span>(<span class="ruby-identifier">name</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">args</span>)
15
- <span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/train_([\w]+)/</span>, <span class="ruby-value str">'\1'</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/(un)?train_([\w]+)/</span>, <span class="ruby-value str">'\2'</span>).<span class="ruby-identifier">prepare_category_name</span>
16
16
  <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">category</span>
17
- <span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">train</span> <span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>}
18
- <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/train_([\w]+)/</span>
17
+ <span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">eval</span>(<span class="ruby-node">&quot;#{$1}train(category, text)&quot;</span>) }
18
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(un)?train_([\w]+)/</span>
19
19
  <span class="ruby-identifier">raise</span> <span class="ruby-constant">StandardError</span>, <span class="ruby-node">&quot;No such category: #{category}&quot;</span>
20
20
  <span class="ruby-keyword kw">else</span>
21
21
  <span class="ruby-keyword kw">super</span> <span class="ruby-comment cmt">#raise StandardError, &quot;No such method: #{name}&quot;</span>
@@ -10,9 +10,9 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 96</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 121</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_category</span>(<span class="ruby-identifier">category</span>)
15
- <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
15
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
16
  <span class="ruby-keyword kw">end</span></pre>
17
17
  </body>
18
18
  </html>
@@ -0,0 +1,252 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Classifier::ContentNode</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Classifier::ContentNode</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/lsi/content_node_rb.html">
59
+ lib/classifier/lsi/content_node.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ <a href="../Object.html">
69
+ Object
70
+ </a>
71
+ </td>
72
+ </tr>
73
+ </table>
74
+ </div>
75
+ <!-- banner header -->
76
+
77
+ <div id="bodyContent">
78
+
79
+
80
+
81
+ <div id="contextContent">
82
+
83
+ <div id="description">
84
+ <p>
85
+ This is an internal data structure class for the <a href="LSI.html">LSI</a>
86
+ node. Save for <a href="ContentNode.html#M000034">raw_vector_with</a>, it
87
+ should be fairly straightforward to understand. You should never have to
88
+ use it directly.
89
+ </p>
90
+
91
+ </div>
92
+
93
+
94
+ </div>
95
+
96
+ <div id="method-list">
97
+ <h3 class="section-bar">Methods</h3>
98
+
99
+ <div class="name-list">
100
+ <a href="#M000031">new</a>&nbsp;&nbsp;
101
+ <a href="#M000034">raw_vector_with</a>&nbsp;&nbsp;
102
+ <a href="#M000033">search_norm</a>&nbsp;&nbsp;
103
+ <a href="#M000032">search_vector</a>&nbsp;&nbsp;
104
+ </div>
105
+ </div>
106
+
107
+ </div>
108
+
109
+
110
+ <!-- if includes -->
111
+
112
+ <div id="section">
113
+
114
+
115
+
116
+
117
+
118
+ <div id="attribute-list">
119
+ <h3 class="section-bar">Attributes</h3>
120
+
121
+ <div class="name-list">
122
+ <table>
123
+ <tr class="top-aligned-row context-row">
124
+ <td class="context-item-name">categories</td>
125
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
126
+ <td class="context-item-desc"></td>
127
+ </tr>
128
+ <tr class="top-aligned-row context-row">
129
+ <td class="context-item-name">lsi_norm</td>
130
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
131
+ <td class="context-item-desc"></td>
132
+ </tr>
133
+ <tr class="top-aligned-row context-row">
134
+ <td class="context-item-name">lsi_vector</td>
135
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
136
+ <td class="context-item-desc"></td>
137
+ </tr>
138
+ <tr class="top-aligned-row context-row">
139
+ <td class="context-item-name">raw_norm</td>
140
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
141
+ <td class="context-item-desc"></td>
142
+ </tr>
143
+ <tr class="top-aligned-row context-row">
144
+ <td class="context-item-name">raw_vector</td>
145
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
146
+ <td class="context-item-desc"></td>
147
+ </tr>
148
+ <tr class="top-aligned-row context-row">
149
+ <td class="context-item-name">source</td>
150
+ <td class="context-item-value">&nbsp;[R]&nbsp;</td>
151
+ <td class="context-item-desc"></td>
152
+ </tr>
153
+ <tr class="top-aligned-row context-row">
154
+ <td class="context-item-name">word_hash</td>
155
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
156
+ <td class="context-item-desc"></td>
157
+ </tr>
158
+ </table>
159
+ </div>
160
+ </div>
161
+
162
+
163
+
164
+ <!-- if method_list -->
165
+ <div id="methods">
166
+ <h3 class="section-bar">Public Class methods</h3>
167
+
168
+ <div id="method-M000031" class="method-detail">
169
+ <a name="M000031"></a>
170
+
171
+ <div class="method-heading">
172
+ <a href="ContentNode.src/M000031.html" target="Code" class="method-signature"
173
+ onclick="popupCode('ContentNode.src/M000031.html');return false;">
174
+ <span class="method-name">new</span><span class="method-args">( source, categories=nil, text_proc=nil )</span>
175
+ </a>
176
+ </div>
177
+
178
+ <div class="method-description">
179
+ <p>
180
+ If text_proc is not specified, the source will be duck-typed via
181
+ source.to_s
182
+ </p>
183
+ </div>
184
+ </div>
185
+
186
+ <h3 class="section-bar">Public Instance methods</h3>
187
+
188
+ <div id="method-M000034" class="method-detail">
189
+ <a name="M000034"></a>
190
+
191
+ <div class="method-heading">
192
+ <a href="ContentNode.src/M000034.html" target="Code" class="method-signature"
193
+ onclick="popupCode('ContentNode.src/M000034.html');return false;">
194
+ <span class="method-name">raw_vector_with</span><span class="method-args">( word_list )</span>
195
+ </a>
196
+ </div>
197
+
198
+ <div class="method-description">
199
+ <p>
200
+ Creates the raw vector out of word_hash using word_list as the key for
201
+ mapping the vector space.
202
+ </p>
203
+ </div>
204
+ </div>
205
+
206
+ <div id="method-M000033" class="method-detail">
207
+ <a name="M000033"></a>
208
+
209
+ <div class="method-heading">
210
+ <a href="ContentNode.src/M000033.html" target="Code" class="method-signature"
211
+ onclick="popupCode('ContentNode.src/M000033.html');return false;">
212
+ <span class="method-name">search_norm</span><span class="method-args">()</span>
213
+ </a>
214
+ </div>
215
+
216
+ <div class="method-description">
217
+ <p>
218
+ Use this to fetch the appropriate search vector in normalized form.
219
+ </p>
220
+ </div>
221
+ </div>
222
+
223
+ <div id="method-M000032" class="method-detail">
224
+ <a name="M000032"></a>
225
+
226
+ <div class="method-heading">
227
+ <a href="ContentNode.src/M000032.html" target="Code" class="method-signature"
228
+ onclick="popupCode('ContentNode.src/M000032.html');return false;">
229
+ <span class="method-name">search_vector</span><span class="method-args">()</span>
230
+ </a>
231
+ </div>
232
+
233
+ <div class="method-description">
234
+ <p>
235
+ Use this to fetch the appropriate search vector.
236
+ </p>
237
+ </div>
238
+ </div>
239
+
240
+
241
+ </div>
242
+
243
+
244
+ </div>
245
+
246
+
247
+ <div id="validator-badges">
248
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
249
+ </div>
250
+
251
+ </body>
252
+ </html>