classifier 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +341 -0
- data/README +59 -6
- data/Rakefile +16 -4
- data/bin/bayes.rb +8 -2
- data/doc/classes/Classifier.html +15 -10
- data/doc/classes/Classifier/Bayes.html +68 -38
- data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
- data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
- data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
- data/doc/classes/Classifier/ContentNode.html +252 -0
- data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
- data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
- data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
- data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
- data/doc/classes/Classifier/LSI.html +449 -0
- data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
- data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
- data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
- data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
- data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
- data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
- data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
- data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
- data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
- data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
- data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
- data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
- data/doc/classes/Classifier/WordList.html +202 -0
- data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
- data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
- data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
- data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
- data/doc/classes/GSL.html +111 -0
- data/doc/classes/GSL/Vector.html +156 -0
- data/doc/classes/GSL/Vector.src/M000005.html +18 -0
- data/doc/classes/GSL/Vector.src/M000006.html +19 -0
- data/doc/classes/Object.html +139 -0
- data/doc/classes/Object.src/M000001.html +16 -0
- data/doc/classes/String.html +95 -9
- data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
- data/doc/classes/String.src/M000003.html +18 -0
- data/doc/classes/String.src/M000004.html +18 -0
- data/doc/created.rid +1 -1
- data/doc/files/README.html +102 -12
- data/doc/files/lib/classifier/bayes_rb.html +1 -1
- data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
- data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
- data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
- data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
- data/doc/files/lib/classifier/lsi_rb.html +125 -0
- data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
- data/doc/files/lib/classifier_rb.html +3 -1
- data/doc/fr_class_index.html +6 -2
- data/doc/fr_file_index.html +5 -2
- data/doc/fr_method_index.html +34 -11
- data/lib/classifier.rb +3 -1
- data/lib/classifier/bayes.rb +34 -9
- data/lib/classifier/extensions/vector_serialize.rb +14 -0
- data/lib/classifier/extensions/word_hash.rb +125 -0
- data/lib/classifier/extensions/word_list.rb +31 -0
- data/lib/classifier/lsi.rb +248 -0
- data/lib/classifier/lsi/content_node.rb +67 -0
- data/lib/classifier/string_extensions.rb +10 -5
- data/test/bayes/bayesian_test.rb +2 -2
- data/test/lsi/lsi_test.rb +88 -0
- data/test/string_extensions/word_hash_test.rb +7 -5
- metadata +79 -24
- data/doc/classes/Classifier/Stemmable.html +0 -243
- data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
- data/doc/classes/Classifier/WordHash.html +0 -178
- data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
- data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
- data/lib/classifier/string_extensions/word_hash.rb +0 -119
data/bin/bayes.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
3
|
+
begin
|
4
|
+
require 'rubygems'
|
5
|
+
require_gem 'classifier'
|
6
|
+
rescue
|
7
|
+
require 'classifier'
|
8
|
+
end
|
9
|
+
|
4
10
|
require 'madeleine'
|
5
11
|
|
6
12
|
m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
|
@@ -24,7 +30,7 @@ when "classify"
|
|
24
30
|
puts m.system.classify(File.open(ARGV[1]).read)
|
25
31
|
else
|
26
32
|
puts "Invalid option: choose add [category] [file] or clasify [file]"
|
27
|
-
exit(1)
|
33
|
+
exit(-1)
|
28
34
|
end
|
29
35
|
|
30
36
|
m.take_snapshot
|
data/doc/classes/Classifier.html
CHANGED
@@ -59,12 +59,16 @@
|
|
59
59
|
lib/classifier/bayes.rb
|
60
60
|
</a>
|
61
61
|
<br />
|
62
|
-
<a href="../files/lib/classifier/
|
63
|
-
lib/classifier/
|
62
|
+
<a href="../files/lib/classifier/lsi_rb.html">
|
63
|
+
lib/classifier/lsi.rb
|
64
64
|
</a>
|
65
65
|
<br />
|
66
|
-
<a href="../files/lib/classifier/
|
67
|
-
lib/classifier/
|
66
|
+
<a href="../files/lib/classifier/extensions/word_list_rb.html">
|
67
|
+
lib/classifier/extensions/word_list.rb
|
68
|
+
</a>
|
69
|
+
<br />
|
70
|
+
<a href="../files/lib/classifier/lsi/content_node_rb.html">
|
71
|
+
lib/classifier/lsi/content_node.rb
|
68
72
|
</a>
|
69
73
|
<br />
|
70
74
|
</td>
|
@@ -82,13 +86,13 @@
|
|
82
86
|
|
83
87
|
<div id="description">
|
84
88
|
<table>
|
85
|
-
<tr><td valign="top">Author:</td><td>
|
89
|
+
<tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
|
86
90
|
|
87
91
|
</td></tr>
|
88
|
-
<tr><td valign="top">Copyright:</td><td>Copyright © 2005
|
92
|
+
<tr><td valign="top">Copyright:</td><td>Copyright © 2005 David Fayram II
|
89
93
|
|
90
94
|
</td></tr>
|
91
|
-
<tr><td valign="top">License:</td><td>
|
95
|
+
<tr><td valign="top">License:</td><td>GPL
|
92
96
|
|
93
97
|
</td></tr>
|
94
98
|
</table>
|
@@ -109,9 +113,10 @@
|
|
109
113
|
<div id="class-list">
|
110
114
|
<h3 class="section-bar">Classes and Modules</h3>
|
111
115
|
|
112
|
-
|
113
|
-
|
114
|
-
Class <a href="Classifier/
|
116
|
+
Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
|
117
|
+
Class <a href="Classifier/ContentNode.html" class="link">Classifier::ContentNode</a><br />
|
118
|
+
Class <a href="Classifier/LSI.html" class="link">Classifier::LSI</a><br />
|
119
|
+
Class <a href="Classifier/WordList.html" class="link">Classifier::WordList</a><br />
|
115
120
|
|
116
121
|
</div>
|
117
122
|
|
@@ -65,7 +65,9 @@
|
|
65
65
|
<tr class="top-aligned-row">
|
66
66
|
<td><strong>Parent:</strong></td>
|
67
67
|
<td>
|
68
|
+
<a href="../Object.html">
|
68
69
|
Object
|
70
|
+
</a>
|
69
71
|
</td>
|
70
72
|
</tr>
|
71
73
|
</table>
|
@@ -86,13 +88,14 @@
|
|
86
88
|
<h3 class="section-bar">Methods</h3>
|
87
89
|
|
88
90
|
<div class="name-list">
|
89
|
-
<a href="#
|
90
|
-
<a href="#
|
91
|
-
<a href="#
|
92
|
-
<a href="#
|
93
|
-
<a href="#
|
94
|
-
<a href="#
|
95
|
-
<a href="#
|
91
|
+
<a href="#M000029">add_category</a>
|
92
|
+
<a href="#M000030">append_category</a>
|
93
|
+
<a href="#M000026">classifications</a>
|
94
|
+
<a href="#M000027">classify</a>
|
95
|
+
<a href="#M000028">method_missing</a>
|
96
|
+
<a href="#M000023">new</a>
|
97
|
+
<a href="#M000024">train</a>
|
98
|
+
<a href="#M000025">untrain</a>
|
96
99
|
</div>
|
97
100
|
</div>
|
98
101
|
|
@@ -114,12 +117,12 @@
|
|
114
117
|
<div id="methods">
|
115
118
|
<h3 class="section-bar">Public Class methods</h3>
|
116
119
|
|
117
|
-
<div id="method-
|
118
|
-
<a name="
|
120
|
+
<div id="method-M000023" class="method-detail">
|
121
|
+
<a name="M000023"></a>
|
119
122
|
|
120
123
|
<div class="method-heading">
|
121
|
-
<a href="Bayes.src/
|
122
|
-
onclick="popupCode('Bayes.src/
|
124
|
+
<a href="Bayes.src/M000023.html" target="Code" class="method-signature"
|
125
|
+
onclick="popupCode('Bayes.src/M000023.html');return false;">
|
123
126
|
<span class="method-name">new</span><span class="method-args">(*categories)</span>
|
124
127
|
</a>
|
125
128
|
</div>
|
@@ -137,12 +140,12 @@ initialized and given a training method. E.g.,
|
|
137
140
|
|
138
141
|
<h3 class="section-bar">Public Instance methods</h3>
|
139
142
|
|
140
|
-
<div id="method-
|
141
|
-
<a name="
|
143
|
+
<div id="method-M000029" class="method-detail">
|
144
|
+
<a name="M000029"></a>
|
142
145
|
|
143
146
|
<div class="method-heading">
|
144
|
-
<a href="Bayes.src/
|
145
|
-
onclick="popupCode('Bayes.src/
|
147
|
+
<a href="Bayes.src/M000029.html" target="Code" class="method-signature"
|
148
|
+
onclick="popupCode('Bayes.src/M000029.html');return false;">
|
146
149
|
<span class="method-name">add_category</span><span class="method-args">(category)</span>
|
147
150
|
</a>
|
148
151
|
</div>
|
@@ -163,8 +166,8 @@ at initialization.
|
|
163
166
|
</div>
|
164
167
|
</div>
|
165
168
|
|
166
|
-
<div id="method-
|
167
|
-
<a name="
|
169
|
+
<div id="method-M000030" class="method-detail">
|
170
|
+
<a name="M000030"></a>
|
168
171
|
|
169
172
|
<div class="method-heading">
|
170
173
|
<span class="method-name">append_category</span><span class="method-args">(category)</span>
|
@@ -172,17 +175,17 @@ at initialization.
|
|
172
175
|
|
173
176
|
<div class="method-description">
|
174
177
|
<p>
|
175
|
-
Alias for <a href="Bayes.html#
|
178
|
+
Alias for <a href="Bayes.html#M000029">add_category</a>
|
176
179
|
</p>
|
177
180
|
</div>
|
178
181
|
</div>
|
179
182
|
|
180
|
-
<div id="method-
|
181
|
-
<a name="
|
183
|
+
<div id="method-M000026" class="method-detail">
|
184
|
+
<a name="M000026"></a>
|
182
185
|
|
183
186
|
<div class="method-heading">
|
184
|
-
<a href="Bayes.src/
|
185
|
-
onclick="popupCode('Bayes.src/
|
187
|
+
<a href="Bayes.src/M000026.html" target="Code" class="method-signature"
|
188
|
+
onclick="popupCode('Bayes.src/M000026.html');return false;">
|
186
189
|
<span class="method-name">classifications</span><span class="method-args">(text)</span>
|
187
190
|
</a>
|
188
191
|
</div>
|
@@ -197,17 +200,17 @@ Returns the scores in each category the provided <tt>text</tt>. E.g.,
|
|
197
200
|
</pre>
|
198
201
|
<p>
|
199
202
|
The largest of these scores (the one closest to 0) is the one picked out by
|
200
|
-
<a href="Bayes.html#
|
203
|
+
<a href="Bayes.html#M000027">classify</a>
|
201
204
|
</p>
|
202
205
|
</div>
|
203
206
|
</div>
|
204
207
|
|
205
|
-
<div id="method-
|
206
|
-
<a name="
|
208
|
+
<div id="method-M000027" class="method-detail">
|
209
|
+
<a name="M000027"></a>
|
207
210
|
|
208
211
|
<div class="method-heading">
|
209
|
-
<a href="Bayes.src/
|
210
|
-
onclick="popupCode('Bayes.src/
|
212
|
+
<a href="Bayes.src/M000027.html" target="Code" class="method-signature"
|
213
|
+
onclick="popupCode('Bayes.src/M000027.html');return false;">
|
211
214
|
<span class="method-name">classify</span><span class="method-args">(text)</span>
|
212
215
|
</a>
|
213
216
|
</div>
|
@@ -224,36 +227,37 @@ the categories given in the initializer. E.g.,
|
|
224
227
|
</div>
|
225
228
|
</div>
|
226
229
|
|
227
|
-
<div id="method-
|
228
|
-
<a name="
|
230
|
+
<div id="method-M000028" class="method-detail">
|
231
|
+
<a name="M000028"></a>
|
229
232
|
|
230
233
|
<div class="method-heading">
|
231
|
-
<a href="Bayes.src/
|
232
|
-
onclick="popupCode('Bayes.src/
|
234
|
+
<a href="Bayes.src/M000028.html" target="Code" class="method-signature"
|
235
|
+
onclick="popupCode('Bayes.src/M000028.html');return false;">
|
233
236
|
<span class="method-name">method_missing</span><span class="method-args">(name, *args)</span>
|
234
237
|
</a>
|
235
238
|
</div>
|
236
239
|
|
237
240
|
<div class="method-description">
|
238
241
|
<p>
|
239
|
-
Provides training methods for the categories specified in <a
|
240
|
-
href="Bayes.html#
|
242
|
+
Provides training and untraining methods for the categories specified in <a
|
243
|
+
href="Bayes.html#M000023">Bayes#new</a> For example:
|
241
244
|
</p>
|
242
245
|
<pre>
|
243
246
|
b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
244
247
|
b.train_this "This text"
|
245
248
|
b.train_that "That text"
|
249
|
+
b.untrain_that "That text"
|
246
250
|
b.train_the_other "The other text"
|
247
251
|
</pre>
|
248
252
|
</div>
|
249
253
|
</div>
|
250
254
|
|
251
|
-
<div id="method-
|
252
|
-
<a name="
|
255
|
+
<div id="method-M000024" class="method-detail">
|
256
|
+
<a name="M000024"></a>
|
253
257
|
|
254
258
|
<div class="method-heading">
|
255
|
-
<a href="Bayes.src/
|
256
|
-
onclick="popupCode('Bayes.src/
|
259
|
+
<a href="Bayes.src/M000024.html" target="Code" class="method-signature"
|
260
|
+
onclick="popupCode('Bayes.src/M000024.html');return false;">
|
257
261
|
<span class="method-name">train</span><span class="method-args">(category, text)</span>
|
258
262
|
</a>
|
259
263
|
</div>
|
@@ -261,7 +265,7 @@ href="Bayes.html#M000005">Bayes#new</a> For example:
|
|
261
265
|
<div class="method-description">
|
262
266
|
<p>
|
263
267
|
Provides a general training method for all categories specified in <a
|
264
|
-
href="Bayes.html#
|
268
|
+
href="Bayes.html#M000023">Bayes#new</a> For example:
|
265
269
|
</p>
|
266
270
|
<pre>
|
267
271
|
b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
@@ -272,6 +276,32 @@ href="Bayes.html#M000005">Bayes#new</a> For example:
|
|
272
276
|
</div>
|
273
277
|
</div>
|
274
278
|
|
279
|
+
<div id="method-M000025" class="method-detail">
|
280
|
+
<a name="M000025"></a>
|
281
|
+
|
282
|
+
<div class="method-heading">
|
283
|
+
<a href="Bayes.src/M000025.html" target="Code" class="method-signature"
|
284
|
+
onclick="popupCode('Bayes.src/M000025.html');return false;">
|
285
|
+
<span class="method-name">untrain</span><span class="method-args">(category, text)</span>
|
286
|
+
</a>
|
287
|
+
</div>
|
288
|
+
|
289
|
+
<div class="method-description">
|
290
|
+
<p>
|
291
|
+
Provides a untraining method for all categories specified in <a
|
292
|
+
href="Bayes.html#M000023">Bayes#new</a> Be very careful with this method.
|
293
|
+
</p>
|
294
|
+
<p>
|
295
|
+
For example:
|
296
|
+
</p>
|
297
|
+
<pre>
|
298
|
+
b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
299
|
+
b.train :this, "This text"
|
300
|
+
b.untrain :this, "This text"
|
301
|
+
</pre>
|
302
|
+
</div>
|
303
|
+
</div>
|
304
|
+
|
275
305
|
|
276
306
|
</div>
|
277
307
|
|
@@ -13,7 +13,7 @@
|
|
13
13
|
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 11</span>
|
14
14
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>)
|
15
15
|
<span class="ruby-ivar">@categories</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
|
-
<span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">
|
16
|
+
<span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
|
17
17
|
<span class="ruby-ivar">@total_words</span> = <span class="ruby-value">0</span>
|
18
18
|
<span class="ruby-keyword kw">end</span></pre>
|
19
19
|
</body>
|
@@ -12,7 +12,7 @@
|
|
12
12
|
<body class="standalone-code">
|
13
13
|
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 24</span>
|
14
14
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
|
15
|
-
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">
|
15
|
+
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>
|
16
16
|
<span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
17
17
|
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
|
18
18
|
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
@@ -0,0 +1,30 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>untrain (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 41</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">untrain</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
|
15
|
+
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>
|
16
|
+
<span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
17
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">>=</span> <span class="ruby-value">0</span>
|
18
|
+
<span class="ruby-identifier">orig</span> = <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>]
|
19
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
|
20
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">-=</span> <span class="ruby-identifier">count</span>
|
21
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator"><=</span> <span class="ruby-value">0</span>
|
22
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>].<span class="ruby-identifier">delete</span>(<span class="ruby-identifier">word</span>)
|
23
|
+
<span class="ruby-identifier">count</span> = <span class="ruby-identifier">orig</span>
|
24
|
+
<span class="ruby-keyword kw">end</span>
|
25
|
+
<span class="ruby-ivar">@total_words</span> <span class="ruby-operator">-=</span> <span class="ruby-identifier">count</span>
|
26
|
+
<span class="ruby-keyword kw">end</span>
|
27
|
+
<span class="ruby-keyword kw">end</span>
|
28
|
+
<span class="ruby-keyword kw">end</span></pre>
|
29
|
+
</body>
|
30
|
+
</html>
|
@@ -10,7 +10,7 @@
|
|
10
10
|
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
11
|
</head>
|
12
12
|
<body class="standalone-code">
|
13
|
-
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 62</span>
|
14
14
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
|
15
15
|
<span class="ruby-identifier">score</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
16
|
<span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span>, <span class="ruby-identifier">category_words</span><span class="ruby-operator">|</span>
|
@@ -10,7 +10,7 @@
|
|
10
10
|
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
11
|
</head>
|
12
12
|
<body class="standalone-code">
|
13
|
-
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 80</span>
|
14
14
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
|
15
15
|
(<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">a</span>[<span class="ruby-value">1</span>] })[<span class="ruby-value">0</span>][<span class="ruby-value">0</span>]
|
16
16
|
<span class="ruby-keyword kw">end</span></pre>
|
@@ -10,12 +10,12 @@
|
|
10
10
|
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
11
|
</head>
|
12
12
|
<body class="standalone-code">
|
13
|
-
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 92</span>
|
14
14
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">method_missing</span>(<span class="ruby-identifier">name</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">args</span>)
|
15
|
-
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/train_([\w]+)/</span>, <span class="ruby-value str">'\
|
15
|
+
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/(un)?train_([\w]+)/</span>, <span class="ruby-value str">'\2'</span>).<span class="ruby-identifier">prepare_category_name</span>
|
16
16
|
<span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">category</span>
|
17
|
-
<span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">
|
18
|
-
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/train_([\w]+)/</span>
|
17
|
+
<span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">eval</span>(<span class="ruby-node">"#{$1}train(category, text)"</span>) }
|
18
|
+
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(un)?train_([\w]+)/</span>
|
19
19
|
<span class="ruby-identifier">raise</span> <span class="ruby-constant">StandardError</span>, <span class="ruby-node">"No such category: #{category}"</span>
|
20
20
|
<span class="ruby-keyword kw">else</span>
|
21
21
|
<span class="ruby-keyword kw">super</span> <span class="ruby-comment cmt">#raise StandardError, "No such method: #{name}"</span>
|
@@ -10,9 +10,9 @@
|
|
10
10
|
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
11
|
</head>
|
12
12
|
<body class="standalone-code">
|
13
|
-
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 121</span>
|
14
14
|
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_category</span>(<span class="ruby-identifier">category</span>)
|
15
|
-
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">
|
15
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">prepare_category_name</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
16
|
<span class="ruby-keyword kw">end</span></pre>
|
17
17
|
</body>
|
18
18
|
</html>
|
@@ -0,0 +1,252 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Classifier::ContentNode</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Classifier::ContentNode</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/classifier/lsi/content_node_rb.html">
|
59
|
+
lib/classifier/lsi/content_node.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
<a href="../Object.html">
|
69
|
+
Object
|
70
|
+
</a>
|
71
|
+
</td>
|
72
|
+
</tr>
|
73
|
+
</table>
|
74
|
+
</div>
|
75
|
+
<!-- banner header -->
|
76
|
+
|
77
|
+
<div id="bodyContent">
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
<div id="contextContent">
|
82
|
+
|
83
|
+
<div id="description">
|
84
|
+
<p>
|
85
|
+
This is an internal data structure class for the <a href="LSI.html">LSI</a>
|
86
|
+
node. Save for <a href="ContentNode.html#M000034">raw_vector_with</a>, it
|
87
|
+
should be fairly straightforward to understand. You should never have to
|
88
|
+
use it directly.
|
89
|
+
</p>
|
90
|
+
|
91
|
+
</div>
|
92
|
+
|
93
|
+
|
94
|
+
</div>
|
95
|
+
|
96
|
+
<div id="method-list">
|
97
|
+
<h3 class="section-bar">Methods</h3>
|
98
|
+
|
99
|
+
<div class="name-list">
|
100
|
+
<a href="#M000031">new</a>
|
101
|
+
<a href="#M000034">raw_vector_with</a>
|
102
|
+
<a href="#M000033">search_norm</a>
|
103
|
+
<a href="#M000032">search_vector</a>
|
104
|
+
</div>
|
105
|
+
</div>
|
106
|
+
|
107
|
+
</div>
|
108
|
+
|
109
|
+
|
110
|
+
<!-- if includes -->
|
111
|
+
|
112
|
+
<div id="section">
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
<div id="attribute-list">
|
119
|
+
<h3 class="section-bar">Attributes</h3>
|
120
|
+
|
121
|
+
<div class="name-list">
|
122
|
+
<table>
|
123
|
+
<tr class="top-aligned-row context-row">
|
124
|
+
<td class="context-item-name">categories</td>
|
125
|
+
<td class="context-item-value"> [RW] </td>
|
126
|
+
<td class="context-item-desc"></td>
|
127
|
+
</tr>
|
128
|
+
<tr class="top-aligned-row context-row">
|
129
|
+
<td class="context-item-name">lsi_norm</td>
|
130
|
+
<td class="context-item-value"> [RW] </td>
|
131
|
+
<td class="context-item-desc"></td>
|
132
|
+
</tr>
|
133
|
+
<tr class="top-aligned-row context-row">
|
134
|
+
<td class="context-item-name">lsi_vector</td>
|
135
|
+
<td class="context-item-value"> [RW] </td>
|
136
|
+
<td class="context-item-desc"></td>
|
137
|
+
</tr>
|
138
|
+
<tr class="top-aligned-row context-row">
|
139
|
+
<td class="context-item-name">raw_norm</td>
|
140
|
+
<td class="context-item-value"> [RW] </td>
|
141
|
+
<td class="context-item-desc"></td>
|
142
|
+
</tr>
|
143
|
+
<tr class="top-aligned-row context-row">
|
144
|
+
<td class="context-item-name">raw_vector</td>
|
145
|
+
<td class="context-item-value"> [RW] </td>
|
146
|
+
<td class="context-item-desc"></td>
|
147
|
+
</tr>
|
148
|
+
<tr class="top-aligned-row context-row">
|
149
|
+
<td class="context-item-name">source</td>
|
150
|
+
<td class="context-item-value"> [R] </td>
|
151
|
+
<td class="context-item-desc"></td>
|
152
|
+
</tr>
|
153
|
+
<tr class="top-aligned-row context-row">
|
154
|
+
<td class="context-item-name">word_hash</td>
|
155
|
+
<td class="context-item-value"> [RW] </td>
|
156
|
+
<td class="context-item-desc"></td>
|
157
|
+
</tr>
|
158
|
+
</table>
|
159
|
+
</div>
|
160
|
+
</div>
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
<!-- if method_list -->
|
165
|
+
<div id="methods">
|
166
|
+
<h3 class="section-bar">Public Class methods</h3>
|
167
|
+
|
168
|
+
<div id="method-M000031" class="method-detail">
|
169
|
+
<a name="M000031"></a>
|
170
|
+
|
171
|
+
<div class="method-heading">
|
172
|
+
<a href="ContentNode.src/M000031.html" target="Code" class="method-signature"
|
173
|
+
onclick="popupCode('ContentNode.src/M000031.html');return false;">
|
174
|
+
<span class="method-name">new</span><span class="method-args">( source, categories=nil, text_proc=nil )</span>
|
175
|
+
</a>
|
176
|
+
</div>
|
177
|
+
|
178
|
+
<div class="method-description">
|
179
|
+
<p>
|
180
|
+
If text_proc is not specified, the source will be duck-typed via
|
181
|
+
source.to_s
|
182
|
+
</p>
|
183
|
+
</div>
|
184
|
+
</div>
|
185
|
+
|
186
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
187
|
+
|
188
|
+
<div id="method-M000034" class="method-detail">
|
189
|
+
<a name="M000034"></a>
|
190
|
+
|
191
|
+
<div class="method-heading">
|
192
|
+
<a href="ContentNode.src/M000034.html" target="Code" class="method-signature"
|
193
|
+
onclick="popupCode('ContentNode.src/M000034.html');return false;">
|
194
|
+
<span class="method-name">raw_vector_with</span><span class="method-args">( word_list )</span>
|
195
|
+
</a>
|
196
|
+
</div>
|
197
|
+
|
198
|
+
<div class="method-description">
|
199
|
+
<p>
|
200
|
+
Creates the raw vector out of word_hash using word_list as the key for
|
201
|
+
mapping the vector space.
|
202
|
+
</p>
|
203
|
+
</div>
|
204
|
+
</div>
|
205
|
+
|
206
|
+
<div id="method-M000033" class="method-detail">
|
207
|
+
<a name="M000033"></a>
|
208
|
+
|
209
|
+
<div class="method-heading">
|
210
|
+
<a href="ContentNode.src/M000033.html" target="Code" class="method-signature"
|
211
|
+
onclick="popupCode('ContentNode.src/M000033.html');return false;">
|
212
|
+
<span class="method-name">search_norm</span><span class="method-args">()</span>
|
213
|
+
</a>
|
214
|
+
</div>
|
215
|
+
|
216
|
+
<div class="method-description">
|
217
|
+
<p>
|
218
|
+
Use this to fetch the appropriate search vector in normalized form.
|
219
|
+
</p>
|
220
|
+
</div>
|
221
|
+
</div>
|
222
|
+
|
223
|
+
<div id="method-M000032" class="method-detail">
|
224
|
+
<a name="M000032"></a>
|
225
|
+
|
226
|
+
<div class="method-heading">
|
227
|
+
<a href="ContentNode.src/M000032.html" target="Code" class="method-signature"
|
228
|
+
onclick="popupCode('ContentNode.src/M000032.html');return false;">
|
229
|
+
<span class="method-name">search_vector</span><span class="method-args">()</span>
|
230
|
+
</a>
|
231
|
+
</div>
|
232
|
+
|
233
|
+
<div class="method-description">
|
234
|
+
<p>
|
235
|
+
Use this to fetch the appropriate search vector.
|
236
|
+
</p>
|
237
|
+
</div>
|
238
|
+
</div>
|
239
|
+
|
240
|
+
|
241
|
+
</div>
|
242
|
+
|
243
|
+
|
244
|
+
</div>
|
245
|
+
|
246
|
+
|
247
|
+
<div id="validator-badges">
|
248
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
249
|
+
</div>
|
250
|
+
|
251
|
+
</body>
|
252
|
+
</html>
|