classifier 1.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +2 -4
- data/doc/classes/Classifier.html +135 -0
- data/doc/classes/Classifier/Bayes.html +287 -0
- data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
- data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
- data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
- data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
- data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
- data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
- data/doc/classes/Classifier/Stemmable.html +243 -0
- data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
- data/doc/classes/Classifier/WordHash.html +178 -0
- data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
- data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
- data/doc/classes/String.html +119 -0
- data/doc/created.rid +1 -0
- data/doc/files/README.html +156 -0
- data/doc/files/lib/classifier/bayes_rb.html +115 -0
- data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
- data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
- data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
- data/doc/files/lib/classifier_rb.html +123 -0
- data/doc/fr_class_index.html +31 -0
- data/doc/fr_file_index.html +32 -0
- data/doc/fr_method_index.html +37 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/lib/classifier/bayes.rb +63 -12
- data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
- data/lib/classifier/string_extensions/word_hash.rb +96 -3
- data/test/bayes/bayesian_test.rb +13 -0
- data/test/string_extensions/word_hash_test.rb +7 -3
- metadata +36 -1
data/README
CHANGED
@@ -7,7 +7,7 @@ Classifier is a general module to allow Bayesian and other types of classificati
|
|
7
7
|
b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
|
8
8
|
b.train_interesting "here are some good words. I hope you love them"
|
9
9
|
b.train_uninteresting "here are some bad words, I hate you"
|
10
|
-
b.classify "I hate bad words and you" # returns '
|
10
|
+
b.classify "I hate bad words and you" # returns 'Uninteresting'
|
11
11
|
|
12
12
|
require 'madeleine'
|
13
13
|
m = SnapshotMadeleine.new("bayes_data") {
|
@@ -30,4 +30,4 @@ Using Madeleine, your application can persist the learned data over time.
|
|
30
30
|
|
31
31
|
Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
32
32
|
Copyright:: Copyright (c) 2005 Lucas Carlson
|
33
|
-
License::
|
33
|
+
License:: LGPL
|
data/Rakefile
CHANGED
@@ -5,10 +5,10 @@ require 'rake/rdoctask'
|
|
5
5
|
require 'rake/gempackagetask'
|
6
6
|
require 'rake/contrib/rubyforgepublisher'
|
7
7
|
|
8
|
-
PKG_VERSION = "1.1"
|
8
|
+
PKG_VERSION = "1.1.1"
|
9
9
|
|
10
10
|
PKG_FILES = FileList[
|
11
|
-
"lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile"
|
11
|
+
"lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile", "doc/**/*"
|
12
12
|
]
|
13
13
|
|
14
14
|
desc "Default Task"
|
@@ -26,8 +26,6 @@ Rake::TestTask.new("test") { |t|
|
|
26
26
|
desc "Create documentation"
|
27
27
|
Rake::RDocTask.new("doc") { |rdoc|
|
28
28
|
rdoc.rdoc_dir = 'doc'
|
29
|
-
rdoc.title = "Classifier library"
|
30
|
-
rdoc.options << '--line-numbers --inline-source --accessor'
|
31
29
|
rdoc.rdoc_files.include('README')
|
32
30
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
33
31
|
}
|
@@ -0,0 +1,135 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Module: Classifier</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Module</strong></td>
|
53
|
+
<td class="class-name-in-header">Classifier</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/lib/classifier/bayes_rb.html">
|
59
|
+
lib/classifier/bayes.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
<a href="../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
|
63
|
+
lib/classifier/string_extensions/porter_stemmer.rb
|
64
|
+
</a>
|
65
|
+
<br />
|
66
|
+
<a href="../files/lib/classifier/string_extensions/word_hash_rb.html">
|
67
|
+
lib/classifier/string_extensions/word_hash.rb
|
68
|
+
</a>
|
69
|
+
<br />
|
70
|
+
</td>
|
71
|
+
</tr>
|
72
|
+
|
73
|
+
</table>
|
74
|
+
</div>
|
75
|
+
<!-- banner header -->
|
76
|
+
|
77
|
+
<div id="bodyContent">
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
<div id="contextContent">
|
82
|
+
|
83
|
+
<div id="description">
|
84
|
+
<table>
|
85
|
+
<tr><td valign="top">Author:</td><td>Lucas Carlson (<a href="mailto:lucas@rufy.com">lucas@rufy.com</a>)
|
86
|
+
|
87
|
+
</td></tr>
|
88
|
+
<tr><td valign="top">Copyright:</td><td>Copyright © 2005 Lucas Carlson
|
89
|
+
|
90
|
+
</td></tr>
|
91
|
+
<tr><td valign="top">License:</td><td>LGPL
|
92
|
+
|
93
|
+
</td></tr>
|
94
|
+
</table>
|
95
|
+
|
96
|
+
</div>
|
97
|
+
|
98
|
+
|
99
|
+
</div>
|
100
|
+
|
101
|
+
|
102
|
+
</div>
|
103
|
+
|
104
|
+
|
105
|
+
<!-- if includes -->
|
106
|
+
|
107
|
+
<div id="section">
|
108
|
+
|
109
|
+
<div id="class-list">
|
110
|
+
<h3 class="section-bar">Classes and Modules</h3>
|
111
|
+
|
112
|
+
Module <a href="Classifier/Stemmable.html" class="link">Classifier::Stemmable</a><br />
|
113
|
+
Module <a href="Classifier/WordHash.html" class="link">Classifier::WordHash</a><br />
|
114
|
+
Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
|
115
|
+
|
116
|
+
</div>
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
<!-- if method_list -->
|
125
|
+
|
126
|
+
|
127
|
+
</div>
|
128
|
+
|
129
|
+
|
130
|
+
<div id="validator-badges">
|
131
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
132
|
+
</div>
|
133
|
+
|
134
|
+
</body>
|
135
|
+
</html>
|
@@ -0,0 +1,287 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Classifier::Bayes</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Classifier::Bayes</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/classifier/bayes_rb.html">
|
59
|
+
lib/classifier/bayes.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000010">add_category</a>
|
90
|
+
<a href="#M000011">append_category</a>
|
91
|
+
<a href="#M000007">classifications</a>
|
92
|
+
<a href="#M000008">classify</a>
|
93
|
+
<a href="#M000009">method_missing</a>
|
94
|
+
<a href="#M000005">new</a>
|
95
|
+
<a href="#M000006">train</a>
|
96
|
+
</div>
|
97
|
+
</div>
|
98
|
+
|
99
|
+
</div>
|
100
|
+
|
101
|
+
|
102
|
+
<!-- if includes -->
|
103
|
+
|
104
|
+
<div id="section">
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
<!-- if method_list -->
|
114
|
+
<div id="methods">
|
115
|
+
<h3 class="section-bar">Public Class methods</h3>
|
116
|
+
|
117
|
+
<div id="method-M000005" class="method-detail">
|
118
|
+
<a name="M000005"></a>
|
119
|
+
|
120
|
+
<div class="method-heading">
|
121
|
+
<a href="Bayes.src/M000005.html" target="Code" class="method-signature"
|
122
|
+
onclick="popupCode('Bayes.src/M000005.html');return false;">
|
123
|
+
<span class="method-name">new</span><span class="method-args">(*categories)</span>
|
124
|
+
</a>
|
125
|
+
</div>
|
126
|
+
|
127
|
+
<div class="method-description">
|
128
|
+
<p>
|
129
|
+
The class can be created with one or more categories, each of which will be
|
130
|
+
initialized and given a training method. E.g.,
|
131
|
+
</p>
|
132
|
+
<pre>
|
133
|
+
b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
|
134
|
+
</pre>
|
135
|
+
</div>
|
136
|
+
</div>
|
137
|
+
|
138
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
139
|
+
|
140
|
+
<div id="method-M000010" class="method-detail">
|
141
|
+
<a name="M000010"></a>
|
142
|
+
|
143
|
+
<div class="method-heading">
|
144
|
+
<a href="Bayes.src/M000010.html" target="Code" class="method-signature"
|
145
|
+
onclick="popupCode('Bayes.src/M000010.html');return false;">
|
146
|
+
<span class="method-name">add_category</span><span class="method-args">(category)</span>
|
147
|
+
</a>
|
148
|
+
</div>
|
149
|
+
|
150
|
+
<div class="method-description">
|
151
|
+
<p>
|
152
|
+
Allows you to add categories to the classifier. For example:
|
153
|
+
</p>
|
154
|
+
<pre>
|
155
|
+
b.add_category "Not spam"
|
156
|
+
</pre>
|
157
|
+
<p>
|
158
|
+
WARNING: Adding categories to a trained classifier will result in an
|
159
|
+
undertrained category that will tend to match more criteria than the
|
160
|
+
trained selective categories. In short, try to initialize your categories
|
161
|
+
at initialization.
|
162
|
+
</p>
|
163
|
+
</div>
|
164
|
+
</div>
|
165
|
+
|
166
|
+
<div id="method-M000011" class="method-detail">
|
167
|
+
<a name="M000011"></a>
|
168
|
+
|
169
|
+
<div class="method-heading">
|
170
|
+
<span class="method-name">append_category</span><span class="method-args">(category)</span>
|
171
|
+
</div>
|
172
|
+
|
173
|
+
<div class="method-description">
|
174
|
+
<p>
|
175
|
+
Alias for <a href="Bayes.html#M000010">add_category</a>
|
176
|
+
</p>
|
177
|
+
</div>
|
178
|
+
</div>
|
179
|
+
|
180
|
+
<div id="method-M000007" class="method-detail">
|
181
|
+
<a name="M000007"></a>
|
182
|
+
|
183
|
+
<div class="method-heading">
|
184
|
+
<a href="Bayes.src/M000007.html" target="Code" class="method-signature"
|
185
|
+
onclick="popupCode('Bayes.src/M000007.html');return false;">
|
186
|
+
<span class="method-name">classifications</span><span class="method-args">(text)</span>
|
187
|
+
</a>
|
188
|
+
</div>
|
189
|
+
|
190
|
+
<div class="method-description">
|
191
|
+
<p>
|
192
|
+
Returns the scores in each category the provided <tt>text</tt>. E.g.,
|
193
|
+
</p>
|
194
|
+
<pre>
|
195
|
+
b.classifications "I hate bad words and you"
|
196
|
+
=> {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
|
197
|
+
</pre>
|
198
|
+
<p>
|
199
|
+
The largest of these scores (the one closest to 0) is the one picked out by
|
200
|
+
<a href="Bayes.html#M000008">classify</a>
|
201
|
+
</p>
|
202
|
+
</div>
|
203
|
+
</div>
|
204
|
+
|
205
|
+
<div id="method-M000008" class="method-detail">
|
206
|
+
<a name="M000008"></a>
|
207
|
+
|
208
|
+
<div class="method-heading">
|
209
|
+
<a href="Bayes.src/M000008.html" target="Code" class="method-signature"
|
210
|
+
onclick="popupCode('Bayes.src/M000008.html');return false;">
|
211
|
+
<span class="method-name">classify</span><span class="method-args">(text)</span>
|
212
|
+
</a>
|
213
|
+
</div>
|
214
|
+
|
215
|
+
<div class="method-description">
|
216
|
+
<p>
|
217
|
+
Returns the classification of the provided <tt>text</tt>, which is one of
|
218
|
+
the categories given in the initializer. E.g.,
|
219
|
+
</p>
|
220
|
+
<pre>
|
221
|
+
b.classify "I hate bad words and you"
|
222
|
+
=> 'Uninteresting'
|
223
|
+
</pre>
|
224
|
+
</div>
|
225
|
+
</div>
|
226
|
+
|
227
|
+
<div id="method-M000009" class="method-detail">
|
228
|
+
<a name="M000009"></a>
|
229
|
+
|
230
|
+
<div class="method-heading">
|
231
|
+
<a href="Bayes.src/M000009.html" target="Code" class="method-signature"
|
232
|
+
onclick="popupCode('Bayes.src/M000009.html');return false;">
|
233
|
+
<span class="method-name">method_missing</span><span class="method-args">(name, *args)</span>
|
234
|
+
</a>
|
235
|
+
</div>
|
236
|
+
|
237
|
+
<div class="method-description">
|
238
|
+
<p>
|
239
|
+
Provides training methods for the categories specified in <a
|
240
|
+
href="Bayes.html#M000005">Bayes#new</a> For example:
|
241
|
+
</p>
|
242
|
+
<pre>
|
243
|
+
b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
244
|
+
b.train_this "This text"
|
245
|
+
b.train_that "That text"
|
246
|
+
b.train_the_other "The other text"
|
247
|
+
</pre>
|
248
|
+
</div>
|
249
|
+
</div>
|
250
|
+
|
251
|
+
<div id="method-M000006" class="method-detail">
|
252
|
+
<a name="M000006"></a>
|
253
|
+
|
254
|
+
<div class="method-heading">
|
255
|
+
<a href="Bayes.src/M000006.html" target="Code" class="method-signature"
|
256
|
+
onclick="popupCode('Bayes.src/M000006.html');return false;">
|
257
|
+
<span class="method-name">train</span><span class="method-args">(category, text)</span>
|
258
|
+
</a>
|
259
|
+
</div>
|
260
|
+
|
261
|
+
<div class="method-description">
|
262
|
+
<p>
|
263
|
+
Provides a general training method for all categories specified in <a
|
264
|
+
href="Bayes.html#M000005">Bayes#new</a> For example:
|
265
|
+
</p>
|
266
|
+
<pre>
|
267
|
+
b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
268
|
+
b.train :this, "This text"
|
269
|
+
b.train "that", "That text"
|
270
|
+
b.train "The other", "The other text"
|
271
|
+
</pre>
|
272
|
+
</div>
|
273
|
+
</div>
|
274
|
+
|
275
|
+
|
276
|
+
</div>
|
277
|
+
|
278
|
+
|
279
|
+
</div>
|
280
|
+
|
281
|
+
|
282
|
+
<div id="validator-badges">
|
283
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
284
|
+
</div>
|
285
|
+
|
286
|
+
</body>
|
287
|
+
</html>
|