classifier 1.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/README +2 -2
  2. data/Rakefile +2 -4
  3. data/doc/classes/Classifier.html +135 -0
  4. data/doc/classes/Classifier/Bayes.html +287 -0
  5. data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
  6. data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
  7. data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
  8. data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
  9. data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
  10. data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
  11. data/doc/classes/Classifier/Stemmable.html +243 -0
  12. data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
  13. data/doc/classes/Classifier/WordHash.html +178 -0
  14. data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
  15. data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
  16. data/doc/classes/String.html +119 -0
  17. data/doc/created.rid +1 -0
  18. data/doc/files/README.html +156 -0
  19. data/doc/files/lib/classifier/bayes_rb.html +115 -0
  20. data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
  21. data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
  22. data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
  23. data/doc/files/lib/classifier_rb.html +123 -0
  24. data/doc/fr_class_index.html +31 -0
  25. data/doc/fr_file_index.html +32 -0
  26. data/doc/fr_method_index.html +37 -0
  27. data/doc/index.html +24 -0
  28. data/doc/rdoc-style.css +208 -0
  29. data/lib/classifier/bayes.rb +63 -12
  30. data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
  31. data/lib/classifier/string_extensions/word_hash.rb +96 -3
  32. data/test/bayes/bayesian_test.rb +13 -0
  33. data/test/string_extensions/word_hash_test.rb +7 -3
  34. metadata +36 -1
data/README CHANGED
@@ -7,7 +7,7 @@ Classifier is a general module to allow Bayesian and other types of classificati
7
7
  b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
8
8
  b.train_interesting "here are some good words. I hope you love them"
9
9
  b.train_uninteresting "here are some bad words, I hate you"
10
- b.classify "I hate bad words and you" # returns 'Uninsteresting'
10
+ b.classify "I hate bad words and you" # returns 'Uninteresting'
11
11
 
12
12
  require 'madeleine'
13
13
  m = SnapshotMadeleine.new("bayes_data") {
@@ -30,4 +30,4 @@ Using Madeleine, your application can persist the learned data over time.
30
30
 
31
31
  Author:: Lucas Carlson (mailto:lucas@rufy.com)
32
32
  Copyright:: Copyright (c) 2005 Lucas Carlson
33
- License:: GPL
33
+ License:: LGPL
data/Rakefile CHANGED
@@ -5,10 +5,10 @@ require 'rake/rdoctask'
5
5
  require 'rake/gempackagetask'
6
6
  require 'rake/contrib/rubyforgepublisher'
7
7
 
8
- PKG_VERSION = "1.1"
8
+ PKG_VERSION = "1.1.1"
9
9
 
10
10
  PKG_FILES = FileList[
11
- "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile"
11
+ "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile", "doc/**/*"
12
12
  ]
13
13
 
14
14
  desc "Default Task"
@@ -26,8 +26,6 @@ Rake::TestTask.new("test") { |t|
26
26
  desc "Create documentation"
27
27
  Rake::RDocTask.new("doc") { |rdoc|
28
28
  rdoc.rdoc_dir = 'doc'
29
- rdoc.title = "Classifier library"
30
- rdoc.options << '--line-numbers --inline-source --accessor'
31
29
  rdoc.rdoc_files.include('README')
32
30
  rdoc.rdoc_files.include('lib/**/*.rb')
33
31
  }
@@ -0,0 +1,135 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Module: Classifier</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Module</strong></td>
53
+ <td class="class-name-in-header">Classifier</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../files/lib/classifier/bayes_rb.html">
59
+ lib/classifier/bayes.rb
60
+ </a>
61
+ <br />
62
+ <a href="../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
63
+ lib/classifier/string_extensions/porter_stemmer.rb
64
+ </a>
65
+ <br />
66
+ <a href="../files/lib/classifier/string_extensions/word_hash_rb.html">
67
+ lib/classifier/string_extensions/word_hash.rb
68
+ </a>
69
+ <br />
70
+ </td>
71
+ </tr>
72
+
73
+ </table>
74
+ </div>
75
+ <!-- banner header -->
76
+
77
+ <div id="bodyContent">
78
+
79
+
80
+
81
+ <div id="contextContent">
82
+
83
+ <div id="description">
84
+ <table>
85
+ <tr><td valign="top">Author:</td><td>Lucas Carlson (<a href="mailto:lucas@rufy.com">lucas@rufy.com</a>)
86
+
87
+ </td></tr>
88
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 Lucas Carlson
89
+
90
+ </td></tr>
91
+ <tr><td valign="top">License:</td><td>LGPL
92
+
93
+ </td></tr>
94
+ </table>
95
+
96
+ </div>
97
+
98
+
99
+ </div>
100
+
101
+
102
+ </div>
103
+
104
+
105
+ <!-- if includes -->
106
+
107
+ <div id="section">
108
+
109
+ <div id="class-list">
110
+ <h3 class="section-bar">Classes and Modules</h3>
111
+
112
+ Module <a href="Classifier/Stemmable.html" class="link">Classifier::Stemmable</a><br />
113
+ Module <a href="Classifier/WordHash.html" class="link">Classifier::WordHash</a><br />
114
+ Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
115
+
116
+ </div>
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+ <!-- if method_list -->
125
+
126
+
127
+ </div>
128
+
129
+
130
+ <div id="validator-badges">
131
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
132
+ </div>
133
+
134
+ </body>
135
+ </html>
@@ -0,0 +1,287 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Classifier::Bayes</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Classifier::Bayes</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/bayes_rb.html">
59
+ lib/classifier/bayes.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+
82
+
83
+ </div>
84
+
85
+ <div id="method-list">
86
+ <h3 class="section-bar">Methods</h3>
87
+
88
+ <div class="name-list">
89
+ <a href="#M000010">add_category</a>&nbsp;&nbsp;
90
+ <a href="#M000011">append_category</a>&nbsp;&nbsp;
91
+ <a href="#M000007">classifications</a>&nbsp;&nbsp;
92
+ <a href="#M000008">classify</a>&nbsp;&nbsp;
93
+ <a href="#M000009">method_missing</a>&nbsp;&nbsp;
94
+ <a href="#M000005">new</a>&nbsp;&nbsp;
95
+ <a href="#M000006">train</a>&nbsp;&nbsp;
96
+ </div>
97
+ </div>
98
+
99
+ </div>
100
+
101
+
102
+ <!-- if includes -->
103
+
104
+ <div id="section">
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+ <!-- if method_list -->
114
+ <div id="methods">
115
+ <h3 class="section-bar">Public Class methods</h3>
116
+
117
+ <div id="method-M000005" class="method-detail">
118
+ <a name="M000005"></a>
119
+
120
+ <div class="method-heading">
121
+ <a href="Bayes.src/M000005.html" target="Code" class="method-signature"
122
+ onclick="popupCode('Bayes.src/M000005.html');return false;">
123
+ <span class="method-name">new</span><span class="method-args">(*categories)</span>
124
+ </a>
125
+ </div>
126
+
127
+ <div class="method-description">
128
+ <p>
129
+ The class can be created with one or more categories, each of which will be
130
+ initialized and given a training method. E.g.,
131
+ </p>
132
+ <pre>
133
+ b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
134
+ </pre>
135
+ </div>
136
+ </div>
137
+
138
+ <h3 class="section-bar">Public Instance methods</h3>
139
+
140
+ <div id="method-M000010" class="method-detail">
141
+ <a name="M000010"></a>
142
+
143
+ <div class="method-heading">
144
+ <a href="Bayes.src/M000010.html" target="Code" class="method-signature"
145
+ onclick="popupCode('Bayes.src/M000010.html');return false;">
146
+ <span class="method-name">add_category</span><span class="method-args">(category)</span>
147
+ </a>
148
+ </div>
149
+
150
+ <div class="method-description">
151
+ <p>
152
+ Allows you to add categories to the classifier. For example:
153
+ </p>
154
+ <pre>
155
+ b.add_category &quot;Not spam&quot;
156
+ </pre>
157
+ <p>
158
+ WARNING: Adding categories to a trained classifier will result in an
159
+ undertrained category that will tend to match more criteria than the
160
+ trained selective categories. In short, try to initialize your categories
161
+ at initialization.
162
+ </p>
163
+ </div>
164
+ </div>
165
+
166
+ <div id="method-M000011" class="method-detail">
167
+ <a name="M000011"></a>
168
+
169
+ <div class="method-heading">
170
+ <span class="method-name">append_category</span><span class="method-args">(category)</span>
171
+ </div>
172
+
173
+ <div class="method-description">
174
+ <p>
175
+ Alias for <a href="Bayes.html#M000010">add_category</a>
176
+ </p>
177
+ </div>
178
+ </div>
179
+
180
+ <div id="method-M000007" class="method-detail">
181
+ <a name="M000007"></a>
182
+
183
+ <div class="method-heading">
184
+ <a href="Bayes.src/M000007.html" target="Code" class="method-signature"
185
+ onclick="popupCode('Bayes.src/M000007.html');return false;">
186
+ <span class="method-name">classifications</span><span class="method-args">(text)</span>
187
+ </a>
188
+ </div>
189
+
190
+ <div class="method-description">
191
+ <p>
192
+ Returns the scores in each category the provided <tt>text</tt>. E.g.,
193
+ </p>
194
+ <pre>
195
+ b.classifications &quot;I hate bad words and you&quot;
196
+ =&gt; {&quot;Uninteresting&quot;=&gt;-12.6997928013932, &quot;Interesting&quot;=&gt;-18.4206807439524}
197
+ </pre>
198
+ <p>
199
+ The largest of these scores (the one closest to 0) is the one picked out by
200
+ <a href="Bayes.html#M000008">classify</a>
201
+ </p>
202
+ </div>
203
+ </div>
204
+
205
+ <div id="method-M000008" class="method-detail">
206
+ <a name="M000008"></a>
207
+
208
+ <div class="method-heading">
209
+ <a href="Bayes.src/M000008.html" target="Code" class="method-signature"
210
+ onclick="popupCode('Bayes.src/M000008.html');return false;">
211
+ <span class="method-name">classify</span><span class="method-args">(text)</span>
212
+ </a>
213
+ </div>
214
+
215
+ <div class="method-description">
216
+ <p>
217
+ Returns the classification of the provided <tt>text</tt>, which is one of
218
+ the categories given in the initializer. E.g.,
219
+ </p>
220
+ <pre>
221
+ b.classify &quot;I hate bad words and you&quot;
222
+ =&gt; 'Uninteresting'
223
+ </pre>
224
+ </div>
225
+ </div>
226
+
227
+ <div id="method-M000009" class="method-detail">
228
+ <a name="M000009"></a>
229
+
230
+ <div class="method-heading">
231
+ <a href="Bayes.src/M000009.html" target="Code" class="method-signature"
232
+ onclick="popupCode('Bayes.src/M000009.html');return false;">
233
+ <span class="method-name">method_missing</span><span class="method-args">(name, *args)</span>
234
+ </a>
235
+ </div>
236
+
237
+ <div class="method-description">
238
+ <p>
239
+ Provides training methods for the categories specified in <a
240
+ href="Bayes.html#M000005">Bayes#new</a> For example:
241
+ </p>
242
+ <pre>
243
+ b = Classifier::Bayes.new 'This', 'That', 'the_other'
244
+ b.train_this &quot;This text&quot;
245
+ b.train_that &quot;That text&quot;
246
+ b.train_the_other &quot;The other text&quot;
247
+ </pre>
248
+ </div>
249
+ </div>
250
+
251
+ <div id="method-M000006" class="method-detail">
252
+ <a name="M000006"></a>
253
+
254
+ <div class="method-heading">
255
+ <a href="Bayes.src/M000006.html" target="Code" class="method-signature"
256
+ onclick="popupCode('Bayes.src/M000006.html');return false;">
257
+ <span class="method-name">train</span><span class="method-args">(category, text)</span>
258
+ </a>
259
+ </div>
260
+
261
+ <div class="method-description">
262
+ <p>
263
+ Provides a general training method for all categories specified in <a
264
+ href="Bayes.html#M000005">Bayes#new</a> For example:
265
+ </p>
266
+ <pre>
267
+ b = Classifier::Bayes.new 'This', 'That', 'the_other'
268
+ b.train :this, &quot;This text&quot;
269
+ b.train &quot;that&quot;, &quot;That text&quot;
270
+ b.train &quot;The other&quot;, &quot;The other text&quot;
271
+ </pre>
272
+ </div>
273
+ </div>
274
+
275
+
276
+ </div>
277
+
278
+
279
+ </div>
280
+
281
+
282
+ <div id="validator-badges">
283
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
284
+ </div>
285
+
286
+ </body>
287
+ </html>