classifier 1.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. data/README +2 -2
  2. data/Rakefile +2 -4
  3. data/doc/classes/Classifier.html +135 -0
  4. data/doc/classes/Classifier/Bayes.html +287 -0
  5. data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
  6. data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
  7. data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
  8. data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
  9. data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
  10. data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
  11. data/doc/classes/Classifier/Stemmable.html +243 -0
  12. data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
  13. data/doc/classes/Classifier/WordHash.html +178 -0
  14. data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
  15. data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
  16. data/doc/classes/String.html +119 -0
  17. data/doc/created.rid +1 -0
  18. data/doc/files/README.html +156 -0
  19. data/doc/files/lib/classifier/bayes_rb.html +115 -0
  20. data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
  21. data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
  22. data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
  23. data/doc/files/lib/classifier_rb.html +123 -0
  24. data/doc/fr_class_index.html +31 -0
  25. data/doc/fr_file_index.html +32 -0
  26. data/doc/fr_method_index.html +37 -0
  27. data/doc/index.html +24 -0
  28. data/doc/rdoc-style.css +208 -0
  29. data/lib/classifier/bayes.rb +63 -12
  30. data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
  31. data/lib/classifier/string_extensions/word_hash.rb +96 -3
  32. data/test/bayes/bayesian_test.rb +13 -0
  33. data/test/string_extensions/word_hash_test.rb +7 -3
  34. metadata +36 -1
data/README CHANGED
@@ -7,7 +7,7 @@ Classifier is a general module to allow Bayesian and other types of classificati
7
7
  b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
8
8
  b.train_interesting "here are some good words. I hope you love them"
9
9
  b.train_uninteresting "here are some bad words, I hate you"
10
- b.classify "I hate bad words and you" # returns 'Uninsteresting'
10
+ b.classify "I hate bad words and you" # returns 'Uninteresting'
11
11
 
12
12
  require 'madeleine'
13
13
  m = SnapshotMadeleine.new("bayes_data") {
@@ -30,4 +30,4 @@ Using Madeleine, your application can persist the learned data over time.
30
30
 
31
31
  Author:: Lucas Carlson (mailto:lucas@rufy.com)
32
32
  Copyright:: Copyright (c) 2005 Lucas Carlson
33
- License:: GPL
33
+ License:: LGPL
data/Rakefile CHANGED
@@ -5,10 +5,10 @@ require 'rake/rdoctask'
5
5
  require 'rake/gempackagetask'
6
6
  require 'rake/contrib/rubyforgepublisher'
7
7
 
8
- PKG_VERSION = "1.1"
8
+ PKG_VERSION = "1.1.1"
9
9
 
10
10
  PKG_FILES = FileList[
11
- "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile"
11
+ "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile", "doc/**/*"
12
12
  ]
13
13
 
14
14
  desc "Default Task"
@@ -26,8 +26,6 @@ Rake::TestTask.new("test") { |t|
26
26
  desc "Create documentation"
27
27
  Rake::RDocTask.new("doc") { |rdoc|
28
28
  rdoc.rdoc_dir = 'doc'
29
- rdoc.title = "Classifier library"
30
- rdoc.options << '--line-numbers --inline-source --accessor'
31
29
  rdoc.rdoc_files.include('README')
32
30
  rdoc.rdoc_files.include('lib/**/*.rb')
33
31
  }
@@ -0,0 +1,135 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Module: Classifier</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Module</strong></td>
53
+ <td class="class-name-in-header">Classifier</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../files/lib/classifier/bayes_rb.html">
59
+ lib/classifier/bayes.rb
60
+ </a>
61
+ <br />
62
+ <a href="../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
63
+ lib/classifier/string_extensions/porter_stemmer.rb
64
+ </a>
65
+ <br />
66
+ <a href="../files/lib/classifier/string_extensions/word_hash_rb.html">
67
+ lib/classifier/string_extensions/word_hash.rb
68
+ </a>
69
+ <br />
70
+ </td>
71
+ </tr>
72
+
73
+ </table>
74
+ </div>
75
+ <!-- banner header -->
76
+
77
+ <div id="bodyContent">
78
+
79
+
80
+
81
+ <div id="contextContent">
82
+
83
+ <div id="description">
84
+ <table>
85
+ <tr><td valign="top">Author:</td><td>Lucas Carlson (<a href="mailto:lucas@rufy.com">lucas@rufy.com</a>)
86
+
87
+ </td></tr>
88
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 Lucas Carlson
89
+
90
+ </td></tr>
91
+ <tr><td valign="top">License:</td><td>LGPL
92
+
93
+ </td></tr>
94
+ </table>
95
+
96
+ </div>
97
+
98
+
99
+ </div>
100
+
101
+
102
+ </div>
103
+
104
+
105
+ <!-- if includes -->
106
+
107
+ <div id="section">
108
+
109
+ <div id="class-list">
110
+ <h3 class="section-bar">Classes and Modules</h3>
111
+
112
+ Module <a href="Classifier/Stemmable.html" class="link">Classifier::Stemmable</a><br />
113
+ Module <a href="Classifier/WordHash.html" class="link">Classifier::WordHash</a><br />
114
+ Class <a href="Classifier/Bayes.html" class="link">Classifier::Bayes</a><br />
115
+
116
+ </div>
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+ <!-- if method_list -->
125
+
126
+
127
+ </div>
128
+
129
+
130
+ <div id="validator-badges">
131
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
132
+ </div>
133
+
134
+ </body>
135
+ </html>
@@ -0,0 +1,287 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Classifier::Bayes</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Classifier::Bayes</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/bayes_rb.html">
59
+ lib/classifier/bayes.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+
82
+
83
+ </div>
84
+
85
+ <div id="method-list">
86
+ <h3 class="section-bar">Methods</h3>
87
+
88
+ <div class="name-list">
89
+ <a href="#M000010">add_category</a>&nbsp;&nbsp;
90
+ <a href="#M000011">append_category</a>&nbsp;&nbsp;
91
+ <a href="#M000007">classifications</a>&nbsp;&nbsp;
92
+ <a href="#M000008">classify</a>&nbsp;&nbsp;
93
+ <a href="#M000009">method_missing</a>&nbsp;&nbsp;
94
+ <a href="#M000005">new</a>&nbsp;&nbsp;
95
+ <a href="#M000006">train</a>&nbsp;&nbsp;
96
+ </div>
97
+ </div>
98
+
99
+ </div>
100
+
101
+
102
+ <!-- if includes -->
103
+
104
+ <div id="section">
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+ <!-- if method_list -->
114
+ <div id="methods">
115
+ <h3 class="section-bar">Public Class methods</h3>
116
+
117
+ <div id="method-M000005" class="method-detail">
118
+ <a name="M000005"></a>
119
+
120
+ <div class="method-heading">
121
+ <a href="Bayes.src/M000005.html" target="Code" class="method-signature"
122
+ onclick="popupCode('Bayes.src/M000005.html');return false;">
123
+ <span class="method-name">new</span><span class="method-args">(*categories)</span>
124
+ </a>
125
+ </div>
126
+
127
+ <div class="method-description">
128
+ <p>
129
+ The class can be created with one or more categories, each of which will be
130
+ initialized and given a training method. E.g.,
131
+ </p>
132
+ <pre>
133
+ b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
134
+ </pre>
135
+ </div>
136
+ </div>
137
+
138
+ <h3 class="section-bar">Public Instance methods</h3>
139
+
140
+ <div id="method-M000010" class="method-detail">
141
+ <a name="M000010"></a>
142
+
143
+ <div class="method-heading">
144
+ <a href="Bayes.src/M000010.html" target="Code" class="method-signature"
145
+ onclick="popupCode('Bayes.src/M000010.html');return false;">
146
+ <span class="method-name">add_category</span><span class="method-args">(category)</span>
147
+ </a>
148
+ </div>
149
+
150
+ <div class="method-description">
151
+ <p>
152
+ Allows you to add categories to the classifier. For example:
153
+ </p>
154
+ <pre>
155
+ b.add_category &quot;Not spam&quot;
156
+ </pre>
157
+ <p>
158
+ WARNING: Adding categories to a trained classifier will result in an
159
+ undertrained category that will tend to match more criteria than the
160
+ trained selective categories. In short, try to initialize your categories
161
+ at initialization.
162
+ </p>
163
+ </div>
164
+ </div>
165
+
166
+ <div id="method-M000011" class="method-detail">
167
+ <a name="M000011"></a>
168
+
169
+ <div class="method-heading">
170
+ <span class="method-name">append_category</span><span class="method-args">(category)</span>
171
+ </div>
172
+
173
+ <div class="method-description">
174
+ <p>
175
+ Alias for <a href="Bayes.html#M000010">add_category</a>
176
+ </p>
177
+ </div>
178
+ </div>
179
+
180
+ <div id="method-M000007" class="method-detail">
181
+ <a name="M000007"></a>
182
+
183
+ <div class="method-heading">
184
+ <a href="Bayes.src/M000007.html" target="Code" class="method-signature"
185
+ onclick="popupCode('Bayes.src/M000007.html');return false;">
186
+ <span class="method-name">classifications</span><span class="method-args">(text)</span>
187
+ </a>
188
+ </div>
189
+
190
+ <div class="method-description">
191
+ <p>
192
+ Returns the scores in each category the provided <tt>text</tt>. E.g.,
193
+ </p>
194
+ <pre>
195
+ b.classifications &quot;I hate bad words and you&quot;
196
+ =&gt; {&quot;Uninteresting&quot;=&gt;-12.6997928013932, &quot;Interesting&quot;=&gt;-18.4206807439524}
197
+ </pre>
198
+ <p>
199
+ The largest of these scores (the one closest to 0) is the one picked out by
200
+ <a href="Bayes.html#M000008">classify</a>
201
+ </p>
202
+ </div>
203
+ </div>
204
+
205
+ <div id="method-M000008" class="method-detail">
206
+ <a name="M000008"></a>
207
+
208
+ <div class="method-heading">
209
+ <a href="Bayes.src/M000008.html" target="Code" class="method-signature"
210
+ onclick="popupCode('Bayes.src/M000008.html');return false;">
211
+ <span class="method-name">classify</span><span class="method-args">(text)</span>
212
+ </a>
213
+ </div>
214
+
215
+ <div class="method-description">
216
+ <p>
217
+ Returns the classification of the provided <tt>text</tt>, which is one of
218
+ the categories given in the initializer. E.g.,
219
+ </p>
220
+ <pre>
221
+ b.classify &quot;I hate bad words and you&quot;
222
+ =&gt; 'Uninteresting'
223
+ </pre>
224
+ </div>
225
+ </div>
226
+
227
+ <div id="method-M000009" class="method-detail">
228
+ <a name="M000009"></a>
229
+
230
+ <div class="method-heading">
231
+ <a href="Bayes.src/M000009.html" target="Code" class="method-signature"
232
+ onclick="popupCode('Bayes.src/M000009.html');return false;">
233
+ <span class="method-name">method_missing</span><span class="method-args">(name, *args)</span>
234
+ </a>
235
+ </div>
236
+
237
+ <div class="method-description">
238
+ <p>
239
+ Provides training methods for the categories specified in <a
240
+ href="Bayes.html#M000005">Bayes#new</a> For example:
241
+ </p>
242
+ <pre>
243
+ b = Classifier::Bayes.new 'This', 'That', 'the_other'
244
+ b.train_this &quot;This text&quot;
245
+ b.train_that &quot;That text&quot;
246
+ b.train_the_other &quot;The other text&quot;
247
+ </pre>
248
+ </div>
249
+ </div>
250
+
251
+ <div id="method-M000006" class="method-detail">
252
+ <a name="M000006"></a>
253
+
254
+ <div class="method-heading">
255
+ <a href="Bayes.src/M000006.html" target="Code" class="method-signature"
256
+ onclick="popupCode('Bayes.src/M000006.html');return false;">
257
+ <span class="method-name">train</span><span class="method-args">(category, text)</span>
258
+ </a>
259
+ </div>
260
+
261
+ <div class="method-description">
262
+ <p>
263
+ Provides a general training method for all categories specified in <a
264
+ href="Bayes.html#M000005">Bayes#new</a> For example:
265
+ </p>
266
+ <pre>
267
+ b = Classifier::Bayes.new 'This', 'That', 'the_other'
268
+ b.train :this, &quot;This text&quot;
269
+ b.train &quot;that&quot;, &quot;That text&quot;
270
+ b.train &quot;The other&quot;, &quot;The other text&quot;
271
+ </pre>
272
+ </div>
273
+ </div>
274
+
275
+
276
+ </div>
277
+
278
+
279
+ </div>
280
+
281
+
282
+ <div id="validator-badges">
283
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
284
+ </div>
285
+
286
+ </body>
287
+ </html>