classifier 1.1 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +2 -2
- data/Rakefile +2 -4
- data/doc/classes/Classifier.html +135 -0
- data/doc/classes/Classifier/Bayes.html +287 -0
- data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
- data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
- data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
- data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
- data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
- data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
- data/doc/classes/Classifier/Stemmable.html +243 -0
- data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
- data/doc/classes/Classifier/WordHash.html +178 -0
- data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
- data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
- data/doc/classes/String.html +119 -0
- data/doc/created.rid +1 -0
- data/doc/files/README.html +156 -0
- data/doc/files/lib/classifier/bayes_rb.html +115 -0
- data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
- data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
- data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
- data/doc/files/lib/classifier_rb.html +123 -0
- data/doc/fr_class_index.html +31 -0
- data/doc/fr_file_index.html +32 -0
- data/doc/fr_method_index.html +37 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/lib/classifier/bayes.rb +63 -12
- data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
- data/lib/classifier/string_extensions/word_hash.rb +96 -3
- data/test/bayes/bayesian_test.rb +13 -0
- data/test/string_extensions/word_hash_test.rb +7 -3
- metadata +36 -1
@@ -0,0 +1,178 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Module: Classifier::WordHash</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Module</strong></td>
|
53
|
+
<td class="class-name-in-header">Classifier::WordHash</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/classifier/string_extensions/word_hash_rb.html">
|
59
|
+
lib/classifier/string_extensions/word_hash.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
</table>
|
66
|
+
</div>
|
67
|
+
<!-- banner header -->
|
68
|
+
|
69
|
+
<div id="bodyContent">
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
<div id="contextContent">
|
74
|
+
|
75
|
+
<div id="description">
|
76
|
+
<p>
|
77
|
+
This module is mixed into <a href="../String.html">String</a> to provide
|
78
|
+
convenience methods for the <a href="../Classifier.html">Classifier</a>
|
79
|
+
package.
|
80
|
+
</p>
|
81
|
+
|
82
|
+
</div>
|
83
|
+
|
84
|
+
|
85
|
+
</div>
|
86
|
+
|
87
|
+
<div id="method-list">
|
88
|
+
<h3 class="section-bar">Methods</h3>
|
89
|
+
|
90
|
+
<div class="name-list">
|
91
|
+
<a href="#M000001">without_punctuation</a>
|
92
|
+
<a href="#M000002">word_hash</a>
|
93
|
+
</div>
|
94
|
+
</div>
|
95
|
+
|
96
|
+
</div>
|
97
|
+
|
98
|
+
|
99
|
+
<!-- if includes -->
|
100
|
+
|
101
|
+
<div id="section">
|
102
|
+
|
103
|
+
|
104
|
+
<div id="constants-list">
|
105
|
+
<h3 class="section-bar">Constants</h3>
|
106
|
+
|
107
|
+
<div class="name-list">
|
108
|
+
<table summary="Constants">
|
109
|
+
<tr class="top-aligned-row context-row">
|
110
|
+
<td class="context-item-name">CORPUS_SKIP_WORDS</td>
|
111
|
+
<td>=</td>
|
112
|
+
<td class="context-item-value">{ "a" => 1, "again" => 1, "all" => 1, "along" => 1, "are" => 1, "also" => 1, "an" => 1, "and" => 1, "as" => 1, "at" => 1, "but" => 1, "by" => 1, "came" => 1, "can" => 1, "cant" => 1, "couldnt" => 1, "did" => 1, "didn" => 1, "didnt" => 1, "do" => 1, "doesnt" => 1, "dont" => 1, "ever" => 1, "first" => 1, "from" => 1, "have" => 1, "her" => 1, "here" => 1, "him" => 1, "how" => 1, "i" => 1, "if" => 1, "in" => 1, "into" => 1, "is" => 1, "isnt" => 1, "it" => 1, "itll" => 1, "just" => 1, "last" => 1, "least" => 1, "like" => 1, "most" => 1, "my" => 1, "new" => 1, "no" => 1, "not" => 1, "now" => 1, "of" => 1, "on" => 1, "or" => 1, "should" => 1, "sinc" => 1, "so" => 1, "some" => 1, "th" => 1, "than" => 1, "this" => 1, "that" => 1, "the" => 1, "their" => 1, "then" => 1, "those" => 1, "to" => 1, "told" => 1, "too" => 1, "true" => 1, "try" => 1, "until" => 1, "url" => 1, "us" => 1, "were" => 1, "when" => 1, "whether" => 1, "while" => 1, "with" => 1, "within" => 1, "yes" => 1, "you" => 1, "youll" => 1, }</td>
|
113
|
+
</tr>
|
114
|
+
</table>
|
115
|
+
</div>
|
116
|
+
</div>
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
<!-- if method_list -->
|
124
|
+
<div id="methods">
|
125
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
126
|
+
|
127
|
+
<div id="method-M000001" class="method-detail">
|
128
|
+
<a name="M000001"></a>
|
129
|
+
|
130
|
+
<div class="method-heading">
|
131
|
+
<a href="WordHash.src/M000001.html" target="Code" class="method-signature"
|
132
|
+
onclick="popupCode('WordHash.src/M000001.html');return false;">
|
133
|
+
<span class="method-name">without_punctuation</span><span class="method-args">()</span>
|
134
|
+
</a>
|
135
|
+
</div>
|
136
|
+
|
137
|
+
<div class="method-description">
|
138
|
+
<p>
|
139
|
+
Removes common punctuation symbols, returning a new string. E.g.,
|
140
|
+
</p>
|
141
|
+
<pre>
|
142
|
+
"Hello (greeting's), with {braces} < >...?".without_punctuation
|
143
|
+
=> "Hello greetings with braces "
|
144
|
+
</pre>
|
145
|
+
</div>
|
146
|
+
</div>
|
147
|
+
|
148
|
+
<div id="method-M000002" class="method-detail">
|
149
|
+
<a name="M000002"></a>
|
150
|
+
|
151
|
+
<div class="method-heading">
|
152
|
+
<a href="WordHash.src/M000002.html" target="Code" class="method-signature"
|
153
|
+
onclick="popupCode('WordHash.src/M000002.html');return false;">
|
154
|
+
<span class="method-name">word_hash</span><span class="method-args">()</span>
|
155
|
+
</a>
|
156
|
+
</div>
|
157
|
+
|
158
|
+
<div class="method-description">
|
159
|
+
<p>
|
160
|
+
Return a Hash of strings => ints. Each word in the string is stemmed,
|
161
|
+
interned, and indexes to its frequency in the document.
|
162
|
+
</p>
|
163
|
+
</div>
|
164
|
+
</div>
|
165
|
+
|
166
|
+
|
167
|
+
</div>
|
168
|
+
|
169
|
+
|
170
|
+
</div>
|
171
|
+
|
172
|
+
|
173
|
+
<div id="validator-badges">
|
174
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
175
|
+
</div>
|
176
|
+
|
177
|
+
</body>
|
178
|
+
</html>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>without_punctuation (Classifier::WordHash)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/string_extensions/word_hash.rb, line 14</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">without_punctuation</span>
|
15
|
+
<span class="ruby-identifier">tr</span>( <span class="ruby-value str">',?.!;:"@#$%^&*()_=+[]{}\|<>/`~'</span>, <span class="ruby-value str">" "</span> ) .<span class="ruby-identifier">tr</span>( <span class="ruby-value str">"'\-"</span>, <span class="ruby-value str">""</span>)
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,28 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>word_hash (Classifier::WordHash)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/string_extensions/word_hash.rb, line 20</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">word_hash</span>
|
15
|
+
<span class="ruby-identifier">d</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
|
+
<span class="ruby-identifier">corpus</span> = <span class="ruby-identifier">without_punctuation</span>
|
17
|
+
(<span class="ruby-identifier">corpus</span>.<span class="ruby-identifier">split</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/[\w+]/</span>,<span class="ruby-value str">""</span>).<span class="ruby-identifier">split</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span><span class="ruby-operator">|</span>
|
18
|
+
<span class="ruby-identifier">item</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">downcase</span>
|
19
|
+
<span class="ruby-identifier">key</span> = <span class="ruby-identifier">item</span>.<span class="ruby-identifier">stem</span>.<span class="ruby-identifier">intern</span>
|
20
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-operator">!</span>(<span class="ruby-identifier">word</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[\w+]/</span>) <span class="ruby-operator">||</span> <span class="ruby-identifier">word</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">></span> <span class="ruby-value">2</span>
|
21
|
+
<span class="ruby-identifier">d</span>[<span class="ruby-identifier">key</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
|
22
|
+
<span class="ruby-identifier">d</span>[<span class="ruby-identifier">key</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
|
23
|
+
<span class="ruby-keyword kw">end</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-constant">CORPUS_SKIP_WORDS</span>[<span class="ruby-identifier">item</span>]
|
24
|
+
<span class="ruby-keyword kw">end</span>
|
25
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-identifier">d</span>
|
26
|
+
<span class="ruby-keyword kw">end</span></pre>
|
27
|
+
</body>
|
28
|
+
</html>
|
@@ -0,0 +1,119 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: String</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">String</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/lib/classifier/string_extensions_rb.html">
|
59
|
+
lib/classifier/string_extensions.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
</div>
|
87
|
+
|
88
|
+
|
89
|
+
<!-- if includes -->
|
90
|
+
<div id="includes">
|
91
|
+
<h3 class="section-bar">Included Modules</h3>
|
92
|
+
|
93
|
+
<div id="includes-list">
|
94
|
+
<span class="include-name"><a href="Classifier/Stemmable.html">Classifier::Stemmable</a></span>
|
95
|
+
<span class="include-name"><a href="Classifier/WordHash.html">Classifier::WordHash</a></span>
|
96
|
+
</div>
|
97
|
+
</div>
|
98
|
+
|
99
|
+
<div id="section">
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
<!-- if method_list -->
|
109
|
+
|
110
|
+
|
111
|
+
</div>
|
112
|
+
|
113
|
+
|
114
|
+
<div id="validator-badges">
|
115
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
116
|
+
</div>
|
117
|
+
|
118
|
+
</body>
|
119
|
+
</html>
|
data/doc/created.rid
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Mon Apr 11 18:45:03 PDT 2005
|
@@ -0,0 +1,156 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: README</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>README</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>README
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Mon Apr 11 17:22:25 PDT 2005</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
<div id="description">
|
72
|
+
<h2>Welcome to <a href="../classes/Classifier.html">Classifier</a></h2>
|
73
|
+
<p>
|
74
|
+
<a href="../classes/Classifier.html">Classifier</a> is a general module to
|
75
|
+
allow Bayesian and other types of classifications.
|
76
|
+
</p>
|
77
|
+
<h2>Usage</h2>
|
78
|
+
<pre>
|
79
|
+
require 'classifier'
|
80
|
+
b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
|
81
|
+
b.train_interesting "here are some good words. I hope you love them"
|
82
|
+
b.train_uninteresting "here are some bad words, I hate you"
|
83
|
+
b.classify "I hate bad words and you" # returns 'Uninteresting'
|
84
|
+
|
85
|
+
require 'madeleine'
|
86
|
+
m = SnapshotMadeleine.new("bayes_data") {
|
87
|
+
Classifier::Bayes.new 'Interesting', 'Uninteresting'
|
88
|
+
}
|
89
|
+
m.system.train_interesting "here are some good words. I hope you love them"
|
90
|
+
m.system.train_uninteresting "here are some bad words, I hate you"
|
91
|
+
m.take_snapshot
|
92
|
+
m.system.classify "I love you" # returns 'Interesting'
|
93
|
+
</pre>
|
94
|
+
<p>
|
95
|
+
Using Madeleine, your application can persist the learned data over time.
|
96
|
+
</p>
|
97
|
+
<h2>Bayesian Classification</h2>
|
98
|
+
<ul>
|
99
|
+
<li><a
|
100
|
+
href="http://www.process.com/precisemail/bayesian_filtering.htm">www.process.com/precisemail/bayesian_filtering.htm</a>
|
101
|
+
|
102
|
+
</li>
|
103
|
+
<li><a
|
104
|
+
href="http://en.wikipedia.org/wiki/Bayesian_filtering">en.wikipedia.org/wiki/Bayesian_filtering</a>
|
105
|
+
|
106
|
+
</li>
|
107
|
+
<li><a
|
108
|
+
href="http://www.paulgraham.com/spam.html">www.paulgraham.com/spam.html</a>
|
109
|
+
|
110
|
+
</li>
|
111
|
+
</ul>
|
112
|
+
<h2>About</h2>
|
113
|
+
<table>
|
114
|
+
<tr><td valign="top">Author:</td><td>Lucas Carlson (<a href="mailto:lucas@rufy.com">lucas@rufy.com</a>)
|
115
|
+
|
116
|
+
</td></tr>
|
117
|
+
<tr><td valign="top">Copyright:</td><td>Copyright © 2005 Lucas Carlson
|
118
|
+
|
119
|
+
</td></tr>
|
120
|
+
<tr><td valign="top">License:</td><td>LGPL
|
121
|
+
|
122
|
+
</td></tr>
|
123
|
+
</table>
|
124
|
+
|
125
|
+
</div>
|
126
|
+
|
127
|
+
|
128
|
+
</div>
|
129
|
+
|
130
|
+
|
131
|
+
</div>
|
132
|
+
|
133
|
+
|
134
|
+
<!-- if includes -->
|
135
|
+
|
136
|
+
<div id="section">
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
<!-- if method_list -->
|
146
|
+
|
147
|
+
|
148
|
+
</div>
|
149
|
+
|
150
|
+
|
151
|
+
<div id="validator-badges">
|
152
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
153
|
+
</div>
|
154
|
+
|
155
|
+
</body>
|
156
|
+
</html>
|