classifier 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +341 -0
- data/README +59 -6
- data/Rakefile +16 -4
- data/bin/bayes.rb +8 -2
- data/doc/classes/Classifier.html +15 -10
- data/doc/classes/Classifier/Bayes.html +68 -38
- data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
- data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
- data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
- data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
- data/doc/classes/Classifier/ContentNode.html +252 -0
- data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
- data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
- data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
- data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
- data/doc/classes/Classifier/LSI.html +449 -0
- data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
- data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
- data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
- data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
- data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
- data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
- data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
- data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
- data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
- data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
- data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
- data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
- data/doc/classes/Classifier/WordList.html +202 -0
- data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
- data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
- data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
- data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
- data/doc/classes/GSL.html +111 -0
- data/doc/classes/GSL/Vector.html +156 -0
- data/doc/classes/GSL/Vector.src/M000005.html +18 -0
- data/doc/classes/GSL/Vector.src/M000006.html +19 -0
- data/doc/classes/Object.html +139 -0
- data/doc/classes/Object.src/M000001.html +16 -0
- data/doc/classes/String.html +95 -9
- data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
- data/doc/classes/String.src/M000003.html +18 -0
- data/doc/classes/String.src/M000004.html +18 -0
- data/doc/created.rid +1 -1
- data/doc/files/README.html +102 -12
- data/doc/files/lib/classifier/bayes_rb.html +1 -1
- data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
- data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
- data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
- data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
- data/doc/files/lib/classifier/lsi_rb.html +125 -0
- data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
- data/doc/files/lib/classifier_rb.html +3 -1
- data/doc/fr_class_index.html +6 -2
- data/doc/fr_file_index.html +5 -2
- data/doc/fr_method_index.html +34 -11
- data/lib/classifier.rb +3 -1
- data/lib/classifier/bayes.rb +34 -9
- data/lib/classifier/extensions/vector_serialize.rb +14 -0
- data/lib/classifier/extensions/word_hash.rb +125 -0
- data/lib/classifier/extensions/word_list.rb +31 -0
- data/lib/classifier/lsi.rb +248 -0
- data/lib/classifier/lsi/content_node.rb +67 -0
- data/lib/classifier/string_extensions.rb +10 -5
- data/test/bayes/bayesian_test.rb +2 -2
- data/test/lsi/lsi_test.rb +88 -0
- data/test/string_extensions/word_hash_test.rb +7 -5
- metadata +79 -24
- data/doc/classes/Classifier/Stemmable.html +0 -243
- data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
- data/doc/classes/Classifier/WordHash.html +0 -178
- data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
- data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
- data/lib/classifier/string_extensions/word_hash.rb +0 -119
@@ -0,0 +1,115 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: content_node.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>content_node.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/classifier/lsi/content_node.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Sun Apr 24 21:35:57 PDT 2005</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
<div id="description">
|
72
|
+
<table>
|
73
|
+
<tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
|
74
|
+
|
75
|
+
</td></tr>
|
76
|
+
<tr><td valign="top">Copyright:</td><td>Copyright © 2005 David Fayram II
|
77
|
+
|
78
|
+
</td></tr>
|
79
|
+
<tr><td valign="top">License:</td><td>GPL
|
80
|
+
|
81
|
+
</td></tr>
|
82
|
+
</table>
|
83
|
+
|
84
|
+
</div>
|
85
|
+
|
86
|
+
|
87
|
+
</div>
|
88
|
+
|
89
|
+
|
90
|
+
</div>
|
91
|
+
|
92
|
+
|
93
|
+
<!-- if includes -->
|
94
|
+
|
95
|
+
<div id="section">
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
<!-- if method_list -->
|
105
|
+
|
106
|
+
|
107
|
+
</div>
|
108
|
+
|
109
|
+
|
110
|
+
<div id="validator-badges">
|
111
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
112
|
+
</div>
|
113
|
+
|
114
|
+
</body>
|
115
|
+
</html>
|
@@ -0,0 +1,125 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: lsi.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>lsi.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/classifier/lsi.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Sun Apr 24 21:34:06 PDT 2005</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
<div id="description">
|
72
|
+
<table>
|
73
|
+
<tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
|
74
|
+
|
75
|
+
</td></tr>
|
76
|
+
<tr><td valign="top">Copyright:</td><td>Copyright © 2005 David Fayram II
|
77
|
+
|
78
|
+
</td></tr>
|
79
|
+
<tr><td valign="top">License:</td><td>GPL
|
80
|
+
|
81
|
+
</td></tr>
|
82
|
+
</table>
|
83
|
+
|
84
|
+
</div>
|
85
|
+
|
86
|
+
<div id="requires-list">
|
87
|
+
<h3 class="section-bar">Required files</h3>
|
88
|
+
|
89
|
+
<div class="name-list">
|
90
|
+
gsl
|
91
|
+
classifier/extensions/word_list
|
92
|
+
classifier/extensions/vector_serialize
|
93
|
+
classifier/lsi/content_node
|
94
|
+
</div>
|
95
|
+
</div>
|
96
|
+
|
97
|
+
</div>
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
|
103
|
+
<!-- if includes -->
|
104
|
+
|
105
|
+
<div id="section">
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
<!-- if method_list -->
|
115
|
+
|
116
|
+
|
117
|
+
</div>
|
118
|
+
|
119
|
+
|
120
|
+
<div id="validator-badges">
|
121
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
122
|
+
</div>
|
123
|
+
|
124
|
+
</body>
|
125
|
+
</html>
|
@@ -56,7 +56,7 @@
|
|
56
56
|
</tr>
|
57
57
|
<tr class="top-aligned-row">
|
58
58
|
<td><strong>Last Update:</strong></td>
|
59
|
-
<td>
|
59
|
+
<td>Sun Apr 24 02:08:49 PDT 2005</td>
|
60
60
|
</tr>
|
61
61
|
</table>
|
62
62
|
</div>
|
@@ -87,8 +87,7 @@
|
|
87
87
|
<h3 class="section-bar">Required files</h3>
|
88
88
|
|
89
89
|
<div class="name-list">
|
90
|
-
classifier/
|
91
|
-
classifier/string_extensions/word_hash
|
90
|
+
classifier/extensions/word_hash
|
92
91
|
</div>
|
93
92
|
</div>
|
94
93
|
|
@@ -56,7 +56,7 @@
|
|
56
56
|
</tr>
|
57
57
|
<tr class="top-aligned-row">
|
58
58
|
<td><strong>Last Update:</strong></td>
|
59
|
-
<td>
|
59
|
+
<td>Sun Apr 24 02:08:49 PDT 2005</td>
|
60
60
|
</tr>
|
61
61
|
</table>
|
62
62
|
</div>
|
@@ -87,8 +87,10 @@
|
|
87
87
|
<h3 class="section-bar">Required files</h3>
|
88
88
|
|
89
89
|
<div class="name-list">
|
90
|
+
rubygems
|
90
91
|
classifier/string_extensions
|
91
92
|
classifier/bayes
|
93
|
+
classifier/lsi
|
92
94
|
</div>
|
93
95
|
</div>
|
94
96
|
|
data/doc/fr_class_index.html
CHANGED
@@ -22,8 +22,12 @@
|
|
22
22
|
<div id="index-entries">
|
23
23
|
<a href="classes/Classifier.html">Classifier</a><br />
|
24
24
|
<a href="classes/Classifier/Bayes.html">Classifier::Bayes</a><br />
|
25
|
-
<a href="classes/Classifier/
|
26
|
-
<a href="classes/Classifier/
|
25
|
+
<a href="classes/Classifier/ContentNode.html">Classifier::ContentNode</a><br />
|
26
|
+
<a href="classes/Classifier/LSI.html">Classifier::LSI</a><br />
|
27
|
+
<a href="classes/Classifier/WordList.html">Classifier::WordList</a><br />
|
28
|
+
<a href="classes/GSL.html">GSL</a><br />
|
29
|
+
<a href="classes/GSL/Vector.html">GSL::Vector</a><br />
|
30
|
+
<a href="classes/Object.html">Object</a><br />
|
27
31
|
<a href="classes/String.html">String</a><br />
|
28
32
|
</div>
|
29
33
|
</div>
|
data/doc/fr_file_index.html
CHANGED
@@ -23,9 +23,12 @@
|
|
23
23
|
<a href="files/README.html">README</a><br />
|
24
24
|
<a href="files/lib/classifier_rb.html">lib/classifier.rb</a><br />
|
25
25
|
<a href="files/lib/classifier/bayes_rb.html">lib/classifier/bayes.rb</a><br />
|
26
|
+
<a href="files/lib/classifier/extensions/vector_serialize_rb.html">lib/classifier/extensions/vector_serialize.rb</a><br />
|
27
|
+
<a href="files/lib/classifier/extensions/word_hash_rb.html">lib/classifier/extensions/word_hash.rb</a><br />
|
28
|
+
<a href="files/lib/classifier/extensions/word_list_rb.html">lib/classifier/extensions/word_list.rb</a><br />
|
29
|
+
<a href="files/lib/classifier/lsi_rb.html">lib/classifier/lsi.rb</a><br />
|
30
|
+
<a href="files/lib/classifier/lsi/content_node_rb.html">lib/classifier/lsi/content_node.rb</a><br />
|
26
31
|
<a href="files/lib/classifier/string_extensions_rb.html">lib/classifier/string_extensions.rb</a><br />
|
27
|
-
<a href="files/lib/classifier/string_extensions/porter_stemmer_rb.html">lib/classifier/string_extensions/porter_stemmer.rb</a><br />
|
28
|
-
<a href="files/lib/classifier/string_extensions/word_hash_rb.html">lib/classifier/string_extensions/word_hash.rb</a><br />
|
29
32
|
</div>
|
30
33
|
</div>
|
31
34
|
</body>
|
data/doc/fr_method_index.html
CHANGED
@@ -20,17 +20,40 @@
|
|
20
20
|
<div id="index">
|
21
21
|
<h1 class="section-bar">Methods</h1>
|
22
22
|
<div id="index-entries">
|
23
|
-
<a href="classes/Classifier/
|
24
|
-
<a href="classes/Classifier/
|
25
|
-
<a href="classes/
|
26
|
-
<a href="classes/
|
27
|
-
<a href="classes/Classifier/Bayes.html#
|
28
|
-
<a href="classes/Classifier/
|
29
|
-
<a href="classes/Classifier/
|
30
|
-
<a href="classes/Classifier/
|
31
|
-
<a href="classes/Classifier/
|
32
|
-
<a href="classes/Classifier/
|
33
|
-
<a href="classes/Classifier/
|
23
|
+
<a href="classes/Classifier/LSI.html#M000014"><< (Classifier::LSI)</a><br />
|
24
|
+
<a href="classes/Classifier/WordList.html#M000009">[] (Classifier::WordList)</a><br />
|
25
|
+
<a href="classes/GSL/Vector.html#M000005">_dump (GSL::Vector)</a><br />
|
26
|
+
<a href="classes/GSL/Vector.html#M000006">_load (GSL::Vector)</a><br />
|
27
|
+
<a href="classes/Classifier/Bayes.html#M000029">add_category (Classifier::Bayes)</a><br />
|
28
|
+
<a href="classes/Classifier/LSI.html#M000013">add_item (Classifier::LSI)</a><br />
|
29
|
+
<a href="classes/Classifier/WordList.html#M000008">add_word (Classifier::WordList)</a><br />
|
30
|
+
<a href="classes/Classifier/Bayes.html#M000030">append_category (Classifier::Bayes)</a><br />
|
31
|
+
<a href="classes/Classifier/LSI.html#M000017">build_index (Classifier::LSI)</a><br />
|
32
|
+
<a href="classes/Classifier/Bayes.html#M000026">classifications (Classifier::Bayes)</a><br />
|
33
|
+
<a href="classes/Classifier/LSI.html#M000022">classify (Classifier::LSI)</a><br />
|
34
|
+
<a href="classes/Classifier/Bayes.html#M000027">classify (Classifier::Bayes)</a><br />
|
35
|
+
<a href="classes/String.html#M000004">clean_word_hash (String)</a><br />
|
36
|
+
<a href="classes/Classifier/LSI.html#M000021">find_related (Classifier::LSI)</a><br />
|
37
|
+
<a href="classes/Classifier/LSI.html#M000016">items (Classifier::LSI)</a><br />
|
38
|
+
<a href="classes/Classifier/Bayes.html#M000028">method_missing (Classifier::Bayes)</a><br />
|
39
|
+
<a href="classes/Classifier/LSI.html#M000012">needs_rebuild? (Classifier::LSI)</a><br />
|
40
|
+
<a href="classes/Classifier/Bayes.html#M000023">new (Classifier::Bayes)</a><br />
|
41
|
+
<a href="classes/Classifier/LSI.html#M000011">new (Classifier::LSI)</a><br />
|
42
|
+
<a href="classes/Classifier/ContentNode.html#M000031">new (Classifier::ContentNode)</a><br />
|
43
|
+
<a href="classes/Classifier/WordList.html#M000007">new (Classifier::WordList)</a><br />
|
44
|
+
<a href="classes/Object.html#M000001">prepare_category_name (Object)</a><br />
|
45
|
+
<a href="classes/Classifier/LSI.html#M000018">proximity_array_for_content (Classifier::LSI)</a><br />
|
46
|
+
<a href="classes/Classifier/LSI.html#M000019">proximity_norms_for_content (Classifier::LSI)</a><br />
|
47
|
+
<a href="classes/Classifier/ContentNode.html#M000034">raw_vector_with (Classifier::ContentNode)</a><br />
|
48
|
+
<a href="classes/Classifier/LSI.html#M000015">remove_item (Classifier::LSI)</a><br />
|
49
|
+
<a href="classes/Classifier/LSI.html#M000020">search (Classifier::LSI)</a><br />
|
50
|
+
<a href="classes/Classifier/ContentNode.html#M000033">search_norm (Classifier::ContentNode)</a><br />
|
51
|
+
<a href="classes/Classifier/ContentNode.html#M000032">search_vector (Classifier::ContentNode)</a><br />
|
52
|
+
<a href="classes/Classifier/WordList.html#M000010">size (Classifier::WordList)</a><br />
|
53
|
+
<a href="classes/Classifier/Bayes.html#M000024">train (Classifier::Bayes)</a><br />
|
54
|
+
<a href="classes/Classifier/Bayes.html#M000025">untrain (Classifier::Bayes)</a><br />
|
55
|
+
<a href="classes/String.html#M000002">without_punctuation (String)</a><br />
|
56
|
+
<a href="classes/String.html#M000003">word_hash (String)</a><br />
|
34
57
|
</div>
|
35
58
|
</div>
|
36
59
|
</body>
|
data/lib/classifier.rb
CHANGED
data/lib/classifier/bayes.rb
CHANGED
@@ -10,7 +10,7 @@ class Bayes
|
|
10
10
|
# b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
|
11
11
|
def initialize(*categories)
|
12
12
|
@categories = Hash.new
|
13
|
-
categories.each { |category| @categories[category.
|
13
|
+
categories.each { |category| @categories[category.prepare_category_name] = Hash.new }
|
14
14
|
@total_words = 0
|
15
15
|
end
|
16
16
|
|
@@ -22,14 +22,38 @@ class Bayes
|
|
22
22
|
# b.train "that", "That text"
|
23
23
|
# b.train "The other", "The other text"
|
24
24
|
def train(category, text)
|
25
|
-
category = category.
|
25
|
+
category = category.prepare_category_name
|
26
26
|
text.word_hash.each do |word, count|
|
27
27
|
@categories[category][word] ||= 0
|
28
28
|
@categories[category][word] += count
|
29
29
|
@total_words += count
|
30
30
|
end
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
|
+
#
|
34
|
+
# Provides a untraining method for all categories specified in Bayes#new
|
35
|
+
# Be very careful with this method.
|
36
|
+
#
|
37
|
+
# For example:
|
38
|
+
# b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
39
|
+
# b.train :this, "This text"
|
40
|
+
# b.untrain :this, "This text"
|
41
|
+
def untrain(category, text)
|
42
|
+
category = category.prepare_category_name
|
43
|
+
text.word_hash.each do |word, count|
|
44
|
+
if @total_words >= 0
|
45
|
+
orig = @categories[category][word]
|
46
|
+
@categories[category][word] ||= 0
|
47
|
+
@categories[category][word] -= count
|
48
|
+
if @categories[category][word] <= 0
|
49
|
+
@categories[category].delete(word)
|
50
|
+
count = orig
|
51
|
+
end
|
52
|
+
@total_words -= count
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
33
57
|
#
|
34
58
|
# Returns the scores in each category the provided +text+. E.g.,
|
35
59
|
# b.classifications "I hate bad words and you"
|
@@ -58,17 +82,18 @@ class Bayes
|
|
58
82
|
end
|
59
83
|
|
60
84
|
#
|
61
|
-
# Provides training methods for the categories specified in Bayes#new
|
85
|
+
# Provides training and untraining methods for the categories specified in Bayes#new
|
62
86
|
# For example:
|
63
87
|
# b = Classifier::Bayes.new 'This', 'That', 'the_other'
|
64
88
|
# b.train_this "This text"
|
65
89
|
# b.train_that "That text"
|
90
|
+
# b.untrain_that "That text"
|
66
91
|
# b.train_the_other "The other text"
|
67
92
|
def method_missing(name, *args)
|
68
|
-
category = name.to_s.gsub(/train_([\w]+)/, '\
|
93
|
+
category = name.to_s.gsub(/(un)?train_([\w]+)/, '\2').prepare_category_name
|
69
94
|
if @categories.has_key? category
|
70
|
-
args.each {|text| train
|
71
|
-
elsif name.to_s =~ /train_([\w]+)/
|
95
|
+
args.each { |text| eval("#{$1}train(category, text)") }
|
96
|
+
elsif name.to_s =~ /(un)?train_([\w]+)/
|
72
97
|
raise StandardError, "No such category: #{category}"
|
73
98
|
else
|
74
99
|
super #raise StandardError, "No such method: #{name}"
|
@@ -94,10 +119,10 @@ class Bayes
|
|
94
119
|
# more criteria than the trained selective categories. In short,
|
95
120
|
# try to initialize your categories at initialization.
|
96
121
|
def add_category(category)
|
97
|
-
@categories[category.
|
122
|
+
@categories[category.prepare_category_name] = Hash.new
|
98
123
|
end
|
99
124
|
|
100
|
-
alias append_category add_category
|
125
|
+
alias append_category add_category
|
101
126
|
end
|
102
127
|
|
103
128
|
end
|