classifier 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
@@ -0,0 +1,115 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: content_node.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>content_node.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/classifier/lsi/content_node.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Sun Apr 24 21:35:57 PDT 2005</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <table>
73
+ <tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
74
+
75
+ </td></tr>
76
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 David Fayram II
77
+
78
+ </td></tr>
79
+ <tr><td valign="top">License:</td><td>GPL
80
+
81
+ </td></tr>
82
+ </table>
83
+
84
+ </div>
85
+
86
+
87
+ </div>
88
+
89
+
90
+ </div>
91
+
92
+
93
+ <!-- if includes -->
94
+
95
+ <div id="section">
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+ <!-- if method_list -->
105
+
106
+
107
+ </div>
108
+
109
+
110
+ <div id="validator-badges">
111
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
112
+ </div>
113
+
114
+ </body>
115
+ </html>
@@ -0,0 +1,125 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: lsi.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>lsi.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/classifier/lsi.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Sun Apr 24 21:34:06 PDT 2005</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <table>
73
+ <tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
74
+
75
+ </td></tr>
76
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 David Fayram II
77
+
78
+ </td></tr>
79
+ <tr><td valign="top">License:</td><td>GPL
80
+
81
+ </td></tr>
82
+ </table>
83
+
84
+ </div>
85
+
86
+ <div id="requires-list">
87
+ <h3 class="section-bar">Required files</h3>
88
+
89
+ <div class="name-list">
90
+ gsl&nbsp;&nbsp;
91
+ classifier/extensions/word_list&nbsp;&nbsp;
92
+ classifier/extensions/vector_serialize&nbsp;&nbsp;
93
+ classifier/lsi/content_node&nbsp;&nbsp;
94
+ </div>
95
+ </div>
96
+
97
+ </div>
98
+
99
+
100
+ </div>
101
+
102
+
103
+ <!-- if includes -->
104
+
105
+ <div id="section">
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+ <!-- if method_list -->
115
+
116
+
117
+ </div>
118
+
119
+
120
+ <div id="validator-badges">
121
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
122
+ </div>
123
+
124
+ </body>
125
+ </html>
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Mon Apr 11 12:27:41 PDT 2005</td>
59
+ <td>Sun Apr 24 02:08:49 PDT 2005</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -87,8 +87,7 @@
87
87
  <h3 class="section-bar">Required files</h3>
88
88
 
89
89
  <div class="name-list">
90
- classifier/string_extensions/porter_stemmer&nbsp;&nbsp;
91
- classifier/string_extensions/word_hash&nbsp;&nbsp;
90
+ classifier/extensions/word_hash&nbsp;&nbsp;
92
91
  </div>
93
92
  </div>
94
93
 
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Mon Apr 11 12:27:41 PDT 2005</td>
59
+ <td>Sun Apr 24 02:08:49 PDT 2005</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -87,8 +87,10 @@
87
87
  <h3 class="section-bar">Required files</h3>
88
88
 
89
89
  <div class="name-list">
90
+ rubygems&nbsp;&nbsp;
90
91
  classifier/string_extensions&nbsp;&nbsp;
91
92
  classifier/bayes&nbsp;&nbsp;
93
+ classifier/lsi&nbsp;&nbsp;
92
94
  </div>
93
95
  </div>
94
96
 
@@ -22,8 +22,12 @@
22
22
  <div id="index-entries">
23
23
  <a href="classes/Classifier.html">Classifier</a><br />
24
24
  <a href="classes/Classifier/Bayes.html">Classifier::Bayes</a><br />
25
- <a href="classes/Classifier/Stemmable.html">Classifier::Stemmable</a><br />
26
- <a href="classes/Classifier/WordHash.html">Classifier::WordHash</a><br />
25
+ <a href="classes/Classifier/ContentNode.html">Classifier::ContentNode</a><br />
26
+ <a href="classes/Classifier/LSI.html">Classifier::LSI</a><br />
27
+ <a href="classes/Classifier/WordList.html">Classifier::WordList</a><br />
28
+ <a href="classes/GSL.html">GSL</a><br />
29
+ <a href="classes/GSL/Vector.html">GSL::Vector</a><br />
30
+ <a href="classes/Object.html">Object</a><br />
27
31
  <a href="classes/String.html">String</a><br />
28
32
  </div>
29
33
  </div>
@@ -23,9 +23,12 @@
23
23
  <a href="files/README.html">README</a><br />
24
24
  <a href="files/lib/classifier_rb.html">lib/classifier.rb</a><br />
25
25
  <a href="files/lib/classifier/bayes_rb.html">lib/classifier/bayes.rb</a><br />
26
+ <a href="files/lib/classifier/extensions/vector_serialize_rb.html">lib/classifier/extensions/vector_serialize.rb</a><br />
27
+ <a href="files/lib/classifier/extensions/word_hash_rb.html">lib/classifier/extensions/word_hash.rb</a><br />
28
+ <a href="files/lib/classifier/extensions/word_list_rb.html">lib/classifier/extensions/word_list.rb</a><br />
29
+ <a href="files/lib/classifier/lsi_rb.html">lib/classifier/lsi.rb</a><br />
30
+ <a href="files/lib/classifier/lsi/content_node_rb.html">lib/classifier/lsi/content_node.rb</a><br />
26
31
  <a href="files/lib/classifier/string_extensions_rb.html">lib/classifier/string_extensions.rb</a><br />
27
- <a href="files/lib/classifier/string_extensions/porter_stemmer_rb.html">lib/classifier/string_extensions/porter_stemmer.rb</a><br />
28
- <a href="files/lib/classifier/string_extensions/word_hash_rb.html">lib/classifier/string_extensions/word_hash.rb</a><br />
29
32
  </div>
30
33
  </div>
31
34
  </body>
@@ -20,17 +20,40 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Methods</h1>
22
22
  <div id="index-entries">
23
- <a href="classes/Classifier/Bayes.html#M000010">add_category (Classifier::Bayes)</a><br />
24
- <a href="classes/Classifier/Bayes.html#M000011">append_category (Classifier::Bayes)</a><br />
25
- <a href="classes/Classifier/Bayes.html#M000007">classifications (Classifier::Bayes)</a><br />
26
- <a href="classes/Classifier/Bayes.html#M000008">classify (Classifier::Bayes)</a><br />
27
- <a href="classes/Classifier/Bayes.html#M000009">method_missing (Classifier::Bayes)</a><br />
28
- <a href="classes/Classifier/Bayes.html#M000005">new (Classifier::Bayes)</a><br />
29
- <a href="classes/Classifier/Stemmable.html#M000004">stem (Classifier::Stemmable)</a><br />
30
- <a href="classes/Classifier/Stemmable.html#M000003">stem_porter (Classifier::Stemmable)</a><br />
31
- <a href="classes/Classifier/Bayes.html#M000006">train (Classifier::Bayes)</a><br />
32
- <a href="classes/Classifier/WordHash.html#M000001">without_punctuation (Classifier::WordHash)</a><br />
33
- <a href="classes/Classifier/WordHash.html#M000002">word_hash (Classifier::WordHash)</a><br />
23
+ <a href="classes/Classifier/LSI.html#M000014"><< (Classifier::LSI)</a><br />
24
+ <a href="classes/Classifier/WordList.html#M000009">[] (Classifier::WordList)</a><br />
25
+ <a href="classes/GSL/Vector.html#M000005">_dump (GSL::Vector)</a><br />
26
+ <a href="classes/GSL/Vector.html#M000006">_load (GSL::Vector)</a><br />
27
+ <a href="classes/Classifier/Bayes.html#M000029">add_category (Classifier::Bayes)</a><br />
28
+ <a href="classes/Classifier/LSI.html#M000013">add_item (Classifier::LSI)</a><br />
29
+ <a href="classes/Classifier/WordList.html#M000008">add_word (Classifier::WordList)</a><br />
30
+ <a href="classes/Classifier/Bayes.html#M000030">append_category (Classifier::Bayes)</a><br />
31
+ <a href="classes/Classifier/LSI.html#M000017">build_index (Classifier::LSI)</a><br />
32
+ <a href="classes/Classifier/Bayes.html#M000026">classifications (Classifier::Bayes)</a><br />
33
+ <a href="classes/Classifier/LSI.html#M000022">classify (Classifier::LSI)</a><br />
34
+ <a href="classes/Classifier/Bayes.html#M000027">classify (Classifier::Bayes)</a><br />
35
+ <a href="classes/String.html#M000004">clean_word_hash (String)</a><br />
36
+ <a href="classes/Classifier/LSI.html#M000021">find_related (Classifier::LSI)</a><br />
37
+ <a href="classes/Classifier/LSI.html#M000016">items (Classifier::LSI)</a><br />
38
+ <a href="classes/Classifier/Bayes.html#M000028">method_missing (Classifier::Bayes)</a><br />
39
+ <a href="classes/Classifier/LSI.html#M000012">needs_rebuild? (Classifier::LSI)</a><br />
40
+ <a href="classes/Classifier/Bayes.html#M000023">new (Classifier::Bayes)</a><br />
41
+ <a href="classes/Classifier/LSI.html#M000011">new (Classifier::LSI)</a><br />
42
+ <a href="classes/Classifier/ContentNode.html#M000031">new (Classifier::ContentNode)</a><br />
43
+ <a href="classes/Classifier/WordList.html#M000007">new (Classifier::WordList)</a><br />
44
+ <a href="classes/Object.html#M000001">prepare_category_name (Object)</a><br />
45
+ <a href="classes/Classifier/LSI.html#M000018">proximity_array_for_content (Classifier::LSI)</a><br />
46
+ <a href="classes/Classifier/LSI.html#M000019">proximity_norms_for_content (Classifier::LSI)</a><br />
47
+ <a href="classes/Classifier/ContentNode.html#M000034">raw_vector_with (Classifier::ContentNode)</a><br />
48
+ <a href="classes/Classifier/LSI.html#M000015">remove_item (Classifier::LSI)</a><br />
49
+ <a href="classes/Classifier/LSI.html#M000020">search (Classifier::LSI)</a><br />
50
+ <a href="classes/Classifier/ContentNode.html#M000033">search_norm (Classifier::ContentNode)</a><br />
51
+ <a href="classes/Classifier/ContentNode.html#M000032">search_vector (Classifier::ContentNode)</a><br />
52
+ <a href="classes/Classifier/WordList.html#M000010">size (Classifier::WordList)</a><br />
53
+ <a href="classes/Classifier/Bayes.html#M000024">train (Classifier::Bayes)</a><br />
54
+ <a href="classes/Classifier/Bayes.html#M000025">untrain (Classifier::Bayes)</a><br />
55
+ <a href="classes/String.html#M000002">without_punctuation (String)</a><br />
56
+ <a href="classes/String.html#M000003">word_hash (String)</a><br />
34
57
  </div>
35
58
  </div>
36
59
  </body>
data/lib/classifier.rb CHANGED
@@ -24,5 +24,7 @@
24
24
  # Copyright:: Copyright (c) 2005 Lucas Carlson
25
25
  # License:: LGPL
26
26
 
27
+ require 'rubygems'
27
28
  require 'classifier/string_extensions'
28
- require 'classifier/bayes'
29
+ require 'classifier/bayes'
30
+ require 'classifier/lsi'
@@ -10,7 +10,7 @@ class Bayes
10
10
  # b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
11
11
  def initialize(*categories)
12
12
  @categories = Hash.new
13
- categories.each { |category| @categories[category.to_s.gsub("_"," ").capitalize.intern] = Hash.new }
13
+ categories.each { |category| @categories[category.prepare_category_name] = Hash.new }
14
14
  @total_words = 0
15
15
  end
16
16
 
@@ -22,14 +22,38 @@ class Bayes
22
22
  # b.train "that", "That text"
23
23
  # b.train "The other", "The other text"
24
24
  def train(category, text)
25
- category = category.to_s.gsub("_"," ").capitalize.intern
25
+ category = category.prepare_category_name
26
26
  text.word_hash.each do |word, count|
27
27
  @categories[category][word] ||= 0
28
28
  @categories[category][word] += count
29
29
  @total_words += count
30
30
  end
31
31
  end
32
-
32
+
33
+ #
34
+ # Provides a untraining method for all categories specified in Bayes#new
35
+ # Be very careful with this method.
36
+ #
37
+ # For example:
38
+ # b = Classifier::Bayes.new 'This', 'That', 'the_other'
39
+ # b.train :this, "This text"
40
+ # b.untrain :this, "This text"
41
+ def untrain(category, text)
42
+ category = category.prepare_category_name
43
+ text.word_hash.each do |word, count|
44
+ if @total_words >= 0
45
+ orig = @categories[category][word]
46
+ @categories[category][word] ||= 0
47
+ @categories[category][word] -= count
48
+ if @categories[category][word] <= 0
49
+ @categories[category].delete(word)
50
+ count = orig
51
+ end
52
+ @total_words -= count
53
+ end
54
+ end
55
+ end
56
+
33
57
  #
34
58
  # Returns the scores in each category the provided +text+. E.g.,
35
59
  # b.classifications "I hate bad words and you"
@@ -58,17 +82,18 @@ class Bayes
58
82
  end
59
83
 
60
84
  #
61
- # Provides training methods for the categories specified in Bayes#new
85
+ # Provides training and untraining methods for the categories specified in Bayes#new
62
86
  # For example:
63
87
  # b = Classifier::Bayes.new 'This', 'That', 'the_other'
64
88
  # b.train_this "This text"
65
89
  # b.train_that "That text"
90
+ # b.untrain_that "That text"
66
91
  # b.train_the_other "The other text"
67
92
  def method_missing(name, *args)
68
- category = name.to_s.gsub(/train_([\w]+)/, '\1').gsub("_"," ").capitalize.intern
93
+ category = name.to_s.gsub(/(un)?train_([\w]+)/, '\2').prepare_category_name
69
94
  if @categories.has_key? category
70
- args.each {|text| train category, text}
71
- elsif name.to_s =~ /train_([\w]+)/
95
+ args.each { |text| eval("#{$1}train(category, text)") }
96
+ elsif name.to_s =~ /(un)?train_([\w]+)/
72
97
  raise StandardError, "No such category: #{category}"
73
98
  else
74
99
  super #raise StandardError, "No such method: #{name}"
@@ -94,10 +119,10 @@ class Bayes
94
119
  # more criteria than the trained selective categories. In short,
95
120
  # try to initialize your categories at initialization.
96
121
  def add_category(category)
97
- @categories[category.to_s.gsub("_"," ").capitalize.intern] = Hash.new
122
+ @categories[category.prepare_category_name] = Hash.new
98
123
  end
99
124
 
100
- alias append_category add_category # :nodoc:
125
+ alias append_category add_category
101
126
  end
102
127
 
103
128
  end