classifier 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
@@ -0,0 +1,115 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: content_node.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>content_node.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/classifier/lsi/content_node.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Sun Apr 24 21:35:57 PDT 2005</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <table>
73
+ <tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
74
+
75
+ </td></tr>
76
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 David Fayram II
77
+
78
+ </td></tr>
79
+ <tr><td valign="top">License:</td><td>GPL
80
+
81
+ </td></tr>
82
+ </table>
83
+
84
+ </div>
85
+
86
+
87
+ </div>
88
+
89
+
90
+ </div>
91
+
92
+
93
+ <!-- if includes -->
94
+
95
+ <div id="section">
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+ <!-- if method_list -->
105
+
106
+
107
+ </div>
108
+
109
+
110
+ <div id="validator-badges">
111
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
112
+ </div>
113
+
114
+ </body>
115
+ </html>
@@ -0,0 +1,125 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>File: lsi.rb</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="fileHeader">
50
+ <h1>lsi.rb</h1>
51
+ <table class="header-table">
52
+ <tr class="top-aligned-row">
53
+ <td><strong>Path:</strong></td>
54
+ <td>lib/classifier/lsi.rb
55
+ </td>
56
+ </tr>
57
+ <tr class="top-aligned-row">
58
+ <td><strong>Last Update:</strong></td>
59
+ <td>Sun Apr 24 21:34:06 PDT 2005</td>
60
+ </tr>
61
+ </table>
62
+ </div>
63
+ <!-- banner header -->
64
+
65
+ <div id="bodyContent">
66
+
67
+
68
+
69
+ <div id="contextContent">
70
+
71
+ <div id="description">
72
+ <table>
73
+ <tr><td valign="top">Author:</td><td>David Fayram (<a href="mailto:dfayram@lensmen.net">dfayram@lensmen.net</a>)
74
+
75
+ </td></tr>
76
+ <tr><td valign="top">Copyright:</td><td>Copyright &#169; 2005 David Fayram II
77
+
78
+ </td></tr>
79
+ <tr><td valign="top">License:</td><td>GPL
80
+
81
+ </td></tr>
82
+ </table>
83
+
84
+ </div>
85
+
86
+ <div id="requires-list">
87
+ <h3 class="section-bar">Required files</h3>
88
+
89
+ <div class="name-list">
90
+ gsl&nbsp;&nbsp;
91
+ classifier/extensions/word_list&nbsp;&nbsp;
92
+ classifier/extensions/vector_serialize&nbsp;&nbsp;
93
+ classifier/lsi/content_node&nbsp;&nbsp;
94
+ </div>
95
+ </div>
96
+
97
+ </div>
98
+
99
+
100
+ </div>
101
+
102
+
103
+ <!-- if includes -->
104
+
105
+ <div id="section">
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+ <!-- if method_list -->
115
+
116
+
117
+ </div>
118
+
119
+
120
+ <div id="validator-badges">
121
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
122
+ </div>
123
+
124
+ </body>
125
+ </html>
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Mon Apr 11 12:27:41 PDT 2005</td>
59
+ <td>Sun Apr 24 02:08:49 PDT 2005</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -87,8 +87,7 @@
87
87
  <h3 class="section-bar">Required files</h3>
88
88
 
89
89
  <div class="name-list">
90
- classifier/string_extensions/porter_stemmer&nbsp;&nbsp;
91
- classifier/string_extensions/word_hash&nbsp;&nbsp;
90
+ classifier/extensions/word_hash&nbsp;&nbsp;
92
91
  </div>
93
92
  </div>
94
93
 
@@ -56,7 +56,7 @@
56
56
  </tr>
57
57
  <tr class="top-aligned-row">
58
58
  <td><strong>Last Update:</strong></td>
59
- <td>Mon Apr 11 12:27:41 PDT 2005</td>
59
+ <td>Sun Apr 24 02:08:49 PDT 2005</td>
60
60
  </tr>
61
61
  </table>
62
62
  </div>
@@ -87,8 +87,10 @@
87
87
  <h3 class="section-bar">Required files</h3>
88
88
 
89
89
  <div class="name-list">
90
+ rubygems&nbsp;&nbsp;
90
91
  classifier/string_extensions&nbsp;&nbsp;
91
92
  classifier/bayes&nbsp;&nbsp;
93
+ classifier/lsi&nbsp;&nbsp;
92
94
  </div>
93
95
  </div>
94
96
 
@@ -22,8 +22,12 @@
22
22
  <div id="index-entries">
23
23
  <a href="classes/Classifier.html">Classifier</a><br />
24
24
  <a href="classes/Classifier/Bayes.html">Classifier::Bayes</a><br />
25
- <a href="classes/Classifier/Stemmable.html">Classifier::Stemmable</a><br />
26
- <a href="classes/Classifier/WordHash.html">Classifier::WordHash</a><br />
25
+ <a href="classes/Classifier/ContentNode.html">Classifier::ContentNode</a><br />
26
+ <a href="classes/Classifier/LSI.html">Classifier::LSI</a><br />
27
+ <a href="classes/Classifier/WordList.html">Classifier::WordList</a><br />
28
+ <a href="classes/GSL.html">GSL</a><br />
29
+ <a href="classes/GSL/Vector.html">GSL::Vector</a><br />
30
+ <a href="classes/Object.html">Object</a><br />
27
31
  <a href="classes/String.html">String</a><br />
28
32
  </div>
29
33
  </div>
@@ -23,9 +23,12 @@
23
23
  <a href="files/README.html">README</a><br />
24
24
  <a href="files/lib/classifier_rb.html">lib/classifier.rb</a><br />
25
25
  <a href="files/lib/classifier/bayes_rb.html">lib/classifier/bayes.rb</a><br />
26
+ <a href="files/lib/classifier/extensions/vector_serialize_rb.html">lib/classifier/extensions/vector_serialize.rb</a><br />
27
+ <a href="files/lib/classifier/extensions/word_hash_rb.html">lib/classifier/extensions/word_hash.rb</a><br />
28
+ <a href="files/lib/classifier/extensions/word_list_rb.html">lib/classifier/extensions/word_list.rb</a><br />
29
+ <a href="files/lib/classifier/lsi_rb.html">lib/classifier/lsi.rb</a><br />
30
+ <a href="files/lib/classifier/lsi/content_node_rb.html">lib/classifier/lsi/content_node.rb</a><br />
26
31
  <a href="files/lib/classifier/string_extensions_rb.html">lib/classifier/string_extensions.rb</a><br />
27
- <a href="files/lib/classifier/string_extensions/porter_stemmer_rb.html">lib/classifier/string_extensions/porter_stemmer.rb</a><br />
28
- <a href="files/lib/classifier/string_extensions/word_hash_rb.html">lib/classifier/string_extensions/word_hash.rb</a><br />
29
32
  </div>
30
33
  </div>
31
34
  </body>
@@ -20,17 +20,40 @@
20
20
  <div id="index">
21
21
  <h1 class="section-bar">Methods</h1>
22
22
  <div id="index-entries">
23
- <a href="classes/Classifier/Bayes.html#M000010">add_category (Classifier::Bayes)</a><br />
24
- <a href="classes/Classifier/Bayes.html#M000011">append_category (Classifier::Bayes)</a><br />
25
- <a href="classes/Classifier/Bayes.html#M000007">classifications (Classifier::Bayes)</a><br />
26
- <a href="classes/Classifier/Bayes.html#M000008">classify (Classifier::Bayes)</a><br />
27
- <a href="classes/Classifier/Bayes.html#M000009">method_missing (Classifier::Bayes)</a><br />
28
- <a href="classes/Classifier/Bayes.html#M000005">new (Classifier::Bayes)</a><br />
29
- <a href="classes/Classifier/Stemmable.html#M000004">stem (Classifier::Stemmable)</a><br />
30
- <a href="classes/Classifier/Stemmable.html#M000003">stem_porter (Classifier::Stemmable)</a><br />
31
- <a href="classes/Classifier/Bayes.html#M000006">train (Classifier::Bayes)</a><br />
32
- <a href="classes/Classifier/WordHash.html#M000001">without_punctuation (Classifier::WordHash)</a><br />
33
- <a href="classes/Classifier/WordHash.html#M000002">word_hash (Classifier::WordHash)</a><br />
23
+ <a href="classes/Classifier/LSI.html#M000014"><< (Classifier::LSI)</a><br />
24
+ <a href="classes/Classifier/WordList.html#M000009">[] (Classifier::WordList)</a><br />
25
+ <a href="classes/GSL/Vector.html#M000005">_dump (GSL::Vector)</a><br />
26
+ <a href="classes/GSL/Vector.html#M000006">_load (GSL::Vector)</a><br />
27
+ <a href="classes/Classifier/Bayes.html#M000029">add_category (Classifier::Bayes)</a><br />
28
+ <a href="classes/Classifier/LSI.html#M000013">add_item (Classifier::LSI)</a><br />
29
+ <a href="classes/Classifier/WordList.html#M000008">add_word (Classifier::WordList)</a><br />
30
+ <a href="classes/Classifier/Bayes.html#M000030">append_category (Classifier::Bayes)</a><br />
31
+ <a href="classes/Classifier/LSI.html#M000017">build_index (Classifier::LSI)</a><br />
32
+ <a href="classes/Classifier/Bayes.html#M000026">classifications (Classifier::Bayes)</a><br />
33
+ <a href="classes/Classifier/LSI.html#M000022">classify (Classifier::LSI)</a><br />
34
+ <a href="classes/Classifier/Bayes.html#M000027">classify (Classifier::Bayes)</a><br />
35
+ <a href="classes/String.html#M000004">clean_word_hash (String)</a><br />
36
+ <a href="classes/Classifier/LSI.html#M000021">find_related (Classifier::LSI)</a><br />
37
+ <a href="classes/Classifier/LSI.html#M000016">items (Classifier::LSI)</a><br />
38
+ <a href="classes/Classifier/Bayes.html#M000028">method_missing (Classifier::Bayes)</a><br />
39
+ <a href="classes/Classifier/LSI.html#M000012">needs_rebuild? (Classifier::LSI)</a><br />
40
+ <a href="classes/Classifier/Bayes.html#M000023">new (Classifier::Bayes)</a><br />
41
+ <a href="classes/Classifier/LSI.html#M000011">new (Classifier::LSI)</a><br />
42
+ <a href="classes/Classifier/ContentNode.html#M000031">new (Classifier::ContentNode)</a><br />
43
+ <a href="classes/Classifier/WordList.html#M000007">new (Classifier::WordList)</a><br />
44
+ <a href="classes/Object.html#M000001">prepare_category_name (Object)</a><br />
45
+ <a href="classes/Classifier/LSI.html#M000018">proximity_array_for_content (Classifier::LSI)</a><br />
46
+ <a href="classes/Classifier/LSI.html#M000019">proximity_norms_for_content (Classifier::LSI)</a><br />
47
+ <a href="classes/Classifier/ContentNode.html#M000034">raw_vector_with (Classifier::ContentNode)</a><br />
48
+ <a href="classes/Classifier/LSI.html#M000015">remove_item (Classifier::LSI)</a><br />
49
+ <a href="classes/Classifier/LSI.html#M000020">search (Classifier::LSI)</a><br />
50
+ <a href="classes/Classifier/ContentNode.html#M000033">search_norm (Classifier::ContentNode)</a><br />
51
+ <a href="classes/Classifier/ContentNode.html#M000032">search_vector (Classifier::ContentNode)</a><br />
52
+ <a href="classes/Classifier/WordList.html#M000010">size (Classifier::WordList)</a><br />
53
+ <a href="classes/Classifier/Bayes.html#M000024">train (Classifier::Bayes)</a><br />
54
+ <a href="classes/Classifier/Bayes.html#M000025">untrain (Classifier::Bayes)</a><br />
55
+ <a href="classes/String.html#M000002">without_punctuation (String)</a><br />
56
+ <a href="classes/String.html#M000003">word_hash (String)</a><br />
34
57
  </div>
35
58
  </div>
36
59
  </body>
data/lib/classifier.rb CHANGED
@@ -24,5 +24,7 @@
24
24
  # Copyright:: Copyright (c) 2005 Lucas Carlson
25
25
  # License:: LGPL
26
26
 
27
+ require 'rubygems'
27
28
  require 'classifier/string_extensions'
28
- require 'classifier/bayes'
29
+ require 'classifier/bayes'
30
+ require 'classifier/lsi'
@@ -10,7 +10,7 @@ class Bayes
10
10
  # b = Classifier::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
11
11
  def initialize(*categories)
12
12
  @categories = Hash.new
13
- categories.each { |category| @categories[category.to_s.gsub("_"," ").capitalize.intern] = Hash.new }
13
+ categories.each { |category| @categories[category.prepare_category_name] = Hash.new }
14
14
  @total_words = 0
15
15
  end
16
16
 
@@ -22,14 +22,38 @@ class Bayes
22
22
  # b.train "that", "That text"
23
23
  # b.train "The other", "The other text"
24
24
  def train(category, text)
25
- category = category.to_s.gsub("_"," ").capitalize.intern
25
+ category = category.prepare_category_name
26
26
  text.word_hash.each do |word, count|
27
27
  @categories[category][word] ||= 0
28
28
  @categories[category][word] += count
29
29
  @total_words += count
30
30
  end
31
31
  end
32
-
32
+
33
+ #
34
+ # Provides a untraining method for all categories specified in Bayes#new
35
+ # Be very careful with this method.
36
+ #
37
+ # For example:
38
+ # b = Classifier::Bayes.new 'This', 'That', 'the_other'
39
+ # b.train :this, "This text"
40
+ # b.untrain :this, "This text"
41
+ def untrain(category, text)
42
+ category = category.prepare_category_name
43
+ text.word_hash.each do |word, count|
44
+ if @total_words >= 0
45
+ orig = @categories[category][word]
46
+ @categories[category][word] ||= 0
47
+ @categories[category][word] -= count
48
+ if @categories[category][word] <= 0
49
+ @categories[category].delete(word)
50
+ count = orig
51
+ end
52
+ @total_words -= count
53
+ end
54
+ end
55
+ end
56
+
33
57
  #
34
58
  # Returns the scores in each category the provided +text+. E.g.,
35
59
  # b.classifications "I hate bad words and you"
@@ -58,17 +82,18 @@ class Bayes
58
82
  end
59
83
 
60
84
  #
61
- # Provides training methods for the categories specified in Bayes#new
85
+ # Provides training and untraining methods for the categories specified in Bayes#new
62
86
  # For example:
63
87
  # b = Classifier::Bayes.new 'This', 'That', 'the_other'
64
88
  # b.train_this "This text"
65
89
  # b.train_that "That text"
90
+ # b.untrain_that "That text"
66
91
  # b.train_the_other "The other text"
67
92
  def method_missing(name, *args)
68
- category = name.to_s.gsub(/train_([\w]+)/, '\1').gsub("_"," ").capitalize.intern
93
+ category = name.to_s.gsub(/(un)?train_([\w]+)/, '\2').prepare_category_name
69
94
  if @categories.has_key? category
70
- args.each {|text| train category, text}
71
- elsif name.to_s =~ /train_([\w]+)/
95
+ args.each { |text| eval("#{$1}train(category, text)") }
96
+ elsif name.to_s =~ /(un)?train_([\w]+)/
72
97
  raise StandardError, "No such category: #{category}"
73
98
  else
74
99
  super #raise StandardError, "No such method: #{name}"
@@ -94,10 +119,10 @@ class Bayes
94
119
  # more criteria than the trained selective categories. In short,
95
120
  # try to initialize your categories at initialization.
96
121
  def add_category(category)
97
- @categories[category.to_s.gsub("_"," ").capitalize.intern] = Hash.new
122
+ @categories[category.prepare_category_name] = Hash.new
98
123
  end
99
124
 
100
- alias append_category add_category # :nodoc:
125
+ alias append_category add_category
101
126
  end
102
127
 
103
128
  end