reclassifier 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/reclassifier/bayes.rb +36 -32
- data/lib/reclassifier/version.rb +1 -1
- metadata +1 -1
data/lib/reclassifier/bayes.rb
CHANGED
@@ -18,7 +18,8 @@ class Reclassifier::Bayes
|
|
18
18
|
# Options:
|
19
19
|
# * :clean - If false, punctuation will be included in the classifier. Otherwise, punctuation will be omitted. Default is true.
|
20
20
|
#
|
21
|
-
#
|
21
|
+
#
|
22
|
+
# b = Reclassifier::Bayes.new([:interesting, :uninteresting, :spam], :clean => true)
|
22
23
|
#
|
23
24
|
def initialize(classifications = [], options = {})
|
24
25
|
@classifications = {}
|
@@ -31,9 +32,9 @@ class Reclassifier::Bayes
|
|
31
32
|
#
|
32
33
|
# Provides a general training method for all classifications specified in Bayes#new
|
33
34
|
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
35
|
+
# b = Reclassifier::Bayes.new([:this, :that])
|
36
|
+
# b.train(:this, "This text")
|
37
|
+
# b.train(:that, "That text")
|
37
38
|
#
|
38
39
|
def train(classification, text)
|
39
40
|
ensure_classification_exists(classification)
|
@@ -51,9 +52,9 @@ class Reclassifier::Bayes
|
|
51
52
|
# Untrain a (classification, text) pair.
|
52
53
|
# Be very careful with this method.
|
53
54
|
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
55
|
+
# b = Reclassifier::Bayes.new([:this, :that])
|
56
|
+
# b.train(:this, "This text")
|
57
|
+
# b.untrain(:this, "This text")
|
57
58
|
#
|
58
59
|
def untrain(classification, text)
|
59
60
|
ensure_classification_exists(classification)
|
@@ -67,8 +68,8 @@ class Reclassifier::Bayes
|
|
67
68
|
|
68
69
|
# Returns the scores of the specified text for each classification.
|
69
70
|
#
|
70
|
-
#
|
71
|
-
#
|
71
|
+
# b.calculate_scores("I hate bad words and you")
|
72
|
+
# => {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
|
72
73
|
#
|
73
74
|
# The largest of these scores (the one closest to 0) is the one picked out by #classify
|
74
75
|
#
|
@@ -100,8 +101,8 @@ class Reclassifier::Bayes
|
|
100
101
|
# Returns the classification of the specified text, which is one of the
|
101
102
|
# classifications given in the initializer.
|
102
103
|
#
|
103
|
-
#
|
104
|
-
#
|
104
|
+
# b.classify("I hate bad words and you")
|
105
|
+
# => :uninteresting
|
105
106
|
#
|
106
107
|
def classify(text)
|
107
108
|
calculate_scores(text).max_by {|classification| classification[1]}[0]
|
@@ -109,8 +110,8 @@ class Reclassifier::Bayes
|
|
109
110
|
|
110
111
|
# Provides a list of classification names
|
111
112
|
#
|
112
|
-
#
|
113
|
-
#
|
113
|
+
# b.classifications
|
114
|
+
# => [:this, :that, :the_other]
|
114
115
|
#
|
115
116
|
def classifications
|
116
117
|
@classifications.keys
|
@@ -120,7 +121,8 @@ class Reclassifier::Bayes
|
|
120
121
|
# Has no effect if the classification already existed.
|
121
122
|
# Returns the classification.
|
122
123
|
#
|
123
|
-
#
|
124
|
+
# b.add_classification(:not_spam)
|
125
|
+
# => :not_spam
|
124
126
|
#
|
125
127
|
def add_classification(classification)
|
126
128
|
@classifications[classification] ||= {}
|
@@ -134,7 +136,8 @@ class Reclassifier::Bayes
|
|
134
136
|
# Removes the classification from the classifier.
|
135
137
|
# Returns the classifier if the classification existed, else nil.
|
136
138
|
#
|
137
|
-
#
|
139
|
+
# b.remove_classification(:not_spam)
|
140
|
+
# => :not_spam
|
138
141
|
#
|
139
142
|
def remove_classification(classification)
|
140
143
|
return_value = if @classifications.include?(classification)
|
@@ -150,20 +153,20 @@ class Reclassifier::Bayes
|
|
150
153
|
|
151
154
|
# Invalidates the cache.
|
152
155
|
#
|
153
|
-
#
|
156
|
+
# classifier = Reclassifier::Bayes.new([:one, :other])
|
154
157
|
#
|
155
|
-
#
|
156
|
-
#
|
158
|
+
# classifier.train(:one, 'bbb')
|
159
|
+
# classifier.train(:other, 'aaa')
|
157
160
|
#
|
158
|
-
#
|
161
|
+
# classifier.classify('aaa')
|
159
162
|
#
|
160
|
-
#
|
161
|
-
#
|
163
|
+
# classifier.cache_set?
|
164
|
+
# => true
|
162
165
|
#
|
163
|
-
#
|
166
|
+
# classifier.invalidate_cache
|
164
167
|
#
|
165
|
-
#
|
166
|
-
#
|
168
|
+
# classifier.cache_set?
|
169
|
+
# => false
|
167
170
|
#
|
168
171
|
def invalidate_cache
|
169
172
|
@cache = {}
|
@@ -171,18 +174,19 @@ class Reclassifier::Bayes
|
|
171
174
|
|
172
175
|
# Returns true if the cache has been set (i.e. #classify has been run).
|
173
176
|
# Returns false otherwise.
|
174
|
-
# classifier = Reclassifier::Bayes.new([:one, :other])
|
175
177
|
#
|
176
|
-
#
|
177
|
-
#
|
178
|
+
# classifier = Reclassifier::Bayes.new([:one, :other])
|
179
|
+
#
|
180
|
+
# classifier.cache_set?
|
181
|
+
# => false
|
178
182
|
#
|
179
|
-
#
|
180
|
-
#
|
183
|
+
# classifier.train(:one, 'bbb')
|
184
|
+
# classifier.train(:other, 'aaa')
|
181
185
|
#
|
182
|
-
#
|
186
|
+
# classifier.classify('aaa')
|
183
187
|
#
|
184
|
-
#
|
185
|
-
#
|
188
|
+
# classifier.cache_set?
|
189
|
+
# => true
|
186
190
|
#
|
187
191
|
def cache_set?
|
188
192
|
@cache.present?
|
data/lib/reclassifier/version.rb
CHANGED