reclassifier 0.4.7 → 0.4.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/reclassifier/bayes.rb +36 -32
- data/lib/reclassifier/version.rb +1 -1
- metadata +1 -1
data/lib/reclassifier/bayes.rb
CHANGED
@@ -18,7 +18,8 @@ class Reclassifier::Bayes
|
|
18
18
|
# Options:
|
19
19
|
# * :clean - If false, punctuation will be included in the classifier. Otherwise, punctuation will be omitted. Default is true.
|
20
20
|
#
|
21
|
-
#
|
21
|
+
#
|
22
|
+
# b = Reclassifier::Bayes.new([:interesting, :uninteresting, :spam], :clean => true)
|
22
23
|
#
|
23
24
|
def initialize(classifications = [], options = {})
|
24
25
|
@classifications = {}
|
@@ -31,9 +32,9 @@ class Reclassifier::Bayes
|
|
31
32
|
#
|
32
33
|
# Provides a general training method for all classifications specified in Bayes#new
|
33
34
|
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
#
|
35
|
+
# b = Reclassifier::Bayes.new([:this, :that])
|
36
|
+
# b.train(:this, "This text")
|
37
|
+
# b.train(:that, "That text")
|
37
38
|
#
|
38
39
|
def train(classification, text)
|
39
40
|
ensure_classification_exists(classification)
|
@@ -51,9 +52,9 @@ class Reclassifier::Bayes
|
|
51
52
|
# Untrain a (classification, text) pair.
|
52
53
|
# Be very careful with this method.
|
53
54
|
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
55
|
+
# b = Reclassifier::Bayes.new([:this, :that])
|
56
|
+
# b.train(:this, "This text")
|
57
|
+
# b.untrain(:this, "This text")
|
57
58
|
#
|
58
59
|
def untrain(classification, text)
|
59
60
|
ensure_classification_exists(classification)
|
@@ -67,8 +68,8 @@ class Reclassifier::Bayes
|
|
67
68
|
|
68
69
|
# Returns the scores of the specified text for each classification.
|
69
70
|
#
|
70
|
-
#
|
71
|
-
#
|
71
|
+
# b.calculate_scores("I hate bad words and you")
|
72
|
+
# => {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
|
72
73
|
#
|
73
74
|
# The largest of these scores (the one closest to 0) is the one picked out by #classify
|
74
75
|
#
|
@@ -100,8 +101,8 @@ class Reclassifier::Bayes
|
|
100
101
|
# Returns the classification of the specified text, which is one of the
|
101
102
|
# classifications given in the initializer.
|
102
103
|
#
|
103
|
-
#
|
104
|
-
#
|
104
|
+
# b.classify("I hate bad words and you")
|
105
|
+
# => :uninteresting
|
105
106
|
#
|
106
107
|
def classify(text)
|
107
108
|
calculate_scores(text).max_by {|classification| classification[1]}[0]
|
@@ -109,8 +110,8 @@ class Reclassifier::Bayes
|
|
109
110
|
|
110
111
|
# Provides a list of classification names
|
111
112
|
#
|
112
|
-
#
|
113
|
-
#
|
113
|
+
# b.classifications
|
114
|
+
# => [:this, :that, :the_other]
|
114
115
|
#
|
115
116
|
def classifications
|
116
117
|
@classifications.keys
|
@@ -120,7 +121,8 @@ class Reclassifier::Bayes
|
|
120
121
|
# Has no effect if the classification already existed.
|
121
122
|
# Returns the classification.
|
122
123
|
#
|
123
|
-
#
|
124
|
+
# b.add_classification(:not_spam)
|
125
|
+
# => :not_spam
|
124
126
|
#
|
125
127
|
def add_classification(classification)
|
126
128
|
@classifications[classification] ||= {}
|
@@ -134,7 +136,8 @@ class Reclassifier::Bayes
|
|
134
136
|
# Removes the classification from the classifier.
|
135
137
|
# Returns the classifier if the classification existed, else nil.
|
136
138
|
#
|
137
|
-
#
|
139
|
+
# b.remove_classification(:not_spam)
|
140
|
+
# => :not_spam
|
138
141
|
#
|
139
142
|
def remove_classification(classification)
|
140
143
|
return_value = if @classifications.include?(classification)
|
@@ -150,20 +153,20 @@ class Reclassifier::Bayes
|
|
150
153
|
|
151
154
|
# Invalidates the cache.
|
152
155
|
#
|
153
|
-
#
|
156
|
+
# classifier = Reclassifier::Bayes.new([:one, :other])
|
154
157
|
#
|
155
|
-
#
|
156
|
-
#
|
158
|
+
# classifier.train(:one, 'bbb')
|
159
|
+
# classifier.train(:other, 'aaa')
|
157
160
|
#
|
158
|
-
#
|
161
|
+
# classifier.classify('aaa')
|
159
162
|
#
|
160
|
-
#
|
161
|
-
#
|
163
|
+
# classifier.cache_set?
|
164
|
+
# => true
|
162
165
|
#
|
163
|
-
#
|
166
|
+
# classifier.invalidate_cache
|
164
167
|
#
|
165
|
-
#
|
166
|
-
#
|
168
|
+
# classifier.cache_set?
|
169
|
+
# => false
|
167
170
|
#
|
168
171
|
def invalidate_cache
|
169
172
|
@cache = {}
|
@@ -171,18 +174,19 @@ class Reclassifier::Bayes
|
|
171
174
|
|
172
175
|
# Returns true if the cache has been set (i.e. #classify has been run).
|
173
176
|
# Returns false otherwise.
|
174
|
-
# classifier = Reclassifier::Bayes.new([:one, :other])
|
175
177
|
#
|
176
|
-
#
|
177
|
-
#
|
178
|
+
# classifier = Reclassifier::Bayes.new([:one, :other])
|
179
|
+
#
|
180
|
+
# classifier.cache_set?
|
181
|
+
# => false
|
178
182
|
#
|
179
|
-
#
|
180
|
-
#
|
183
|
+
# classifier.train(:one, 'bbb')
|
184
|
+
# classifier.train(:other, 'aaa')
|
181
185
|
#
|
182
|
-
#
|
186
|
+
# classifier.classify('aaa')
|
183
187
|
#
|
184
|
-
#
|
185
|
-
#
|
188
|
+
# classifier.cache_set?
|
189
|
+
# => true
|
186
190
|
#
|
187
191
|
def cache_set?
|
188
192
|
@cache.present?
|
data/lib/reclassifier/version.rb
CHANGED