omnicat-bayes 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +20 -0
- data/Rakefile +1 -1
- data/lib/omnicat/bayes/version.rb +1 -1
- data/lib/omnicat/classifiers/bayes.rb +65 -3
- data/omnicat-bayes.gemspec +1 -1
- data/{lib/test → test}/test_helper.rb +1 -1
- data/{lib/test → test}/unit/bayes_test.rb +0 -0
- data/{lib/test → test}/unit/classifiers/bayes_test.rb +50 -0
- metadata +11 -8
data/README.md
CHANGED
@@ -57,6 +57,12 @@ Train category with a document.
|
|
57
57
|
|
58
58
|
bayes.train('positive', 'great if you are in a slap happy mood .')
|
59
59
|
bayes.train('negative', 'bad tracking issue')
|
60
|
+
|
61
|
+
### Untrain
|
62
|
+
Untrain category with a document.
|
63
|
+
|
64
|
+
bayes.untrain('positive', 'great if you are in a slap happy mood .')
|
65
|
+
bayes.untrain('negative', 'bad tracking issue')
|
60
66
|
|
61
67
|
### Train batch
|
62
68
|
Train category with multiple documents.
|
@@ -71,6 +77,20 @@ Train category with multiple documents.
|
|
71
77
|
'interesting , but not compelling . ',
|
72
78
|
'seems clever but not especially compelling'
|
73
79
|
])
|
80
|
+
|
81
|
+
### Untrain batch
|
82
|
+
Untrain category with multiple documents.
|
83
|
+
|
84
|
+
bayes.untrain_batch('positive', [
|
85
|
+
'a feel-good picture in the best sense of the term...',
|
86
|
+
'it is a feel-good movie about which you can actually feel good.',
|
87
|
+
'love and money both of them are good choises'
|
88
|
+
])
|
89
|
+
bayes.untrain_batch('negative', [
|
90
|
+
'simplistic , silly and tedious .',
|
91
|
+
'interesting , but not compelling . ',
|
92
|
+
'seems clever but not especially compelling'
|
93
|
+
])
|
74
94
|
|
75
95
|
### Classify
|
76
96
|
Classify a document.
|
data/Rakefile
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'digest'
|
1
2
|
require 'omnicat/classifiers/strategy'
|
2
3
|
|
3
4
|
module OmniCat
|
@@ -54,7 +55,13 @@ module OmniCat
|
|
54
55
|
if category_exists?(category_name)
|
55
56
|
increment_doc_counts(category_name)
|
56
57
|
update_priors
|
57
|
-
|
58
|
+
doc_key = Digest::MD5.hexdigest(doc_content)
|
59
|
+
if doc = @categories[category_name].docs[doc_key]
|
60
|
+
doc.increment_count
|
61
|
+
else
|
62
|
+
doc = OmniCat::Doc.new(content: doc_content)
|
63
|
+
end
|
64
|
+
@categories[category_name].docs[doc_key] = doc
|
58
65
|
doc.tokens.each do |token, count|
|
59
66
|
increment_token_counts(category_name, token, count)
|
60
67
|
@categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
|
@@ -65,6 +72,42 @@ module OmniCat
|
|
65
72
|
end
|
66
73
|
end
|
67
74
|
|
75
|
+
# Untrain the desired category with a document
|
76
|
+
#
|
77
|
+
# ==== Parameters
|
78
|
+
#
|
79
|
+
# * +category_name+ - Name of the category from added categories list
|
80
|
+
# * +doc_content+ - Document text
|
81
|
+
#
|
82
|
+
# ==== Examples
|
83
|
+
#
|
84
|
+
# # Untrain the desired category
|
85
|
+
# bayes.untrain("positive", "clear documentation")
|
86
|
+
# bayes.untrain("positive", "good, very well")
|
87
|
+
# bayes.untrain("negative", "bad dog")
|
88
|
+
# bayes.untrain("neutral", "how is the management gui")
|
89
|
+
def untrain(category_name, doc_content)
|
90
|
+
if category_exists?(category_name)
|
91
|
+
doc_key = Digest::MD5.hexdigest(doc_content)
|
92
|
+
if doc = @categories[category_name].docs[doc_key]
|
93
|
+
@categories[category_name].docs[doc_key].decrement_count
|
94
|
+
else
|
95
|
+
raise StandardError,
|
96
|
+
"Document is not found in #{category_name} documents!"
|
97
|
+
end
|
98
|
+
doc.tokens.each do |token, count|
|
99
|
+
decrement_token_counts(category_name, token, count)
|
100
|
+
@categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
|
101
|
+
end
|
102
|
+
@categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0
|
103
|
+
decrement_doc_counts(category_name)
|
104
|
+
update_priors
|
105
|
+
else
|
106
|
+
raise StandardError,
|
107
|
+
"Category with name '#{category_name}' does not exist!"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
68
111
|
# Classify the given document
|
69
112
|
#
|
70
113
|
# ==== Parameters
|
@@ -110,14 +153,33 @@ module OmniCat
|
|
110
153
|
|
111
154
|
# nodoc
|
112
155
|
def increment_token_counts(category_name, token, count)
|
113
|
-
|
156
|
+
modify_token_counts(category_name, token, count)
|
157
|
+
end
|
158
|
+
|
159
|
+
# nodoc
|
160
|
+
def decrement_token_counts(category_name, token, count)
|
161
|
+
modify_token_counts(category_name, token, -1 * count)
|
162
|
+
end
|
163
|
+
|
164
|
+
# nodoc
|
165
|
+
def modify_token_counts(category_name, token, count)
|
166
|
+
modify_uniq_token_count(token, count < 0 ? -1 : 1)
|
114
167
|
@token_count += count
|
115
168
|
@categories[category_name].token_count += count
|
116
169
|
end
|
117
170
|
|
118
171
|
# nodoc
|
119
172
|
def increment_uniq_token_count(token)
|
120
|
-
|
173
|
+
modify_uniq_token_count(token, 1)
|
174
|
+
end
|
175
|
+
|
176
|
+
# nodoc
|
177
|
+
def decrement_uniq_token_count(token)
|
178
|
+
modify_uniq_token_count(token, -1)
|
179
|
+
end
|
180
|
+
|
181
|
+
# nodoc
|
182
|
+
def modify_uniq_token_count(token, uniq_token_addition)
|
121
183
|
categories.each do |_, category|
|
122
184
|
if category.tokens.has_key?(token)
|
123
185
|
uniq_token_addition = 0
|
data/omnicat-bayes.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
|
-
spec.add_dependency 'omnicat', '~> 0.2.
|
21
|
+
spec.add_dependency 'omnicat', '~> 0.2.1'
|
22
22
|
spec.add_development_dependency 'bundler', '~> 1.3'
|
23
23
|
spec.add_development_dependency 'rake'
|
24
24
|
end
|
@@ -1,2 +1,2 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'omnicat', 'bayes'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'omnicat', 'bayes'))
|
File without changes
|
@@ -65,6 +65,56 @@ class TestBayes < Test::Unit::TestCase
|
|
65
65
|
)
|
66
66
|
end
|
67
67
|
|
68
|
+
def test_untrain_valid_category
|
69
|
+
@bayes.add_category 'neutral'
|
70
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
71
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
|
72
|
+
assert_equal(
|
73
|
+
0,
|
74
|
+
@bayes.categories['neutral'].doc_count
|
75
|
+
)
|
76
|
+
assert_equal(
|
77
|
+
0,
|
78
|
+
@bayes.categories['neutral'].docs.count
|
79
|
+
)
|
80
|
+
assert_equal(
|
81
|
+
0,
|
82
|
+
@bayes.categories['neutral'].token_count
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_untrain_with_doc_count_2
|
87
|
+
@bayes.add_category 'neutral'
|
88
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
89
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
90
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
|
91
|
+
assert_equal(
|
92
|
+
1,
|
93
|
+
@bayes.categories['neutral'].doc_count
|
94
|
+
)
|
95
|
+
assert_equal(
|
96
|
+
{'how' => 1, 'you' => 1, '?' => 2, ':|' => 1, ':)' => 1, ';-)' => 1, ':(' => 1},
|
97
|
+
@bayes.categories['neutral'].tokens
|
98
|
+
)
|
99
|
+
assert_equal(
|
100
|
+
8,
|
101
|
+
@bayes.categories['neutral'].token_count
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_untrain_invalid_category
|
106
|
+
assert_equal(false, @bayes.categories.has_key?('neutral'))
|
107
|
+
assert_raise(StandardError) {
|
108
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_untrain_with_missing_doc
|
113
|
+
@bayes.add_category 'neutral'
|
114
|
+
assert_raise(StandardError) {
|
115
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :(' }
|
116
|
+
end
|
117
|
+
|
68
118
|
def test_train_batch
|
69
119
|
@bayes.add_category 'positive'
|
70
120
|
@bayes.train_batch 'positive', ['good job ever', 'valid syntax',
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: omnicat-bayes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: omnicat
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0.2.
|
21
|
+
version: 0.2.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.2.
|
29
|
+
version: 0.2.1
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: bundler
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,10 +75,10 @@ files:
|
|
75
75
|
- lib/omnicat/bayes/version.rb
|
76
76
|
- lib/omnicat/classifiers/bayes.rb
|
77
77
|
- lib/omnicat/classifiers/bayes_internals/category.rb
|
78
|
-
- lib/test/test_helper.rb
|
79
|
-
- lib/test/unit/bayes_test.rb
|
80
|
-
- lib/test/unit/classifiers/bayes_test.rb
|
81
78
|
- omnicat-bayes.gemspec
|
79
|
+
- test/test_helper.rb
|
80
|
+
- test/unit/bayes_test.rb
|
81
|
+
- test/unit/classifiers/bayes_test.rb
|
82
82
|
homepage: https://github.com/mustafaturan/omnicat-bayes
|
83
83
|
licenses:
|
84
84
|
- MIT
|
@@ -104,4 +104,7 @@ rubygems_version: 1.8.23
|
|
104
104
|
signing_key:
|
105
105
|
specification_version: 3
|
106
106
|
summary: Naive Bayes text classification implementation as an OmniCat classifier strategy.
|
107
|
-
test_files:
|
107
|
+
test_files:
|
108
|
+
- test/test_helper.rb
|
109
|
+
- test/unit/bayes_test.rb
|
110
|
+
- test/unit/classifiers/bayes_test.rb
|