omnicat-bayes 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +20 -0
- data/Rakefile +1 -1
- data/lib/omnicat/bayes/version.rb +1 -1
- data/lib/omnicat/classifiers/bayes.rb +65 -3
- data/omnicat-bayes.gemspec +1 -1
- data/{lib/test → test}/test_helper.rb +1 -1
- data/{lib/test → test}/unit/bayes_test.rb +0 -0
- data/{lib/test → test}/unit/classifiers/bayes_test.rb +50 -0
- metadata +11 -8
data/README.md
CHANGED
@@ -57,6 +57,12 @@ Train category with a document.
|
|
57
57
|
|
58
58
|
bayes.train('positive', 'great if you are in a slap happy mood .')
|
59
59
|
bayes.train('negative', 'bad tracking issue')
|
60
|
+
|
61
|
+
### Untrain
|
62
|
+
Untrain category with a document.
|
63
|
+
|
64
|
+
bayes.untrain('positive', 'great if you are in a slap happy mood .')
|
65
|
+
bayes.untrain('negative', 'bad tracking issue')
|
60
66
|
|
61
67
|
### Train batch
|
62
68
|
Train category with multiple documents.
|
@@ -71,6 +77,20 @@ Train category with multiple documents.
|
|
71
77
|
'interesting , but not compelling . ',
|
72
78
|
'seems clever but not especially compelling'
|
73
79
|
])
|
80
|
+
|
81
|
+
### Untrain batch
|
82
|
+
Untrain category with multiple documents.
|
83
|
+
|
84
|
+
bayes.untrain_batch('positive', [
|
85
|
+
'a feel-good picture in the best sense of the term...',
|
86
|
+
'it is a feel-good movie about which you can actually feel good.',
|
87
|
+
'love and money both of them are good choises'
|
88
|
+
])
|
89
|
+
bayes.untrain_batch('negative', [
|
90
|
+
'simplistic , silly and tedious .',
|
91
|
+
'interesting , but not compelling . ',
|
92
|
+
'seems clever but not especially compelling'
|
93
|
+
])
|
74
94
|
|
75
95
|
### Classify
|
76
96
|
Classify a document.
|
data/Rakefile
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'digest'
|
1
2
|
require 'omnicat/classifiers/strategy'
|
2
3
|
|
3
4
|
module OmniCat
|
@@ -54,7 +55,13 @@ module OmniCat
|
|
54
55
|
if category_exists?(category_name)
|
55
56
|
increment_doc_counts(category_name)
|
56
57
|
update_priors
|
57
|
-
|
58
|
+
doc_key = Digest::MD5.hexdigest(doc_content)
|
59
|
+
if doc = @categories[category_name].docs[doc_key]
|
60
|
+
doc.increment_count
|
61
|
+
else
|
62
|
+
doc = OmniCat::Doc.new(content: doc_content)
|
63
|
+
end
|
64
|
+
@categories[category_name].docs[doc_key] = doc
|
58
65
|
doc.tokens.each do |token, count|
|
59
66
|
increment_token_counts(category_name, token, count)
|
60
67
|
@categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
|
@@ -65,6 +72,42 @@ module OmniCat
|
|
65
72
|
end
|
66
73
|
end
|
67
74
|
|
75
|
+
# Untrain the desired category with a document
|
76
|
+
#
|
77
|
+
# ==== Parameters
|
78
|
+
#
|
79
|
+
# * +category_name+ - Name of the category from added categories list
|
80
|
+
# * +doc_content+ - Document text
|
81
|
+
#
|
82
|
+
# ==== Examples
|
83
|
+
#
|
84
|
+
# # Untrain the desired category
|
85
|
+
# bayes.untrain("positive", "clear documentation")
|
86
|
+
# bayes.untrain("positive", "good, very well")
|
87
|
+
# bayes.untrain("negative", "bad dog")
|
88
|
+
# bayes.untrain("neutral", "how is the management gui")
|
89
|
+
def untrain(category_name, doc_content)
|
90
|
+
if category_exists?(category_name)
|
91
|
+
doc_key = Digest::MD5.hexdigest(doc_content)
|
92
|
+
if doc = @categories[category_name].docs[doc_key]
|
93
|
+
@categories[category_name].docs[doc_key].decrement_count
|
94
|
+
else
|
95
|
+
raise StandardError,
|
96
|
+
"Document is not found in #{category_name} documents!"
|
97
|
+
end
|
98
|
+
doc.tokens.each do |token, count|
|
99
|
+
decrement_token_counts(category_name, token, count)
|
100
|
+
@categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
|
101
|
+
end
|
102
|
+
@categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0
|
103
|
+
decrement_doc_counts(category_name)
|
104
|
+
update_priors
|
105
|
+
else
|
106
|
+
raise StandardError,
|
107
|
+
"Category with name '#{category_name}' does not exist!"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
68
111
|
# Classify the given document
|
69
112
|
#
|
70
113
|
# ==== Parameters
|
@@ -110,14 +153,33 @@ module OmniCat
|
|
110
153
|
|
111
154
|
# nodoc
|
112
155
|
def increment_token_counts(category_name, token, count)
|
113
|
-
|
156
|
+
modify_token_counts(category_name, token, count)
|
157
|
+
end
|
158
|
+
|
159
|
+
# nodoc
|
160
|
+
def decrement_token_counts(category_name, token, count)
|
161
|
+
modify_token_counts(category_name, token, -1 * count)
|
162
|
+
end
|
163
|
+
|
164
|
+
# nodoc
|
165
|
+
def modify_token_counts(category_name, token, count)
|
166
|
+
modify_uniq_token_count(token, count < 0 ? -1 : 1)
|
114
167
|
@token_count += count
|
115
168
|
@categories[category_name].token_count += count
|
116
169
|
end
|
117
170
|
|
118
171
|
# nodoc
|
119
172
|
def increment_uniq_token_count(token)
|
120
|
-
|
173
|
+
modify_uniq_token_count(token, 1)
|
174
|
+
end
|
175
|
+
|
176
|
+
# nodoc
|
177
|
+
def decrement_uniq_token_count(token)
|
178
|
+
modify_uniq_token_count(token, -1)
|
179
|
+
end
|
180
|
+
|
181
|
+
# nodoc
|
182
|
+
def modify_uniq_token_count(token, uniq_token_addition)
|
121
183
|
categories.each do |_, category|
|
122
184
|
if category.tokens.has_key?(token)
|
123
185
|
uniq_token_addition = 0
|
data/omnicat-bayes.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
|
-
spec.add_dependency 'omnicat', '~> 0.2.
|
21
|
+
spec.add_dependency 'omnicat', '~> 0.2.1'
|
22
22
|
spec.add_development_dependency 'bundler', '~> 1.3'
|
23
23
|
spec.add_development_dependency 'rake'
|
24
24
|
end
|
@@ -1,2 +1,2 @@
|
|
1
1
|
require 'test/unit'
|
2
|
-
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'omnicat', 'bayes'))
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'omnicat', 'bayes'))
|
File without changes
|
@@ -65,6 +65,56 @@ class TestBayes < Test::Unit::TestCase
|
|
65
65
|
)
|
66
66
|
end
|
67
67
|
|
68
|
+
def test_untrain_valid_category
|
69
|
+
@bayes.add_category 'neutral'
|
70
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
71
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
|
72
|
+
assert_equal(
|
73
|
+
0,
|
74
|
+
@bayes.categories['neutral'].doc_count
|
75
|
+
)
|
76
|
+
assert_equal(
|
77
|
+
0,
|
78
|
+
@bayes.categories['neutral'].docs.count
|
79
|
+
)
|
80
|
+
assert_equal(
|
81
|
+
0,
|
82
|
+
@bayes.categories['neutral'].token_count
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_untrain_with_doc_count_2
|
87
|
+
@bayes.add_category 'neutral'
|
88
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
89
|
+
@bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
|
90
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
|
91
|
+
assert_equal(
|
92
|
+
1,
|
93
|
+
@bayes.categories['neutral'].doc_count
|
94
|
+
)
|
95
|
+
assert_equal(
|
96
|
+
{'how' => 1, 'you' => 1, '?' => 2, ':|' => 1, ':)' => 1, ';-)' => 1, ':(' => 1},
|
97
|
+
@bayes.categories['neutral'].tokens
|
98
|
+
)
|
99
|
+
assert_equal(
|
100
|
+
8,
|
101
|
+
@bayes.categories['neutral'].token_count
|
102
|
+
)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_untrain_invalid_category
|
106
|
+
assert_equal(false, @bayes.categories.has_key?('neutral'))
|
107
|
+
assert_raise(StandardError) {
|
108
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_untrain_with_missing_doc
|
113
|
+
@bayes.add_category 'neutral'
|
114
|
+
assert_raise(StandardError) {
|
115
|
+
@bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :(' }
|
116
|
+
end
|
117
|
+
|
68
118
|
def test_train_batch
|
69
119
|
@bayes.add_category 'positive'
|
70
120
|
@bayes.train_batch 'positive', ['good job ever', 'valid syntax',
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: omnicat-bayes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: omnicat
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 0.2.
|
21
|
+
version: 0.2.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.2.
|
29
|
+
version: 0.2.1
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: bundler
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,10 +75,10 @@ files:
|
|
75
75
|
- lib/omnicat/bayes/version.rb
|
76
76
|
- lib/omnicat/classifiers/bayes.rb
|
77
77
|
- lib/omnicat/classifiers/bayes_internals/category.rb
|
78
|
-
- lib/test/test_helper.rb
|
79
|
-
- lib/test/unit/bayes_test.rb
|
80
|
-
- lib/test/unit/classifiers/bayes_test.rb
|
81
78
|
- omnicat-bayes.gemspec
|
79
|
+
- test/test_helper.rb
|
80
|
+
- test/unit/bayes_test.rb
|
81
|
+
- test/unit/classifiers/bayes_test.rb
|
82
82
|
homepage: https://github.com/mustafaturan/omnicat-bayes
|
83
83
|
licenses:
|
84
84
|
- MIT
|
@@ -104,4 +104,7 @@ rubygems_version: 1.8.23
|
|
104
104
|
signing_key:
|
105
105
|
specification_version: 3
|
106
106
|
summary: Naive Bayes text classification implementation as an OmniCat classifier strategy.
|
107
|
-
test_files:
|
107
|
+
test_files:
|
108
|
+
- test/test_helper.rb
|
109
|
+
- test/unit/bayes_test.rb
|
110
|
+
- test/unit/classifiers/bayes_test.rb
|