omnicat-bayes 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -57,6 +57,12 @@ Train category with a document.
57
57
 
58
58
  bayes.train('positive', 'great if you are in a slap happy mood .')
59
59
  bayes.train('negative', 'bad tracking issue')
60
+
61
+ ### Untrain
62
+ Untrain category with a document.
63
+
64
+ bayes.untrain('positive', 'great if you are in a slap happy mood .')
65
+ bayes.untrain('negative', 'bad tracking issue')
60
66
 
61
67
  ### Train batch
62
68
  Train category with multiple documents.
@@ -71,6 +77,20 @@ Train category with multiple documents.
71
77
  'interesting , but not compelling . ',
72
78
  'seems clever but not especially compelling'
73
79
  ])
80
+
81
+ ### Untrain batch
82
+ Untrain category with multiple documents.
83
+
84
+ bayes.untrain_batch('positive', [
85
+ 'a feel-good picture in the best sense of the term...',
86
+ 'it is a feel-good movie about which you can actually feel good.',
87
+ 'love and money both of them are good choises'
88
+ ])
89
+ bayes.untrain_batch('negative', [
90
+ 'simplistic , silly and tedious .',
91
+ 'interesting , but not compelling . ',
92
+ 'seems clever but not especially compelling'
93
+ ])
74
94
 
75
95
  ### Classify
76
96
  Classify a document.
data/Rakefile CHANGED
@@ -9,7 +9,7 @@ task :default => [ :test ]
9
9
  desc "Run all unit tests"
10
10
  Rake::TestTask.new do |t|
11
11
  t.libs << 'lib'
12
- t.test_files = FileList['lib/test/unit/*_test.rb']
12
+ t.test_files = FileList['test/unit/*_test.rb']
13
13
  t.verbose = true
14
14
  end
15
15
 
@@ -1,5 +1,5 @@
1
1
  module Omnicat
2
2
  module Bayes
3
- VERSION = '0.2.0'
3
+ VERSION = '0.2.1'
4
4
  end
5
5
  end
@@ -1,3 +1,4 @@
1
+ require 'digest'
1
2
  require 'omnicat/classifiers/strategy'
2
3
 
3
4
  module OmniCat
@@ -54,7 +55,13 @@ module OmniCat
54
55
  if category_exists?(category_name)
55
56
  increment_doc_counts(category_name)
56
57
  update_priors
57
- doc = OmniCat::Doc.new(content: doc_content)
58
+ doc_key = Digest::MD5.hexdigest(doc_content)
59
+ if doc = @categories[category_name].docs[doc_key]
60
+ doc.increment_count
61
+ else
62
+ doc = OmniCat::Doc.new(content: doc_content)
63
+ end
64
+ @categories[category_name].docs[doc_key] = doc
58
65
  doc.tokens.each do |token, count|
59
66
  increment_token_counts(category_name, token, count)
60
67
  @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
@@ -65,6 +72,42 @@ module OmniCat
65
72
  end
66
73
  end
67
74
 
75
+ # Untrain the desired category with a document
76
+ #
77
+ # ==== Parameters
78
+ #
79
+ # * +category_name+ - Name of the category from added categories list
80
+ # * +doc_content+ - Document text
81
+ #
82
+ # ==== Examples
83
+ #
84
+ # # Untrain the desired category
85
+ # bayes.untrain("positive", "clear documentation")
86
+ # bayes.untrain("positive", "good, very well")
87
+ # bayes.untrain("negative", "bad dog")
88
+ # bayes.untrain("neutral", "how is the management gui")
89
+ def untrain(category_name, doc_content)
90
+ if category_exists?(category_name)
91
+ doc_key = Digest::MD5.hexdigest(doc_content)
92
+ if doc = @categories[category_name].docs[doc_key]
93
+ @categories[category_name].docs[doc_key].decrement_count
94
+ else
95
+ raise StandardError,
96
+ "Document is not found in #{category_name} documents!"
97
+ end
98
+ doc.tokens.each do |token, count|
99
+ decrement_token_counts(category_name, token, count)
100
+ @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
101
+ end
102
+ @categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0
103
+ decrement_doc_counts(category_name)
104
+ update_priors
105
+ else
106
+ raise StandardError,
107
+ "Category with name '#{category_name}' does not exist!"
108
+ end
109
+ end
110
+
68
111
  # Classify the given document
69
112
  #
70
113
  # ==== Parameters
@@ -110,14 +153,33 @@ module OmniCat
110
153
 
111
154
  # nodoc
112
155
  def increment_token_counts(category_name, token, count)
113
- increment_uniq_token_count(token)
156
+ modify_token_counts(category_name, token, count)
157
+ end
158
+
159
+ # nodoc
160
+ def decrement_token_counts(category_name, token, count)
161
+ modify_token_counts(category_name, token, -1 * count)
162
+ end
163
+
164
+ # nodoc
165
+ def modify_token_counts(category_name, token, count)
166
+ modify_uniq_token_count(token, count < 0 ? -1 : 1)
114
167
  @token_count += count
115
168
  @categories[category_name].token_count += count
116
169
  end
117
170
 
118
171
  # nodoc
119
172
  def increment_uniq_token_count(token)
120
- uniq_token_addition = 1
173
+ modify_uniq_token_count(token, 1)
174
+ end
175
+
176
+ # nodoc
177
+ def decrement_uniq_token_count(token)
178
+ modify_uniq_token_count(token, -1)
179
+ end
180
+
181
+ # nodoc
182
+ def modify_uniq_token_count(token, uniq_token_addition)
121
183
  categories.each do |_, category|
122
184
  if category.tokens.has_key?(token)
123
185
  uniq_token_addition = 0
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
- spec.add_dependency 'omnicat', '~> 0.2.0'
21
+ spec.add_dependency 'omnicat', '~> 0.2.1'
22
22
  spec.add_development_dependency 'bundler', '~> 1.3'
23
23
  spec.add_development_dependency 'rake'
24
24
  end
@@ -1,2 +1,2 @@
1
1
  require 'test/unit'
2
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'omnicat', 'bayes'))
2
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'omnicat', 'bayes'))
File without changes
@@ -65,6 +65,56 @@ class TestBayes < Test::Unit::TestCase
65
65
  )
66
66
  end
67
67
 
68
+ def test_untrain_valid_category
69
+ @bayes.add_category 'neutral'
70
+ @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
71
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
72
+ assert_equal(
73
+ 0,
74
+ @bayes.categories['neutral'].doc_count
75
+ )
76
+ assert_equal(
77
+ 0,
78
+ @bayes.categories['neutral'].docs.count
79
+ )
80
+ assert_equal(
81
+ 0,
82
+ @bayes.categories['neutral'].token_count
83
+ )
84
+ end
85
+
86
+ def test_untrain_with_doc_count_2
87
+ @bayes.add_category 'neutral'
88
+ @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
89
+ @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
90
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
91
+ assert_equal(
92
+ 1,
93
+ @bayes.categories['neutral'].doc_count
94
+ )
95
+ assert_equal(
96
+ {'how' => 1, 'you' => 1, '?' => 2, ':|' => 1, ':)' => 1, ';-)' => 1, ':(' => 1},
97
+ @bayes.categories['neutral'].tokens
98
+ )
99
+ assert_equal(
100
+ 8,
101
+ @bayes.categories['neutral'].token_count
102
+ )
103
+ end
104
+
105
+ def test_untrain_invalid_category
106
+ assert_equal(false, @bayes.categories.has_key?('neutral'))
107
+ assert_raise(StandardError) {
108
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
109
+ }
110
+ end
111
+
112
+ def test_untrain_with_missing_doc
113
+ @bayes.add_category 'neutral'
114
+ assert_raise(StandardError) {
115
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :(' }
116
+ end
117
+
68
118
  def test_train_batch
69
119
  @bayes.add_category 'positive'
70
120
  @bayes.train_batch 'positive', ['good job ever', 'valid syntax',
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omnicat-bayes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-06 00:00:00.000000000 Z
12
+ date: 2013-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: omnicat
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: 0.2.0
21
+ version: 0.2.1
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ~>
28
28
  - !ruby/object:Gem::Version
29
- version: 0.2.0
29
+ version: 0.2.1
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: bundler
32
32
  requirement: !ruby/object:Gem::Requirement
@@ -75,10 +75,10 @@ files:
75
75
  - lib/omnicat/bayes/version.rb
76
76
  - lib/omnicat/classifiers/bayes.rb
77
77
  - lib/omnicat/classifiers/bayes_internals/category.rb
78
- - lib/test/test_helper.rb
79
- - lib/test/unit/bayes_test.rb
80
- - lib/test/unit/classifiers/bayes_test.rb
81
78
  - omnicat-bayes.gemspec
79
+ - test/test_helper.rb
80
+ - test/unit/bayes_test.rb
81
+ - test/unit/classifiers/bayes_test.rb
82
82
  homepage: https://github.com/mustafaturan/omnicat-bayes
83
83
  licenses:
84
84
  - MIT
@@ -104,4 +104,7 @@ rubygems_version: 1.8.23
104
104
  signing_key:
105
105
  specification_version: 3
106
106
  summary: Naive Bayes text classification implementation as an OmniCat classifier strategy.
107
- test_files: []
107
+ test_files:
108
+ - test/test_helper.rb
109
+ - test/unit/bayes_test.rb
110
+ - test/unit/classifiers/bayes_test.rb