omnicat-bayes 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -57,6 +57,12 @@ Train category with a document.
57
57
 
58
58
  bayes.train('positive', 'great if you are in a slap happy mood .')
59
59
  bayes.train('negative', 'bad tracking issue')
60
+
61
+ ### Untrain
62
+ Untrain category with a document.
63
+
64
+ bayes.untrain('positive', 'great if you are in a slap happy mood .')
65
+ bayes.untrain('negative', 'bad tracking issue')
60
66
 
61
67
  ### Train batch
62
68
  Train category with multiple documents.
@@ -71,6 +77,20 @@ Train category with multiple documents.
71
77
  'interesting , but not compelling . ',
72
78
  'seems clever but not especially compelling'
73
79
  ])
80
+
81
+ ### Untrain batch
82
+ Untrain category with multiple documents.
83
+
84
+ bayes.untrain_batch('positive', [
85
+ 'a feel-good picture in the best sense of the term...',
86
+ 'it is a feel-good movie about which you can actually feel good.',
87
+ 'love and money both of them are good choises'
88
+ ])
89
+ bayes.untrain_batch('negative', [
90
+ 'simplistic , silly and tedious .',
91
+ 'interesting , but not compelling . ',
92
+ 'seems clever but not especially compelling'
93
+ ])
74
94
 
75
95
  ### Classify
76
96
  Classify a document.
data/Rakefile CHANGED
@@ -9,7 +9,7 @@ task :default => [ :test ]
9
9
  desc "Run all unit tests"
10
10
  Rake::TestTask.new do |t|
11
11
  t.libs << 'lib'
12
- t.test_files = FileList['lib/test/unit/*_test.rb']
12
+ t.test_files = FileList['test/unit/*_test.rb']
13
13
  t.verbose = true
14
14
  end
15
15
 
@@ -1,5 +1,5 @@
1
1
  module Omnicat
2
2
  module Bayes
3
- VERSION = '0.2.0'
3
+ VERSION = '0.2.1'
4
4
  end
5
5
  end
@@ -1,3 +1,4 @@
1
+ require 'digest'
1
2
  require 'omnicat/classifiers/strategy'
2
3
 
3
4
  module OmniCat
@@ -54,7 +55,13 @@ module OmniCat
54
55
  if category_exists?(category_name)
55
56
  increment_doc_counts(category_name)
56
57
  update_priors
57
- doc = OmniCat::Doc.new(content: doc_content)
58
+ doc_key = Digest::MD5.hexdigest(doc_content)
59
+ if doc = @categories[category_name].docs[doc_key]
60
+ doc.increment_count
61
+ else
62
+ doc = OmniCat::Doc.new(content: doc_content)
63
+ end
64
+ @categories[category_name].docs[doc_key] = doc
58
65
  doc.tokens.each do |token, count|
59
66
  increment_token_counts(category_name, token, count)
60
67
  @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
@@ -65,6 +72,42 @@ module OmniCat
65
72
  end
66
73
  end
67
74
 
75
+ # Untrain the desired category with a document
76
+ #
77
+ # ==== Parameters
78
+ #
79
+ # * +category_name+ - Name of the category from added categories list
80
+ # * +doc_content+ - Document text
81
+ #
82
+ # ==== Examples
83
+ #
84
+ # # Untrain the desired category
85
+ # bayes.untrain("positive", "clear documentation")
86
+ # bayes.untrain("positive", "good, very well")
87
+ # bayes.untrain("negative", "bad dog")
88
+ # bayes.untrain("neutral", "how is the management gui")
89
+ def untrain(category_name, doc_content)
90
+ if category_exists?(category_name)
91
+ doc_key = Digest::MD5.hexdigest(doc_content)
92
+ if doc = @categories[category_name].docs[doc_key]
93
+ @categories[category_name].docs[doc_key].decrement_count
94
+ else
95
+ raise StandardError,
96
+ "Document is not found in #{category_name} documents!"
97
+ end
98
+ doc.tokens.each do |token, count|
99
+ decrement_token_counts(category_name, token, count)
100
+ @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
101
+ end
102
+ @categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0
103
+ decrement_doc_counts(category_name)
104
+ update_priors
105
+ else
106
+ raise StandardError,
107
+ "Category with name '#{category_name}' does not exist!"
108
+ end
109
+ end
110
+
68
111
  # Classify the given document
69
112
  #
70
113
  # ==== Parameters
@@ -110,14 +153,33 @@ module OmniCat
110
153
 
111
154
  # nodoc
112
155
  def increment_token_counts(category_name, token, count)
113
- increment_uniq_token_count(token)
156
+ modify_token_counts(category_name, token, count)
157
+ end
158
+
159
+ # nodoc
160
+ def decrement_token_counts(category_name, token, count)
161
+ modify_token_counts(category_name, token, -1 * count)
162
+ end
163
+
164
+ # nodoc
165
+ def modify_token_counts(category_name, token, count)
166
+ modify_uniq_token_count(token, count < 0 ? -1 : 1)
114
167
  @token_count += count
115
168
  @categories[category_name].token_count += count
116
169
  end
117
170
 
118
171
  # nodoc
119
172
  def increment_uniq_token_count(token)
120
- uniq_token_addition = 1
173
+ modify_uniq_token_count(token, 1)
174
+ end
175
+
176
+ # nodoc
177
+ def decrement_uniq_token_count(token)
178
+ modify_uniq_token_count(token, -1)
179
+ end
180
+
181
+ # nodoc
182
+ def modify_uniq_token_count(token, uniq_token_addition)
121
183
  categories.each do |_, category|
122
184
  if category.tokens.has_key?(token)
123
185
  uniq_token_addition = 0
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
- spec.add_dependency 'omnicat', '~> 0.2.0'
21
+ spec.add_dependency 'omnicat', '~> 0.2.1'
22
22
  spec.add_development_dependency 'bundler', '~> 1.3'
23
23
  spec.add_development_dependency 'rake'
24
24
  end
@@ -1,2 +1,2 @@
1
1
  require 'test/unit'
2
- require File.expand_path(File.join(File.dirname(__FILE__), '..', 'omnicat', 'bayes'))
2
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'omnicat', 'bayes'))
File without changes
@@ -65,6 +65,56 @@ class TestBayes < Test::Unit::TestCase
65
65
  )
66
66
  end
67
67
 
68
+ def test_untrain_valid_category
69
+ @bayes.add_category 'neutral'
70
+ @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
71
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
72
+ assert_equal(
73
+ 0,
74
+ @bayes.categories['neutral'].doc_count
75
+ )
76
+ assert_equal(
77
+ 0,
78
+ @bayes.categories['neutral'].docs.count
79
+ )
80
+ assert_equal(
81
+ 0,
82
+ @bayes.categories['neutral'].token_count
83
+ )
84
+ end
85
+
86
+ def test_untrain_with_doc_count_2
87
+ @bayes.add_category 'neutral'
88
+ @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
89
+ @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
90
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
91
+ assert_equal(
92
+ 1,
93
+ @bayes.categories['neutral'].doc_count
94
+ )
95
+ assert_equal(
96
+ {'how' => 1, 'you' => 1, '?' => 2, ':|' => 1, ':)' => 1, ';-)' => 1, ':(' => 1},
97
+ @bayes.categories['neutral'].tokens
98
+ )
99
+ assert_equal(
100
+ 8,
101
+ @bayes.categories['neutral'].token_count
102
+ )
103
+ end
104
+
105
+ def test_untrain_invalid_category
106
+ assert_equal(false, @bayes.categories.has_key?('neutral'))
107
+ assert_raise(StandardError) {
108
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
109
+ }
110
+ end
111
+
112
+ def test_untrain_with_missing_doc
113
+ @bayes.add_category 'neutral'
114
+ assert_raise(StandardError) {
115
+ @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :(' }
116
+ end
117
+
68
118
  def test_train_batch
69
119
  @bayes.add_category 'positive'
70
120
  @bayes.train_batch 'positive', ['good job ever', 'valid syntax',
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omnicat-bayes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-06 00:00:00.000000000 Z
12
+ date: 2013-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: omnicat
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: 0.2.0
21
+ version: 0.2.1
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ~>
28
28
  - !ruby/object:Gem::Version
29
- version: 0.2.0
29
+ version: 0.2.1
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: bundler
32
32
  requirement: !ruby/object:Gem::Requirement
@@ -75,10 +75,10 @@ files:
75
75
  - lib/omnicat/bayes/version.rb
76
76
  - lib/omnicat/classifiers/bayes.rb
77
77
  - lib/omnicat/classifiers/bayes_internals/category.rb
78
- - lib/test/test_helper.rb
79
- - lib/test/unit/bayes_test.rb
80
- - lib/test/unit/classifiers/bayes_test.rb
81
78
  - omnicat-bayes.gemspec
79
+ - test/test_helper.rb
80
+ - test/unit/bayes_test.rb
81
+ - test/unit/classifiers/bayes_test.rb
82
82
  homepage: https://github.com/mustafaturan/omnicat-bayes
83
83
  licenses:
84
84
  - MIT
@@ -104,4 +104,7 @@ rubygems_version: 1.8.23
104
104
  signing_key:
105
105
  specification_version: 3
106
106
  summary: Naive Bayes text classification implementation as an OmniCat classifier strategy.
107
- test_files: []
107
+ test_files:
108
+ - test/test_helper.rb
109
+ - test/unit/bayes_test.rb
110
+ - test/unit/classifiers/bayes_test.rb