naivebayes 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6bbec8539369ff2f707fd59440b444cbd2a71e9f
4
- data.tar.gz: 8b0fa15a8dec7d4e314c60cff086074f48f37115
3
+ metadata.gz: ba428b87232e40d36c9c97f615a0e865bef57dfc
4
+ data.tar.gz: 8e195ed2e429a661743dba99236efe6f1529c706
5
5
  SHA512:
6
- metadata.gz: a3210aff3d52771134877dd65b4a9cf524dfd6a754f523eb708b85da4e651e00435b97be6fcca66e63fb2aa37313706012eab99405e940f2d20cd316e8ec43cc
7
- data.tar.gz: d25b10280650406e9e33566641d9c5c9846a59a2fd81f8da9d38d147774174d348e2a43624946154b962c6b6aa98bade8aaebd2c35fcafb3cc54f4f6c075139f
6
+ metadata.gz: 9ca9adf60295e6765bbec1968b486592c5cdc855bc868e0b8a4de982a28c91537985b3521bf28992377f8e03714218db48af7857093ed2dde3e5eeda490cae27
7
+ data.tar.gz: 64e04a038114c4bef08073f55320b7291ee5edb1327e6f2caed868de9edc4559154e87159ebaf125c5c9078d8fd19a6a3736fff624cfa5e46039a0f5486789b9
data/README.md CHANGED
@@ -40,6 +40,20 @@ result = classifier.classify({"aaa" => 1, "bbb" => 1})
40
40
  p result # => {"positive" => 0.9411764705882353,"negative" => 0.05882352941176469}
41
41
  ```
42
42
 
43
+ Complement Naive Bayes.
44
+
45
+ + http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.8572
46
+
47
+ ``` html
48
+ require 'naivebayes'
49
+ classifier = NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 1)
50
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
51
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
52
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
53
+ result = classifier.classify({"aaa" => 4, "bbb" => 3, "ccc" => 3})
54
+ p result #=> {"neutral"=>9.985931139006835, "negative"=>10.112101263742268, "positive"=>10.836883752313222}
55
+ ```
56
+
43
57
 
44
58
  ChangeLog
45
59
  ---------
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.1.0
data/demo.rb CHANGED
@@ -23,3 +23,13 @@ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
23
23
  result = classifier.classify({"aaa" => 1, "bbb" => 1})
24
24
 
25
25
  p result # => {"positive" => 0.9411764705882353,"negative" => 0.05882352941176469}
26
+
27
+ classifier = NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 1)
28
+
29
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
30
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
31
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
32
+ result = classifier.classify({"aaa" => 4, "bbb" => 3, "ccc" => 3})
33
+
34
+ p result #=> {"neutral"=>9.985931139006835, "negative"=>10.112101263742268, "positive"=>10.836883752313222}
35
+
@@ -1,3 +1,8 @@
1
+ === 0.1.0 / 2014-01-23
2
+
3
+ * Implement Complement Naive Bayes.
4
+
5
+
1
6
  === 0.0.3 / 2013-12-13
2
7
 
3
8
  * Accessable instance variable.
@@ -3,7 +3,7 @@
3
3
 
4
4
  module NaiveBayes
5
5
  class Classifier
6
- attr_accessor :frequency_table, :word_table, :instance_count_of, :total_count, :model
6
+ attr_accessor :frequency_table, :word_table, :instance_count_of, :total_count, :model, :smoothing_parameter
7
7
 
8
8
  def initialize(params = {})
9
9
  @frequency_table = Hash.new
@@ -11,17 +11,18 @@ module NaiveBayes
11
11
  @instance_count_of = Hash.new(0)
12
12
  @total_count = 0
13
13
  @model = params[:model]
14
+ @smoothing_parameter = params[:smoothing_parameter] || 1
14
15
  end
15
16
 
16
- def train(label, attributes)
17
+ def train(label, feature)
17
18
  unless @frequency_table.has_key?(label)
18
19
  @frequency_table[label] = Hash.new(0)
19
20
  end
20
- attributes.each {|word, frequency|
21
- if @model == "multinomial"
22
- @frequency_table[label][word] += frequency
23
- else
21
+ feature.each {|word, frequency|
22
+ if @model == "berounoulli"
24
23
  @frequency_table[label][word] += 1
24
+ else
25
+ @frequency_table[label][word] += frequency
25
26
  end
26
27
  @word_table[word] = 1
27
28
  }
@@ -29,7 +30,13 @@ module NaiveBayes
29
30
  @total_count += 1
30
31
  end
31
32
 
32
- def classify(attributes)
33
+ def classify(feature)
34
+ @model == "complement" ? cnb(feature) : mnb(feature)
35
+ end
36
+
37
+ private
38
+
39
+ def mnb(feature)
33
40
  class_prior_of = Hash.new(1)
34
41
  likelihood_of = Hash.new(1)
35
42
  class_posterior_of = Hash.new(1)
@@ -42,7 +49,7 @@ module NaiveBayes
42
49
  @word_table.each_key {|word|
43
50
  laplace_word_likelihood = (@frequency_table[label][word] + 1).to_f /
44
51
  (@instance_count_of[label] + @word_table.size()).to_f
45
- if attributes.has_key?(word)
52
+ if feature.has_key?(word)
46
53
  likelihood_of[label] *= laplace_word_likelihood
47
54
  else
48
55
  likelihood_of[label] *= (1 - laplace_word_likelihood)
@@ -56,5 +63,37 @@ module NaiveBayes
56
63
  }
57
64
  return class_posterior_of
58
65
  end
66
+
67
+ def cnb(feature)
68
+ all_class = @frequency_table.keys
69
+ all_train_data = @instance_count_of.values.inject(0) {|s, v| s + v}
70
+ class_posterior_of = all_class.map {|c|
71
+ n_c = total_number_of_word_in_other_class(c)
72
+ alpha = @smoothing_parameter*feature.length
73
+ term2nd = feature.to_a.map {|e|
74
+ k = e[0]
75
+ v = e[1]
76
+ v*Math.log((number_of_word_in_other_class(c, k) + @smoothing_parameter).to_f/(n_c + alpha))
77
+ }.inject(0) {|s, v| s + v}
78
+ theta_c = @instance_count_of[c].to_f/all_train_data
79
+ [c, Math.log(theta_c) - term2nd]
80
+ }.sort {|x, y| x[1] <=> y[1]}.flatten
81
+ Hash[*class_posterior_of]
82
+ end
83
+
84
+ def total_number_of_word_in_other_class(c)
85
+ all_words = @frequency_table.values.map {|h| h.keys}.flatten.sort.uniq
86
+ other_classes = @frequency_table.keys - [c]
87
+ other_classes.map {|c|
88
+ all_words.map {|w|
89
+ @frequency_table[c][w]
90
+ }
91
+ }.flatten.inject(0) {|s, v| s + v}
92
+ end
93
+
94
+ def number_of_word_in_other_class(c, i)
95
+ other_classes = @frequency_table.keys - [c]
96
+ other_classes.map {|c| @frequency_table[c][i]}.inject(0) {|s, v| s + v}
97
+ end
59
98
  end
60
99
  end
@@ -1,10 +1,10 @@
1
1
  # Name:: NaiveBayes::Version
2
2
  # Author:: 774 <http://id774.net>
3
3
  # Created:: Nov 24, 2013
4
- # Updated:: Dec 13, 2013
5
- # Copyright:: 774 Copyright (c) 2013
4
+ # Updated:: Jan 22, 2014
5
+ # Copyright:: 774 Copyright (c) 2013-2014
6
6
  # License:: Licensed under the GNU GENERAL PUBLIC LICENSE, Version 3.0.
7
7
 
8
8
  module NaiveBayes
9
- VERSION = "0.0.3"
9
+ VERSION = "0.1.0"
10
10
  end
@@ -2,15 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: naivebayes 0.0.3 ruby lib
5
+ # stub: naivebayes 0.1.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "naivebayes"
9
- s.version = "0.0.3"
9
+ s.version = "0.1.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
12
13
  s.authors = ["id774"]
13
- s.date = "2013-12-13"
14
+ s.date = "2014-01-23"
14
15
  s.description = "Naive Bayes classifier"
15
16
  s.email = "idnanashi@gmail.com"
16
17
  s.extra_rdoc_files = [
@@ -40,8 +41,7 @@ Gem::Specification.new do |s|
40
41
  ]
41
42
  s.homepage = "http://github.com/id774/naivebayes"
42
43
  s.licenses = ["GPL"]
43
- s.require_paths = ["lib"]
44
- s.rubygems_version = "2.1.11"
44
+ s.rubygems_version = "2.2.0"
45
45
  s.summary = "naivebayes"
46
46
 
47
47
  if s.respond_to? :specification_version then
@@ -180,6 +180,28 @@ describe NaiveBayes::Classifier do
180
180
  expect(subject).to eq expected
181
181
  end
182
182
  end
183
+
184
+ context '@model with complement' do
185
+ subject { classifier.model }
186
+
187
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
188
+
189
+ it 'should return model name' do
190
+ expected = "complement"
191
+ expect(subject).to eq expected
192
+ end
193
+ end
194
+
195
+ context '@smoothing_parameter with complement' do
196
+ subject { classifier.smoothing_parameter }
197
+
198
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 3) }
199
+
200
+ it 'should return smoothing parameter' do
201
+ expected = 3
202
+ expect(subject).to eq expected
203
+ end
204
+ end
183
205
  end
184
206
  end
185
207
 
@@ -267,6 +289,106 @@ describe NaiveBayes::Classifier do
267
289
  end
268
290
  end
269
291
 
292
+ describe NaiveBayes::Classifier do
293
+ describe 'Complement Naive Bayes' do
294
+ context 'with train data of two expecting positive' do
295
+
296
+ subject { classifier.classify({"aaa" => 1, "bbb" => 1}) }
297
+
298
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
299
+
300
+ it 'should return positive' do
301
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
302
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
303
+
304
+ expected = {
305
+ "negative"=>0.8109302162163289,
306
+ "positive"=>3.1986731175506815
307
+ }
308
+
309
+ expect(subject).to eq expected
310
+ end
311
+ end
312
+
313
+ context 'with train data of two expecting negative' do
314
+
315
+ subject { classifier.classify({"ccc" => 3, "ddd" => 3}) }
316
+
317
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
318
+
319
+ it 'should return negative' do
320
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
321
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
322
+
323
+ expected = {
324
+ "positive"=>3.527593764407934,
325
+ "negative"=>5.898526551448713
326
+ }
327
+
328
+ expect(subject).to eq expected
329
+ end
330
+ end
331
+
332
+ context 'with train data of two expecting negative and smoothing parameter 1' do
333
+
334
+ subject { classifier.classify({"ccc" => 3, "ddd" => 3}) }
335
+
336
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 1) }
337
+
338
+ it 'should return negative' do
339
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
340
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
341
+
342
+ expected = {
343
+ "positive"=>3.527593764407934,
344
+ "negative"=>5.898526551448713
345
+ }
346
+
347
+ expect(subject).to eq expected
348
+ end
349
+ end
350
+
351
+ context 'with train data of two expecting positive and smoothing parameter 3' do
352
+
353
+ subject { classifier.classify({"aaa" => 1, "bbb" => 1}) }
354
+
355
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 3) }
356
+
357
+ it 'should return positive' do
358
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
359
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
360
+
361
+ expected = {
362
+ "negative"=>0.7137664677626813,
363
+ "positive"=>1.9054187877005764
364
+ }
365
+
366
+ expect(subject).to eq expected
367
+ end
368
+ end
369
+
370
+ context 'with train data of two expecting positive and smoothing parameter 5' do
371
+
372
+ subject { classifier.classify({"aaa" => 1, "bbb" => 1}) }
373
+
374
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 5) }
375
+
376
+ it 'should return positive' do
377
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
378
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
379
+
380
+ expected = {
381
+ "negative"=>0.7014459833746406,
382
+ "positive"=>1.5040773967762742
383
+ }
384
+
385
+ expect(subject).to eq expected
386
+ end
387
+ end
388
+
389
+ end
390
+ end
391
+
270
392
  describe NaiveBayes::Classifier do
271
393
  describe 'The berounoulli model' do
272
394
  context 'with train data of three expecting positive' do
@@ -400,3 +522,70 @@ describe NaiveBayes::Classifier do
400
522
  end
401
523
  end
402
524
  end
525
+
526
+ describe NaiveBayes::Classifier do
527
+ describe 'Complement Naive Bayes' do
528
+ context 'with train data of three expecting positive' do
529
+
530
+ subject { classifier.classify({"aaa" => 4, "bbb" => 3, "ccc" => 3}) }
531
+
532
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
533
+
534
+ it 'should return positive' do
535
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
536
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
537
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
538
+
539
+ expected = {
540
+ "neutral" => 9.985931139006835,
541
+ "negative" => 10.112101263742268,
542
+ "positive" => 10.836883752313222
543
+ }
544
+
545
+ expect(subject).to eq expected
546
+ end
547
+ end
548
+
549
+ context 'with train data of three expecting negative' do
550
+
551
+ subject { classifier.classify({"aaa" => 3, "bbb" => 4, "ccc" => 3}) }
552
+
553
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
554
+
555
+ it 'should return negative' do
556
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
557
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
558
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
559
+
560
+ expected = {
561
+ "neutral" => 9.80360958221288,
562
+ "positive" => 10.143736571753276,
563
+ "negative" => 10.294422820536223
564
+ }
565
+
566
+ expect(subject).to eq expected
567
+ end
568
+ end
569
+
570
+ context 'with train data of three expecting neutral' do
571
+
572
+ subject { classifier.classify({"aaa" => 3, "bbb" => 3, "ccc" => 5}) }
573
+
574
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
575
+
576
+ it 'should return neutral' do
577
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
578
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
579
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
580
+
581
+ expected = {
582
+ "negative" => 10.68941662877709,
583
+ "positive" => 11.06002730362743,
584
+ "neutral" => 11.149081948812517
585
+ }
586
+
587
+ expect(subject).to eq expected
588
+ end
589
+ end
590
+ end
591
+ end
@@ -6,6 +6,6 @@ describe NaiveBayes do
6
6
  context "VERSION" do
7
7
  subject { NaiveBayes::VERSION }
8
8
 
9
- it { expect(subject).to eq "0.0.3" }
9
+ it { expect(subject).to eq "0.1.0" }
10
10
  end
11
11
  end
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: naivebayes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - id774
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-13 00:00:00.000000000 Z
11
+ date: 2014-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cucumber
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: jeweler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  description: Naive Bayes classifier
@@ -89,17 +89,17 @@ require_paths:
89
89
  - lib
90
90
  required_ruby_version: !ruby/object:Gem::Requirement
91
91
  requirements:
92
- - - '>='
92
+ - - ">="
93
93
  - !ruby/object:Gem::Version
94
94
  version: '0'
95
95
  required_rubygems_version: !ruby/object:Gem::Requirement
96
96
  requirements:
97
- - - '>='
97
+ - - ">="
98
98
  - !ruby/object:Gem::Version
99
99
  version: '0'
100
100
  requirements: []
101
101
  rubyforge_project:
102
- rubygems_version: 2.1.11
102
+ rubygems_version: 2.2.0
103
103
  signing_key:
104
104
  specification_version: 4
105
105
  summary: naivebayes