naivebayes 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6bbec8539369ff2f707fd59440b444cbd2a71e9f
4
- data.tar.gz: 8b0fa15a8dec7d4e314c60cff086074f48f37115
3
+ metadata.gz: ba428b87232e40d36c9c97f615a0e865bef57dfc
4
+ data.tar.gz: 8e195ed2e429a661743dba99236efe6f1529c706
5
5
  SHA512:
6
- metadata.gz: a3210aff3d52771134877dd65b4a9cf524dfd6a754f523eb708b85da4e651e00435b97be6fcca66e63fb2aa37313706012eab99405e940f2d20cd316e8ec43cc
7
- data.tar.gz: d25b10280650406e9e33566641d9c5c9846a59a2fd81f8da9d38d147774174d348e2a43624946154b962c6b6aa98bade8aaebd2c35fcafb3cc54f4f6c075139f
6
+ metadata.gz: 9ca9adf60295e6765bbec1968b486592c5cdc855bc868e0b8a4de982a28c91537985b3521bf28992377f8e03714218db48af7857093ed2dde3e5eeda490cae27
7
+ data.tar.gz: 64e04a038114c4bef08073f55320b7291ee5edb1327e6f2caed868de9edc4559154e87159ebaf125c5c9078d8fd19a6a3736fff624cfa5e46039a0f5486789b9
data/README.md CHANGED
@@ -40,6 +40,20 @@ result = classifier.classify({"aaa" => 1, "bbb" => 1})
40
40
  p result # => {"positive" => 0.9411764705882353,"negative" => 0.05882352941176469}
41
41
  ```
42
42
 
43
+ Complement Naive Bayes.
44
+
45
+ + http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.8572
46
+
47
+ ``` html
48
+ require 'naivebayes'
49
+ classifier = NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 1)
50
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
51
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
52
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
53
+ result = classifier.classify({"aaa" => 4, "bbb" => 3, "ccc" => 3})
54
+ p result #=> {"neutral"=>9.985931139006835, "negative"=>10.112101263742268, "positive"=>10.836883752313222}
55
+ ```
56
+
43
57
 
44
58
  ChangeLog
45
59
  ---------
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.1.0
data/demo.rb CHANGED
@@ -23,3 +23,13 @@ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
23
23
  result = classifier.classify({"aaa" => 1, "bbb" => 1})
24
24
 
25
25
  p result # => {"positive" => 0.9411764705882353,"negative" => 0.05882352941176469}
26
+
27
+ classifier = NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 1)
28
+
29
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
30
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
31
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
32
+ result = classifier.classify({"aaa" => 4, "bbb" => 3, "ccc" => 3})
33
+
34
+ p result #=> {"neutral"=>9.985931139006835, "negative"=>10.112101263742268, "positive"=>10.836883752313222}
35
+
@@ -1,3 +1,8 @@
1
+ === 0.1.0 / 2014-01-23
2
+
3
+ * Implement Complement Naive Bayes.
4
+
5
+
1
6
  === 0.0.3 / 2013-12-13
2
7
 
3
8
  * Accessable instance variable.
@@ -3,7 +3,7 @@
3
3
 
4
4
  module NaiveBayes
5
5
  class Classifier
6
- attr_accessor :frequency_table, :word_table, :instance_count_of, :total_count, :model
6
+ attr_accessor :frequency_table, :word_table, :instance_count_of, :total_count, :model, :smoothing_parameter
7
7
 
8
8
  def initialize(params = {})
9
9
  @frequency_table = Hash.new
@@ -11,17 +11,18 @@ module NaiveBayes
11
11
  @instance_count_of = Hash.new(0)
12
12
  @total_count = 0
13
13
  @model = params[:model]
14
+ @smoothing_parameter = params[:smoothing_parameter] || 1
14
15
  end
15
16
 
16
- def train(label, attributes)
17
+ def train(label, feature)
17
18
  unless @frequency_table.has_key?(label)
18
19
  @frequency_table[label] = Hash.new(0)
19
20
  end
20
- attributes.each {|word, frequency|
21
- if @model == "multinomial"
22
- @frequency_table[label][word] += frequency
23
- else
21
+ feature.each {|word, frequency|
22
+ if @model == "berounoulli"
24
23
  @frequency_table[label][word] += 1
24
+ else
25
+ @frequency_table[label][word] += frequency
25
26
  end
26
27
  @word_table[word] = 1
27
28
  }
@@ -29,7 +30,13 @@ module NaiveBayes
29
30
  @total_count += 1
30
31
  end
31
32
 
32
- def classify(attributes)
33
+ def classify(feature)
34
+ @model == "complement" ? cnb(feature) : mnb(feature)
35
+ end
36
+
37
+ private
38
+
39
+ def mnb(feature)
33
40
  class_prior_of = Hash.new(1)
34
41
  likelihood_of = Hash.new(1)
35
42
  class_posterior_of = Hash.new(1)
@@ -42,7 +49,7 @@ module NaiveBayes
42
49
  @word_table.each_key {|word|
43
50
  laplace_word_likelihood = (@frequency_table[label][word] + 1).to_f /
44
51
  (@instance_count_of[label] + @word_table.size()).to_f
45
- if attributes.has_key?(word)
52
+ if feature.has_key?(word)
46
53
  likelihood_of[label] *= laplace_word_likelihood
47
54
  else
48
55
  likelihood_of[label] *= (1 - laplace_word_likelihood)
@@ -56,5 +63,37 @@ module NaiveBayes
56
63
  }
57
64
  return class_posterior_of
58
65
  end
66
+
67
+ def cnb(feature)
68
+ all_class = @frequency_table.keys
69
+ all_train_data = @instance_count_of.values.inject(0) {|s, v| s + v}
70
+ class_posterior_of = all_class.map {|c|
71
+ n_c = total_number_of_word_in_other_class(c)
72
+ alpha = @smoothing_parameter*feature.length
73
+ term2nd = feature.to_a.map {|e|
74
+ k = e[0]
75
+ v = e[1]
76
+ v*Math.log((number_of_word_in_other_class(c, k) + @smoothing_parameter).to_f/(n_c + alpha))
77
+ }.inject(0) {|s, v| s + v}
78
+ theta_c = @instance_count_of[c].to_f/all_train_data
79
+ [c, Math.log(theta_c) - term2nd]
80
+ }.sort {|x, y| x[1] <=> y[1]}.flatten
81
+ Hash[*class_posterior_of]
82
+ end
83
+
84
+ def total_number_of_word_in_other_class(c)
85
+ all_words = @frequency_table.values.map {|h| h.keys}.flatten.sort.uniq
86
+ other_classes = @frequency_table.keys - [c]
87
+ other_classes.map {|c|
88
+ all_words.map {|w|
89
+ @frequency_table[c][w]
90
+ }
91
+ }.flatten.inject(0) {|s, v| s + v}
92
+ end
93
+
94
+ def number_of_word_in_other_class(c, i)
95
+ other_classes = @frequency_table.keys - [c]
96
+ other_classes.map {|c| @frequency_table[c][i]}.inject(0) {|s, v| s + v}
97
+ end
59
98
  end
60
99
  end
@@ -1,10 +1,10 @@
1
1
  # Name:: NaiveBayes::Version
2
2
  # Author:: 774 <http://id774.net>
3
3
  # Created:: Nov 24, 2013
4
- # Updated:: Dec 13, 2013
5
- # Copyright:: 774 Copyright (c) 2013
4
+ # Updated:: Jan 22, 2014
5
+ # Copyright:: 774 Copyright (c) 2013-2014
6
6
  # License:: Licensed under the GNU GENERAL PUBLIC LICENSE, Version 3.0.
7
7
 
8
8
  module NaiveBayes
9
- VERSION = "0.0.3"
9
+ VERSION = "0.1.0"
10
10
  end
@@ -2,15 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: naivebayes 0.0.3 ruby lib
5
+ # stub: naivebayes 0.1.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "naivebayes"
9
- s.version = "0.0.3"
9
+ s.version = "0.1.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
12
13
  s.authors = ["id774"]
13
- s.date = "2013-12-13"
14
+ s.date = "2014-01-23"
14
15
  s.description = "Naive Bayes classifier"
15
16
  s.email = "idnanashi@gmail.com"
16
17
  s.extra_rdoc_files = [
@@ -40,8 +41,7 @@ Gem::Specification.new do |s|
40
41
  ]
41
42
  s.homepage = "http://github.com/id774/naivebayes"
42
43
  s.licenses = ["GPL"]
43
- s.require_paths = ["lib"]
44
- s.rubygems_version = "2.1.11"
44
+ s.rubygems_version = "2.2.0"
45
45
  s.summary = "naivebayes"
46
46
 
47
47
  if s.respond_to? :specification_version then
@@ -180,6 +180,28 @@ describe NaiveBayes::Classifier do
180
180
  expect(subject).to eq expected
181
181
  end
182
182
  end
183
+
184
+ context '@model with complement' do
185
+ subject { classifier.model }
186
+
187
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
188
+
189
+ it 'should return model name' do
190
+ expected = "complement"
191
+ expect(subject).to eq expected
192
+ end
193
+ end
194
+
195
+ context '@smoothing_parameter with complement' do
196
+ subject { classifier.smoothing_parameter }
197
+
198
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 3) }
199
+
200
+ it 'should return smoothing parameter' do
201
+ expected = 3
202
+ expect(subject).to eq expected
203
+ end
204
+ end
183
205
  end
184
206
  end
185
207
 
@@ -267,6 +289,106 @@ describe NaiveBayes::Classifier do
267
289
  end
268
290
  end
269
291
 
292
+ describe NaiveBayes::Classifier do
293
+ describe 'Complement Naive Bayes' do
294
+ context 'with train data of two expecting positive' do
295
+
296
+ subject { classifier.classify({"aaa" => 1, "bbb" => 1}) }
297
+
298
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
299
+
300
+ it 'should return positive' do
301
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
302
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
303
+
304
+ expected = {
305
+ "negative"=>0.8109302162163289,
306
+ "positive"=>3.1986731175506815
307
+ }
308
+
309
+ expect(subject).to eq expected
310
+ end
311
+ end
312
+
313
+ context 'with train data of two expecting negative' do
314
+
315
+ subject { classifier.classify({"ccc" => 3, "ddd" => 3}) }
316
+
317
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
318
+
319
+ it 'should return negative' do
320
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
321
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
322
+
323
+ expected = {
324
+ "positive"=>3.527593764407934,
325
+ "negative"=>5.898526551448713
326
+ }
327
+
328
+ expect(subject).to eq expected
329
+ end
330
+ end
331
+
332
+ context 'with train data of two expecting negative and smoothing parameter 1' do
333
+
334
+ subject { classifier.classify({"ccc" => 3, "ddd" => 3}) }
335
+
336
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 1) }
337
+
338
+ it 'should return negative' do
339
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
340
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
341
+
342
+ expected = {
343
+ "positive"=>3.527593764407934,
344
+ "negative"=>5.898526551448713
345
+ }
346
+
347
+ expect(subject).to eq expected
348
+ end
349
+ end
350
+
351
+ context 'with train data of two expecting positive and smoothing parameter 3' do
352
+
353
+ subject { classifier.classify({"aaa" => 1, "bbb" => 1}) }
354
+
355
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 3) }
356
+
357
+ it 'should return positive' do
358
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
359
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
360
+
361
+ expected = {
362
+ "negative"=>0.7137664677626813,
363
+ "positive"=>1.9054187877005764
364
+ }
365
+
366
+ expect(subject).to eq expected
367
+ end
368
+ end
369
+
370
+ context 'with train data of two expecting positive and smoothing parameter 5' do
371
+
372
+ subject { classifier.classify({"aaa" => 1, "bbb" => 1}) }
373
+
374
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement", :smoothing_parameter => 5) }
375
+
376
+ it 'should return positive' do
377
+ classifier.train("positive", {"aaa" => 0, "bbb" => 1})
378
+ classifier.train("negative", {"ccc" => 2, "ddd" => 3})
379
+
380
+ expected = {
381
+ "negative"=>0.7014459833746406,
382
+ "positive"=>1.5040773967762742
383
+ }
384
+
385
+ expect(subject).to eq expected
386
+ end
387
+ end
388
+
389
+ end
390
+ end
391
+
270
392
  describe NaiveBayes::Classifier do
271
393
  describe 'The berounoulli model' do
272
394
  context 'with train data of three expecting positive' do
@@ -400,3 +522,70 @@ describe NaiveBayes::Classifier do
400
522
  end
401
523
  end
402
524
  end
525
+
526
+ describe NaiveBayes::Classifier do
527
+ describe 'Complement Naive Bayes' do
528
+ context 'with train data of three expecting positive' do
529
+
530
+ subject { classifier.classify({"aaa" => 4, "bbb" => 3, "ccc" => 3}) }
531
+
532
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
533
+
534
+ it 'should return positive' do
535
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
536
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
537
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
538
+
539
+ expected = {
540
+ "neutral" => 9.985931139006835,
541
+ "negative" => 10.112101263742268,
542
+ "positive" => 10.836883752313222
543
+ }
544
+
545
+ expect(subject).to eq expected
546
+ end
547
+ end
548
+
549
+ context 'with train data of three expecting negative' do
550
+
551
+ subject { classifier.classify({"aaa" => 3, "bbb" => 4, "ccc" => 3}) }
552
+
553
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
554
+
555
+ it 'should return negative' do
556
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
557
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
558
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
559
+
560
+ expected = {
561
+ "neutral" => 9.80360958221288,
562
+ "positive" => 10.143736571753276,
563
+ "negative" => 10.294422820536223
564
+ }
565
+
566
+ expect(subject).to eq expected
567
+ end
568
+ end
569
+
570
+ context 'with train data of three expecting neutral' do
571
+
572
+ subject { classifier.classify({"aaa" => 3, "bbb" => 3, "ccc" => 5}) }
573
+
574
+ let(:classifier) { NaiveBayes::Classifier.new(:model => "complement") }
575
+
576
+ it 'should return neutral' do
577
+ classifier.train("positive", {"aaa" => 3, "bbb" => 1, "ccc" => 2})
578
+ classifier.train("negative", {"aaa" => 1, "bbb" => 4, "ccc" => 2})
579
+ classifier.train("neutral", {"aaa" => 2, "bbb" => 3, "ccc" => 5})
580
+
581
+ expected = {
582
+ "negative" => 10.68941662877709,
583
+ "positive" => 11.06002730362743,
584
+ "neutral" => 11.149081948812517
585
+ }
586
+
587
+ expect(subject).to eq expected
588
+ end
589
+ end
590
+ end
591
+ end
@@ -6,6 +6,6 @@ describe NaiveBayes do
6
6
  context "VERSION" do
7
7
  subject { NaiveBayes::VERSION }
8
8
 
9
- it { expect(subject).to eq "0.0.3" }
9
+ it { expect(subject).to eq "0.1.0" }
10
10
  end
11
11
  end
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: naivebayes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - id774
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-13 00:00:00.000000000 Z
11
+ date: 2014-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cucumber
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: jeweler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  description: Naive Bayes classifier
@@ -89,17 +89,17 @@ require_paths:
89
89
  - lib
90
90
  required_ruby_version: !ruby/object:Gem::Requirement
91
91
  requirements:
92
- - - '>='
92
+ - - ">="
93
93
  - !ruby/object:Gem::Version
94
94
  version: '0'
95
95
  required_rubygems_version: !ruby/object:Gem::Requirement
96
96
  requirements:
97
- - - '>='
97
+ - - ">="
98
98
  - !ruby/object:Gem::Version
99
99
  version: '0'
100
100
  requirements: []
101
101
  rubyforge_project:
102
- rubygems_version: 2.1.11
102
+ rubygems_version: 2.2.0
103
103
  signing_key:
104
104
  specification_version: 4
105
105
  summary: naivebayes