nb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8c47b77ea4e1ea80b323d2d398a0fcaa8344b1d1
4
+ data.tar.gz: eecba79751ad4b93e8aeec5bd1b769a4aec40a29
5
+ SHA512:
6
+ metadata.gz: 14fd940f7e3e7f99d426fc31a3de91f668ac9622df3aca7739c4d4f3c1c618aa9e63f6e5ab1fc994fbe7b6c71e226c3e50cb8725f0ee953e1675cf4047aa64d1
7
+ data.tar.gz: 34644c2f74430a5753f9f196906162430ff20faecada43db0c2f832108459aaa71337c6b5e267aa47a1d664a5290205c75841f3acf828365c2e22d600974ddc5
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source 'http://ruby.taobao.org'
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'rspec', '~> 2.12.0'
7
+ gem 'guard'
8
+ gem 'guard-rspec'
9
+ gem 'simplecov', require: false
10
+ end
data/Guardfile ADDED
@@ -0,0 +1,8 @@
1
+ ENV['GUARD_GEM_SILENCE_DEPRECATIONS'] = '1'
2
+
3
+ guard 'rspec' do
4
+ watch(%r{^spec/.+_spec\.rb$})
5
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
6
+ watch('spec/spec_helper.rb') { "spec" }
7
+ end
8
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Forrest Ye
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Nb
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'nb'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install nb
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/nb/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,90 @@
1
+ require "yaml"
2
+
3
+ class NaiveBayes
4
+ attr_accessor :categories, :tokens_count, :categories_count
5
+
6
+ def initialize(*categories)
7
+ @categories = categories
8
+ @tokens_count = {}
9
+ @categories_count = {}
10
+
11
+ categories.each do |category|
12
+ @tokens_count[category] = Hash.new(0)
13
+ @categories_count[category] = 0
14
+ end
15
+ end
16
+
17
+ def train(category, *tokens)
18
+ tokens.uniq.each do |token|
19
+ @tokens_count[category][token] += 1
20
+ end
21
+ @categories_count[category] += 1
22
+ end
23
+
24
+ def classify(*tokens)
25
+ classifications(*tokens).first
26
+ end
27
+
28
+ def classifications(*tokens)
29
+ scores = {}
30
+ @categories.each do |category|
31
+ scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
32
+ end
33
+ scores.sort_by { |k, v| -v }
34
+ end
35
+
36
+ def probability_of_a_token_given_a_category(token, category)
37
+ return assumed_probability if @tokens_count[category][token] == 0
38
+
39
+ @tokens_count[category][token].to_f / @categories_count[category]
40
+ end
41
+
42
+ def probability_of_tokens_given_a_category(tokens, category)
43
+ tokens.inject(1.0) do |product, token|
44
+ product * probability_of_a_token_given_a_category(token, category)
45
+ end
46
+ end
47
+
48
+ def probability_of_a_category(category)
49
+ @categories_count[category].to_f / total_number_of_items
50
+ end
51
+
52
+ # def total_number_of_tokens
53
+ # @tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
54
+ # end
55
+
56
+ def total_number_of_items
57
+ @categories_count.values.inject(&:+)
58
+ end
59
+
60
+ # If we have only trained a little bit a class may not have had a feature yet
61
+ # give it a probability of 0 may not be true so we produce a assumed probability
62
+ # which gets smaller more we train
63
+ def assumed_probability
64
+ 0.5 / (total_number_of_items.to_f / 2)
65
+ end
66
+
67
+ def data
68
+ {
69
+ :categories => @categories,
70
+ :tokens_count => @tokens_count,
71
+ :categories_count => @categories_count
72
+ }
73
+ end
74
+
75
+ def save(yaml_file)
76
+ File.write(yaml_file, data.to_yaml)
77
+ end
78
+
79
+ class << self
80
+ def load_yaml(yaml_file)
81
+ data = YAML.load_file(yaml_file)
82
+
83
+ new.tap do |bayes|
84
+ bayes.categories = data[:categories]
85
+ bayes.tokens_count = data[:tokens_count]
86
+ bayes.categories_count = data[:categories_count]
87
+ end
88
+ end
89
+ end
90
+ end
data/lib/nb/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ class NaiveBayes
2
+ VERSION = "0.0.1"
3
+ end
data/lib/nb.rb ADDED
@@ -0,0 +1 @@
1
+ require "nb/naive_bayes"
data/nb.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'nb/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "nb"
8
+ spec.version = NaiveBayes::VERSION
9
+ spec.authors = ["Forrest Ye"]
10
+ spec.email = ["afu@forresty.com"]
11
+ spec.summary = %q{ yet another Naive Bayes library }
12
+ spec.homepage = "https://github.com/forresty/nb"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.6"
21
+ spec.add_development_dependency "rake", "~> 10.0"
22
+ end
@@ -0,0 +1,91 @@
1
+ require "spec_helper"
2
+
3
+ describe NaiveBayes do
4
+ it { should respond_to :train }
5
+ it { should respond_to :save }
6
+ it { should respond_to :classify }
7
+ it { should respond_to :classifications }
8
+ it { should respond_to :probability_of_a_token_given_a_category }
9
+ it { should respond_to :probability_of_tokens_given_a_category }
10
+ it { should respond_to :probability_of_a_category }
11
+ # it { should respond_to :total_number_of_tokens }
12
+ it { should respond_to :total_number_of_items }
13
+
14
+ let(:bayes) { NaiveBayes.new(:love, :hate) }
15
+ subject { bayes }
16
+
17
+ # describe '#total_number_of_tokens' do
18
+ # it 'calculates correctly' do
19
+ # bayes.train :love, 'I', 'love', 'you'
20
+ # bayes.train :hate, 'I', 'hate', 'you'
21
+ #
22
+ # bayes.total_number_of_tokens.should == 6
23
+ #
24
+ # bayes.train :love, 'I', 'love', 'you', 'more'
25
+ #
26
+ # bayes.total_number_of_tokens.should == 10
27
+ # end
28
+ # end
29
+
30
+ describe '#total_number_of_items' do
31
+ it 'calculates correctly' do
32
+ bayes.train :love, 'I', 'love', 'you'
33
+ bayes.train :hate, 'I', 'hate', 'you'
34
+
35
+ bayes.total_number_of_items.should == 2
36
+
37
+ bayes.train :love, 'I', 'love', 'you', 'more'
38
+
39
+ bayes.total_number_of_items.should == 3
40
+ end
41
+ end
42
+
43
+ describe '#probability_of_a_category' do
44
+ it 'calculates correctly' do
45
+ bayes.train :love, 'I', 'love', 'you'
46
+ bayes.train :hate, 'I', 'hate', 'you'
47
+
48
+ bayes.probability_of_a_category(:love).should == 0.5
49
+ end
50
+ end
51
+
52
+ describe '#probability_of_token_given_a_category' do
53
+ it 'calculates correctly' do
54
+ bayes.train :love, 'I', 'love', 'you'
55
+ bayes.train :hate, 'I', 'hate', 'you'
56
+
57
+ bayes.probability_of_a_token_given_a_category('love', :love).should == 1
58
+ bayes.probability_of_a_token_given_a_category('you', :hate).should == 1
59
+
60
+ bayes.train :love, 'I', 'love', 'you', 'more'
61
+
62
+ bayes.probability_of_a_token_given_a_category('more', :love).should == 0.5
63
+ # bayes.probability_of_token_given_a_category('more', :hate).should == 0
64
+ end
65
+ end
66
+
67
+ describe '#classifications' do
68
+ it 'calculates correctly' do
69
+ bayes.train :love, 'I', 'love', 'you'
70
+ bayes.train :hate, 'I', 'hate', 'you'
71
+
72
+ bayes.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
73
+ bayes.classify(*%w{ I love you }).should == [:love, 0.5]
74
+ bayes.classify(*%w{ love }).should == [:love, 0.5]
75
+
76
+ bayes.train :love, 'I', 'love', 'you'
77
+ bayes.train :love, 'I', 'love', 'you'
78
+ bayes.train :love, 'I', 'love', 'you'
79
+
80
+ bayes.classify(*%w{ I love you }).should == [:love, 0.8]
81
+ bayes.classify(*%w{ love }).should == [:love, 0.8]
82
+ bayes.classify(*%w{ only love }).first.should == :love #[:love, 0.16], (0.2 * 1) * 0.8
83
+ end
84
+ end
85
+
86
+ describe 'class methods' do
87
+ subject { NaiveBayes }
88
+
89
+ it { should respond_to :load_yaml }
90
+ end
91
+ end
@@ -0,0 +1,6 @@
1
+ require "simplecov"
2
+ SimpleCov.start do
3
+ add_filter '/spec/'
4
+ end
5
+
6
+ require "nb"
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Forrest Ye
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description:
42
+ email:
43
+ - afu@forresty.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - Gemfile
50
+ - Guardfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - lib/nb.rb
55
+ - lib/nb/naive_bayes.rb
56
+ - lib/nb/version.rb
57
+ - nb.gemspec
58
+ - spec/nb/naive_bayes_spec.rb
59
+ - spec/spec_helper.rb
60
+ homepage: https://github.com/forresty/nb
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.0.2
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: yet another Naive Bayes library
84
+ test_files:
85
+ - spec/nb/naive_bayes_spec.rb
86
+ - spec/spec_helper.rb