naive_bayes 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,27 @@
1
+ = Naive Bayes Classifier
2
+
3
+ This is an extremely simple, straight forward Naive Bayes implementation.
4
+
5
+ == Install
6
+
7
+ gem sources -a -http://gemcutter.org
8
+ sudo gem install naive_bayes
9
+
10
+ == How To Use
11
+
12
+ require 'rubygems'
13
+ require 'naive_bayes'
14
+
15
+ a = NaiveBayes.new(:spam, :ham)
16
+
17
+ a.train(:spam, 'bad', 'word')
18
+ a.train(:ham, 'good', 'word')
19
+
20
+ b = "this is a bad sentence".split(' ')
21
+
22
+ a.classify(*b)
23
+ #=> [:spam, 0.03125]
24
+
25
+ == Copyright
26
+
27
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "naive_bayes"
8
+ gem.summary = %Q{Simple straight forward Naive Bayes classifier implementation}
9
+ gem.description = %Q{Simple straight forward Naive Bayes classifier implementation}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/Naive-Bayes"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "naive_bayes #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,80 @@
1
+ # Bayes Theorem
2
+ # P(A|B) = P(B|A) * P(A) / P(B)
3
+
4
+ # Terminology
5
+ # An ITEM is made up of FEATURES
6
+ # An ITEM belongs to a CLASS
7
+
8
+ # Bayes With Our Terminology
9
+ # P(Class | Item) = P(Item | Class) * P(Class) / P(Item)
10
+
11
+ # However, when classifying, P(Item) is the same across all calcualtions
12
+ # So we don't bother to calculate it
13
+
14
+ class NaiveBayes
15
+
16
+ attr_reader :features_count, :klass_count
17
+
18
+ def initialize(*klasses)
19
+ @features_count = {}
20
+ @klass_count = {}
21
+ @klasses = klasses
22
+
23
+ klasses.each do |klass|
24
+ @features_count[klass] = Hash.new(0.0)
25
+ @klass_count[klass] = 0.0
26
+ end
27
+ end
28
+
29
+ def train(klass, *features)
30
+ features.uniq.each do |feature|
31
+ @features_count[klass][feature] += 1
32
+ end
33
+ @klass_count[klass] += 1
34
+ end
35
+
36
+ #P(Class | Item) = P(Item | Class) * P(Class)
37
+ def classify(*features)
38
+ scores = {}
39
+ @klasses.each do |klass|
40
+ scores[klass] = (prob_of_item_given_a_class(features, klass) * prob_of_class(klass))
41
+ end
42
+ scores.sort {|a,b| b[1] <=> a[1]}[0]
43
+ end
44
+
45
+ private
46
+
47
+ # P(Item | Class)
48
+ def prob_of_item_given_a_class(features, klass)
49
+ a = features.inject(1.0) do |sum, feature|
50
+ prob = prob_of_feature_given_a_class(feature, klass)
51
+ prob = assumed_probability if prob == 0
52
+ sum *= prob
53
+ end
54
+ end
55
+
56
+ # P(Feature | Class)
57
+ def prob_of_feature_given_a_class(feature, klass)
58
+ return 0.5 if @klass_count[klass] == 0
59
+ @features_count[klass][feature] / @klass_count[klass]
60
+ end
61
+
62
+ # P(Class)
63
+ def prob_of_class(klass)
64
+ @klass_count[klass] / total_items
65
+ end
66
+
67
+ def total_items
68
+ @klass_count.inject(0) do |sum, klass|
69
+ sum += klass[1]
70
+ end
71
+ end
72
+
73
+ # If we have only trained a little bit a class may not have had a feature yet
74
+ # give it a probability of 0 may not be true so we produce a assumed probability
75
+ # which gets smaller more we train
76
+ def assumed_probability
77
+ 0.5 / (total_items/2)
78
+ end
79
+
80
+ end
@@ -0,0 +1,55 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{naive_bayes}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2009-11-14}
13
+ s.description = %q{Simple straight forward Naive Bayes classifier implementation}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/naive_bayes.rb",
27
+ "naive_bayes.gemspec",
28
+ "spec/naive_bayes_spec.rb",
29
+ "spec/spec.opts",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/reddavis/Naive-Bayes}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Simple straight forward Naive Bayes classifier implementation}
37
+ s.test_files = [
38
+ "spec/naive_bayes_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
48
+ else
49
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
50
+ end
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
53
+ end
54
+ end
55
+
@@ -0,0 +1,44 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "NaiveBayes" do
4
+
5
+ describe "Initialization" do
6
+ before(:all) do
7
+ @classifier = NaiveBayes.new(:spam, :ham)
8
+ end
9
+
10
+ it "should create a features count for each class" do
11
+ @classifier.features_count.size.should == 2
12
+ end
13
+ end
14
+
15
+ describe "Training" do
16
+ before(:all) do
17
+ @classifier = NaiveBayes.new(:spam, :ham)
18
+ @classifier.train(:spam, 'bad', 'word')
19
+ end
20
+
21
+ it "should train" do
22
+ @classifier.features_count[:spam].size.should == 2
23
+ end
24
+
25
+ it "should bump klass_count for spam up to 1" do
26
+ @classifier.klass_count[:spam].should == 1
27
+ end
28
+ end
29
+
30
+ describe "Classification" do
31
+ before do
32
+ @classifier = NaiveBayes.new(:spam, :ham)
33
+ @classifier.train(:spam, 'bad', 'word')
34
+ @classifier.train(:ham, 'we', 'bad')
35
+ end
36
+
37
+ it "should" do
38
+ a = @classifier.classify('bad', 'word')
39
+ a[0].should == :spam
40
+ a[1].should == 0.5
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'naive_bayes'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-14 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Simple straight forward Naive Bayes classifier implementation
26
+ email: reddavis@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - lib/naive_bayes.rb
42
+ - naive_bayes.gemspec
43
+ - spec/naive_bayes_spec.rb
44
+ - spec/spec.opts
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/reddavis/Naive-Bayes
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --charset=UTF-8
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.5
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Simple straight forward Naive Bayes classifier implementation
74
+ test_files:
75
+ - spec/naive_bayes_spec.rb
76
+ - spec/spec_helper.rb