naive_bayes 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,27 @@
1
+ = Naive Bayes Classifier
2
+
3
+ This is an extremely simple, straight forward Naive Bayes implementation.
4
+
5
+ == Install
6
+
7
+ gem sources -a -http://gemcutter.org
8
+ sudo gem install naive_bayes
9
+
10
+ == How To Use
11
+
12
+ require 'rubygems'
13
+ require 'naive_bayes'
14
+
15
+ a = NaiveBayes.new(:spam, :ham)
16
+
17
+ a.train(:spam, 'bad', 'word')
18
+ a.train(:ham, 'good', 'word')
19
+
20
+ b = "this is a bad sentence".split(' ')
21
+
22
+ a.classify(*b)
23
+ #=> [:spam, 0.03125]
24
+
25
+ == Copyright
26
+
27
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "naive_bayes"
8
+ gem.summary = %Q{Simple straight forward Naive Bayes classifier implementation}
9
+ gem.description = %Q{Simple straight forward Naive Bayes classifier implementation}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/Naive-Bayes"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "naive_bayes #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,80 @@
1
+ # Bayes Theorem
2
+ # P(A|B) = P(B|A) * P(A) / P(B)
3
+
4
+ # Terminology
5
+ # An ITEM is made up of FEATURES
6
+ # An ITEM belongs to a CLASS
7
+
8
+ # Bayes With Our Terminology
9
+ # P(Class | Item) = P(Item | Class) * P(Class) / P(Item)
10
+
11
+ # However, when classifying, P(Item) is the same across all calcualtions
12
+ # So we don't bother to calculate it
13
+
14
+ class NaiveBayes
15
+
16
+ attr_reader :features_count, :klass_count
17
+
18
+ def initialize(*klasses)
19
+ @features_count = {}
20
+ @klass_count = {}
21
+ @klasses = klasses
22
+
23
+ klasses.each do |klass|
24
+ @features_count[klass] = Hash.new(0.0)
25
+ @klass_count[klass] = 0.0
26
+ end
27
+ end
28
+
29
+ def train(klass, *features)
30
+ features.uniq.each do |feature|
31
+ @features_count[klass][feature] += 1
32
+ end
33
+ @klass_count[klass] += 1
34
+ end
35
+
36
+ #P(Class | Item) = P(Item | Class) * P(Class)
37
+ def classify(*features)
38
+ scores = {}
39
+ @klasses.each do |klass|
40
+ scores[klass] = (prob_of_item_given_a_class(features, klass) * prob_of_class(klass))
41
+ end
42
+ scores.sort {|a,b| b[1] <=> a[1]}[0]
43
+ end
44
+
45
+ private
46
+
47
+ # P(Item | Class)
48
+ def prob_of_item_given_a_class(features, klass)
49
+ a = features.inject(1.0) do |sum, feature|
50
+ prob = prob_of_feature_given_a_class(feature, klass)
51
+ prob = assumed_probability if prob == 0
52
+ sum *= prob
53
+ end
54
+ end
55
+
56
+ # P(Feature | Class)
57
+ def prob_of_feature_given_a_class(feature, klass)
58
+ return 0.5 if @klass_count[klass] == 0
59
+ @features_count[klass][feature] / @klass_count[klass]
60
+ end
61
+
62
+ # P(Class)
63
+ def prob_of_class(klass)
64
+ @klass_count[klass] / total_items
65
+ end
66
+
67
+ def total_items
68
+ @klass_count.inject(0) do |sum, klass|
69
+ sum += klass[1]
70
+ end
71
+ end
72
+
73
+ # If we have only trained a little bit a class may not have had a feature yet
74
+ # give it a probability of 0 may not be true so we produce a assumed probability
75
+ # which gets smaller more we train
76
+ def assumed_probability
77
+ 0.5 / (total_items/2)
78
+ end
79
+
80
+ end
@@ -0,0 +1,55 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{naive_bayes}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2009-11-14}
13
+ s.description = %q{Simple straight forward Naive Bayes classifier implementation}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/naive_bayes.rb",
27
+ "naive_bayes.gemspec",
28
+ "spec/naive_bayes_spec.rb",
29
+ "spec/spec.opts",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/reddavis/Naive-Bayes}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Simple straight forward Naive Bayes classifier implementation}
37
+ s.test_files = [
38
+ "spec/naive_bayes_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
48
+ else
49
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
50
+ end
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
53
+ end
54
+ end
55
+
@@ -0,0 +1,44 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "NaiveBayes" do
4
+
5
+ describe "Initialization" do
6
+ before(:all) do
7
+ @classifier = NaiveBayes.new(:spam, :ham)
8
+ end
9
+
10
+ it "should create a features count for each class" do
11
+ @classifier.features_count.size.should == 2
12
+ end
13
+ end
14
+
15
+ describe "Training" do
16
+ before(:all) do
17
+ @classifier = NaiveBayes.new(:spam, :ham)
18
+ @classifier.train(:spam, 'bad', 'word')
19
+ end
20
+
21
+ it "should train" do
22
+ @classifier.features_count[:spam].size.should == 2
23
+ end
24
+
25
+ it "should bump klass_count for spam up to 1" do
26
+ @classifier.klass_count[:spam].should == 1
27
+ end
28
+ end
29
+
30
+ describe "Classification" do
31
+ before do
32
+ @classifier = NaiveBayes.new(:spam, :ham)
33
+ @classifier.train(:spam, 'bad', 'word')
34
+ @classifier.train(:ham, 'we', 'bad')
35
+ end
36
+
37
+ it "should" do
38
+ a = @classifier.classify('bad', 'word')
39
+ a[0].should == :spam
40
+ a[1].should == 0.5
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'naive_bayes'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-14 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Simple straight forward Naive Bayes classifier implementation
26
+ email: reddavis@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - lib/naive_bayes.rb
42
+ - naive_bayes.gemspec
43
+ - spec/naive_bayes_spec.rb
44
+ - spec/spec.opts
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/reddavis/Naive-Bayes
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --charset=UTF-8
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.5
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Simple straight forward Naive Bayes classifier implementation
74
+ test_files:
75
+ - spec/naive_bayes_spec.rb
76
+ - spec/spec_helper.rb