solid_naive_bayes 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e76c76d77d5b450025bae114082124feaeb0adc06da2ff80135f0d20ec7911ca
4
+ data.tar.gz: 7b7b16848ed4cea466afcfaae2eb644c2417c0b052958c21858d2b71b18d49ea
5
+ SHA512:
6
+ metadata.gz: a26f0e3250f6724091c117c6c43f33a681a8c611a52971d131b86fed3cdf4a55245604de5d4850ea18c70858b3ba60be5cdb28ed18c2cabe5f2de06604c6a9a7
7
+ data.tar.gz: 2e590688e26cd3481ddb5a48f3d08bc36d6fbd1fef2536c67b774386d287c6820f4eca14781e9c12147fd56b807b8909962ddc65868ce839dc3d40c2257fcf5b
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,53 @@
1
+ = Naive Bayes Classifier
2
+
3
+ This is an extremely simple, straight forward Naive Bayes implementation.
4
+
5
+ == Install
6
+
7
+ gem sources -a -http://gemcutter.org
8
+ sudo gem install naive_bayes
9
+
10
+ == How To Use
11
+
12
+ require 'rubygems'
13
+ require 'naive_bayes'
14
+
15
+ a = NaiveBayes.new(:spam, :ham)
16
+
17
+ a.train(:spam, 'bad', 'word')
18
+ a.train(:ham, 'good', 'word')
19
+
20
+ b = "this is a bad sentence".split(' ')
21
+
22
+ a.classify(*b)
23
+ #=> [:spam, 0.03125]
24
+
25
+ You can also tell your classifier to save itself, so its easy for you to pick up where you left off:
26
+
27
+ require 'rubygems'
28
+ require 'naive_bayes'
29
+
30
+ a = NaiveBayes.new(:spam, :ham)
31
+ a.db_filepath = 'path/to/anywhere.nb'
32
+
33
+ a.train(:spam, 'bad', 'word')
34
+ a.train(:ham, 'good', 'word')
35
+
36
+ a.save
37
+
38
+ Some time goes past and we want to classify a new document we just received...
39
+
40
+ require 'rubygems'
41
+ require 'naive_bayes'
42
+
43
+ a = NaiveBayes.load('path/to/file')
44
+
45
+ b = "this is a bad sentence".split(' ')
46
+
47
+ # It's as if we were never apart
48
+ a.classify(*b)
49
+ #=> [:spam, 0.03125]
50
+
51
+ == Copyright
52
+
53
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "naive_bayes"
8
+ gem.summary = %Q{Simple straight forward Naive Bayes classifier implementation}
9
+ gem.description = %Q{Simple straight forward Naive Bayes classifier implementation}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/Naive-Bayes"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "naive_bayes #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.3
@@ -0,0 +1,98 @@
1
+ # Bayes Theorem
2
+ # P(A|B) = P(B|A) * P(A) / P(B)
3
+
4
+ # Terminology
5
+ # An ITEM is made up of FEATURES
6
+ # An ITEM belongs to a CLASS
7
+
8
+ # Bayes With Our Terminology
9
+ # P(Class | Item) = P(Item | Class) * P(Class) / P(Item)
10
+
11
+ # However, when classifying, P(Item) is the same across all calcualtions
12
+ # So we don't bother to calculate it
13
+ class NaiveBayes
14
+
15
+ class << self
16
+ def load(db_path)
17
+ data = ""
18
+ File.open(db_path) do |f|
19
+ while line = f.gets
20
+ data << line
21
+ end
22
+ end
23
+ Marshal.load(data)
24
+ end
25
+ end
26
+
27
+ attr_accessor :db_filepath
28
+
29
+ def initialize(*klasses)
30
+ @features_count = {}
31
+ @klass_count = {}
32
+ @klasses = klasses
33
+
34
+ klasses.each do |klass|
35
+ @features_count[klass] = Hash.new(0.0)
36
+ @klass_count[klass] = 0.0
37
+ end
38
+ end
39
+
40
+ def train(klass, *features)
41
+ features.uniq.each do |feature|
42
+ @features_count[klass][feature] += 1
43
+ end
44
+ @klass_count[klass] += 1
45
+ end
46
+
47
+ def untrain(klass, *features)
48
+ features.uniq.each do |feature|
49
+ @features_count[klass][feature] -= 1
50
+ end
51
+ @klass_count[klass] -= 1
52
+ end
53
+
54
+ #P(Class | Item) = P(Item | Class) * P(Class)
55
+ def classify(*features)
56
+ scores = {}
57
+ @klasses.each do |klass|
58
+ scores[klass] = (prob_of_item_given_a_class(features, klass) * prob_of_class(klass))
59
+ end
60
+ return [] if scores.values.reduce(:+) == 0.0
61
+ scores.sort {|a,b| b[1] <=> a[1]}[0]
62
+ end
63
+
64
+ def save
65
+ raise "You haven't set a db_filpath, I dont know where to save" if @db_filepath.nil?
66
+ File.open(@db_filepath, "w+") do |f|
67
+ f.write(Marshal.dump(self))
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ # P(Item | Class)
74
+ def prob_of_item_given_a_class(features, klass)
75
+ a = features.inject(1.0) do |sum, feature|
76
+ prob = prob_of_feature_given_a_class(feature, klass)
77
+ end
78
+ end
79
+
80
+ # P(Feature | Class)
81
+ def prob_of_feature_given_a_class(feature, klass)
82
+ # If there is not any sentence in our trained models we return nil value
83
+ @feature = @features_count[klass][feature.to_sym]
84
+ feature.nil? ? feature : @features_count[klass][feature.to_sym] / @klass_count[klass]
85
+ end
86
+
87
+ # P(Class)
88
+ def prob_of_class(klass)
89
+ @klass_count[klass] / total_items
90
+ end
91
+
92
+ def total_items
93
+ @klass_count.inject(0) do |sum, klass|
94
+ sum += klass[1]
95
+ end
96
+ end
97
+
98
+ end
@@ -0,0 +1,56 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{solid_naive_bayes}
8
+ s.version = "0.0.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2010-03-11}
13
+ s.description = %q{Simple straight forward Naive Bayes classifier implementation}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/naive_bayes.rb",
27
+ "naive_bayes.gemspec",
28
+ "spec/db/naive.nb",
29
+ "spec/naive_bayes_spec.rb",
30
+ "spec/spec.opts",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/reddavis/Naive-Bayes}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.6}
37
+ s.summary = %q{Simple straight forward Naive Bayes classifier implementation}
38
+ s.test_files = [
39
+ "spec/naive_bayes_spec.rb",
40
+ "spec/spec_helper.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
49
+ else
50
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
54
+ end
55
+ end
56
+
data/spec/db/naive.nb ADDED
@@ -0,0 +1 @@
1
+ o:NaiveBayes :@features_count{: spam}I"bad:EFf1I" word;Ff1f0:ham}I"we;Ff1I"bad;Ff1@ :@klass_count{;f1; f1:
@@ -0,0 +1,75 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "NaiveBayes" do
4
+
5
+ describe "Classification" do
6
+ before do
7
+ @classifier = create_and_train_classifier
8
+ end
9
+
10
+ it "should classify as spam with a score of 0.5" do
11
+ a = @classifier.classify('bad', 'word')
12
+ a[0].should == :spam
13
+ a[1].should == 0.5
14
+ end
15
+
16
+ it "should change information" do
17
+ @classifier.untrain(:spam, 'bad', 'word')
18
+ a = @classifier.classify('bad', 'word')
19
+ a[0].should == :ham
20
+ a[1].should == 1.0
21
+ end
22
+ end
23
+
24
+ describe "Saving the NB" do
25
+ describe "DB filepath has been set" do
26
+ before do
27
+ @classifier = NaiveBayes.new(:spam, :ham)
28
+ @classifier.db_filepath = db_filepath
29
+ end
30
+
31
+ it "should save to the filepath provided" do
32
+ FileUtils.rm(db_filepath, :force => true)
33
+ @classifier.save
34
+ File.exists?(db_filepath).should be_true
35
+ end
36
+ end
37
+
38
+ describe "DB filepath has no been set" do
39
+ it "should raise an error" do
40
+ lambda do
41
+ NaiveBayes.new(:spam, :ham).save
42
+ end.should raise_error
43
+ end
44
+ end
45
+ end
46
+
47
+ describe "Load" do
48
+ before do
49
+ classifier = NaiveBayes.new(:spam, :ham)
50
+ classifier.db_filepath = db_filepath
51
+ classifier.train(:spam, 'bad', 'word')
52
+ classifier.train(:ham, 'we', 'bad')
53
+ classifier.save
54
+ end
55
+
56
+ it "should return 0.5" do
57
+ classifier = NaiveBayes.load(db_filepath)
58
+ classifier.classify('bad', 'word')[1].should == 0.5
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def create_and_train_classifier
65
+ a = NaiveBayes.new(:spam, :ham)
66
+ a.train(:spam, 'bad', 'word')
67
+ a.train(:ham, 'we', 'bad')
68
+ a
69
+ end
70
+
71
+ def db_filepath
72
+ File.expand_path(File.dirname(__FILE__) + '/db/naive.nb')
73
+ end
74
+
75
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'naive_bayes'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: solid_naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2010-03-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.9
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.9
27
+ description: Simple straight forward Naive Bayes classifier implementation
28
+ email: reddavis@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - ".document"
36
+ - ".gitignore"
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - lib/naive_bayes.rb
42
+ - naive_bayes.gemspec
43
+ - spec/db/naive.nb
44
+ - spec/naive_bayes_spec.rb
45
+ - spec/spec.opts
46
+ - spec/spec_helper.rb
47
+ homepage: http://github.com/reddavis/Naive-Bayes
48
+ licenses: []
49
+ metadata: {}
50
+ post_install_message:
51
+ rdoc_options:
52
+ - "--charset=UTF-8"
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project:
67
+ rubygems_version: 3.0.0.beta3
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Simple straight forward Naive Bayes classifier implementation
71
+ test_files:
72
+ - spec/naive_bayes_spec.rb
73
+ - spec/spec_helper.rb