solid_naive_bayes 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e76c76d77d5b450025bae114082124feaeb0adc06da2ff80135f0d20ec7911ca
4
+ data.tar.gz: 7b7b16848ed4cea466afcfaae2eb644c2417c0b052958c21858d2b71b18d49ea
5
+ SHA512:
6
+ metadata.gz: a26f0e3250f6724091c117c6c43f33a681a8c611a52971d131b86fed3cdf4a55245604de5d4850ea18c70858b3ba60be5cdb28ed18c2cabe5f2de06604c6a9a7
7
+ data.tar.gz: 2e590688e26cd3481ddb5a48f3d08bc36d6fbd1fef2536c67b774386d287c6820f4eca14781e9c12147fd56b807b8909962ddc65868ce839dc3d40c2257fcf5b
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 reddavis
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,53 @@
1
+ = Naive Bayes Classifier
2
+
3
+ This is an extremely simple, straight forward Naive Bayes implementation.
4
+
5
+ == Install
6
+
7
+ gem sources -a -http://gemcutter.org
8
+ sudo gem install naive_bayes
9
+
10
+ == How To Use
11
+
12
+ require 'rubygems'
13
+ require 'naive_bayes'
14
+
15
+ a = NaiveBayes.new(:spam, :ham)
16
+
17
+ a.train(:spam, 'bad', 'word')
18
+ a.train(:ham, 'good', 'word')
19
+
20
+ b = "this is a bad sentence".split(' ')
21
+
22
+ a.classify(*b)
23
+ #=> [:spam, 0.03125]
24
+
25
+ You can also tell your classifier to save itself, so its easy for you to pick up where you left off:
26
+
27
+ require 'rubygems'
28
+ require 'naive_bayes'
29
+
30
+ a = NaiveBayes.new(:spam, :ham)
31
+ a.db_filepath = 'path/to/anywhere.nb'
32
+
33
+ a.train(:spam, 'bad', 'word')
34
+ a.train(:ham, 'good', 'word')
35
+
36
+ a.save
37
+
38
+ Some time goes past and we want to classify a new document we just received...
39
+
40
+ require 'rubygems'
41
+ require 'naive_bayes'
42
+
43
+ a = NaiveBayes.load('path/to/file')
44
+
45
+ b = "this is a bad sentence".split(' ')
46
+
47
+ # It's as if we were never apart
48
+ a.classify(*b)
49
+ #=> [:spam, 0.03125]
50
+
51
+ == Copyright
52
+
53
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "naive_bayes"
8
+ gem.summary = %Q{Simple straight forward Naive Bayes classifier implementation}
9
+ gem.description = %Q{Simple straight forward Naive Bayes classifier implementation}
10
+ gem.email = "reddavis@gmail.com"
11
+ gem.homepage = "http://github.com/reddavis/Naive-Bayes"
12
+ gem.authors = ["reddavis"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "naive_bayes #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.3
@@ -0,0 +1,98 @@
1
+ # Bayes Theorem
2
+ # P(A|B) = P(B|A) * P(A) / P(B)
3
+
4
+ # Terminology
5
+ # An ITEM is made up of FEATURES
6
+ # An ITEM belongs to a CLASS
7
+
8
+ # Bayes With Our Terminology
9
+ # P(Class | Item) = P(Item | Class) * P(Class) / P(Item)
10
+
11
+ # However, when classifying, P(Item) is the same across all calcualtions
12
+ # So we don't bother to calculate it
13
+ class NaiveBayes
14
+
15
+ class << self
16
+ def load(db_path)
17
+ data = ""
18
+ File.open(db_path) do |f|
19
+ while line = f.gets
20
+ data << line
21
+ end
22
+ end
23
+ Marshal.load(data)
24
+ end
25
+ end
26
+
27
+ attr_accessor :db_filepath
28
+
29
+ def initialize(*klasses)
30
+ @features_count = {}
31
+ @klass_count = {}
32
+ @klasses = klasses
33
+
34
+ klasses.each do |klass|
35
+ @features_count[klass] = Hash.new(0.0)
36
+ @klass_count[klass] = 0.0
37
+ end
38
+ end
39
+
40
+ def train(klass, *features)
41
+ features.uniq.each do |feature|
42
+ @features_count[klass][feature] += 1
43
+ end
44
+ @klass_count[klass] += 1
45
+ end
46
+
47
+ def untrain(klass, *features)
48
+ features.uniq.each do |feature|
49
+ @features_count[klass][feature] -= 1
50
+ end
51
+ @klass_count[klass] -= 1
52
+ end
53
+
54
+ #P(Class | Item) = P(Item | Class) * P(Class)
55
+ def classify(*features)
56
+ scores = {}
57
+ @klasses.each do |klass|
58
+ scores[klass] = (prob_of_item_given_a_class(features, klass) * prob_of_class(klass))
59
+ end
60
+ return [] if scores.values.reduce(:+) == 0.0
61
+ scores.sort {|a,b| b[1] <=> a[1]}[0]
62
+ end
63
+
64
+ def save
65
+ raise "You haven't set a db_filpath, I dont know where to save" if @db_filepath.nil?
66
+ File.open(@db_filepath, "w+") do |f|
67
+ f.write(Marshal.dump(self))
68
+ end
69
+ end
70
+
71
+ private
72
+
73
+ # P(Item | Class)
74
+ def prob_of_item_given_a_class(features, klass)
75
+ a = features.inject(1.0) do |sum, feature|
76
+ prob = prob_of_feature_given_a_class(feature, klass)
77
+ end
78
+ end
79
+
80
+ # P(Feature | Class)
81
+ def prob_of_feature_given_a_class(feature, klass)
82
+ # If there is not any sentence in our trained models we return nil value
83
+ @feature = @features_count[klass][feature.to_sym]
84
+ feature.nil? ? feature : @features_count[klass][feature.to_sym] / @klass_count[klass]
85
+ end
86
+
87
+ # P(Class)
88
+ def prob_of_class(klass)
89
+ @klass_count[klass] / total_items
90
+ end
91
+
92
+ def total_items
93
+ @klass_count.inject(0) do |sum, klass|
94
+ sum += klass[1]
95
+ end
96
+ end
97
+
98
+ end
@@ -0,0 +1,56 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{solid_naive_bayes}
8
+ s.version = "0.0.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2010-03-11}
13
+ s.description = %q{Simple straight forward Naive Bayes classifier implementation}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/naive_bayes.rb",
27
+ "naive_bayes.gemspec",
28
+ "spec/db/naive.nb",
29
+ "spec/naive_bayes_spec.rb",
30
+ "spec/spec.opts",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/reddavis/Naive-Bayes}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.6}
37
+ s.summary = %q{Simple straight forward Naive Bayes classifier implementation}
38
+ s.test_files = [
39
+ "spec/naive_bayes_spec.rb",
40
+ "spec/spec_helper.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
49
+ else
50
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
54
+ end
55
+ end
56
+
data/spec/db/naive.nb ADDED
@@ -0,0 +1 @@
1
+ o:NaiveBayes :@features_count{: spam}I"bad:EFf1I" word;Ff1f0:ham}I"we;Ff1I"bad;Ff1@ :@klass_count{;f1; f1:
@@ -0,0 +1,75 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "NaiveBayes" do
4
+
5
+ describe "Classification" do
6
+ before do
7
+ @classifier = create_and_train_classifier
8
+ end
9
+
10
+ it "should classify as spam with a score of 0.5" do
11
+ a = @classifier.classify('bad', 'word')
12
+ a[0].should == :spam
13
+ a[1].should == 0.5
14
+ end
15
+
16
+ it "should change information" do
17
+ @classifier.untrain(:spam, 'bad', 'word')
18
+ a = @classifier.classify('bad', 'word')
19
+ a[0].should == :ham
20
+ a[1].should == 1.0
21
+ end
22
+ end
23
+
24
+ describe "Saving the NB" do
25
+ describe "DB filepath has been set" do
26
+ before do
27
+ @classifier = NaiveBayes.new(:spam, :ham)
28
+ @classifier.db_filepath = db_filepath
29
+ end
30
+
31
+ it "should save to the filepath provided" do
32
+ FileUtils.rm(db_filepath, :force => true)
33
+ @classifier.save
34
+ File.exists?(db_filepath).should be_true
35
+ end
36
+ end
37
+
38
+ describe "DB filepath has no been set" do
39
+ it "should raise an error" do
40
+ lambda do
41
+ NaiveBayes.new(:spam, :ham).save
42
+ end.should raise_error
43
+ end
44
+ end
45
+ end
46
+
47
+ describe "Load" do
48
+ before do
49
+ classifier = NaiveBayes.new(:spam, :ham)
50
+ classifier.db_filepath = db_filepath
51
+ classifier.train(:spam, 'bad', 'word')
52
+ classifier.train(:ham, 'we', 'bad')
53
+ classifier.save
54
+ end
55
+
56
+ it "should return 0.5" do
57
+ classifier = NaiveBayes.load(db_filepath)
58
+ classifier.classify('bad', 'word')[1].should == 0.5
59
+ end
60
+ end
61
+
62
+ private
63
+
64
+ def create_and_train_classifier
65
+ a = NaiveBayes.new(:spam, :ham)
66
+ a.train(:spam, 'bad', 'word')
67
+ a.train(:ham, 'we', 'bad')
68
+ a
69
+ end
70
+
71
+ def db_filepath
72
+ File.expand_path(File.dirname(__FILE__) + '/db/naive.nb')
73
+ end
74
+
75
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'naive_bayes'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: solid_naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2010-03-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.9
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.2.9
27
+ description: Simple straight forward Naive Bayes classifier implementation
28
+ email: reddavis@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - ".document"
36
+ - ".gitignore"
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - lib/naive_bayes.rb
42
+ - naive_bayes.gemspec
43
+ - spec/db/naive.nb
44
+ - spec/naive_bayes_spec.rb
45
+ - spec/spec.opts
46
+ - spec/spec_helper.rb
47
+ homepage: http://github.com/reddavis/Naive-Bayes
48
+ licenses: []
49
+ metadata: {}
50
+ post_install_message:
51
+ rdoc_options:
52
+ - "--charset=UTF-8"
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ requirements: []
66
+ rubyforge_project:
67
+ rubygems_version: 3.0.0.beta3
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Simple straight forward Naive Bayes classifier implementation
71
+ test_files:
72
+ - spec/naive_bayes_spec.rb
73
+ - spec/spec_helper.rb