naive_bayes 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -1
- data/README.rdoc +25 -0
- data/VERSION +1 -1
- data/lib/naive_bayes.rb +22 -1
- data/naive_bayes.gemspec +3 -2
- data/spec/db/naive.nb +1 -0
- data/spec/naive_bayes_spec.rb +36 -23
- metadata +3 -2
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -21,6 +21,31 @@ This is an extremely simple, straight forward Naive Bayes implementation.
|
|
21
21
|
|
22
22
|
a.classify(*b)
|
23
23
|
#=> [:spam, 0.03125]
|
24
|
+
|
25
|
+
You can also tell your classifier to save itself, so its easy for you to pick up where you left off:
|
26
|
+
|
27
|
+
require 'rubygems'
|
28
|
+
require 'naive_bayes'
|
29
|
+
|
30
|
+
a = NaiveBayes.new(:spam, :ham)
|
31
|
+
a.db_filepath = 'path/to/anywhere.nb'
|
32
|
+
|
33
|
+
# Save is called after every train function
|
34
|
+
a.train(:spam, 'bad', 'word')
|
35
|
+
a.train(:ham, 'good', 'word')
|
36
|
+
|
37
|
+
Some time goes past and we want to classify a new document we just received...
|
38
|
+
|
39
|
+
require 'rubygems'
|
40
|
+
require 'naive_bayes'
|
41
|
+
|
42
|
+
a = NaiveBayes.load('path/to/file')
|
43
|
+
|
44
|
+
b = "this is a bad sentence".split(' ')
|
45
|
+
|
46
|
+
# It's as if we were never apart
|
47
|
+
a.classify(*b)
|
48
|
+
#=> [:spam, 0.03125]
|
24
49
|
|
25
50
|
== Copyright
|
26
51
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/lib/naive_bayes.rb
CHANGED
@@ -13,7 +13,19 @@
|
|
13
13
|
|
14
14
|
class NaiveBayes
|
15
15
|
|
16
|
-
|
16
|
+
class << self
|
17
|
+
def load(db_path)
|
18
|
+
data = ""
|
19
|
+
File.open(db_path) do |f|
|
20
|
+
while line = f.gets
|
21
|
+
data << line
|
22
|
+
end
|
23
|
+
end
|
24
|
+
Marshal.load(data)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_accessor :db_filepath
|
17
29
|
|
18
30
|
def initialize(*klasses)
|
19
31
|
@features_count = {}
|
@@ -31,6 +43,7 @@ class NaiveBayes
|
|
31
43
|
@features_count[klass][feature] += 1
|
32
44
|
end
|
33
45
|
@klass_count[klass] += 1
|
46
|
+
save
|
34
47
|
end
|
35
48
|
|
36
49
|
#P(Class | Item) = P(Item | Class) * P(Class)
|
@@ -44,6 +57,14 @@ class NaiveBayes
|
|
44
57
|
|
45
58
|
private
|
46
59
|
|
60
|
+
def save
|
61
|
+
if @db_filepath
|
62
|
+
File.open(@db_filepath, "w+") do |f|
|
63
|
+
f.write(Marshal.dump(self))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
47
68
|
# P(Item | Class)
|
48
69
|
def prob_of_item_given_a_class(features, klass)
|
49
70
|
a = features.inject(1.0) do |sum, feature|
|
data/naive_bayes.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{naive_bayes}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2010-01-05}
|
13
13
|
s.description = %q{Simple straight forward Naive Bayes classifier implementation}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
|
|
25
25
|
"VERSION",
|
26
26
|
"lib/naive_bayes.rb",
|
27
27
|
"naive_bayes.gemspec",
|
28
|
+
"spec/db/naive.nb",
|
28
29
|
"spec/naive_bayes_spec.rb",
|
29
30
|
"spec/spec.opts",
|
30
31
|
"spec/spec_helper.rb"
|
data/spec/db/naive.nb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
o:NaiveBayes :@klass_count{: spamf1:hamf1:@db_filepath"D/Users/reddavis/Documents/projects/naive_bayes/spec/db/naive.nb:@features_count{;}" wordf1"badf1f0;}"wef1"badf1@:
|
data/spec/naive_bayes_spec.rb
CHANGED
@@ -1,44 +1,57 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
3
|
describe "NaiveBayes" do
|
4
|
-
|
5
|
-
describe "
|
6
|
-
before
|
7
|
-
@classifier =
|
4
|
+
|
5
|
+
describe "Classification" do
|
6
|
+
before do
|
7
|
+
@classifier = create_and_train_classifier
|
8
8
|
end
|
9
9
|
|
10
|
-
it "should
|
11
|
-
@classifier.
|
10
|
+
it "should classify as spam with a score of 0.5" do
|
11
|
+
a = @classifier.classify('bad', 'word')
|
12
|
+
a[0].should == :spam
|
13
|
+
a[1].should == 0.5
|
12
14
|
end
|
13
15
|
end
|
14
16
|
|
15
|
-
describe "
|
16
|
-
before
|
17
|
+
describe "Save" do
|
18
|
+
before do
|
17
19
|
@classifier = NaiveBayes.new(:spam, :ham)
|
18
|
-
@classifier.
|
19
|
-
end
|
20
|
-
|
21
|
-
it "should train" do
|
22
|
-
@classifier.features_count[:spam].size.should == 2
|
20
|
+
@classifier.db_filepath = db_filepath
|
23
21
|
end
|
24
22
|
|
25
|
-
it "should
|
26
|
-
|
23
|
+
it "should save to the filepath provided" do
|
24
|
+
FileUtils.rm(db_filepath, :force => true)
|
25
|
+
@classifier.train(:spam, 'bad')
|
26
|
+
File.exists?(db_filepath).should be_true
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
describe "
|
30
|
+
describe "Load" do
|
31
31
|
before do
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
classifier = NaiveBayes.new(:spam, :ham)
|
33
|
+
classifier.db_filepath = db_filepath
|
34
|
+
classifier.train(:spam, 'bad', 'word')
|
35
|
+
classifier.train(:ham, 'we', 'bad')
|
35
36
|
end
|
36
37
|
|
37
|
-
it "should" do
|
38
|
-
|
39
|
-
|
40
|
-
a[1].should == 0.5
|
38
|
+
it "should return 0.5" do
|
39
|
+
classifier = NaiveBayes.load(db_filepath)
|
40
|
+
classifier.classify('bad', 'word')[1].should == 0.5
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
private
|
45
|
+
|
46
|
+
def create_and_train_classifier
|
47
|
+
a = NaiveBayes.new(:spam, :ham)
|
48
|
+
a.train(:spam, 'bad', 'word')
|
49
|
+
a.train(:ham, 'we', 'bad')
|
50
|
+
a
|
51
|
+
end
|
52
|
+
|
53
|
+
def db_filepath
|
54
|
+
File.expand_path(File.dirname(__FILE__) + '/db/naive.nb')
|
55
|
+
end
|
56
|
+
|
44
57
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: naive_bayes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- reddavis
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2010-01-05 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- VERSION
|
41
41
|
- lib/naive_bayes.rb
|
42
42
|
- naive_bayes.gemspec
|
43
|
+
- spec/db/naive.nb
|
43
44
|
- spec/naive_bayes_spec.rb
|
44
45
|
- spec/spec.opts
|
45
46
|
- spec/spec_helper.rb
|