naive_bayes 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -18,4 +18,4 @@ coverage
18
18
  rdoc
19
19
  pkg
20
20
 
21
- ## PROJECT::SPECIFIC
21
+ ## PROJECT::SPECIFIC
data/README.rdoc CHANGED
@@ -21,6 +21,31 @@ This is an extremely simple, straight forward Naive Bayes implementation.
21
21
 
22
22
  a.classify(*b)
23
23
  #=> [:spam, 0.03125]
24
+
25
+ You can also tell your classifier to save itself, so its easy for you to pick up where you left off:
26
+
27
+ require 'rubygems'
28
+ require 'naive_bayes'
29
+
30
+ a = NaiveBayes.new(:spam, :ham)
31
+ a.db_filepath = 'path/to/anywhere.nb'
32
+
33
+ # Save is called after every train function
34
+ a.train(:spam, 'bad', 'word')
35
+ a.train(:ham, 'good', 'word')
36
+
37
+ Some time goes past and we want to classify a new document we just received...
38
+
39
+ require 'rubygems'
40
+ require 'naive_bayes'
41
+
42
+ a = NaiveBayes.load('path/to/file')
43
+
44
+ b = "this is a bad sentence".split(' ')
45
+
46
+ # It's as if we were never apart
47
+ a.classify(*b)
48
+ #=> [:spam, 0.03125]
24
49
 
25
50
  == Copyright
26
51
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
data/lib/naive_bayes.rb CHANGED
@@ -13,7 +13,19 @@
13
13
 
14
14
  class NaiveBayes
15
15
 
16
- attr_reader :features_count, :klass_count
16
+ class << self
17
+ def load(db_path)
18
+ data = ""
19
+ File.open(db_path) do |f|
20
+ while line = f.gets
21
+ data << line
22
+ end
23
+ end
24
+ Marshal.load(data)
25
+ end
26
+ end
27
+
28
+ attr_accessor :db_filepath
17
29
 
18
30
  def initialize(*klasses)
19
31
  @features_count = {}
@@ -31,6 +43,7 @@ class NaiveBayes
31
43
  @features_count[klass][feature] += 1
32
44
  end
33
45
  @klass_count[klass] += 1
46
+ save
34
47
  end
35
48
 
36
49
  #P(Class | Item) = P(Item | Class) * P(Class)
@@ -44,6 +57,14 @@ class NaiveBayes
44
57
 
45
58
  private
46
59
 
60
+ def save
61
+ if @db_filepath
62
+ File.open(@db_filepath, "w+") do |f|
63
+ f.write(Marshal.dump(self))
64
+ end
65
+ end
66
+ end
67
+
47
68
  # P(Item | Class)
48
69
  def prob_of_item_given_a_class(features, klass)
49
70
  a = features.inject(1.0) do |sum, feature|
data/naive_bayes.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{naive_bayes}
8
- s.version = "0.0.0"
8
+ s.version = "0.0.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2009-11-14}
12
+ s.date = %q{2010-01-05}
13
13
  s.description = %q{Simple straight forward Naive Bayes classifier implementation}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
25
25
  "VERSION",
26
26
  "lib/naive_bayes.rb",
27
27
  "naive_bayes.gemspec",
28
+ "spec/db/naive.nb",
28
29
  "spec/naive_bayes_spec.rb",
29
30
  "spec/spec.opts",
30
31
  "spec/spec_helper.rb"
data/spec/db/naive.nb ADDED
@@ -0,0 +1 @@
1
+ o:NaiveBayes :@klass_count{: spamf1:hamf1:@db_filepath"D/Users/reddavis/Documents/projects/naive_bayes/spec/db/naive.nb:@features_count{;}" wordf1"badf1f0;}"wef1"badf1@:
@@ -1,44 +1,57 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "NaiveBayes" do
4
-
5
- describe "Initialization" do
6
- before(:all) do
7
- @classifier = NaiveBayes.new(:spam, :ham)
4
+
5
+ describe "Classification" do
6
+ before do
7
+ @classifier = create_and_train_classifier
8
8
  end
9
9
 
10
- it "should create a features count for each class" do
11
- @classifier.features_count.size.should == 2
10
+ it "should classify as spam with a score of 0.5" do
11
+ a = @classifier.classify('bad', 'word')
12
+ a[0].should == :spam
13
+ a[1].should == 0.5
12
14
  end
13
15
  end
14
16
 
15
- describe "Training" do
16
- before(:all) do
17
+ describe "Save" do
18
+ before do
17
19
  @classifier = NaiveBayes.new(:spam, :ham)
18
- @classifier.train(:spam, 'bad', 'word')
19
- end
20
-
21
- it "should train" do
22
- @classifier.features_count[:spam].size.should == 2
20
+ @classifier.db_filepath = db_filepath
23
21
  end
24
22
 
25
- it "should bump klass_count for spam up to 1" do
26
- @classifier.klass_count[:spam].should == 1
23
+ it "should save to the filepath provided" do
24
+ FileUtils.rm(db_filepath, :force => true)
25
+ @classifier.train(:spam, 'bad')
26
+ File.exists?(db_filepath).should be_true
27
27
  end
28
28
  end
29
29
 
30
- describe "Classification" do
30
+ describe "Load" do
31
31
  before do
32
- @classifier = NaiveBayes.new(:spam, :ham)
33
- @classifier.train(:spam, 'bad', 'word')
34
- @classifier.train(:ham, 'we', 'bad')
32
+ classifier = NaiveBayes.new(:spam, :ham)
33
+ classifier.db_filepath = db_filepath
34
+ classifier.train(:spam, 'bad', 'word')
35
+ classifier.train(:ham, 'we', 'bad')
35
36
  end
36
37
 
37
- it "should" do
38
- a = @classifier.classify('bad', 'word')
39
- a[0].should == :spam
40
- a[1].should == 0.5
38
+ it "should return 0.5" do
39
+ classifier = NaiveBayes.load(db_filepath)
40
+ classifier.classify('bad', 'word')[1].should == 0.5
41
41
  end
42
42
  end
43
43
 
44
+ private
45
+
46
+ def create_and_train_classifier
47
+ a = NaiveBayes.new(:spam, :ham)
48
+ a.train(:spam, 'bad', 'word')
49
+ a.train(:ham, 'we', 'bad')
50
+ a
51
+ end
52
+
53
+ def db_filepath
54
+ File.expand_path(File.dirname(__FILE__) + '/db/naive.nb')
55
+ end
56
+
44
57
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: naive_bayes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-14 00:00:00 +00:00
12
+ date: 2010-01-05 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -40,6 +40,7 @@ files:
40
40
  - VERSION
41
41
  - lib/naive_bayes.rb
42
42
  - naive_bayes.gemspec
43
+ - spec/db/naive.nb
43
44
  - spec/naive_bayes_spec.rb
44
45
  - spec/spec.opts
45
46
  - spec/spec_helper.rb