naive_bayes 0.0.0 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -18,4 +18,4 @@ coverage
18
18
  rdoc
19
19
  pkg
20
20
 
21
- ## PROJECT::SPECIFIC
21
+ ## PROJECT::SPECIFIC
data/README.rdoc CHANGED
@@ -21,6 +21,31 @@ This is an extremely simple, straight forward Naive Bayes implementation.
21
21
 
22
22
  a.classify(*b)
23
23
  #=> [:spam, 0.03125]
24
+
25
+ You can also tell your classifier to save itself, so its easy for you to pick up where you left off:
26
+
27
+ require 'rubygems'
28
+ require 'naive_bayes'
29
+
30
+ a = NaiveBayes.new(:spam, :ham)
31
+ a.db_filepath = 'path/to/anywhere.nb'
32
+
33
+ # Save is called after every train function
34
+ a.train(:spam, 'bad', 'word')
35
+ a.train(:ham, 'good', 'word')
36
+
37
+ Some time goes past and we want to classify a new document we just received...
38
+
39
+ require 'rubygems'
40
+ require 'naive_bayes'
41
+
42
+ a = NaiveBayes.load('path/to/file')
43
+
44
+ b = "this is a bad sentence".split(' ')
45
+
46
+ # It's as if we were never apart
47
+ a.classify(*b)
48
+ #=> [:spam, 0.03125]
24
49
 
25
50
  == Copyright
26
51
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
data/lib/naive_bayes.rb CHANGED
@@ -13,7 +13,19 @@
13
13
 
14
14
  class NaiveBayes
15
15
 
16
- attr_reader :features_count, :klass_count
16
+ class << self
17
+ def load(db_path)
18
+ data = ""
19
+ File.open(db_path) do |f|
20
+ while line = f.gets
21
+ data << line
22
+ end
23
+ end
24
+ Marshal.load(data)
25
+ end
26
+ end
27
+
28
+ attr_accessor :db_filepath
17
29
 
18
30
  def initialize(*klasses)
19
31
  @features_count = {}
@@ -31,6 +43,7 @@ class NaiveBayes
31
43
  @features_count[klass][feature] += 1
32
44
  end
33
45
  @klass_count[klass] += 1
46
+ save
34
47
  end
35
48
 
36
49
  #P(Class | Item) = P(Item | Class) * P(Class)
@@ -44,6 +57,14 @@ class NaiveBayes
44
57
 
45
58
  private
46
59
 
60
+ def save
61
+ if @db_filepath
62
+ File.open(@db_filepath, "w+") do |f|
63
+ f.write(Marshal.dump(self))
64
+ end
65
+ end
66
+ end
67
+
47
68
  # P(Item | Class)
48
69
  def prob_of_item_given_a_class(features, klass)
49
70
  a = features.inject(1.0) do |sum, feature|
data/naive_bayes.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{naive_bayes}
8
- s.version = "0.0.0"
8
+ s.version = "0.0.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2009-11-14}
12
+ s.date = %q{2010-01-05}
13
13
  s.description = %q{Simple straight forward Naive Bayes classifier implementation}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
25
25
  "VERSION",
26
26
  "lib/naive_bayes.rb",
27
27
  "naive_bayes.gemspec",
28
+ "spec/db/naive.nb",
28
29
  "spec/naive_bayes_spec.rb",
29
30
  "spec/spec.opts",
30
31
  "spec/spec_helper.rb"
data/spec/db/naive.nb ADDED
@@ -0,0 +1 @@
1
+ o:NaiveBayes :@klass_count{: spamf1:hamf1:@db_filepath"D/Users/reddavis/Documents/projects/naive_bayes/spec/db/naive.nb:@features_count{;}" wordf1"badf1f0;}"wef1"badf1@:
@@ -1,44 +1,57 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "NaiveBayes" do
4
-
5
- describe "Initialization" do
6
- before(:all) do
7
- @classifier = NaiveBayes.new(:spam, :ham)
4
+
5
+ describe "Classification" do
6
+ before do
7
+ @classifier = create_and_train_classifier
8
8
  end
9
9
 
10
- it "should create a features count for each class" do
11
- @classifier.features_count.size.should == 2
10
+ it "should classify as spam with a score of 0.5" do
11
+ a = @classifier.classify('bad', 'word')
12
+ a[0].should == :spam
13
+ a[1].should == 0.5
12
14
  end
13
15
  end
14
16
 
15
- describe "Training" do
16
- before(:all) do
17
+ describe "Save" do
18
+ before do
17
19
  @classifier = NaiveBayes.new(:spam, :ham)
18
- @classifier.train(:spam, 'bad', 'word')
19
- end
20
-
21
- it "should train" do
22
- @classifier.features_count[:spam].size.should == 2
20
+ @classifier.db_filepath = db_filepath
23
21
  end
24
22
 
25
- it "should bump klass_count for spam up to 1" do
26
- @classifier.klass_count[:spam].should == 1
23
+ it "should save to the filepath provided" do
24
+ FileUtils.rm(db_filepath, :force => true)
25
+ @classifier.train(:spam, 'bad')
26
+ File.exists?(db_filepath).should be_true
27
27
  end
28
28
  end
29
29
 
30
- describe "Classification" do
30
+ describe "Load" do
31
31
  before do
32
- @classifier = NaiveBayes.new(:spam, :ham)
33
- @classifier.train(:spam, 'bad', 'word')
34
- @classifier.train(:ham, 'we', 'bad')
32
+ classifier = NaiveBayes.new(:spam, :ham)
33
+ classifier.db_filepath = db_filepath
34
+ classifier.train(:spam, 'bad', 'word')
35
+ classifier.train(:ham, 'we', 'bad')
35
36
  end
36
37
 
37
- it "should" do
38
- a = @classifier.classify('bad', 'word')
39
- a[0].should == :spam
40
- a[1].should == 0.5
38
+ it "should return 0.5" do
39
+ classifier = NaiveBayes.load(db_filepath)
40
+ classifier.classify('bad', 'word')[1].should == 0.5
41
41
  end
42
42
  end
43
43
 
44
+ private
45
+
46
+ def create_and_train_classifier
47
+ a = NaiveBayes.new(:spam, :ham)
48
+ a.train(:spam, 'bad', 'word')
49
+ a.train(:ham, 'we', 'bad')
50
+ a
51
+ end
52
+
53
+ def db_filepath
54
+ File.expand_path(File.dirname(__FILE__) + '/db/naive.nb')
55
+ end
56
+
44
57
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: naive_bayes
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-14 00:00:00 +00:00
12
+ date: 2010-01-05 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -40,6 +40,7 @@ files:
40
40
  - VERSION
41
41
  - lib/naive_bayes.rb
42
42
  - naive_bayes.gemspec
43
+ - spec/db/naive.nb
43
44
  - spec/naive_bayes_spec.rb
44
45
  - spec/spec.opts
45
46
  - spec/spec_helper.rb