RubyGems - nb - Versions diffs - 0.0.4 → 0.1.1 - Mend

nb 0.0.4 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/Gemfile +3 -0
data/Gemfile.ci +3 -0
data/README.md +52 -9
data/lib/nb.rb +3 -1
data/lib/nb/backend/memory.rb +39 -0
data/lib/nb/backend/redis.rb +102 -0
data/lib/nb/classifier.rb +127 -0
data/lib/nb/version.rb +2 -2
data/spec/nb/backend/memory_spec.rb +13 -0
data/spec/nb/backend/redis_spec.rb +12 -0
data/spec/nb/classifier_spec.rb +152 -0
data/spec/spec_helper.rb +1 -0
metadata +12 -6
data/lib/nb/naive_bayes.rb +0 -112
data/spec/nb/naive_bayes_spec.rb +0 -113

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0d55c868cf9d3df0866130593f9e7d78935d9fde
-  data.tar.gz: 5e9cd9507bdea2a306e3a8fa9423153d0d4cfbb5
+  metadata.gz: 703ab07acadbf5f04d8d979d888790029cc0c6de
+  data.tar.gz: c468c1d63b8f628be6160e7041f6053c8431297a
 SHA512:
-  metadata.gz: 299259d6e2322cfe07adca36f84899796632bbc7f36c407de3a6e28d763355ba7518cabc82c7f3e8339c8783fa9937cb14edfc062d50946832c0d112c72d4c44
-  data.tar.gz: 243a9a559861ea695f66f21fe6b34bd5ecc89809b819306c1a9833f08bf51cad1131846180fa6dc55aaad404dc97a13b708ce281b9f95c499691da09e65a6e75
+  metadata.gz: 95c245d113ac2dd0a15c7c0d23599d8393b738ddf18ac8be176757409e9b46ca0d19ede1b47f4fdc60a7184690f347e836697231b945f10e1d90f8a8111fa461
+  data.tar.gz: 3f969b83d80f16baa624d874228f03d1c36935dfd469d8e1bd9428529bb4e5138e62c3160673b4836f381e762aabef1f2f0848a7d08e9869ec02406a1d9cd371

data/Gemfile CHANGED

@@ -7,4 +7,7 @@ group :development, :test do
   gem 'guard'
   gem 'guard-rspec'
   gem 'simplecov', require: false
+  gem 'fakeredis'
 end
+gem 'redis'

data/Gemfile.ci CHANGED

@@ -8,4 +8,7 @@ group :development, :test do
   gem 'guard-rspec'
   gem 'simplecov', require: false
   gem 'coveralls', require: false
+  gem 'fakeredis'
 end
+gem 'redis'

data/README.md CHANGED

@@ -4,7 +4,7 @@
 [![Build Status](https://travis-ci.org/forresty/nb.svg?branch=master)](https://travis-ci.org/forresty/nb)
 [![Gem Version](https://badge.fury.io/rb/nb.svg)](http://badge.fury.io/rb/nb)
-yet another Naive Bayes library
+yet another Naive Bayes library with support of memory and Redis backend
 ## Installation
@@ -25,19 +25,19 @@ Or install it yourself as:
 ## Usage
 ```ruby
-bayes = NaiveBayes.new :love, :hate
+classifier = NaiveBayes::Classifier.new :love, :hate
-bayes.train :love, 'I', 'love', 'you'
-bayes.train :hate, 'I', 'hate', 'you'
+classifier.train :love, 'I', 'love', 'you'
+classifier.train :hate, 'I', 'hate', 'you'
-bayes.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
-bayes.classify(*%w{ I love you }).should == [:love, 0.5]
-bayes.classify(*%w{ love }).should == [:love, 0.5]
+classifier.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
+classifier.classify(*%w{ I love you }).should == [:love, 0.5]
+classifier.classify(*%w{ love }).should == [:love, 0.5]
 ```
-### ability to view top tokens
+### Ability to view top tokens
-`bayes.top_tokens_of_category(:spam)`
+`classifier.top_tokens_of_category(:spam)`
 ```
 +------------+------+--------------------+
@@ -61,6 +61,39 @@ bayes.classify(*%w{ love }).should == [:love, 0.5]
 +------------+------+--------------------+
 ```
+### Use Redis backend
+```ruby
+classifier = Classifier.new(:spam, :ham, backend: :redis, host: 'localhost', port: 30000)
+```
+it generates 2 + N keys in redis:
+```
+127.0.0.1:30000> keys *
+1) "nb:hash:tokens_count:ham"
+2) "nb:hash:tokens_count:spam"
+3) "nb:set:categories"
+4) "nb:hash:categories_count"
+```
+### Support default category
+in case the probability of each category is too low:
+```ruby
+@classifier = NaiveBayes::Classifer.new :spam, :ham
+@classifier.default_category = :ham
+```
+```
+bayes filter mark as spam: false
+bayes classifications: [[:ham, 5.044818725004143e-80], [:spam, 1.938475275819746e-119]]
+bayes filter mark as spam: false
+bayes classifications: [[:spam, 0.0], [:ham, 0.0]]
+```
 ## Credits
 - [classifier gem](https://github.com/cardmagic/classifier)
@@ -74,3 +107,13 @@ bayes.classify(*%w{ love }).should == [:love, 0.5]
 3. Commit your changes (`git commit -am 'Add some feature'`)
 4. Push to the branch (`git push origin my-new-feature`)
 5. Create a new Pull Request
+## Changelog
+### 0.1.1 / 2014-12-15
+- fix redis backend
+### 0.1.0 / 2014-12-15
+- init implementation of redis backend

data/lib/nb.rb CHANGED

@@ -1 +1,3 @@
-require "nb/naive_bayes"
+require_relative "nb/classifier"
+require_relative "nb/backend/memory"
+require_relative "nb/backend/redis"

data/lib/nb/backend/memory.rb ADDED

@@ -0,0 +1,39 @@
+module NaiveBayes
+  module Backend
+    class Memory
+      attr_accessor :categories, :tokens_count, :categories_count
+      def initialize(categories)
+        @categories = categories
+        clear!
+      end
+      def clear!
+        @tokens_count = {}
+        @categories_count = {}
+        @categories.each do |category|
+          @tokens_count[category] = Hash.new(0)
+          @categories_count[category] = 0
+        end
+      end
+      def train(category, *tokens)
+        tokens.uniq.each do |token|
+          @tokens_count[category][token] += 1
+        end
+        @categories_count[category] += 1
+      end
+      def untrain(category, *tokens)
+        tokens.uniq.each do |token|
+          @tokens_count[category][token] -= 1
+        end
+        @categories_count[category] -= 1
+      end
+    end
+  end
+end

data/lib/nb/backend/redis.rb ADDED

@@ -0,0 +1,102 @@
+require "redis"
+module NaiveBayes
+  module Backend
+    class Redis
+      class RedisHash
+        def initialize(redis, hash_name)
+          @redis = redis
+          @hash_name = hash_name
+        end
+        def [](key)
+          value = @redis.hget @hash_name, key
+          value.to_f
+        end
+        def []=(key, value)
+          @redis.hset @hash_name, key, value
+        end
+        def incr(key)
+          @redis.hincrby @hash_name, key, 1
+        end
+        def decr(key)
+          @redis.hdecrby @hash_name, key, 1
+        end
+        def values
+          @redis.hvals(@hash_name).map(&:to_f)
+        end
+        def map
+          out = []
+          if block_given?
+            @redis.hkeys(@hash_name).each { |k| out << yield(k, self.[](k)) }
+          else
+            out = to_enum :map
+          end
+          out
+        end
+      end
+      def initialize(categories, options={})
+        @redis = ::Redis.new(options)
+        @_categories = categories
+        setup
+      end
+      def categories
+        @redis.smembers("nb:set:categories").map(&:to_sym)
+      end
+      def categories_count
+        @categories_count ||= RedisHash.new(@redis, "nb:hash:categories_count")
+      end
+      def tokens_count
+        @tokens_count ||= Hash.new
+      end
+      def clear!
+        @redis.flushall
+        setup
+        categories.each do |category|
+          self.categories_count[category] = 0
+        end
+      end
+      def setup
+        @redis.sadd "nb:set:categories", @_categories
+        categories.each do |category|
+          # @tokens_count[category] = Hash.new(0)
+          self.tokens_count[category] = RedisHash.new(@redis, "nb:hash:tokens_count:#{category}")
+        end
+      end
+      def train(category, *tokens)
+        tokens.uniq.each do |token|
+          self.tokens_count[category].incr(token)
+        end
+        self.categories_count.incr(category)
+      end
+      def untrain(category, *tokens)
+        tokens.uniq.each do |token|
+          self.tokens_count[category][token].decr(token)
+        end
+        self.categories_count.decr(category)
+      end
+    end
+  end
+end

data/lib/nb/classifier.rb ADDED

@@ -0,0 +1,127 @@
+require "yaml"
+module NaiveBayes
+  class Classifier
+    attr_accessor :default_category
+    attr_accessor :backend
+    def initialize(*categories)
+      if categories.last.is_a?(Hash)
+        options = categories.pop
+      else
+        options = {}
+      end
+      options[:backend] ||= :memory
+      case options[:backend]
+      when :memory
+        @backend = Backend::Memory.new(categories)
+      when :redis
+        options[:host] ||= 'localhost'
+        options[:port] ||= 6379
+        @backend = Backend::Redis.new(categories, host: options[:host], port: options[:port])
+      else
+        raise "unsupported backend: #{options[:backend]}"
+      end
+      @default_category = categories.first
+    end
+    def train(category, *tokens)
+      backend.train(category, *tokens)
+    end
+    def untrain(category, *tokens)
+      backend.untrain(category, *tokens)
+    end
+    def clear!
+      backend.clear!
+    end
+    def classify(*tokens)
+      result = classifications(*tokens).first
+      if result.last == 0.0
+        [@default_category, 0.0]
+      else
+        result
+      end
+    end
+    def classifications(*tokens)
+      scores = {}
+      backend.categories.each do |category|
+        scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
+      end
+      scores.sort_by { |k, v| -v }
+    end
+    def top_tokens_of_category(category, count=20)
+      backend.tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
+    end
+    def probability_of_a_token_in_category(token, category)
+      probability_of_a_token_given_a_category(token, category) / backend.categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
+    end
+    def probability_of_a_token_given_a_category(token, category)
+      return assumed_probability if backend.tokens_count[category][token] == 0
+      backend.tokens_count[category][token].to_f / backend.categories_count[category]
+    end
+    def probability_of_tokens_given_a_category(tokens, category)
+      tokens.inject(1.0) do |product, token|
+        product * probability_of_a_token_given_a_category(token, category)
+      end
+    end
+    def probability_of_a_category(category)
+      backend.categories_count[category].to_f / total_number_of_items
+    end
+    # def total_number_of_tokens
+    #   @tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
+    # end
+    def total_number_of_items
+      backend.categories_count.values.inject(&:+)
+    end
+    # If we have only trained a little bit a class may not have had a feature yet
+    # give it a probability of 0 may not be true so we produce a assumed probability
+    # which gets smaller more we train
+    def assumed_probability
+      0.5 / (total_number_of_items.to_f / 2)
+    end
+    def data
+      {
+        :categories => backend.categories,
+        :tokens_count => backend.tokens_count,
+        :categories_count => backend.categories_count
+      }
+    end
+    def save(yaml_file)
+      raise 'only memory backend can save' unless backend == :memory
+      File.write(yaml_file, data.to_yaml)
+    end
+    class << self
+      # will load into a memory-backed classifier
+      def load_yaml(yaml_file)
+        data = YAML.load_file(yaml_file)
+        new(data[:categories], backend: :memory).tap do |classifier|
+          classifier.tokens_count = data[:tokens_count]
+          classifier.categories_count = data[:categories_count]
+        end
+      end
+    end
+  end
+end

data/lib/nb/version.rb CHANGED

@@ -1,3 +1,3 @@
-class NaiveBayes
-  VERSION = "0.0.4"
+module NaiveBayes
+  VERSION = "0.1.1"
 end

data/spec/nb/backend/memory_spec.rb ADDED

@@ -0,0 +1,13 @@
+require "spec_helper"
+module NaiveBayes
+  module Backend
+    describe Memory do
+      subject { Memory.new [:ham, :spam] }
+      it { should respond_to :categories= }
+      it { should respond_to :train }
+      it { should respond_to :untrain }
+    end
+  end
+end

data/spec/nb/backend/redis_spec.rb ADDED

@@ -0,0 +1,12 @@
+require "spec_helper"
+module NaiveBayes
+  module Backend
+    describe Redis do
+      subject { Redis.new [:ham, :spam] }
+      it { should respond_to :train }
+      it { should respond_to :untrain }
+    end
+  end
+end

data/spec/nb/classifier_spec.rb ADDED

@@ -0,0 +1,152 @@
+require "spec_helper"
+module NaiveBayes
+  describe Classifier do
+    let(:classifier) { Classifier.new(:love, :hate) }
+    subject { classifier }
+    it { should respond_to :train }
+    it { should respond_to :untrain }
+    it { should respond_to :save }
+    it { should respond_to :classify }
+    it { should respond_to :classifications }
+    it { should respond_to :probability_of_a_token_given_a_category }
+    it { should respond_to :probability_of_tokens_given_a_category }
+    it { should respond_to :probability_of_a_category }
+    it { should respond_to :probability_of_a_token_in_category }
+    # it { should respond_to :total_number_of_tokens }
+    it { should respond_to :total_number_of_items }
+    it { should respond_to :top_tokens_of_category }
+    it { should respond_to :default_category= }
+    it { should respond_to :clear! }
+    [:memory, :redis].each do |backend|
+      describe "with backend #{backend}" do
+        let(:classifier) { Classifier.new(:love, :hate, backend: backend) }
+        subject { classifier }
+        before(:each) do
+          subject.clear!
+        end
+        # describe '#total_number_of_tokens' do
+        #   it 'calculates correctly' do
+        #     bayes.train :love, 'I', 'love', 'you'
+        #     bayes.train :hate, 'I', 'hate', 'you'
+        #
+        #     bayes.total_number_of_tokens.should == 6
+        #
+        #     bayes.train :love, 'I', 'love', 'you', 'more'
+        #
+        #     bayes.total_number_of_tokens.should == 10
+        #   end
+        # end
+        describe '#categories_count and #tokens_count' do
+          it 'must get it right' do
+            subject.backend.categories_count[:love].should == 0
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.backend.categories_count[:love].should == 1
+            subject.backend.tokens_count[:hate]['you'].should == 1
+            subject.backend.tokens_count[:hate]['love'].should == 0
+          end
+        end
+        describe '#probability_of_a_token_in_category' do
+          it 'calculates correctly' do
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.probability_of_a_token_in_category('love', :love).should == 2.0/3  # 1 / ( 1 + 0.5 )
+            subject.probability_of_a_token_in_category('hate', :love).should == 1.0/3  # 0.5 / ( 1 + 0.5 )
+            subject.probability_of_a_token_in_category('I', :love).should == 0.5
+            subject.train :love, 'hate', 'is', 'love'
+            subject.train :love, 'hate', 'is', 'love'
+            subject.train :love, 'hate', 'is', 'love'
+            subject.probability_of_a_token_in_category('love', :love).should == 5.0/6  # 1 / ( 1 + 0.2 )
+            subject.probability_of_a_token_in_category('hate', :love).should == 3.0/7  # 0.75 / ( 0.75 + 1 )
+          end
+        end
+        describe '#total_number_of_items' do
+          it 'calculates correctly' do
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.total_number_of_items.should == 2
+            subject.train :love, 'I', 'love', 'you', 'more'
+            subject.total_number_of_items.should == 3
+          end
+        end
+        describe '#probability_of_a_category' do
+          it 'calculates correctly' do
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.probability_of_a_category(:love).should == 0.5
+          end
+        end
+        describe '#probability_of_token_given_a_category' do
+          it 'calculates correctly' do
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.probability_of_a_token_given_a_category('love', :love).should == 1
+            subject.probability_of_a_token_given_a_category('you', :hate).should == 1
+            subject.train :love, 'I', 'love', 'you', 'more'
+            subject.probability_of_a_token_given_a_category('more', :love).should == 0.5
+            # bayes.probability_of_token_given_a_category('more', :hate).should == 0
+          end
+        end
+        describe '#classifications' do
+          it 'calculates correctly' do
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
+            subject.classify(*%w{ I love you }).should == [:love, 0.5]
+            subject.classify(*%w{ love }).should == [:love, 0.5]
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :love, 'I', 'love', 'you'
+            subject.classify(*%w{ I love you }).should == [:love, 0.8]
+            subject.classify(*%w{ love }).should == [:love, 0.8]
+            subject.classify(*%w{ only love }).first.should == :love #[:love, 0.16], (0.2 * 1) * 0.8
+          end
+        end
+        describe '#top_tokens_of_category' do
+          it 'finds to tokens' do
+            subject.train :love, 'I', 'love', 'you'
+            subject.train :hate, 'I', 'hate', 'you'
+            subject.top_tokens_of_category(:love).count.should == 3
+          end
+        end
+      end
+    end
+    describe 'class methods' do
+      subject { Classifier }
+      it { should respond_to :load_yaml }
+    end
+  end
+end

data/spec/spec_helper.rb CHANGED

@@ -10,3 +10,4 @@ rescue LoadError
 end
 require "nb"
+require "fakeredis"

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: nb
 version: !ruby/object:Gem::Version
-  version: 0.0.4
+  version: 0.1.1
 platform: ruby
 authors:
 - Forrest Ye
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-19 00:00:00.000000000 Z
+date: 2014-12-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -54,10 +54,14 @@ files:
 - README.md
 - Rakefile
 - lib/nb.rb
-- lib/nb/naive_bayes.rb
+- lib/nb/backend/memory.rb
+- lib/nb/backend/redis.rb
+- lib/nb/classifier.rb
 - lib/nb/version.rb
 - nb.gemspec
-- spec/nb/naive_bayes_spec.rb
+- spec/nb/backend/memory_spec.rb
+- spec/nb/backend/redis_spec.rb
+- spec/nb/classifier_spec.rb
 - spec/spec_helper.rb
 homepage: https://github.com/forresty/nb
 licenses:
@@ -79,10 +83,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.2
+rubygems_version: 2.4.4
 signing_key:
 specification_version: 4
 summary: yet another Naive Bayes library
 test_files:
-- spec/nb/naive_bayes_spec.rb
+- spec/nb/backend/memory_spec.rb
+- spec/nb/backend/redis_spec.rb
+- spec/nb/classifier_spec.rb
 - spec/spec_helper.rb

data/lib/nb/naive_bayes.rb DELETED

@@ -1,112 +0,0 @@
-require "yaml"
-class NaiveBayes
-  attr_accessor :categories, :tokens_count, :categories_count, :default_category
-  def initialize(*categories)
-    @categories = categories
-    @tokens_count = {}
-    @categories_count = {}
-    @default_category = @categories.first
-    categories.each do |category|
-      @tokens_count[category] = Hash.new(0)
-      @categories_count[category] = 0
-    end
-  end
-  def train(category, *tokens)
-    tokens.uniq.each do |token|
-      @tokens_count[category][token] += 1
-    end
-    @categories_count[category] += 1
-  end
-  def untrain(category, *tokens)
-    tokens.uniq.each do |token|
-      @tokens_count[category][token] -= 1
-    end
-    @categories_count[category] -= 1
-  end
-  def classify(*tokens)
-    result = classifications(*tokens).first
-    if result.last == 0.0
-      [@default_category, 0.0]
-    else
-      result
-    end
-  end
-  def classifications(*tokens)
-    scores = {}
-    @categories.each do |category|
-      scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
-    end
-    scores.sort_by { |k, v| -v }
-  end
-  def top_tokens_of_category(category, count=20)
-    tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
-  end
-  def probability_of_a_token_in_category(token, category)
-    probability_of_a_token_given_a_category(token, category) / @categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
-  end
-  def probability_of_a_token_given_a_category(token, category)
-    return assumed_probability if @tokens_count[category][token] == 0
-    @tokens_count[category][token].to_f / @categories_count[category]
-  end
-  def probability_of_tokens_given_a_category(tokens, category)
-    tokens.inject(1.0) do |product, token|
-      product * probability_of_a_token_given_a_category(token, category)
-    end
-  end
-  def probability_of_a_category(category)
-    @categories_count[category].to_f / total_number_of_items
-  end
-  # def total_number_of_tokens
-  #   @tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
-  # end
-  def total_number_of_items
-    @categories_count.values.inject(&:+)
-  end
-  # If we have only trained a little bit a class may not have had a feature yet
-  # give it a probability of 0 may not be true so we produce a assumed probability
-  # which gets smaller more we train
-  def assumed_probability
-    0.5 / (total_number_of_items.to_f / 2)
-  end
-  def data
-    {
-      :categories => @categories,
-      :tokens_count => @tokens_count,
-      :categories_count => @categories_count
-    }
-  end
-  def save(yaml_file)
-    File.write(yaml_file, data.to_yaml)
-  end
-  class << self
-    def load_yaml(yaml_file)
-      data = YAML.load_file(yaml_file)
-      new.tap do |bayes|
-        bayes.categories = data[:categories]
-        bayes.tokens_count = data[:tokens_count]
-        bayes.categories_count = data[:categories_count]
-      end
-    end
-  end
-end

data/spec/nb/naive_bayes_spec.rb DELETED

@@ -1,113 +0,0 @@
-require "spec_helper"
-describe NaiveBayes do
-  it { should respond_to :train }
-  it { should respond_to :untrain }
-  it { should respond_to :save }
-  it { should respond_to :classify }
-  it { should respond_to :classifications }
-  it { should respond_to :probability_of_a_token_given_a_category }
-  it { should respond_to :probability_of_tokens_given_a_category }
-  it { should respond_to :probability_of_a_category }
-  it { should respond_to :probability_of_a_token_in_category }
-  # it { should respond_to :total_number_of_tokens }
-  it { should respond_to :total_number_of_items }
-  it { should respond_to :top_tokens_of_category }
-  it { should respond_to :default_category= }
-  let(:bayes) { NaiveBayes.new(:love, :hate) }
-  subject { bayes }
-  # describe '#total_number_of_tokens' do
-  #   it 'calculates correctly' do
-  #     bayes.train :love, 'I', 'love', 'you'
-  #     bayes.train :hate, 'I', 'hate', 'you'
-  #
-  #     bayes.total_number_of_tokens.should == 6
-  #
-  #     bayes.train :love, 'I', 'love', 'you', 'more'
-  #
-  #     bayes.total_number_of_tokens.should == 10
-  #   end
-  # end
-  describe '#probability_of_a_token_in_category' do
-    it 'calculates correctly' do
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :hate, 'I', 'hate', 'you'
-      bayes.probability_of_a_token_in_category('love', :love).should == 2.0/3  # 1 / ( 1 + 0.5 )
-      bayes.probability_of_a_token_in_category('hate', :love).should == 1.0/3  # 0.5 / ( 1 + 0.5 )
-      bayes.probability_of_a_token_in_category('I', :love).should == 0.5
-      bayes.train :love, 'hate', 'is', 'love'
-      bayes.train :love, 'hate', 'is', 'love'
-      bayes.train :love, 'hate', 'is', 'love'
-      bayes.probability_of_a_token_in_category('love', :love).should == 5.0/6  # 1 / ( 1 + 0.2 )
-      bayes.probability_of_a_token_in_category('hate', :love).should == 3.0/7  # 0.75 / ( 0.75 + 1 )
-    end
-  end
-  describe '#total_number_of_items' do
-    it 'calculates correctly' do
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :hate, 'I', 'hate', 'you'
-      bayes.total_number_of_items.should == 2
-      bayes.train :love, 'I', 'love', 'you', 'more'
-      bayes.total_number_of_items.should == 3
-    end
-  end
-  describe '#probability_of_a_category' do
-    it 'calculates correctly' do
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :hate, 'I', 'hate', 'you'
-      bayes.probability_of_a_category(:love).should == 0.5
-    end
-  end
-  describe '#probability_of_token_given_a_category' do
-    it 'calculates correctly' do
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :hate, 'I', 'hate', 'you'
-      bayes.probability_of_a_token_given_a_category('love', :love).should == 1
-      bayes.probability_of_a_token_given_a_category('you', :hate).should == 1
-      bayes.train :love, 'I', 'love', 'you', 'more'
-      bayes.probability_of_a_token_given_a_category('more', :love).should == 0.5
-      # bayes.probability_of_token_given_a_category('more', :hate).should == 0
-    end
-  end
-  describe '#classifications' do
-    it 'calculates correctly' do
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :hate, 'I', 'hate', 'you'
-      bayes.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
-      bayes.classify(*%w{ I love you }).should == [:love, 0.5]
-      bayes.classify(*%w{ love }).should == [:love, 0.5]
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.train :love, 'I', 'love', 'you'
-      bayes.classify(*%w{ I love you }).should == [:love, 0.8]
-      bayes.classify(*%w{ love }).should == [:love, 0.8]
-      bayes.classify(*%w{ only love }).first.should == :love #[:love, 0.16], (0.2 * 1) * 0.8
-    end
-  end
-  describe 'class methods' do
-    subject { NaiveBayes }
-    it { should respond_to :load_yaml }
-  end
-end