nb 0.0.4 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.ci +3 -0
- data/README.md +52 -9
- data/lib/nb.rb +3 -1
- data/lib/nb/backend/memory.rb +39 -0
- data/lib/nb/backend/redis.rb +102 -0
- data/lib/nb/classifier.rb +127 -0
- data/lib/nb/version.rb +2 -2
- data/spec/nb/backend/memory_spec.rb +13 -0
- data/spec/nb/backend/redis_spec.rb +12 -0
- data/spec/nb/classifier_spec.rb +152 -0
- data/spec/spec_helper.rb +1 -0
- metadata +12 -6
- data/lib/nb/naive_bayes.rb +0 -112
- data/spec/nb/naive_bayes_spec.rb +0 -113
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 703ab07acadbf5f04d8d979d888790029cc0c6de
         | 
| 4 | 
            +
              data.tar.gz: c468c1d63b8f628be6160e7041f6053c8431297a
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 95c245d113ac2dd0a15c7c0d23599d8393b738ddf18ac8be176757409e9b46ca0d19ede1b47f4fdc60a7184690f347e836697231b945f10e1d90f8a8111fa461
         | 
| 7 | 
            +
              data.tar.gz: 3f969b83d80f16baa624d874228f03d1c36935dfd469d8e1bd9428529bb4e5138e62c3160673b4836f381e762aabef1f2f0848a7d08e9869ec02406a1d9cd371
         | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/Gemfile.ci
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -4,7 +4,7 @@ | |
| 4 4 | 
             
            [](https://travis-ci.org/forresty/nb)
         | 
| 5 5 | 
             
            [](http://badge.fury.io/rb/nb)
         | 
| 6 6 |  | 
| 7 | 
            -
            yet another Naive Bayes library
         | 
| 7 | 
            +
            yet another Naive Bayes library with support of memory and Redis backend
         | 
| 8 8 |  | 
| 9 9 | 
             
            ## Installation
         | 
| 10 10 |  | 
| @@ -25,19 +25,19 @@ Or install it yourself as: | |
| 25 25 | 
             
            ## Usage
         | 
| 26 26 |  | 
| 27 27 | 
             
            ```ruby
         | 
| 28 | 
            -
             | 
| 28 | 
            +
            classifier = NaiveBayes::Classifier.new :love, :hate
         | 
| 29 29 |  | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 30 | 
            +
            classifier.train :love, 'I', 'love', 'you'
         | 
| 31 | 
            +
            classifier.train :hate, 'I', 'hate', 'you'
         | 
| 32 32 |  | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 33 | 
            +
            classifier.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
         | 
| 34 | 
            +
            classifier.classify(*%w{ I love you }).should == [:love, 0.5]
         | 
| 35 | 
            +
            classifier.classify(*%w{ love }).should == [:love, 0.5]
         | 
| 36 36 | 
             
            ```
         | 
| 37 37 |  | 
| 38 | 
            -
            ###  | 
| 38 | 
            +
            ### Ability to view top tokens
         | 
| 39 39 |  | 
| 40 | 
            -
            ` | 
| 40 | 
            +
            `classifier.top_tokens_of_category(:spam)`
         | 
| 41 41 |  | 
| 42 42 | 
             
            ```
         | 
| 43 43 | 
             
            +------------+------+--------------------+
         | 
| @@ -61,6 +61,39 @@ bayes.classify(*%w{ love }).should == [:love, 0.5] | |
| 61 61 | 
             
            +------------+------+--------------------+
         | 
| 62 62 | 
             
            ```
         | 
| 63 63 |  | 
| 64 | 
            +
            ### Use Redis backend
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            ```ruby
         | 
| 67 | 
            +
            classifier = Classifier.new(:spam, :ham, backend: :redis, host: 'localhost', port: 30000)
         | 
| 68 | 
            +
            ```
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            it generates 2 + N keys in redis:
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            ```
         | 
| 73 | 
            +
            127.0.0.1:30000> keys *
         | 
| 74 | 
            +
            1) "nb:hash:tokens_count:ham"
         | 
| 75 | 
            +
            2) "nb:hash:tokens_count:spam"
         | 
| 76 | 
            +
            3) "nb:set:categories"
         | 
| 77 | 
            +
            4) "nb:hash:categories_count"
         | 
| 78 | 
            +
            ```
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            ### Support default category
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            in case the probability of each category is too low:
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            ```ruby
         | 
| 85 | 
            +
            @classifier = NaiveBayes::Classifer.new :spam, :ham
         | 
| 86 | 
            +
            @classifier.default_category = :ham
         | 
| 87 | 
            +
            ```
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            ```
         | 
| 90 | 
            +
            bayes filter mark as spam: false
         | 
| 91 | 
            +
            bayes classifications: [[:ham, 5.044818725004143e-80], [:spam, 1.938475275819746e-119]]
         | 
| 92 | 
            +
             | 
| 93 | 
            +
            bayes filter mark as spam: false
         | 
| 94 | 
            +
            bayes classifications: [[:spam, 0.0], [:ham, 0.0]]
         | 
| 95 | 
            +
            ```
         | 
| 96 | 
            +
             | 
| 64 97 | 
             
            ## Credits
         | 
| 65 98 |  | 
| 66 99 | 
             
            - [classifier gem](https://github.com/cardmagic/classifier)
         | 
| @@ -74,3 +107,13 @@ bayes.classify(*%w{ love }).should == [:love, 0.5] | |
| 74 107 | 
             
            3. Commit your changes (`git commit -am 'Add some feature'`)
         | 
| 75 108 | 
             
            4. Push to the branch (`git push origin my-new-feature`)
         | 
| 76 109 | 
             
            5. Create a new Pull Request
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            ## Changelog
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            ### 0.1.1 / 2014-12-15
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            - fix redis backend
         | 
| 116 | 
            +
             | 
| 117 | 
            +
            ### 0.1.0 / 2014-12-15
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            - init implementation of redis backend
         | 
    
        data/lib/nb.rb
    CHANGED
    
    
| @@ -0,0 +1,39 @@ | |
| 1 | 
            +
            module NaiveBayes
         | 
| 2 | 
            +
              module Backend
         | 
| 3 | 
            +
                class Memory
         | 
| 4 | 
            +
                  attr_accessor :categories, :tokens_count, :categories_count
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                  def initialize(categories)
         | 
| 7 | 
            +
                    @categories = categories
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                    clear!
         | 
| 10 | 
            +
                  end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def clear!
         | 
| 13 | 
            +
                    @tokens_count = {}
         | 
| 14 | 
            +
                    @categories_count = {}
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                    @categories.each do |category|
         | 
| 17 | 
            +
                      @tokens_count[category] = Hash.new(0)
         | 
| 18 | 
            +
                      @categories_count[category] = 0
         | 
| 19 | 
            +
                    end
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                  def train(category, *tokens)
         | 
| 23 | 
            +
                    tokens.uniq.each do |token|
         | 
| 24 | 
            +
                      @tokens_count[category][token] += 1
         | 
| 25 | 
            +
                    end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    @categories_count[category] += 1
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                  def untrain(category, *tokens)
         | 
| 31 | 
            +
                    tokens.uniq.each do |token|
         | 
| 32 | 
            +
                      @tokens_count[category][token] -= 1
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    @categories_count[category] -= 1
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
            end
         | 
| @@ -0,0 +1,102 @@ | |
| 1 | 
            +
            require "redis"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module NaiveBayes
         | 
| 4 | 
            +
              module Backend
         | 
| 5 | 
            +
                class Redis
         | 
| 6 | 
            +
                  class RedisHash
         | 
| 7 | 
            +
                    def initialize(redis, hash_name)
         | 
| 8 | 
            +
                      @redis = redis
         | 
| 9 | 
            +
                      @hash_name = hash_name
         | 
| 10 | 
            +
                    end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                    def [](key)
         | 
| 13 | 
            +
                      value = @redis.hget @hash_name, key
         | 
| 14 | 
            +
                      value.to_f
         | 
| 15 | 
            +
                    end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                    def []=(key, value)
         | 
| 18 | 
            +
                      @redis.hset @hash_name, key, value
         | 
| 19 | 
            +
                    end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    def incr(key)
         | 
| 22 | 
            +
                      @redis.hincrby @hash_name, key, 1
         | 
| 23 | 
            +
                    end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    def decr(key)
         | 
| 26 | 
            +
                      @redis.hdecrby @hash_name, key, 1
         | 
| 27 | 
            +
                    end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    def values
         | 
| 30 | 
            +
                      @redis.hvals(@hash_name).map(&:to_f)
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    def map
         | 
| 34 | 
            +
                      out = []
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                      if block_given?
         | 
| 37 | 
            +
                        @redis.hkeys(@hash_name).each { |k| out << yield(k, self.[](k)) }
         | 
| 38 | 
            +
                      else
         | 
| 39 | 
            +
                        out = to_enum :map
         | 
| 40 | 
            +
                      end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                      out
         | 
| 43 | 
            +
                    end
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                  def initialize(categories, options={})
         | 
| 47 | 
            +
                    @redis = ::Redis.new(options)
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                    @_categories = categories
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                    setup
         | 
| 52 | 
            +
                  end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  def categories
         | 
| 55 | 
            +
                    @redis.smembers("nb:set:categories").map(&:to_sym)
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  def categories_count
         | 
| 59 | 
            +
                    @categories_count ||= RedisHash.new(@redis, "nb:hash:categories_count")
         | 
| 60 | 
            +
                  end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                  def tokens_count
         | 
| 63 | 
            +
                    @tokens_count ||= Hash.new
         | 
| 64 | 
            +
                  end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                  def clear!
         | 
| 67 | 
            +
                    @redis.flushall
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    setup
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                    categories.each do |category|
         | 
| 72 | 
            +
                      self.categories_count[category] = 0
         | 
| 73 | 
            +
                    end
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  def setup
         | 
| 77 | 
            +
                    @redis.sadd "nb:set:categories", @_categories
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                    categories.each do |category|
         | 
| 80 | 
            +
                      # @tokens_count[category] = Hash.new(0)
         | 
| 81 | 
            +
                      self.tokens_count[category] = RedisHash.new(@redis, "nb:hash:tokens_count:#{category}")
         | 
| 82 | 
            +
                    end
         | 
| 83 | 
            +
                  end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                  def train(category, *tokens)
         | 
| 86 | 
            +
                    tokens.uniq.each do |token|
         | 
| 87 | 
            +
                      self.tokens_count[category].incr(token)
         | 
| 88 | 
            +
                    end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                    self.categories_count.incr(category)
         | 
| 91 | 
            +
                  end
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                  def untrain(category, *tokens)
         | 
| 94 | 
            +
                    tokens.uniq.each do |token|
         | 
| 95 | 
            +
                      self.tokens_count[category][token].decr(token)
         | 
| 96 | 
            +
                    end
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                    self.categories_count.decr(category)
         | 
| 99 | 
            +
                  end
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
              end
         | 
| 102 | 
            +
            end
         | 
| @@ -0,0 +1,127 @@ | |
| 1 | 
            +
            require "yaml"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module NaiveBayes
         | 
| 4 | 
            +
              class Classifier
         | 
| 5 | 
            +
                attr_accessor :default_category
         | 
| 6 | 
            +
                attr_accessor :backend
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                def initialize(*categories)
         | 
| 9 | 
            +
                  if categories.last.is_a?(Hash)
         | 
| 10 | 
            +
                    options = categories.pop
         | 
| 11 | 
            +
                  else
         | 
| 12 | 
            +
                    options = {}
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  options[:backend] ||= :memory
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  case options[:backend]
         | 
| 18 | 
            +
                  when :memory
         | 
| 19 | 
            +
                    @backend = Backend::Memory.new(categories)
         | 
| 20 | 
            +
                  when :redis
         | 
| 21 | 
            +
                    options[:host] ||= 'localhost'
         | 
| 22 | 
            +
                    options[:port] ||= 6379
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    @backend = Backend::Redis.new(categories, host: options[:host], port: options[:port])
         | 
| 25 | 
            +
                  else
         | 
| 26 | 
            +
                    raise "unsupported backend: #{options[:backend]}"
         | 
| 27 | 
            +
                  end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  @default_category = categories.first
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                def train(category, *tokens)
         | 
| 33 | 
            +
                  backend.train(category, *tokens)
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                def untrain(category, *tokens)
         | 
| 37 | 
            +
                  backend.untrain(category, *tokens)
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                def clear!
         | 
| 41 | 
            +
                  backend.clear!
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def classify(*tokens)
         | 
| 45 | 
            +
                  result = classifications(*tokens).first
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  if result.last == 0.0
         | 
| 48 | 
            +
                    [@default_category, 0.0]
         | 
| 49 | 
            +
                  else
         | 
| 50 | 
            +
                    result
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                def classifications(*tokens)
         | 
| 55 | 
            +
                  scores = {}
         | 
| 56 | 
            +
                  backend.categories.each do |category|
         | 
| 57 | 
            +
                    scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
         | 
| 58 | 
            +
                  end
         | 
| 59 | 
            +
                  scores.sort_by { |k, v| -v }
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                def top_tokens_of_category(category, count=20)
         | 
| 63 | 
            +
                  backend.tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                def probability_of_a_token_in_category(token, category)
         | 
| 67 | 
            +
                  probability_of_a_token_given_a_category(token, category) / backend.categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                def probability_of_a_token_given_a_category(token, category)
         | 
| 71 | 
            +
                  return assumed_probability if backend.tokens_count[category][token] == 0
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  backend.tokens_count[category][token].to_f / backend.categories_count[category]
         | 
| 74 | 
            +
                end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                def probability_of_tokens_given_a_category(tokens, category)
         | 
| 77 | 
            +
                  tokens.inject(1.0) do |product, token|
         | 
| 78 | 
            +
                    product * probability_of_a_token_given_a_category(token, category)
         | 
| 79 | 
            +
                  end
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                def probability_of_a_category(category)
         | 
| 83 | 
            +
                  backend.categories_count[category].to_f / total_number_of_items
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                # def total_number_of_tokens
         | 
| 87 | 
            +
                #   @tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
         | 
| 88 | 
            +
                # end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                def total_number_of_items
         | 
| 91 | 
            +
                  backend.categories_count.values.inject(&:+)
         | 
| 92 | 
            +
                end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                # If we have only trained a little bit a class may not have had a feature yet
         | 
| 95 | 
            +
                # give it a probability of 0 may not be true so we produce a assumed probability
         | 
| 96 | 
            +
                # which gets smaller more we train
         | 
| 97 | 
            +
                def assumed_probability
         | 
| 98 | 
            +
                  0.5 / (total_number_of_items.to_f / 2)
         | 
| 99 | 
            +
                end
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                def data
         | 
| 102 | 
            +
                  {
         | 
| 103 | 
            +
                    :categories => backend.categories,
         | 
| 104 | 
            +
                    :tokens_count => backend.tokens_count,
         | 
| 105 | 
            +
                    :categories_count => backend.categories_count
         | 
| 106 | 
            +
                  }
         | 
| 107 | 
            +
                end
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                def save(yaml_file)
         | 
| 110 | 
            +
                  raise 'only memory backend can save' unless backend == :memory
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                  File.write(yaml_file, data.to_yaml)
         | 
| 113 | 
            +
                end
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                class << self
         | 
| 116 | 
            +
                  # will load into a memory-backed classifier
         | 
| 117 | 
            +
                  def load_yaml(yaml_file)
         | 
| 118 | 
            +
                    data = YAML.load_file(yaml_file)
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                    new(data[:categories], backend: :memory).tap do |classifier|
         | 
| 121 | 
            +
                      classifier.tokens_count = data[:tokens_count]
         | 
| 122 | 
            +
                      classifier.categories_count = data[:categories_count]
         | 
| 123 | 
            +
                    end
         | 
| 124 | 
            +
                  end
         | 
| 125 | 
            +
                end
         | 
| 126 | 
            +
              end
         | 
| 127 | 
            +
            end
         | 
    
        data/lib/nb/version.rb
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
              VERSION = "0. | 
| 1 | 
            +
            module NaiveBayes
         | 
| 2 | 
            +
              VERSION = "0.1.1"
         | 
| 3 3 | 
             
            end
         | 
| @@ -0,0 +1,152 @@ | |
| 1 | 
            +
            require "spec_helper"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module NaiveBayes
         | 
| 4 | 
            +
              describe Classifier do
         | 
| 5 | 
            +
                let(:classifier) { Classifier.new(:love, :hate) }
         | 
| 6 | 
            +
                subject { classifier }
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                it { should respond_to :train }
         | 
| 9 | 
            +
                it { should respond_to :untrain }
         | 
| 10 | 
            +
                it { should respond_to :save }
         | 
| 11 | 
            +
                it { should respond_to :classify }
         | 
| 12 | 
            +
                it { should respond_to :classifications }
         | 
| 13 | 
            +
                it { should respond_to :probability_of_a_token_given_a_category }
         | 
| 14 | 
            +
                it { should respond_to :probability_of_tokens_given_a_category }
         | 
| 15 | 
            +
                it { should respond_to :probability_of_a_category }
         | 
| 16 | 
            +
                it { should respond_to :probability_of_a_token_in_category }
         | 
| 17 | 
            +
                # it { should respond_to :total_number_of_tokens }
         | 
| 18 | 
            +
                it { should respond_to :total_number_of_items }
         | 
| 19 | 
            +
                it { should respond_to :top_tokens_of_category }
         | 
| 20 | 
            +
                it { should respond_to :default_category= }
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                it { should respond_to :clear! }
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                [:memory, :redis].each do |backend|
         | 
| 25 | 
            +
                  describe "with backend #{backend}" do
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    let(:classifier) { Classifier.new(:love, :hate, backend: backend) }
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    subject { classifier }
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    before(:each) do
         | 
| 32 | 
            +
                      subject.clear!
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    # describe '#total_number_of_tokens' do
         | 
| 36 | 
            +
                    #   it 'calculates correctly' do
         | 
| 37 | 
            +
                    #     bayes.train :love, 'I', 'love', 'you'
         | 
| 38 | 
            +
                    #     bayes.train :hate, 'I', 'hate', 'you'
         | 
| 39 | 
            +
                    #
         | 
| 40 | 
            +
                    #     bayes.total_number_of_tokens.should == 6
         | 
| 41 | 
            +
                    #
         | 
| 42 | 
            +
                    #     bayes.train :love, 'I', 'love', 'you', 'more'
         | 
| 43 | 
            +
                    #
         | 
| 44 | 
            +
                    #     bayes.total_number_of_tokens.should == 10
         | 
| 45 | 
            +
                    #   end
         | 
| 46 | 
            +
                    # end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    describe '#categories_count and #tokens_count' do
         | 
| 49 | 
            +
                      it 'must get it right' do
         | 
| 50 | 
            +
                        subject.backend.categories_count[:love].should == 0
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 53 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                        subject.backend.categories_count[:love].should == 1
         | 
| 56 | 
            +
                        subject.backend.tokens_count[:hate]['you'].should == 1
         | 
| 57 | 
            +
                        subject.backend.tokens_count[:hate]['love'].should == 0
         | 
| 58 | 
            +
                      end
         | 
| 59 | 
            +
                    end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    describe '#probability_of_a_token_in_category' do
         | 
| 62 | 
            +
                      it 'calculates correctly' do
         | 
| 63 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 64 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                        subject.probability_of_a_token_in_category('love', :love).should == 2.0/3  # 1 / ( 1 + 0.5 )
         | 
| 67 | 
            +
                        subject.probability_of_a_token_in_category('hate', :love).should == 1.0/3  # 0.5 / ( 1 + 0.5 )
         | 
| 68 | 
            +
                        subject.probability_of_a_token_in_category('I', :love).should == 0.5
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                        subject.train :love, 'hate', 'is', 'love'
         | 
| 71 | 
            +
                        subject.train :love, 'hate', 'is', 'love'
         | 
| 72 | 
            +
                        subject.train :love, 'hate', 'is', 'love'
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                        subject.probability_of_a_token_in_category('love', :love).should == 5.0/6  # 1 / ( 1 + 0.2 )
         | 
| 75 | 
            +
                        subject.probability_of_a_token_in_category('hate', :love).should == 3.0/7  # 0.75 / ( 0.75 + 1 )
         | 
| 76 | 
            +
                      end
         | 
| 77 | 
            +
                    end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                    describe '#total_number_of_items' do
         | 
| 80 | 
            +
                      it 'calculates correctly' do
         | 
| 81 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 82 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                        subject.total_number_of_items.should == 2
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                        subject.train :love, 'I', 'love', 'you', 'more'
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                        subject.total_number_of_items.should == 3
         | 
| 89 | 
            +
                      end
         | 
| 90 | 
            +
                    end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                    describe '#probability_of_a_category' do
         | 
| 93 | 
            +
                      it 'calculates correctly' do
         | 
| 94 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 95 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                        subject.probability_of_a_category(:love).should == 0.5
         | 
| 98 | 
            +
                      end
         | 
| 99 | 
            +
                    end
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                    describe '#probability_of_token_given_a_category' do
         | 
| 102 | 
            +
                      it 'calculates correctly' do
         | 
| 103 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 104 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                        subject.probability_of_a_token_given_a_category('love', :love).should == 1
         | 
| 107 | 
            +
                        subject.probability_of_a_token_given_a_category('you', :hate).should == 1
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                        subject.train :love, 'I', 'love', 'you', 'more'
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                        subject.probability_of_a_token_given_a_category('more', :love).should == 0.5
         | 
| 112 | 
            +
                        # bayes.probability_of_token_given_a_category('more', :hate).should == 0
         | 
| 113 | 
            +
                      end
         | 
| 114 | 
            +
                    end
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    describe '#classifications' do
         | 
| 117 | 
            +
                      it 'calculates correctly' do
         | 
| 118 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 119 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                        subject.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
         | 
| 122 | 
            +
                        subject.classify(*%w{ I love you }).should == [:love, 0.5]
         | 
| 123 | 
            +
                        subject.classify(*%w{ love }).should == [:love, 0.5]
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 126 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 127 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                        subject.classify(*%w{ I love you }).should == [:love, 0.8]
         | 
| 130 | 
            +
                        subject.classify(*%w{ love }).should == [:love, 0.8]
         | 
| 131 | 
            +
                        subject.classify(*%w{ only love }).first.should == :love #[:love, 0.16], (0.2 * 1) * 0.8
         | 
| 132 | 
            +
                      end
         | 
| 133 | 
            +
                    end
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                    describe '#top_tokens_of_category' do
         | 
| 136 | 
            +
                      it 'finds to tokens' do
         | 
| 137 | 
            +
                        subject.train :love, 'I', 'love', 'you'
         | 
| 138 | 
            +
                        subject.train :hate, 'I', 'hate', 'you'
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                        subject.top_tokens_of_category(:love).count.should == 3
         | 
| 141 | 
            +
                      end
         | 
| 142 | 
            +
                    end
         | 
| 143 | 
            +
                  end
         | 
| 144 | 
            +
                end
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                describe 'class methods' do
         | 
| 147 | 
            +
                  subject { Classifier }
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                  it { should respond_to :load_yaml }
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
              end
         | 
| 152 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: nb
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.1.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Forrest Ye
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2014- | 
| 11 | 
            +
            date: 2014-12-16 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -54,10 +54,14 @@ files: | |
| 54 54 | 
             
            - README.md
         | 
| 55 55 | 
             
            - Rakefile
         | 
| 56 56 | 
             
            - lib/nb.rb
         | 
| 57 | 
            -
            - lib/nb/ | 
| 57 | 
            +
            - lib/nb/backend/memory.rb
         | 
| 58 | 
            +
            - lib/nb/backend/redis.rb
         | 
| 59 | 
            +
            - lib/nb/classifier.rb
         | 
| 58 60 | 
             
            - lib/nb/version.rb
         | 
| 59 61 | 
             
            - nb.gemspec
         | 
| 60 | 
            -
            - spec/nb/ | 
| 62 | 
            +
            - spec/nb/backend/memory_spec.rb
         | 
| 63 | 
            +
            - spec/nb/backend/redis_spec.rb
         | 
| 64 | 
            +
            - spec/nb/classifier_spec.rb
         | 
| 61 65 | 
             
            - spec/spec_helper.rb
         | 
| 62 66 | 
             
            homepage: https://github.com/forresty/nb
         | 
| 63 67 | 
             
            licenses:
         | 
| @@ -79,10 +83,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 79 83 | 
             
                  version: '0'
         | 
| 80 84 | 
             
            requirements: []
         | 
| 81 85 | 
             
            rubyforge_project: 
         | 
| 82 | 
            -
            rubygems_version: 2. | 
| 86 | 
            +
            rubygems_version: 2.4.4
         | 
| 83 87 | 
             
            signing_key: 
         | 
| 84 88 | 
             
            specification_version: 4
         | 
| 85 89 | 
             
            summary: yet another Naive Bayes library
         | 
| 86 90 | 
             
            test_files:
         | 
| 87 | 
            -
            - spec/nb/ | 
| 91 | 
            +
            - spec/nb/backend/memory_spec.rb
         | 
| 92 | 
            +
            - spec/nb/backend/redis_spec.rb
         | 
| 93 | 
            +
            - spec/nb/classifier_spec.rb
         | 
| 88 94 | 
             
            - spec/spec_helper.rb
         | 
    
        data/lib/nb/naive_bayes.rb
    DELETED
    
    | @@ -1,112 +0,0 @@ | |
| 1 | 
            -
            require "yaml"
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            class NaiveBayes
         | 
| 4 | 
            -
              attr_accessor :categories, :tokens_count, :categories_count, :default_category
         | 
| 5 | 
            -
             | 
| 6 | 
            -
              def initialize(*categories)
         | 
| 7 | 
            -
                @categories = categories
         | 
| 8 | 
            -
                @tokens_count = {}
         | 
| 9 | 
            -
                @categories_count = {}
         | 
| 10 | 
            -
                @default_category = @categories.first
         | 
| 11 | 
            -
             | 
| 12 | 
            -
                categories.each do |category|
         | 
| 13 | 
            -
                  @tokens_count[category] = Hash.new(0)
         | 
| 14 | 
            -
                  @categories_count[category] = 0
         | 
| 15 | 
            -
                end
         | 
| 16 | 
            -
              end
         | 
| 17 | 
            -
             | 
| 18 | 
            -
              def train(category, *tokens)
         | 
| 19 | 
            -
                tokens.uniq.each do |token|
         | 
| 20 | 
            -
                  @tokens_count[category][token] += 1
         | 
| 21 | 
            -
                end
         | 
| 22 | 
            -
                @categories_count[category] += 1
         | 
| 23 | 
            -
              end
         | 
| 24 | 
            -
             | 
| 25 | 
            -
              def untrain(category, *tokens)
         | 
| 26 | 
            -
                tokens.uniq.each do |token|
         | 
| 27 | 
            -
                  @tokens_count[category][token] -= 1
         | 
| 28 | 
            -
                end
         | 
| 29 | 
            -
                @categories_count[category] -= 1
         | 
| 30 | 
            -
              end
         | 
| 31 | 
            -
             | 
| 32 | 
            -
              def classify(*tokens)
         | 
| 33 | 
            -
                result = classifications(*tokens).first
         | 
| 34 | 
            -
             | 
| 35 | 
            -
                if result.last == 0.0
         | 
| 36 | 
            -
                  [@default_category, 0.0]
         | 
| 37 | 
            -
                else
         | 
| 38 | 
            -
                  result
         | 
| 39 | 
            -
                end
         | 
| 40 | 
            -
              end
         | 
| 41 | 
            -
             | 
| 42 | 
            -
              def classifications(*tokens)
         | 
| 43 | 
            -
                scores = {}
         | 
| 44 | 
            -
                @categories.each do |category|
         | 
| 45 | 
            -
                  scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
         | 
| 46 | 
            -
                end
         | 
| 47 | 
            -
                scores.sort_by { |k, v| -v }
         | 
| 48 | 
            -
              end
         | 
| 49 | 
            -
             | 
| 50 | 
            -
              def top_tokens_of_category(category, count=20)
         | 
| 51 | 
            -
                tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
         | 
| 52 | 
            -
              end
         | 
| 53 | 
            -
             | 
| 54 | 
            -
              def probability_of_a_token_in_category(token, category)
         | 
| 55 | 
            -
                probability_of_a_token_given_a_category(token, category) / @categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
         | 
| 56 | 
            -
              end
         | 
| 57 | 
            -
             | 
| 58 | 
            -
              def probability_of_a_token_given_a_category(token, category)
         | 
| 59 | 
            -
                return assumed_probability if @tokens_count[category][token] == 0
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                @tokens_count[category][token].to_f / @categories_count[category]
         | 
| 62 | 
            -
              end
         | 
| 63 | 
            -
             | 
| 64 | 
            -
              def probability_of_tokens_given_a_category(tokens, category)
         | 
| 65 | 
            -
                tokens.inject(1.0) do |product, token|
         | 
| 66 | 
            -
                  product * probability_of_a_token_given_a_category(token, category)
         | 
| 67 | 
            -
                end
         | 
| 68 | 
            -
              end
         | 
| 69 | 
            -
             | 
| 70 | 
            -
              def probability_of_a_category(category)
         | 
| 71 | 
            -
                @categories_count[category].to_f / total_number_of_items
         | 
| 72 | 
            -
              end
         | 
| 73 | 
            -
             | 
| 74 | 
            -
              # def total_number_of_tokens
         | 
| 75 | 
            -
              #   @tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
         | 
| 76 | 
            -
              # end
         | 
| 77 | 
            -
             | 
| 78 | 
            -
              def total_number_of_items
         | 
| 79 | 
            -
                @categories_count.values.inject(&:+)
         | 
| 80 | 
            -
              end
         | 
| 81 | 
            -
             | 
| 82 | 
            -
              # If we have only trained a little bit a class may not have had a feature yet
         | 
| 83 | 
            -
              # give it a probability of 0 may not be true so we produce a assumed probability
         | 
| 84 | 
            -
              # which gets smaller more we train
         | 
| 85 | 
            -
              def assumed_probability
         | 
| 86 | 
            -
                0.5 / (total_number_of_items.to_f / 2)
         | 
| 87 | 
            -
              end
         | 
| 88 | 
            -
             | 
| 89 | 
            -
              def data
         | 
| 90 | 
            -
                {
         | 
| 91 | 
            -
                  :categories => @categories,
         | 
| 92 | 
            -
                  :tokens_count => @tokens_count,
         | 
| 93 | 
            -
                  :categories_count => @categories_count
         | 
| 94 | 
            -
                }
         | 
| 95 | 
            -
              end
         | 
| 96 | 
            -
             | 
| 97 | 
            -
              def save(yaml_file)
         | 
| 98 | 
            -
                File.write(yaml_file, data.to_yaml)
         | 
| 99 | 
            -
              end
         | 
| 100 | 
            -
             | 
| 101 | 
            -
              class << self
         | 
| 102 | 
            -
                def load_yaml(yaml_file)
         | 
| 103 | 
            -
                  data = YAML.load_file(yaml_file)
         | 
| 104 | 
            -
             | 
| 105 | 
            -
                  new.tap do |bayes|
         | 
| 106 | 
            -
                    bayes.categories = data[:categories]
         | 
| 107 | 
            -
                    bayes.tokens_count = data[:tokens_count]
         | 
| 108 | 
            -
                    bayes.categories_count = data[:categories_count]
         | 
| 109 | 
            -
                  end
         | 
| 110 | 
            -
                end
         | 
| 111 | 
            -
              end
         | 
| 112 | 
            -
            end
         | 
    
        data/spec/nb/naive_bayes_spec.rb
    DELETED
    
    | @@ -1,113 +0,0 @@ | |
| 1 | 
            -
            require "spec_helper"
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            describe NaiveBayes do
         | 
| 4 | 
            -
              it { should respond_to :train }
         | 
| 5 | 
            -
              it { should respond_to :untrain }
         | 
| 6 | 
            -
              it { should respond_to :save }
         | 
| 7 | 
            -
              it { should respond_to :classify }
         | 
| 8 | 
            -
              it { should respond_to :classifications }
         | 
| 9 | 
            -
              it { should respond_to :probability_of_a_token_given_a_category }
         | 
| 10 | 
            -
              it { should respond_to :probability_of_tokens_given_a_category }
         | 
| 11 | 
            -
              it { should respond_to :probability_of_a_category }
         | 
| 12 | 
            -
              it { should respond_to :probability_of_a_token_in_category }
         | 
| 13 | 
            -
              # it { should respond_to :total_number_of_tokens }
         | 
| 14 | 
            -
              it { should respond_to :total_number_of_items }
         | 
| 15 | 
            -
              it { should respond_to :top_tokens_of_category }
         | 
| 16 | 
            -
              it { should respond_to :default_category= }
         | 
| 17 | 
            -
             | 
| 18 | 
            -
              let(:bayes) { NaiveBayes.new(:love, :hate) }
         | 
| 19 | 
            -
              subject { bayes }
         | 
| 20 | 
            -
             | 
| 21 | 
            -
              # describe '#total_number_of_tokens' do
         | 
| 22 | 
            -
              #   it 'calculates correctly' do
         | 
| 23 | 
            -
              #     bayes.train :love, 'I', 'love', 'you'
         | 
| 24 | 
            -
              #     bayes.train :hate, 'I', 'hate', 'you'
         | 
| 25 | 
            -
              #
         | 
| 26 | 
            -
              #     bayes.total_number_of_tokens.should == 6
         | 
| 27 | 
            -
              #
         | 
| 28 | 
            -
              #     bayes.train :love, 'I', 'love', 'you', 'more'
         | 
| 29 | 
            -
              #
         | 
| 30 | 
            -
              #     bayes.total_number_of_tokens.should == 10
         | 
| 31 | 
            -
              #   end
         | 
| 32 | 
            -
              # end
         | 
| 33 | 
            -
             | 
| 34 | 
            -
              describe '#probability_of_a_token_in_category' do
         | 
| 35 | 
            -
                it 'calculates correctly' do
         | 
| 36 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 37 | 
            -
                  bayes.train :hate, 'I', 'hate', 'you'
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                  bayes.probability_of_a_token_in_category('love', :love).should == 2.0/3  # 1 / ( 1 + 0.5 )
         | 
| 40 | 
            -
                  bayes.probability_of_a_token_in_category('hate', :love).should == 1.0/3  # 0.5 / ( 1 + 0.5 )
         | 
| 41 | 
            -
                  bayes.probability_of_a_token_in_category('I', :love).should == 0.5
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                  bayes.train :love, 'hate', 'is', 'love'
         | 
| 44 | 
            -
                  bayes.train :love, 'hate', 'is', 'love'
         | 
| 45 | 
            -
                  bayes.train :love, 'hate', 'is', 'love'
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                  bayes.probability_of_a_token_in_category('love', :love).should == 5.0/6  # 1 / ( 1 + 0.2 )
         | 
| 48 | 
            -
                  bayes.probability_of_a_token_in_category('hate', :love).should == 3.0/7  # 0.75 / ( 0.75 + 1 )
         | 
| 49 | 
            -
                end
         | 
| 50 | 
            -
              end
         | 
| 51 | 
            -
             | 
| 52 | 
            -
              describe '#total_number_of_items' do
         | 
| 53 | 
            -
                it 'calculates correctly' do
         | 
| 54 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 55 | 
            -
                  bayes.train :hate, 'I', 'hate', 'you'
         | 
| 56 | 
            -
             | 
| 57 | 
            -
                  bayes.total_number_of_items.should == 2
         | 
| 58 | 
            -
             | 
| 59 | 
            -
                  bayes.train :love, 'I', 'love', 'you', 'more'
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                  bayes.total_number_of_items.should == 3
         | 
| 62 | 
            -
                end
         | 
| 63 | 
            -
              end
         | 
| 64 | 
            -
             | 
| 65 | 
            -
              describe '#probability_of_a_category' do
         | 
| 66 | 
            -
                it 'calculates correctly' do
         | 
| 67 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 68 | 
            -
                  bayes.train :hate, 'I', 'hate', 'you'
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                  bayes.probability_of_a_category(:love).should == 0.5
         | 
| 71 | 
            -
                end
         | 
| 72 | 
            -
              end
         | 
| 73 | 
            -
             | 
| 74 | 
            -
              describe '#probability_of_token_given_a_category' do
         | 
| 75 | 
            -
                it 'calculates correctly' do
         | 
| 76 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 77 | 
            -
                  bayes.train :hate, 'I', 'hate', 'you'
         | 
| 78 | 
            -
             | 
| 79 | 
            -
                  bayes.probability_of_a_token_given_a_category('love', :love).should == 1
         | 
| 80 | 
            -
                  bayes.probability_of_a_token_given_a_category('you', :hate).should == 1
         | 
| 81 | 
            -
             | 
| 82 | 
            -
                  bayes.train :love, 'I', 'love', 'you', 'more'
         | 
| 83 | 
            -
             | 
| 84 | 
            -
                  bayes.probability_of_a_token_given_a_category('more', :love).should == 0.5
         | 
| 85 | 
            -
                  # bayes.probability_of_token_given_a_category('more', :hate).should == 0
         | 
| 86 | 
            -
                end
         | 
| 87 | 
            -
              end
         | 
| 88 | 
            -
             | 
| 89 | 
            -
              describe '#classifications' do
         | 
| 90 | 
            -
                it 'calculates correctly' do
         | 
| 91 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 92 | 
            -
                  bayes.train :hate, 'I', 'hate', 'you'
         | 
| 93 | 
            -
             | 
| 94 | 
            -
                  bayes.classifications(*%w{ I love you }).should == [[:love, 0.5], [:hate, 0.25]]
         | 
| 95 | 
            -
                  bayes.classify(*%w{ I love you }).should == [:love, 0.5]
         | 
| 96 | 
            -
                  bayes.classify(*%w{ love }).should == [:love, 0.5]
         | 
| 97 | 
            -
             | 
| 98 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 99 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 100 | 
            -
                  bayes.train :love, 'I', 'love', 'you'
         | 
| 101 | 
            -
             | 
| 102 | 
            -
                  bayes.classify(*%w{ I love you }).should == [:love, 0.8]
         | 
| 103 | 
            -
                  bayes.classify(*%w{ love }).should == [:love, 0.8]
         | 
| 104 | 
            -
                  bayes.classify(*%w{ only love }).first.should == :love #[:love, 0.16], (0.2 * 1) * 0.8
         | 
| 105 | 
            -
                end
         | 
| 106 | 
            -
              end
         | 
| 107 | 
            -
             | 
| 108 | 
            -
              describe 'class methods' do
         | 
| 109 | 
            -
                subject { NaiveBayes }
         | 
| 110 | 
            -
             | 
| 111 | 
            -
                it { should respond_to :load_yaml }
         | 
| 112 | 
            -
              end
         | 
| 113 | 
            -
            end
         |