bae 0.0.7-java → 0.0.8-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -1
- data/lib/bae/classifier.rb +165 -8
- data/lib/bae/native_classifier.rb +26 -0
- data/lib/bae/version.rb +1 -1
- data/lib/bae.rb +2 -4
- data/spec/lib/bae/classifier_spec.rb +51 -2
- data/spec/lib/bae/native_classifier_spec.rb +33 -0
- data/spec/spec_helper.rb +1 -0
- metadata +5 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: cb626ff0b92f80f096cebc7248a64a8f47f02fda
         | 
| 4 | 
            +
              data.tar.gz: 87c41e0571e1a31c303f9ab346eef119cec83e6f
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 478e21b1c13f82037a5773cbb5b960ff98387b69cd024ece14c17eee7e9bf5784b658ea555e223e0affcc689ff6aa3fed9f59460ce586f283e06eeecc0d2291f
         | 
| 7 | 
            +
              data.tar.gz: a3ceddd6c99d9ca8826f2142b2f383b9426ca0f9b28f88fbf966d954595b368c286551c94a6202d1add93709f196b4704bbe6ef5657b371dc1f41a2ac80317ed
         | 
    
        data/README.md
    CHANGED
    
    | @@ -3,6 +3,15 @@ Bae | |
| 3 3 |  | 
| 4 4 | 
             
            Bae is a multinomial naive bayes classifier based on another gem ["naivebayes"](https://github.com/id774/naivebayes), only this one uses java to do the heavy lifting.
         | 
| 5 5 |  | 
| 6 | 
            +
            By default this will use the vanilla ruby implementation, but you can use the native classifier written in java.
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            ```ruby
         | 
| 9 | 
            +
            require 'bae/native_classifier'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            classifier = ::Bae::NativeClassifier.new
         | 
| 12 | 
            +
            ```
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 6 15 | 
             
            ## Installation
         | 
| 7 16 |  | 
| 8 17 | 
             
            Add this line to your application's Gemfile:
         | 
| @@ -50,10 +59,37 @@ classifier.classify("aaa bbb") | |
| 50 59 | 
             
            #=> {"positive"=>0.8962655601659751, "negative"=>0.0663900414937759, "neutral"=>0.037344398340248955}
         | 
| 51 60 | 
             
            ```
         | 
| 52 61 |  | 
| 62 | 
            +
            ### Saving State
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            You can actually save a snapshot of the trained classifier to disk and load it into memory.
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            ```ruby
         | 
| 67 | 
            +
            # From the example above...
         | 
| 68 | 
            +
            classifier = ::Bae::Classifier.new
         | 
| 69 | 
            +
            classifier.train("positive", {"aaa" => 0, "bbb" => 1})
         | 
| 70 | 
            +
            classifier.train("negative", {"ccc" => 2, "ddd" => 3})
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            classifier.finish_training!
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            classifier.classify({"aaa" => 1, "bbb" => 1})
         | 
| 75 | 
            +
            #=> {"positive" => 0.8767123287671234, "negative" => 0.12328767123287669}
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            # Now let's save it to disk
         | 
| 78 | 
            +
            classifier.save_state("/tmp/some_state.json")
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            # Let's create a new classifier and load from the sate we just saved
         | 
| 81 | 
            +
            classifier = ::Bae::Classifier.new
         | 
| 82 | 
            +
            classifier.load_state("/tmp/some_state.json")
         | 
| 83 | 
            +
             | 
| 84 | 
            +
            # Now we can classify without retraining
         | 
| 85 | 
            +
            classifier.classify({"aaa" => 1, "bbb" => 1})
         | 
| 86 | 
            +
            #=> {"positive" => 0.8767123287671234, "negative" => 0.12328767123287669}
         | 
| 87 | 
            +
            ```
         | 
| 88 | 
            +
             | 
| 53 89 |  | 
| 54 90 | 
             
            ## Contributing
         | 
| 55 91 |  | 
| 56 | 
            -
            1. Fork it ( https://github.com/ | 
| 92 | 
            +
            1. Fork it ( https://github.com/film42/bae/fork )
         | 
| 57 93 | 
             
            2. Create your feature branch (`git checkout -b my-new-feature`)
         | 
| 58 94 | 
             
            3. Commit your changes (`git commit -am 'Add some feature'`)
         | 
| 59 95 | 
             
            4. Push to the branch (`git push origin my-new-feature`)
         | 
    
        data/lib/bae/classifier.rb
    CHANGED
    
    | @@ -1,23 +1,180 @@ | |
| 1 1 | 
             
            module Bae
         | 
| 2 2 | 
             
              class Classifier
         | 
| 3 3 |  | 
| 4 | 
            -
                 | 
| 4 | 
            +
                attr_accessor :frequency_table, :label_index, :label_index_sequence,
         | 
| 5 | 
            +
                  :label_instance_count, :total_terms
         | 
| 5 6 |  | 
| 6 7 | 
             
                def initialize
         | 
| 7 | 
            -
                  @ | 
| 8 | 
            +
                  @frequency_table = ::Hash.new { |hash, feature| hash[feature] = [] }
         | 
| 9 | 
            +
                  @label_instance_count = ::Hash.new { |hash, label| hash[label] = 0 }
         | 
| 10 | 
            +
                  @label_index = ::Hash.new { |hash, label| hash[label] = 0 }
         | 
| 11 | 
            +
                  @label_index_sequence = -1 # start at -1 so 0 is first value
         | 
| 12 | 
            +
                  @total_terms = 0.0
         | 
| 8 13 | 
             
                end
         | 
| 9 14 |  | 
| 10 | 
            -
                def  | 
| 11 | 
            -
                   | 
| 15 | 
            +
                def finish_training!
         | 
| 16 | 
            +
                  calculate_likelihoods!
         | 
| 17 | 
            +
                  calculate_priors!
         | 
| 12 18 | 
             
                end
         | 
| 13 19 |  | 
| 14 | 
            -
                def  | 
| 15 | 
            -
                   | 
| 20 | 
            +
                def train(label, training_data)
         | 
| 21 | 
            +
                  if training_data.is_a?(::String)
         | 
| 22 | 
            +
                    train_from_string(label, training_data)
         | 
| 23 | 
            +
                  elsif training_data.is_a?(::Hash)
         | 
| 24 | 
            +
                    train_from_hash(label, training_data)
         | 
| 25 | 
            +
                  else
         | 
| 26 | 
            +
                    fail 'Training data must either be a string or hash'
         | 
| 27 | 
            +
                  end
         | 
| 16 28 | 
             
                end
         | 
| 17 29 |  | 
| 18 | 
            -
                def  | 
| 19 | 
            -
                   | 
| 30 | 
            +
                def train_from_string(label, document)
         | 
| 31 | 
            +
                  words = document.split
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  words.each do |word|
         | 
| 34 | 
            +
                    update_label_index(label)
         | 
| 35 | 
            +
                    update_frequency_table(label, word, 1)
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
                  @label_instance_count[label] += 1
         | 
| 38 | 
            +
                  @total_terms += 1
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def train_from_hash(label, frequency_hash)
         | 
| 42 | 
            +
                  frequency_hash.each do |word, frequency|
         | 
| 43 | 
            +
                    update_label_index(label)
         | 
| 44 | 
            +
                    update_frequency_table(label, word, frequency)
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
                  @label_instance_count[label] += 1
         | 
| 47 | 
            +
                  @total_terms += 1
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                def classify(data)
         | 
| 51 | 
            +
                  if data.is_a?(::String)
         | 
| 52 | 
            +
                    classify_from_string(data)
         | 
| 53 | 
            +
                  elsif data.is_a?(::Hash)
         | 
| 54 | 
            +
                    classify_from_hash(data)
         | 
| 55 | 
            +
                  else
         | 
| 56 | 
            +
                    fail 'Training data must either be a string or hash'
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                def classify_from_hash(frequency_hash)
         | 
| 61 | 
            +
                  document = frequency_hash.map{ |word, frequency| (word + ' ') * frequency }.join
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                  classify_from_string(document)
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                def classify_from_string(document)
         | 
| 67 | 
            +
                  words = document.split.uniq
         | 
| 68 | 
            +
                  likelihoods = @likelihoods.dup
         | 
| 69 | 
            +
                  posterior = {}
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                  vocab_size = frequency_table.keys.size
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  label_index.each do |label, index|
         | 
| 74 | 
            +
                    words.map do |word|
         | 
| 75 | 
            +
                      row = frequency_table[word]
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                      unless row.empty?
         | 
| 78 | 
            +
                        laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f
         | 
| 79 | 
            +
                        likelihoods[label] *= laplace_word_likelihood / (1.0 - laplace_word_likelihood)
         | 
| 80 | 
            +
                      end
         | 
| 81 | 
            +
                    end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                    posterior[label] = @priors[label] * likelihoods[label]
         | 
| 84 | 
            +
                  end
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                  normalize(posterior)
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def save_state(path)
         | 
| 90 | 
            +
                  state = {}
         | 
| 91 | 
            +
                  state['frequency_table'] = frequency_table
         | 
| 92 | 
            +
                  state['label_instance_count'] = label_instance_count
         | 
| 93 | 
            +
                  state['label_index'] = label_index
         | 
| 94 | 
            +
                  state['label_index_sequence'] = label_index_sequence
         | 
| 95 | 
            +
                  state['total_terms'] = total_terms
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                  ::File.open(::File.expand_path(path), 'w') do |handle|
         | 
| 98 | 
            +
                    handle.write(state.to_json)
         | 
| 99 | 
            +
                  end
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def load_state(path)
         | 
| 103 | 
            +
                  state = ::JSON.parse(::File.read(::File.expand_path(path)))
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                  fail 'Missing frequency_table' unless state['frequency_table']
         | 
| 106 | 
            +
                  fail 'Missing label_instance_count' unless state['label_instance_count']
         | 
| 107 | 
            +
                  fail 'Missing label_index' unless state['label_index']
         | 
| 108 | 
            +
                  fail 'Missing label_index_sequence' unless state['label_index_sequence']
         | 
| 109 | 
            +
                  fail 'Missing total_terms' unless state['total_terms']
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                  @frequency_table = state['frequency_table']
         | 
| 112 | 
            +
                  @label_instance_count = state['label_instance_count']
         | 
| 113 | 
            +
                  @label_index = state['label_index']
         | 
| 114 | 
            +
                  @label_index_sequence = state['label_index_sequence']
         | 
| 115 | 
            +
                  @total_terms = state['total_terms']
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                  finish_training!
         | 
| 118 | 
            +
                end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
              private
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                def calculate_likelihoods!
         | 
| 123 | 
            +
                  @likelihoods = label_index.inject({}) do |accumulator, (label, index)|
         | 
| 124 | 
            +
                    initial_likelihood = 1.0
         | 
| 125 | 
            +
                    vocab_size = frequency_table.keys.size
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                    frequency_table.each do |feature, row|
         | 
| 128 | 
            +
                      laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f
         | 
| 129 | 
            +
                      initial_likelihood *= (1.0 - laplace_word_likelihood)
         | 
| 130 | 
            +
                    end
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                    accumulator[label] = initial_likelihood
         | 
| 133 | 
            +
                    accumulator
         | 
| 134 | 
            +
                  end
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                def calculate_priors!
         | 
| 138 | 
            +
                  @priors = label_instance_count.inject({}) do |hash, (label, count)|
         | 
| 139 | 
            +
                    hash[label] = count / total_terms
         | 
| 140 | 
            +
                    hash
         | 
| 141 | 
            +
                  end
         | 
| 20 142 | 
             
                end
         | 
| 21 143 |  | 
| 144 | 
            +
                def get_next_sequence_value
         | 
| 145 | 
            +
                  @label_index_sequence += 1
         | 
| 146 | 
            +
                end
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                def normalize(posterior)
         | 
| 149 | 
            +
                  sum = posterior.inject(0.0) { |accumulator, (key, value)| accumulator + value }
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                  posterior.inject({}) do |accumulator, (key, value)|
         | 
| 152 | 
            +
                    accumulator[key] = value / sum
         | 
| 153 | 
            +
                    accumulator
         | 
| 154 | 
            +
                  end
         | 
| 155 | 
            +
                end
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                def update_label_index(label)
         | 
| 158 | 
            +
                  unless label_index.keys.include?(label)
         | 
| 159 | 
            +
                    index = get_next_sequence_value
         | 
| 160 | 
            +
                    label_index[label] = index
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                    frequency_table.each do |feature, value|
         | 
| 163 | 
            +
                      value[index] = 0
         | 
| 164 | 
            +
                    end
         | 
| 165 | 
            +
                  end
         | 
| 166 | 
            +
                end
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                def update_frequency_table(label, word, frequency)
         | 
| 169 | 
            +
                  row = frequency_table[word]
         | 
| 170 | 
            +
                  index = label_index[label]
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                  if row[index]
         | 
| 173 | 
            +
                    row[index] += frequency
         | 
| 174 | 
            +
                  else
         | 
| 175 | 
            +
                    row[0..1] = label_index.keys.map { |label| 0 }
         | 
| 176 | 
            +
                    row[index] = frequency
         | 
| 177 | 
            +
                  end
         | 
| 178 | 
            +
                end
         | 
| 22 179 | 
             
              end
         | 
| 23 180 | 
             
            end
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            require 'java'
         | 
| 2 | 
            +
            require ::File.join(::File.dirname(__FILE__), "..", "..", "target" , "bae.jar")
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module Bae
         | 
| 5 | 
            +
              class NativeClassifier
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                attr_reader :internal_classifier
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                def initialize
         | 
| 10 | 
            +
                  @internal_classifier = ::Java::Bae::NaiveBayesClassifier.new
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def train(label, feature)
         | 
| 14 | 
            +
                  internal_classifier.train(label, ::Java::Bae::Document.new(feature))
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def classify(feature)
         | 
| 18 | 
            +
                  internal_classifier.classify(::Java::Bae::Document.new(feature))
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def finish_training!
         | 
| 22 | 
            +
                  internal_classifier.calculateInitialLikelihoods()
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
            end
         | 
    
        data/lib/bae/version.rb
    CHANGED
    
    
    
        data/lib/bae.rb
    CHANGED
    
    
| @@ -1,10 +1,17 @@ | |
| 1 1 | 
             
            require 'spec_helper'
         | 
| 2 2 |  | 
| 3 | 
            +
            require 'bae/native_classifier'
         | 
| 4 | 
            +
             | 
| 3 5 | 
             
            describe ::Bae::Classifier do
         | 
| 4 6 |  | 
| 5 7 | 
             
              subject { described_class.new }
         | 
| 6 8 |  | 
| 7 | 
            -
               | 
| 9 | 
            +
              let(:state_json) {
         | 
| 10 | 
            +
                '{"frequency_table":{"aaa":[0,0],"bbb":[1,0],"ccc":[0,2],"ddd":[0,3]},"label_instance_count":{"positive":1,"negative":1},"label_index":{"positive":0,"negative":1},"label_index_sequence":1,"total_terms":2.0}'
         | 
| 11 | 
            +
              }
         | 
| 12 | 
            +
              let(:state) { ::JSON.parse(state_json) }
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              it "can classify a hash document" do
         | 
| 8 15 | 
             
                subject.train("positive", {"aaa" => 0, "bbb" => 1})
         | 
| 9 16 | 
             
                subject.train("negative", {"ccc" => 2, "ddd" => 3})
         | 
| 10 17 |  | 
| @@ -16,7 +23,7 @@ describe ::Bae::Classifier do | |
| 16 23 | 
             
                expect(results["negative"]).to be_within(0.001).of(0.05882)
         | 
| 17 24 | 
             
              end
         | 
| 18 25 |  | 
| 19 | 
            -
              it "can classify from  | 
| 26 | 
            +
              it "can classify from a string based document" do
         | 
| 20 27 | 
             
                subject.train("positive", "aaa aaa bbb");
         | 
| 21 28 | 
             
                subject.train("negative", "ccc ccc ddd ddd");
         | 
| 22 29 | 
             
                subject.train("neutral", "eee eee eee fff fff fff");
         | 
| @@ -30,4 +37,46 @@ describe ::Bae::Classifier do | |
| 30 37 | 
             
                expect(results["neutral"]).to be_within(0.001).of(0.03734)
         | 
| 31 38 | 
             
              end
         | 
| 32 39 |  | 
| 40 | 
            +
              it "fails when you attempt to train or test anything other than a hash or string" do
         | 
| 41 | 
            +
                subject.train("positive", "aaa aaa bbb");
         | 
| 42 | 
            +
                expect{ subject.train("a", 1337) }.to raise_error 'Training data must either be a string or hash'
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                subject.finish_training!
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                subject.classify("aaa bbb")
         | 
| 47 | 
            +
                expect{ subject.classify(1337) }.to raise_error 'Training data must either be a string or hash'
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
              it "can save the classifier state" do
         | 
| 51 | 
            +
                subject.train("positive", {"aaa" => 0, "bbb" => 1})
         | 
| 52 | 
            +
                subject.train("negative", {"ccc" => 2, "ddd" => 3})
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                subject.finish_training!
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                temp_file = ::Tempfile.new('some_state')
         | 
| 57 | 
            +
                subject.save_state(temp_file.path)
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                temp_file.rewind
         | 
| 60 | 
            +
                expect(temp_file.read).to eq(state_json)
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                temp_file.close
         | 
| 63 | 
            +
                temp_file.unlink
         | 
| 64 | 
            +
              end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
              it "can correctly load a classifier state and correctly classify" do
         | 
| 67 | 
            +
                temp_file = ::Tempfile.new('some_state')
         | 
| 68 | 
            +
                temp_file.write(state_json)
         | 
| 69 | 
            +
                temp_file.rewind
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                subject.load_state(temp_file.path)
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                results = subject.classify({"aaa" => 1, "bbb" => 1})
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                expect(results["positive"]).to be_within(0.001).of(0.94117)
         | 
| 76 | 
            +
                expect(results["negative"]).to be_within(0.001).of(0.05882)
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                temp_file.close
         | 
| 79 | 
            +
                temp_file.unlink
         | 
| 80 | 
            +
              end
         | 
| 81 | 
            +
             | 
| 33 82 | 
             
            end
         | 
| @@ -0,0 +1,33 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            describe ::Bae::NativeClassifier do
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              subject { described_class.new }
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              it "can classify a hash document" do
         | 
| 8 | 
            +
                subject.train("positive", {"aaa" => 0, "bbb" => 1})
         | 
| 9 | 
            +
                subject.train("negative", {"ccc" => 2, "ddd" => 3})
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                subject.finish_training!
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                results = subject.classify({"aaa" => 1, "bbb" => 1})
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                expect(results["positive"]).to be_within(0.001).of(0.94117)
         | 
| 16 | 
            +
                expect(results["negative"]).to be_within(0.001).of(0.05882)
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              it "can classify from a string based document" do
         | 
| 20 | 
            +
                subject.train("positive", "aaa aaa bbb");
         | 
| 21 | 
            +
                subject.train("negative", "ccc ccc ddd ddd");
         | 
| 22 | 
            +
                subject.train("neutral", "eee eee eee fff fff fff");
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                subject.finish_training!
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                results = subject.classify("aaa bbb")
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                expect(results["positive"]).to be_within(0.001).of(0.89626)
         | 
| 29 | 
            +
                expect(results["negative"]).to be_within(0.001).of(0.06639)
         | 
| 30 | 
            +
                expect(results["neutral"]).to be_within(0.001).of(0.03734)
         | 
| 31 | 
            +
              end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: bae
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.8
         | 
| 5 5 | 
             
            platform: java
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Garrett Thornburg
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015-02- | 
| 11 | 
            +
            date: 2015-02-25 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -68,8 +68,10 @@ files: | |
| 68 68 | 
             
            - build.xml
         | 
| 69 69 | 
             
            - lib/bae.rb
         | 
| 70 70 | 
             
            - lib/bae/classifier.rb
         | 
| 71 | 
            +
            - lib/bae/native_classifier.rb
         | 
| 71 72 | 
             
            - lib/bae/version.rb
         | 
| 72 73 | 
             
            - spec/lib/bae/classifier_spec.rb
         | 
| 74 | 
            +
            - spec/lib/bae/native_classifier_spec.rb
         | 
| 73 75 | 
             
            - spec/spec_helper.rb
         | 
| 74 76 | 
             
            - src/main/java/bae/Document.java
         | 
| 75 77 | 
             
            - src/main/java/bae/FrequencyTable.java
         | 
| @@ -104,4 +106,5 @@ specification_version: 4 | |
| 104 106 | 
             
            summary: Multinomial naive bayes classifier with a kick of java
         | 
| 105 107 | 
             
            test_files:
         | 
| 106 108 | 
             
            - spec/lib/bae/classifier_spec.rb
         | 
| 109 | 
            +
            - spec/lib/bae/native_classifier_spec.rb
         | 
| 107 110 | 
             
            - spec/spec_helper.rb
         |