RubyGems - omnicat-bayes - Versions diffs - 0.2.2 → 0.3.0 - Mend

omnicat-bayes 0.2.2 → 0.3.0

Files changed (7) hide show

checksums.yaml +7 -0
data/README.md +20 -10
data/lib/omnicat/bayes/version.rb +1 -1
data/lib/omnicat/classifiers/bayes.rb +84 -62
data/omnicat-bayes.gemspec +1 -2
data/test/unit/classifiers/bayes_test.rb +23 -10
metadata +16 -40

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 9226b652056d418b7f374a46c15e55c146f9f421
+  data.tar.gz: e370533de84b92342b0cc202ecf50c978749c9a4
+SHA512:
+  metadata.gz: ecae56eab0769f6d582114d13326ec5e483431ac8f33ff2479578dff56a759e482e61e120e03619123c65ca78419f303cfb0bd685f8d712142a6e502fc3f6c28
+  data.tar.gz: 3cbf521720c7f804a70aacde837cac67599de3fd8e190540e4272909c085ebcedc3c1f5e322fb7f2160263432f22b17cc19f8d7f769a8b6b8be4cb75e1346ece

data/README.md CHANGED

@@ -27,11 +27,15 @@ See rdoc for detailed usage.
 Optional configuration sample:
     OmniCat.configure do |config|
+      # you can enable auto train mode by :unique or :continues
+      # unique: only uniq docs will be added to training docs on prediction
+      # continues: always add docs to training docs on prediction
+      config.auto_train = :off
       config.exclude_tokens = ['something', 'anything'] # exclude token list
       config.token_patterns = {
-        # exclude token Regex patterns
+        # exclude tokens with Regex patterns
         minus: [/[\s\t\n\r]+/, /(@[\w\d]+)/],
-        # include token Regex patterns
+        # include tokens with Regex patterns
         plus: [/[\p{L}\-0-9]{2,}/, /[\!\?]/, /[\:\)\(\;\-\|]{2,3}/]
       }
     end
@@ -96,9 +100,14 @@ Untrain category with multiple documents.
 Classify a document.
     result = bayes.classify('I feel so good and happy')
-    => #<OmniCat::Result:0x007fd20296aad8 @category={:name=>"positive", :percentage=>73}, @scores={"positive"=>5.4253472222222225e-09, "negative"=>1.9600796074572086e-09}, @total_score=7.385426829679431e-09>
+    => #<OmniCat::Result:0x007febb152af68 @top_score_key="positive", @scores={"positive"=>#<OmniCat::Score:0x007febb152add8 @key="positive", @value=6.813226744186048e-09, @percentage=58>, "negative"=>#<OmniCat::Score:0x007febb152ac70 @key="negative", @value=4.875003449064939e-09, @percentage=42>}, @total_score=1.1688230193250986e-08>
     result.to_hash
-    => {:category=>{:name=>"positive", :percentage=>73}, :scores=>{"positive"=>5.4253472222222225e-09, "negative"=>1.9600796074572086e-09}, :total_score=>7.385426829679431e-09}
+    => {:top_score_key=>"positive", :scores=>{"positive"=>{:key=>"positive", :value=>6.813226744186048e-09, :percentage=>58}, "negative"=>{:key=>"negative", :value=>4.875003449064939e-09, :percentage=>42}}, :total_score=>1.1688230193250986e-08}
+    result.top_score
+    => #<OmniCat::Score:0x007febb152add8 @key="positive", @value=6.813226744186048e-09, @percentage=58>
+    result.top_score.to_hash
+    => {:key=>"positive", :value=>6.813226744186048e-09, :percentage=>58}
 ### Classify batch
 Classify multiple documents at a time.
@@ -109,25 +118,26 @@ Classify multiple documents at a time.
         'a good piece of work'
       ]
     )
-    => [#<OmniCat::Result:0x007fd2029341b8 @category={:name=>"negative", :percentage=>78}, @scores={"positive"=>2.5521869888765736e-14, "negative"=>9.074442627116706e-14}, @total_score=1.162662961599328e-13>, #<OmniCat::Result:0x007fd20292e7e0 @category={:name=>"positive", :percentage=>80}, @scores={"positive"=>2.411265432098765e-07, "negative"=>5.880238822371627e-08}, @total_score=2.999289314335928e-07>]
+    => [#<OmniCat::Result:0x007febb14f3680 @top_score_key="negative", @scores={"positive"=>#<OmniCat::Score:0x007febb14f34a0 @key="positive", @value=7.971480930520432e-14, @percentage=22>, "negative"=>#<OmniCat::Score:0x007febb14f32c0 @key="negative", @value=2.834304330851709e-13, @percentage=78>}, @total_score=3.6314524239037524e-13>, #<OmniCat::Result:0x007febb14f2aa0 @top_score_key="positive", @scores={"positive"=>#<OmniCat::Score:0x007febb14f2960 @key="positive", @value=3.802731206057328e-07, @percentage=72>, "negative"=>#<OmniCat::Score:0x007febb14f2820 @key="negative", @value=1.4625010347194818e-07, @percentage=28>}, @total_score=5.26523224077681e-07>]
 ### Convert to hash
 Convert full Bayes object to hash.
     # For storing, restoring modal data
     bayes_hash = bayes.to_hash
-    => {:categories=>{"positive"=>{:doc_count=>4, :tokens=>{"great"=>1, "if"=>1, "you"=>2, "are"=>2, "in"=>2, "slap"=>1, "happy"=>1, "mood"=>1, "feel-good"=>2, "picture"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>2, "term"=>1, "it"=>1, "is"=>1, "movie"=>1, "about"=>1, "which"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>2, "love"=>1, "and"=>1, "money"=>1, "both"=>1, "them"=>1, "choises"=>1}, :token_count=>37}, "negative"=>{:doc_count=>4, :tokens=>{"bad"=>1, "tracking"=>1, "issue"=>1, "simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1, "interesting"=>1, "but"=>2, "not"=>2, "compelling"=>2, "seems"=>1, "clever"=>1, "especially"=>1}, :token_count=>17}}, :category_count=>2, :doc_count=>8, :k_value=>1.0, :token_count=>54, :uniq_token_count=>43}
+    => {:categories=>{"positive"=>{:doc_count=>4, :docs=>{"28fd29bbf840c86db65e510ff3cd07a9"=>{:content=>"great if you are in a slap happy mood .", :content_md5=>"28fd29bbf840c86db65e510ff3cd07a9", :count=>1, :tokens=>{"great"=>1, "if"=>1, "you"=>1, "are"=>1, "in"=>1, "slap"=>1, "happy"=>1, "mood"=>1}}, "82b4cd9513f448dea0024f2d0e2ccd44"=>{:content=>"a feel-good picture in the best sense of the term...", :content_md5=>"82b4cd9513f448dea0024f2d0e2ccd44", :count=>1, :tokens=>{"feel-good"=>1, "picture"=>1, "in"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>1, "term"=>1}}, "f917bf1cf1256c78c5436d850dab3104"=>{:content=>"it is a feel-good movie about which you can actually feel good.", :content_md5=>"f917bf1cf1256c78c5436d850dab3104", :count=>1, :tokens=>{"it"=>1, "is"=>1, "feel-good"=>1, "movie"=>1, "about"=>1, "which"=>1, "you"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>1}}, "4343bbe84c035733708c3f58136f321e"=>{:content=>"love and money both of them are good choises", :content_md5=>"4343bbe84c035733708c3f58136f321e", :count=>1, :tokens=>{"love"=>1, "and"=>1, "money"=>1, "both"=>1, "of"=>1, "them"=>1, "are"=>1, "good"=>1, "choises"=>1}}}, :name=>"positive", :tokens=>{"great"=>1, "if"=>1, "you"=>2, "are"=>2, "in"=>2, "slap"=>1, "happy"=>1, "mood"=>1, "feel-good"=>2, "picture"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>2, "term"=>1, "it"=>1, "is"=>1, "movie"=>1, "about"=>1, "which"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>2, "love"=>1, "and"=>1, "money"=>1, "both"=>1, "them"=>1, "choises"=>1}, :token_count=>37, :prior=>0.5}, "negative"=>{:doc_count=>4, :docs=>{"89b36e774579662591ea21b3283d9b35"=>{:content=>"bad tracking issue", :content_md5=>"89b36e774579662591ea21b3283d9b35", :count=>1, :tokens=>{"bad"=>1, "tracking"=>1, "issue"=>1}}, "b0ec48bc87527e285b26d6cce8e278e7"=>{:content=>"simplistic , silly and tedious .", :content_md5=>"b0ec48bc87527e285b26d6cce8e278e7", :count=>1, :tokens=>{"simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1}}, "ae9d4fbaf40906614ca712a888648c5f"=>{:content=>"interesting , but not compelling . ", :content_md5=>"ae9d4fbaf40906614ca712a888648c5f", :count=>1, :tokens=>{"interesting"=>1, "but"=>1, "not"=>1, "compelling"=>1}}, "0e495f5d88d8049746a1b6961bf3cc90"=>{:content=>"seems clever but not especially compelling", :content_md5=>"0e495f5d88d8049746a1b6961bf3cc90", :count=>1, :tokens=>{"seems"=>1, "clever"=>1, "but"=>1, "not"=>1, "especially"=>1, "compelling"=>1}}}, :name=>"negative", :tokens=>{"bad"=>1, "tracking"=>1, "issue"=>1, "simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1, "interesting"=>1, "but"=>2, "not"=>2, "compelling"=>2, "seems"=>1, "clever"=>1, "especially"=>1}, :token_count=>17, :prior=>0.5}}, :category_count=>2, :category_size_limit=>0, :doc_count=>8, :token_count=>54, :unique_token_count=>43, :k_value=>1.0}
 ### Load from hash
 Load full Bayes object from hash.
     another_bayes_obj = OmniCat::Classifiers::Bayes.new(bayes_hash)
-    => #<OmniCat::Classifiers::Bayes:0x007fd20308cff0 @categories={"positive"=>#<OmniCat::Classifiers::BayesInternals::Category:0x007fd20308cf78 @doc_count=4, @tokens={"great"=>1, "if"=>1, "you"=>2, "are"=>2, "in"=>2, "slap"=>1, "happy"=>1, "mood"=>1, "feel-good"=>2, "picture"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>2, "term"=>1, "it"=>1, "is"=>1, "movie"=>1, "about"=>1, "which"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>2, "love"=>1, "and"=>1, "money"=>1, "both"=>1, "them"=>1, "choises"=>1}, @token_count=37>, "negative"=>#<OmniCat::Classifiers::BayesInternals::Category:0x007fd20308cf00 @doc_count=4, @tokens={"bad"=>1, "tracking"=>1, "issue"=>1, "simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1, "interesting"=>1, "but"=>2, "not"=>2, "compelling"=>2, "seems"=>1, "clever"=>1, "especially"=>1}, @token_count=17>}, @category_count=2, @doc_count=8, @k_value=1.0, @token_count=54, @uniq_token_count=43>
+    => #<OmniCat::Classifiers::Bayes:0x007febb14d15a8 @categories={"positive"=>#<OmniCat::Classifiers::BayesInternals::Category:0x007febb14d1530 @doc_count=4, @docs={"28fd29bbf840c86db65e510ff3cd07a9"=>{:content=>"great if you are in a slap happy mood .", :content_md5=>"28fd29bbf840c86db65e510ff3cd07a9", :count=>1, :tokens=>{"great"=>1, "if"=>1, "you"=>1, "are"=>1, "in"=>1, "slap"=>1, "happy"=>1, "mood"=>1}}, "82b4cd9513f448dea0024f2d0e2ccd44"=>{:content=>"a feel-good picture in the best sense of the term...", :content_md5=>"82b4cd9513f448dea0024f2d0e2ccd44", :count=>1, :tokens=>{"feel-good"=>1, "picture"=>1, "in"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>1, "term"=>1}}, "f917bf1cf1256c78c5436d850dab3104"=>{:content=>"it is a feel-good movie about which you can actually feel good.", :content_md5=>"f917bf1cf1256c78c5436d850dab3104", :count=>1, :tokens=>{"it"=>1, "is"=>1, "feel-good"=>1, "movie"=>1, "about"=>1, "which"=>1, "you"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>1}}, "4343bbe84c035733708c3f58136f321e"=>{:content=>"love and money both of them are good choises", :content_md5=>"4343bbe84c035733708c3f58136f321e", :count=>1, :tokens=>{"love"=>1, "and"=>1, "money"=>1, "both"=>1, "of"=>1, "them"=>1, "are"=>1, "good"=>1, "choises"=>1}}}, @name="positive", @tokens={"great"=>1, "if"=>1, "you"=>2, "are"=>2, "in"=>2, "slap"=>1, "happy"=>1, "mood"=>1, "feel-good"=>2, "picture"=>1, "the"=>2, "best"=>1, "sense"=>1, "of"=>2, "term"=>1, "it"=>1, "is"=>1, "movie"=>1, "about"=>1, "which"=>1, "can"=>1, "actually"=>1, "feel"=>1, "good"=>2, "love"=>1, "and"=>1, "money"=>1, "both"=>1, "them"=>1, "choises"=>1}, @token_count=37, @prior=0.5>, "negative"=>#<OmniCat::Classifiers::BayesInternals::Category:0x007febb14d14e0 @doc_count=4, @docs={"89b36e774579662591ea21b3283d9b35"=>{:content=>"bad tracking issue", :content_md5=>"89b36e774579662591ea21b3283d9b35", :count=>1, :tokens=>{"bad"=>1, "tracking"=>1, "issue"=>1}}, "b0ec48bc87527e285b26d6cce8e278e7"=>{:content=>"simplistic , silly and tedious .", :content_md5=>"b0ec48bc87527e285b26d6cce8e278e7", :count=>1, :tokens=>{"simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1}}, "ae9d4fbaf40906614ca712a888648c5f"=>{:content=>"interesting , but not compelling . ", :content_md5=>"ae9d4fbaf40906614ca712a888648c5f", :count=>1, :tokens=>{"interesting"=>1, "but"=>1, "not"=>1, "compelling"=>1}}, "0e495f5d88d8049746a1b6961bf3cc90"=>{:content=>"seems clever but not especially compelling", :content_md5=>"0e495f5d88d8049746a1b6961bf3cc90", :count=>1, :tokens=>{"seems"=>1, "clever"=>1, "but"=>1, "not"=>1, "especially"=>1, "compelling"=>1}}}, @name="negative", @tokens={"bad"=>1, "tracking"=>1, "issue"=>1, "simplistic"=>1, "silly"=>1, "and"=>1, "tedious"=>1, "interesting"=>1, "but"=>2, "not"=>2, "compelling"=>2, "seems"=>1, "clever"=>1, "especially"=>1}, @token_count=17, @prior=0.5>}, @category_count=2, @category_size_limit=0, @doc_count=8, @token_count=54, @unique_token_count=43, @k_value=1.0>
     another_bayes_obj.classify('best senses')
-    => #<OmniCat::Result:0x007fd203075008 @category={:name=>"positive", :percentage=>57}, @scores={"positive"=>0.0002314814814814815, "negative"=>0.00017146776406035664}, @total_score=0.00040294924554183816>
+    => #<OmniCat::Result:0x007febb14c0fc8 @top_score_key="positive", @scores={"positive"=>#<OmniCat::Score:0x007febb14c0ed8 @key="positive", @value=0.00029069767441860465, @percentage=52>, "negative"=>#<OmniCat::Score:0x007febb14c0de8 @key="negative", @value=0.0002704164413196322, @percentage=48>}, @total_score=0.0005611141157382368>
-## Todo
-* Implement all OmniCat(http://github.com/mustafaturan/omnicat) classifier strategy abstract methods
+### Best practices
+For bayes classification always try to train same amount of documents for each category. So, do not activate auto training mode, because it make overages on balance of trained docs and makes algorithm go crazy :).
+To get best results on text classification you should apply some cleaning actions like spellchecking, stemming, stop words cleaning before training and prediction actions.
 ## Contributing

data/lib/omnicat/bayes/version.rb CHANGED

@@ -1,5 +1,5 @@
 module Omnicat
   module Bayes
-    VERSION = '0.2.2'
+    VERSION = '0.3.0'
   end
 end

data/lib/omnicat/classifiers/bayes.rb CHANGED

@@ -33,7 +33,7 @@ module OmniCat
                 "Category with name '#{category_name}' is already exists!"
         else
           increment_category_count
-          @categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new
+          @categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new(name: category_name)
         end
       end
@@ -52,24 +52,14 @@ module OmniCat
       #   bayes.train("negative", "bad dog")
       #   bayes.train("neutral", "how is the management gui")
       def train(category_name, doc_content)
-        if category_exists?(category_name)
-          increment_doc_counts(category_name)
-          update_priors
-          doc_key = Digest::MD5.hexdigest(doc_content)
-          if doc = @categories[category_name].docs[doc_key]
-            doc.increment_count
-          else
-            doc = OmniCat::Doc.new(content: doc_content)
-          end
-          @categories[category_name].docs[doc_key] = doc
-          doc.tokens.each do |token, count|
-            increment_token_counts(category_name, token, count)
-            @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
-          end
-        else
-          raise StandardError,
-                "Category with name '#{category_name}' does not exist!"
+        category_must_exist(category_name)
+        doc = add_doc(category_name, doc_content)
+        doc.tokens.each do |token, count|
+          increment_token_counts(category_name, token, count)
+          @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
         end
+        increment_doc_counts(category_name)
+        update_priors
       end
       # Untrain the desired category with a document
@@ -87,26 +77,15 @@ module OmniCat
       #   bayes.untrain("negative", "bad dog")
       #   bayes.untrain("neutral", "how is the management gui")
       def untrain(category_name, doc_content)
-        if category_exists?(category_name)
-          doc_key = Digest::MD5.hexdigest(doc_content)
-          if doc = @categories[category_name].docs[doc_key]
-            @categories[category_name].docs[doc_key].decrement_count
-          else
-            raise StandardError,
-                  "Document is not found in #{category_name} documents!"
-          end
-          doc.tokens.each do |token, count|
-            @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
-            @categories[category_name].tokens.delete(token) if @categories[category_name].tokens[token] == 0
-            decrement_token_counts(category_name, token, count)
-          end
-          @categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0
-          decrement_doc_counts(category_name)
-          update_priors
-        else
-          raise StandardError,
-                "Category with name '#{category_name}' does not exist!"
+        category_must_exist(category_name)
+        doc = remove_doc(category_name, doc_content)
+        doc.tokens.each do |token, count|
+          @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
+          @categories[category_name].tokens.delete(token) if @categories[category_name].tokens[token] == 0
+          decrement_token_counts(category_name, token, count)
         end
+        decrement_doc_counts(category_name)
+        update_priors
       end
       # Classify the given document
@@ -126,25 +105,32 @@ module OmniCat
       #   =>
       def classify(doc_content)
         return unless classifiable?
-        score = -1000000
+        doc = ::OmniCat::Doc.new(content: doc_content)
         result = ::OmniCat::Result.new
         @categories.each do |category_name, category|
-          result.scores[category_name] = doc_probability(category, doc_content)
-          if result.scores[category_name] > score
-            result.category[:name] = category_name
-            score = result.scores[category_name]
-          end
-          result.total_score += result.scores[category_name]
+          result.add_score(
+            Score.new(
+              key: category.name,
+              value: doc_probability(category, doc)
+            )
+          )
         end
-        result.total_score = 1 if result.total_score == 0
-        result.category[:percentage] = (
-          result.scores[result.category[:name]] * 100.0 /
-          result.total_score
-        ).floor
+        auto_train(@categories[result.top_score.key], doc)
+        result.calculate_percentages
         result
       end
       private
+        # nodoc
+        def auto_train(category, doc)
+          case ::OmniCat.config.auto_train
+          when :continues
+            train(category.name, doc.content)
+          when :unique
+            train(category.name, doc.content) unless category.docs.has_key?(doc.content_md5)
+          end
+        end
         # nodoc
         def update_priors
           @categories.each do |_, category|
@@ -164,36 +150,35 @@ module OmniCat
         # nodoc
         def modify_token_counts(category_name, token, count)
-          modify_uniq_token_count(token, count < 0 ? -1 : 1)
+          modify_unique_token_count(token, count < 0 ? -1 : 1)
           @token_count += count
           @categories[category_name].token_count += count
         end
         # nodoc
-        def increment_uniq_token_count(token)
-          modify_uniq_token_count(token, 1)
+        def increment_unique_token_count(token)
+          modify_unique_token_count(token, 1)
         end
         # nodoc
-        def decrement_uniq_token_count(token)
-          modify_uniq_token_count(token, -1)
+        def decrement_unique_token_count(token)
+          modify_unique_token_count(token, -1)
         end
         # nodoc
-        def modify_uniq_token_count(token, uniq_token_addition)
+        def modify_unique_token_count(token, uniq_token_addition)
           @categories.each do |_, category|
              if category.tokens.has_key?(token)
                uniq_token_addition = 0
                break
              end
           end
-          @uniq_token_count += uniq_token_addition
+          @unique_token_count += uniq_token_addition
         end
         # nodoc
-        def doc_probability(category, doc_content)
-          score = k_value
-          doc = OmniCat::Doc.new(content: doc_content)
+        def doc_probability(category, doc)
+          score = @k_value
           doc.tokens.each do |token, count|
             score *= token_probability(category, token, count)
           end
@@ -203,14 +188,51 @@ module OmniCat
         # nodoc
         def token_probability(category, token, count)
           if category.tokens[token].to_i == 0
-            k_value / token_count
+            @k_value / (@unique_token_count * count)
           else
             count * (
-              (category.tokens[token].to_i + k_value) /
-              (category.token_count + uniq_token_count)
+              (category.tokens[token].to_i + @k_value) /
+              (category.token_count + @unique_token_count)
             )
           end
         end
+        # nodoc
+        def add_doc(category_name, doc_content)
+          doc_key = generate_doc_key(doc_content)
+          if doc = @categories[category_name].docs[doc_key]
+            doc.increment_count
+          else
+            @categories[category_name].docs[doc_key] = ::OmniCat::Doc.new(content: doc_content)
+          end
+          @categories[category_name].docs[doc_key]
+        end
+        # nodoc
+        def remove_doc(category_name, doc_content)
+          doc_key = generate_doc_key(doc_content)
+          doc = @categories[category_name].docs[doc_key]
+          unless doc
+            raise StandardError,
+                  "Document is not found in #{category_name} documents!"
+          end
+          doc.decrement_count
+          @categories[category_name].docs.delete(doc_key) if doc.count == 0
+          doc
+        end
+        # nodoc
+        def generate_doc_key(doc_content)
+          Digest::MD5.hexdigest(doc_content)
+        end
+        # nodoc
+        def category_must_exist(category_name)
+          unless category_exists?(category_name)
+            raise StandardError,
+                  "Category with name '#{category_name}' does not exist!"
+          end
+        end
     end
   end
 end

data/omnicat-bayes.gemspec CHANGED

@@ -18,8 +18,7 @@ Gem::Specification.new do |spec|
   spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
   spec.require_paths = ['lib']
-  spec.add_dependency 'hashable', '~> 0.1.1'
-  spec.add_dependency 'omnicat', '~> 0.2.2'
+  spec.add_dependency 'omnicat', '~> 0.3.0'
   spec.add_development_dependency 'bundler', '~> 1.3'
   spec.add_development_dependency 'rake'
 end

data/test/unit/classifiers/bayes_test.rb CHANGED

@@ -4,6 +4,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'test_hel
 class TestBayes < Test::Unit::TestCase
   def setup
     OmniCat.configure do |config|
+      config.auto_train = :off
       config.exclude_tokens = ['are', 'at', 'by']
       config.token_patterns = {
         minus: [/[\s\t\n\r]+/, /(@[\w\d]+)/],
@@ -82,7 +83,7 @@ class TestBayes < Test::Unit::TestCase
       @bayes.categories['neutral'].token_count
     )
   end
   def test_untrain_with_doc_count_2
     @bayes.add_category 'neutral'
     @bayes.train 'neutral', 'how are you?? : :| :) ;-) :('
@@ -112,7 +113,8 @@ class TestBayes < Test::Unit::TestCase
   def test_untrain_with_missing_doc
     @bayes.add_category 'neutral'
     assert_raise(StandardError) {
-      @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :(' }
+      @bayes.untrain 'neutral', 'how are you?? : :| :) ;-) :('
+    }
   end
   def test_train_batch
@@ -128,15 +130,15 @@ class TestBayes < Test::Unit::TestCase
   def test_train_missing_category
     assert_raise(StandardError) { @bayes.train 'neutral', 'how are you?' }
   end
   def test_unique_token_count
     @bayes.add_category 'positive'
     @bayes.train_batch 'positive', ['good job ever', 'valid syntax',
       'best moments of my good life']
-    assert_equal(10,@bayes.uniq_token_count)
+    assert_equal(10,@bayes.unique_token_count)
     @bayes.untrain_batch 'positive', ['good job ever', 'valid syntax',
       'best moments of my good life']
-    assert_equal(0,@bayes.uniq_token_count)
+    assert_equal(0,@bayes.unique_token_count)
   end
   def test_classifiability_error
@@ -154,11 +156,12 @@ class TestBayes < Test::Unit::TestCase
     @bayes.train('negative', 'bad work')
     assert_equal(
       'positive',
-      @bayes.classify('very good position for this sentence').category[:name]
+      @bayes.classify('very good position for this sentence').top_score.key
     )
+    @bayes.train('negative', 'work')
     assert_equal(
       'negative',
-      @bayes.classify('bad words').category[:name]
+      @bayes.classify('bad words').top_score.key
     )
   end
@@ -176,11 +179,11 @@ class TestBayes < Test::Unit::TestCase
     assert_equal(
       'positive',
-      results[0].category[:name]
+      results[0].top_score.key
     )
     assert_equal(
       'negative',
-      results[1].category[:name]
+      results[1].top_score.key
     )
   end
@@ -191,11 +194,21 @@ class TestBayes < Test::Unit::TestCase
     bayes1.train('positive', 'good job')
     bayes1.train('negative', 'bad work')
     h1 = bayes1.to_hash
     bayes2 = ::OmniCat::Classifiers::Bayes.new(h1)
     assert_equal(h1, bayes2.to_hash)
   end
+  def test_change_strategy
+    c1 = ::OmniCat::Classifier.new(::OmniCat::Classifiers::Bayes.new)
+    c1.add_category 'positive'
+    c1.add_category 'negative'
+    c1.train('positive', 'good job')
+    c1.train('negative', 'bad work')
+    h1 = c1.to_hash
+    c1.strategy = ::OmniCat::Classifiers::Bayes.new
+    assert_equal(h1, c1.to_hash)
+  end
   def test_classify_with_insufficient_categories
     assert_raise(StandardError) { @bayes.classify 'blank' }
   end

metadata CHANGED

@@ -1,78 +1,55 @@
 --- !ruby/object:Gem::Specification
 name: omnicat-bayes
 version: !ruby/object:Gem::Version
-  version: 0.2.2
-  prerelease:
+  version: 0.3.0
 platform: ruby
 authors:
 - Mustafa Turan
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-07-11 00:00:00.000000000 Z
+date: 2014-02-19 00:00:00.000000000 Z
 dependencies:
-- !ruby/object:Gem::Dependency
-  name: hashable
-  requirement: !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ~>
-      - !ruby/object:Gem::Version
-        version: 0.1.1
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ~>
-      - !ruby/object:Gem::Version
-        version: 0.1.1
 - !ruby/object:Gem::Dependency
   name: omnicat
   requirement: !ruby/object:Gem::Requirement
-    none: false
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.2
+        version: 0.3.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
-    none: false
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.2
+        version: 0.3.0
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
-    none: false
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
         version: '1.3'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
-    none: false
     requirements:
-    - - ~>
+    - - "~>"
       - !ruby/object:Gem::Version
         version: '1.3'
 - !ruby/object:Gem::Dependency
   name: rake
   requirement: !ruby/object:Gem::Requirement
-    none: false
     requirements:
-    - - ! '>='
+    - - ">="
       - !ruby/object:Gem::Version
         version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
-    none: false
     requirements:
-    - - ! '>='
+    - - ">="
       - !ruby/object:Gem::Version
         version: '0'
 description: Naive Bayes classifier strategy for OmniCat
@@ -82,7 +59,7 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- .gitignore
+- ".gitignore"
 - Gemfile
 - LICENSE.txt
 - README.md
@@ -98,27 +75,26 @@ files:
 homepage: https://github.com/mustafaturan/omnicat-bayes
 licenses:
 - MIT
+metadata: {}
 post_install_message:
 rdoc_options: []
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
-  none: false
   requirements:
-  - - ! '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
-  none: false
   requirements:
-  - - ! '>='
+  - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.23
+rubygems_version: 2.2.0
 signing_key:
-specification_version: 3
+specification_version: 4
 summary: Naive Bayes text classification implementation as an OmniCat classifier strategy.
 test_files:
 - test/test_helper.rb