RubyGems - reclassifier - Versions diffs - 0.0.3 → 0.0.4 - Mend

reclassifier 0.0.3 → 0.0.4

Files changed (4) hide show

data/Rakefile +6 -1
data/lib/reclassifier/bayes.rb +108 -110
data/lib/reclassifier/version.rb +1 -1
metadata +2 -2

data/Rakefile CHANGED Viewed

@@ -1,4 +1,9 @@
-require "bundler/gem_tasks"
+require 'bundler/gem_tasks'
+require 'rdoc/task'
 require 'rspec/core/rake_task'
+Rake::RDocTask.new do |rdoc|
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end
 RSpec::Core::RakeTask.new(:spec)

data/lib/reclassifier/bayes.rb CHANGED Viewed

@@ -2,136 +2,134 @@
 # Bayesian classifier for arbitrary text.
 #
 # Implementation is translated from
-# Introduction to Information Retrieval by Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze,
+# <em>Introduction to Information Retrieval</em> by Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze,
 # Cambridge University Press. 2008, ISBN 0521865719.
 #
-module Reclassifier
-  class Bayes
-    # Can be created with zero or more classifications, each of which will be
-    # initialized and given a training method.  The classifications are specified as
-    # symbols.  E.g.,
-    #      b = Reclassifier::Bayes.new :interesting, :uninteresting, :spam
-    def initialize(*classifications)
-      @classifications = {}
-      classifications.each {|classification| @classifications[classification] = {}}
-      @docs_in_classification_count = {}
-    end
+class Reclassifier::Bayes
+  # Can be created with zero or more classifications, each of which will be
+  # initialized and given a training method.  The classifications are specified as
+  # symbols.  E.g.,
+  #      b = Reclassifier::Bayes.new :interesting, :uninteresting, :spam
+  def initialize(*classifications)
+    @classifications = {}
+    classifications.each {|classification| @classifications[classification] = {}}
+    @docs_in_classification_count = {}
+  end
-    #
-    # Provides a general training method for all classifications specified in Bayes#new
-    # For example:
-    #     b = Reclassifier::Bayes.new :this, :that
-    #     b.train :this, "This text"
-    #     b.train :that, "That text"
-    def train(classification, text)
-      ensure_classification_exists(classification)
+  #
+  # Provides a general training method for all classifications specified in Bayes#new
+  # For example:
+  #     b = Reclassifier::Bayes.new :this, :that
+  #     b.train :this, "This text"
+  #     b.train :that, "That text"
+  def train(classification, text)
+    ensure_classification_exists(classification)
-      @docs_in_classification_count[classification] ||= 0
-      @docs_in_classification_count[classification] += 1
+    @docs_in_classification_count[classification] ||= 0
+    @docs_in_classification_count[classification] += 1
-      text.word_hash.each do |word, count|
-        @classifications[classification][word] ||= 0
+    text.word_hash.each do |word, count|
+      @classifications[classification][word] ||= 0
-        @classifications[classification][word] += count
-      end
+      @classifications[classification][word] += count
+    end
+  end
+  #
+  # Untrain a (classification, text) pair.
+  # Be very careful with this method.
+  #
+  # For example:
+  #     b = Reclassifier::Bayes.new :this, :that, :the_other
+  #     b.train :this, "This text"
+  #     b.untrain :this, "This text"
+  def untrain(classification, text)
+    ensure_classification_exists(classification)
+    @docs_in_classification_count[classification] -= 1
+    text.word_hash.each do |word, count|
+      @classifications[classification][word] -= count if @classifications[classification].include?(word)
     end
+  end
-    #
-    # Untrain a (classification, text) pair.
-    # Be very careful with this method.
-    #
-    # For example:
-    #     b = Reclassifier::Bayes.new :this, :that, :the_other
-    #     b.train :this, "This text"
-    #     b.untrain :this, "This text"
-    def untrain(classification, text)
-      ensure_classification_exists(classification)
+  #
+  # Returns the scores of the specified text for each classification. E.g.,
+  #    b.classifications "I hate bad words and you"
+  #    =>  {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
+  # The largest of these scores (the one closest to 0) is the one picked out by #classify
+  def calculate_scores(text)
+    scores = {}
-      @docs_in_classification_count[classification] -= 1
+    @classifications.each do |classification, classification_word_counts|
+      # prior
+      scores[classification] = Math.log(@docs_in_classification_count[classification])
+      scores[classification] -= Math.log(@docs_in_classification_count.values.reduce(:+))
+      # likelihood
       text.word_hash.each do |word, count|
-        @classifications[classification][word] -= count if @classifications[classification].include?(word)
-      end
-    end
+        if @classifications.values.reduce(Set.new) {|set, word_counts| set.merge(word_counts.keys)}.include?(word)
+          scores[classification] += count * Math.log((classification_word_counts[word] || 0) + 1)
-    #
-    # Returns the scores of the specified text for each classification. E.g.,
-    #    b.classifications "I hate bad words and you"
-    #    =>  {"Uninteresting"=>-12.6997928013932, "Interesting"=>-18.4206807439524}
-    # The largest of these scores (the one closest to 0) is the one picked out by #classify
-    def calculate_scores(text)
-      scores = {}
-      @classifications.each do |classification, classification_word_counts|
-        # prior
-        scores[classification] = Math.log(@docs_in_classification_count[classification])
-        scores[classification] -= Math.log(@docs_in_classification_count.values.reduce(:+))
-        # likelihood
-        text.word_hash.each do |word, count|
-          if @classifications.values.reduce(Set.new) {|set, word_counts| set.merge(word_counts.keys)}.include?(word)
-            scores[classification] += count * Math.log((classification_word_counts[word] || 0) + 1)
-            scores[classification] -= count * Math.log(classification_word_counts.values.reduce(:+) + @classifications.values.reduce(Set.new) {|set, word_counts| set.merge(word_counts.keys)}.count)
-          end
+          scores[classification] -= count * Math.log(classification_word_counts.values.reduce(:+) + @classifications.values.reduce(Set.new) {|set, word_counts| set.merge(word_counts.keys)}.count)
         end
       end
-      scores
-    end
-    #
-    # Returns the classification of the specified text, which is one of the
-    # classifications given in the initializer. E.g.,
-    #    b.classify "I hate bad words and you"
-    #    =>  :uninteresting
-    def classify(text)
-      calculate_scores(text).max_by {|classification| classification[1]}[0]
     end
-    #
-    # Provides a list of classification names
-    # For example:
-    #     b.classifications
-    #     =>   [:this, :that, :the_other]
-    def classifications
-      @classifications.keys
-    end
+    scores
+  end
-    #
-    # Adds the classification to the classifier.
-    # Has no effect if the classification already existed.
-    # Returns the classification.
-    # For example:
-    #     b.add_classification(:not_spam)
-    def add_classification(classification)
-      @classifications[classification] ||= {}
+  #
+  # Returns the classification of the specified text, which is one of the
+  # classifications given in the initializer. E.g.,
+  #    b.classify "I hate bad words and you"
+  #    =>  :uninteresting
+  def classify(text)
+    calculate_scores(text).max_by {|classification| classification[1]}[0]
+  end
-      classification
-    end
+  #
+  # Provides a list of classification names
+  # For example:
+  #     b.classifications
+  #     =>   [:this, :that, :the_other]
+  def classifications
+    @classifications.keys
+  end
-    #
-    # Removes the classification from the classifier.
-    # Returns the classifier if the classification existed, else nil.
-    # For example:
-    #     b.remove_classification(:not_spam)
-    def remove_classification(classification)
-      return_value = if @classifications.include?(classification)
-                       classification
-                     else
-                       nil
-                     end
-      @classifications.delete(classification)
-      return_value
-    end
+  #
+  # Adds the classification to the classifier.
+  # Has no effect if the classification already existed.
+  # Returns the classification.
+  # For example:
+  #     b.add_classification(:not_spam)
+  def add_classification(classification)
+    @classifications[classification] ||= {}
-    private
+    classification
+  end
-      def ensure_classification_exists(classification)
-        raise Reclassifier::UnknownClassificationError unless @classifications.include?(classification)
-      end
+  #
+  # Removes the classification from the classifier.
+  # Returns the classifier if the classification existed, else nil.
+  # For example:
+  #     b.remove_classification(:not_spam)
+  def remove_classification(classification)
+    return_value = if @classifications.include?(classification)
+                     classification
+                   else
+                     nil
+                   end
+    @classifications.delete(classification)
+    return_value
   end
+  private
+    def ensure_classification_exists(classification)
+      raise Reclassifier::UnknownClassificationError unless @classifications.include?(classification)
+    end
 end

data/lib/reclassifier/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Reclassifier
-  VERSION = "0.0.3"
+  VERSION = "0.0.4"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: reclassifier
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-04-22 00:00:00.000000000 Z
+date: 2013-04-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler