RubyGems - NaiveText - Versions diffs - 0.6.0 → 1.0.0 - Mend

NaiveText 0.6.0 → 1.0.0

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/Gemfile +3 -4
data/Guardfile +3 -4
data/NaiveText.gemspec +12 -12
data/Rakefile +1 -2
data/bin/console +3 -3
data/lib/NaiveText.rb +12 -15
data/lib/NaiveText/Categories.rb +5 -6
data/lib/NaiveText/CategoriesFactory.rb +10 -29
data/lib/NaiveText/Category.rb +2 -3
data/lib/NaiveText/Example.rb +3 -2
data/lib/NaiveText/ExamplesFactory.rb +8 -8
data/lib/NaiveText/ExamplesGroup.rb +15 -17
data/lib/NaiveText/ProbabilityCalculator.rb +22 -22
data/lib/NaiveText/ProbabilityCollection.rb +16 -16
data/lib/NaiveText/TextClassifier.rb +2 -6
data/lib/NaiveText/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3b04e3a990ab60596a6e4067f3e6e6b7b762e9e7
-  data.tar.gz: 95cefeef5c2030e33c7290eecb848ec85e3a4d86
+  metadata.gz: 50e030f17d9a465122b843bd773747c02eee7488
+  data.tar.gz: a6b6ac823fb3ac1e190a2fad37871e258f76bf5e
 SHA512:
-  metadata.gz: d4b7734d40ca51cb0af57485ca7312007ba2ef0982f471cd3d95c000e488ea1d526bc6a03a84d52cfc1eeb41a3dc0793c986e7d9be49424ead2811042f0b8ce5
-  data.tar.gz: aed39b603081561255c043fbd61d9de06e0e91a14a628e1b324589e8eb0f6d4d3428248b9e18c6f35bf79a21852f7a121256a8fa16530f0774960526eeab3deb
+  metadata.gz: 795b9f38baa41fb7899070394832d1d520f63711eae619e0995550984293bb631bb83059826d92444841a17df7297f566371c21d611a5433fff4ee5b3802e224
+  data.tar.gz: 7cd4d3aa96d4b237e98b062250e3cd61536eb705977c088bc4fb004275d4de6f58d3e9b598c7f4b4877fc29e726f3730d7c1614ba4934670faa786812529b17e

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,15 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
+## [1.0.0]- 2016-1-5
+### Changed
+- Split up the integration specs. Removed some duplication in the specs.
+- Refactored the specs to be more concise.
+- Cleaned the source code to be more readable.
+### Deleted
+- Removed old and deprecated array option for CategoriesFactory
+- Removed old misspelled call for propabilities on TextClassifier
 ## [0.6.0]- 2015-11-30
 ### Added
 - Added optional language_model, that make it possible to compare words based on the word stem. (Like 'testing', 'tests', 'tested' all matched with the stem 'test')

data/Gemfile CHANGED Viewed

@@ -3,7 +3,6 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in NaiveText.gemspec
 gemspec
-spec.add_development_dependency "guard"
-spec.add_development_dependency "guard-rspec"
-spec.add_development_dependency "guard-rubocop"
+spec.add_development_dependency 'guard'
+spec.add_development_dependency 'guard-rspec'
+spec.add_development_dependency 'guard-rubocop'

data/Guardfile CHANGED Viewed

@@ -24,8 +24,8 @@
 #  * zeus: 'zeus rspec' (requires the server to be started separately)
 #  * 'just' rspec: 'rspec'
-guard :rspec, cmd: "bundle exec rspec" do
-  require "guard/rspec/dsl"
+guard :rspec, cmd: 'bundle exec rspec' do
+  require 'guard/rspec/dsl'
   dsl = Guard::RSpec::Dsl.new(self)
   # Feel free to open issues for suggestions and improvements
@@ -39,10 +39,9 @@ guard :rspec, cmd: "bundle exec rspec" do
   # Ruby files
   ruby = dsl.ruby
   dsl.watch_spec_files_for(ruby.lib_files)
 end
 guard :rubocop, keep_failed: false do
-  watch(%r{(.+\.rb)$})
+  watch(/(.+\.rb)$/)
   watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
 end

data/NaiveText.gemspec CHANGED Viewed

@@ -4,27 +4,27 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'NaiveText/version'
 Gem::Specification.new do |spec|
-  spec.name          = "NaiveText"
+  spec.name          = 'NaiveText'
   spec.version       = NaiveText::VERSION
-  spec.authors       = ["RicciFlowing"]
-  spec.email         = ["benjamin@mathe-sellin.de"]
+  spec.authors       = ['RicciFlowing']
+  spec.email         = ['benjamin@mathe-sellin.de']
-  spec.summary       = "A text classifier written in ruby"
-  spec.description   = "NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app."
-  spec.homepage      = "https://github.com/RicciFlowing/NaiveText"
-  spec.licenses    = ['MIT']
+  spec.summary       = 'A text classifier written in ruby'
+  spec.description   = 'NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app.'
+  spec.homepage      = 'https://github.com/RicciFlowing/NaiveText'
+  spec.licenses = ['MIT']
   spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
-  spec.bindir        = "exe"
+  spec.bindir        = 'exe'
   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
-  spec.require_paths = ["lib"]
+  spec.require_paths = ['lib']
   spec.required_ruby_version = '>= 2.0.0'
   if spec.respond_to?(:metadata)
-    spec.metadata['allowed_push_host'] = "https://rubygems.org"
+    spec.metadata['allowed_push_host'] = 'https://rubygems.org'
   end
-  spec.add_development_dependency "bundler", "~> 1.8"
-  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_development_dependency 'bundler', '~> 1.8'
+  spec.add_development_dependency 'rake', '~> 10.0'
 end

data/Rakefile CHANGED Viewed

@@ -1,2 +1 @@
-require "bundler/gem_tasks"
+require 'bundler/gem_tasks'

data/bin/console CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
-require "bundler/setup"
-require "NaiveText"
+require 'bundler/setup'
+require 'NaiveText'
 # You can add fixtures and/or initialization code here to make experimenting
 # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "NaiveText"
 # require "pry"
 # Pry.start
-require "irb"
+require 'irb'
 IRB.start

data/lib/NaiveText.rb CHANGED Viewed

@@ -1,20 +1,17 @@
-require "NaiveText/version"
-require "NaiveText/Example"
-require "NaiveText/ExamplesFactory"
-require "NaiveText/ExamplesGroup"
-require "NaiveText/ProbabilityCollection"
-require "NaiveText/ProbabilityCalculator"
-require "NaiveText/TextClassifier"
-require "NaiveText/Category"
-require "NaiveText/Categories"
-require "NaiveText/CategoriesFactory"
+require 'NaiveText/version'
+require 'NaiveText/Example'
+require 'NaiveText/ExamplesFactory'
+require 'NaiveText/ExamplesGroup'
+require 'NaiveText/ProbabilityCollection'
+require 'NaiveText/ProbabilityCalculator'
+require 'NaiveText/TextClassifier'
+require 'NaiveText/Category'
+require 'NaiveText/Categories'
+require 'NaiveText/CategoriesFactory'
 module NaiveText
   def self.build(config)
-      @categories = CategoriesFactory.build(config)
-      @test_classifier = TextClassifier.new(categories: @categories)
+    @categories = CategoriesFactory.build(config)
+    @test_classifier = TextClassifier.new(categories: @categories)
   end
 end

data/lib/NaiveText/Categories.rb CHANGED Viewed

@@ -17,14 +17,13 @@ class Categories
   end
   def total_word_count
-    @categories.inject(0) { |count, category | count + category.word_count  }
+    @categories.inject(0) { |count, category| count + category.word_count }
   end
   private
-   def calculate_apriori_propability_for(category)
-     sum_of_words = @categories.inject(0) {|sum, category| sum + category.word_count }
-     category.word_count.to_f / sum_of_words
-   end
+  def calculate_apriori_propability_for(category)
+    sum_of_words = @categories.inject(0) { |sum, category| sum + category.word_count }
+    category.word_count.to_f / sum_of_words
+  end
 end

data/lib/NaiveText/CategoriesFactory.rb CHANGED Viewed

@@ -2,36 +2,17 @@ class CategoriesFactory
   def self.build(config)
     categories = []
     default = nil
-    if config.is_a?(Array)
-      puts "The format [{name: name_of_category, path: path_to_trainings_data}] is deprecated and will be removed in version 1.0.0 (due in Jan. 2016). Use the following arguments instead: categories: [name: 'the name', examples:'An example']"
-      config.each do |category_config|
-        begin
-          examples = ExamplesFactory.from_files(category_config[:path])
-          group = ExamplesGroup.new(examples: examples)
-          categories << Category.new(name: category_config[:name], examples: group )
-        rescue
-          puts "You haven't provided trainingsdata for the category" + category_config[:name]
-          puts "This category was not created."
-        end
+    config[:categories].each do |category_config|
+      begin
+        group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model])
+        category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
+        categories << category
+        default = category if category_config[:name] == config[:default]
+      rescue
+        puts "You haven't provided trainingsdata for the category" + category_config[:name]
+        puts 'This category was not created.'
       end
-      Categories.new(categories: categories)
-    else
-      config[:categories].each do |category_config|
-        begin
-          group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model] )
-          category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
-          categories << category
-          if category_config[:name] == config[:default]
-            default = category
-          end
-        rescue
-          puts "You haven't provided trainingsdata for the category" + category_config[:name]
-          puts "This category was not created."
-        end
-      end
-      Categories.new(categories: categories, default: default )
     end
+    Categories.new(categories: categories, default: default)
   end
 end

data/lib/NaiveText/Category.rb CHANGED Viewed

@@ -3,7 +3,6 @@ class Category
   attr_reader :name, :id, :weight
   def initialize(args)
     @name     = args[:name]
     @examples = args[:examples]
@@ -14,7 +13,7 @@ class Category
   end
   def p(word)
-    if(@examples.word_count>0)
+    if @examples.word_count > 0
       @examples.count(word).to_f / @examples.word_count
     else
       0
@@ -35,7 +34,7 @@ class NullCategory
   attr_reader :id
   def initialize
-    @name = "No category"
+    @name = 'No category'
     @id = 0
   end
 end

data/lib/NaiveText/Example.rb CHANGED Viewed

@@ -6,13 +6,14 @@ class Example
   end
   private
-    def load_text(args)
-    end
+  def load_text(_args)
+  end
 end
 class FileExample < Example
   private
     def load_text(args)
       @text = File.read(args[:path])
     end

data/lib/NaiveText/ExamplesFactory.rb CHANGED Viewed

@@ -1,14 +1,14 @@
 class ExamplesFactory
   def self.from_files(dir_path)
-      begin
-        examples = []
-        Dir.foreach(dir_path) do |file_path|
-          next if file_path == '.' or file_path == '..'
-          examples.push FileExample.new(path: dir_path+'/'+file_path)
-        end
-      rescue
-        puts "Failed loading" + dir_path
+    begin
+      examples = []
+      Dir.foreach(dir_path) do |file_path|
+        next if file_path == '.' || file_path == '..'
+        examples.push FileExample.new(path: dir_path + '/' + file_path)
       end
+    rescue
+      puts 'Failed loading' + dir_path
+    end
     examples
   end
 end

data/lib/NaiveText/ExamplesGroup.rb CHANGED Viewed

@@ -1,13 +1,11 @@
 class ExamplesGroup
   def initialize(args)
     @examples       = args[:examples].to_a || []
-    @language_model = args[:language_model] || lambda {|str| str}
+    @language_model = args[:language_model] || ->(str) { str }
     load_text
     split_text_into_words
     format_words
-    if @words.length == 0
-      raise 'Empty_Trainingsdata'
-    end
+    fail 'Empty_Trainingsdata' if @words.length == 0
   end
   def count(word)
@@ -20,20 +18,20 @@ class ExamplesGroup
   private
-    def load_text
-      @text = ''
-      @examples.each do |example|
-        @text += ' ' + example.text
-      end
+  def load_text
+    @text = ''
+    @examples.each do |example|
+      @text += ' ' + example.text
     end
+  end
-    def split_text_into_words
-      @words = @text.split(/\W+/)
-    end
+  def split_text_into_words
+    @words = @text.split(/\W+/)
+  end
-    def format_words
-      @words.map! {|word| word.downcase}
-      @words.map! {|word| @language_model.call(word)}
-      @words
-    end
+  def format_words
+    @words.map!(&:downcase)
+    @words.map! { |word| @language_model.call(word) }
+    @words
+  end
 end

data/lib/NaiveText/ProbabilityCalculator.rb CHANGED Viewed

@@ -9,35 +9,35 @@ class ProbabilityCalculator
     @probabilities.normalize
   end
   private
-    def protect_factor(factor)
-      [factor, minimum].max
-    end
-    def minimum
-      1.to_f/(10*@categories.total_word_count)
-    end
+  def protect_factor(factor)
+    [factor, minimum].max
+  end
-    def calculateProbabilities(text)
-      set_apriori_probabilities
-      list_of_words = text.split(/\W+/)
-      list_of_words.each do |word|
-        @categories.each do |category|
-          @probabilities.multiply(category: category, factor: protect_factor(category.p(word)) )
-        end
-      end
-      remove_minimum(text)
-    end
+  def minimum
+    1.to_f / (10 * @categories.total_word_count)
+  end
-    def set_apriori_probabilities
+  def calculateProbabilities(text)
+    set_apriori_probabilities
+    list_of_words = text.split(/\W+/)
+    list_of_words.each do |word|
       @categories.each do |category|
-        @probabilities.set(category: category, value: @categories.p_apriori(category))
+        @probabilities.multiply(category: category, factor: protect_factor(category.p(word)))
       end
     end
+    remove_minimum(text)
+  end
-    def remove_minimum(text)
-      times = text.split(/\W+/).length
-      @probabilities.greater_then(minimum**times)
+  def set_apriori_probabilities
+    @categories.each do |category|
+      @probabilities.set(category: category, value: @categories.p_apriori(category))
     end
+  end
+  def remove_minimum(text)
+    times = text.split(/\W+/).length
+    @probabilities.greater_then(minimum**times)
+  end
 end

data/lib/NaiveText/ProbabilityCollection.rb CHANGED Viewed

@@ -1,18 +1,17 @@
 class ProbabilityCollection
   def initialize(args)
-    @categories  =  args[:categories] || []
+    @categories = args[:categories] || []
     initialize_ids
     @probabilities = []
     initalize_probabilities(@ids)
   end
   def find(category)
-    return @probabilities[category.id]
+    @probabilities[category.id]
   end
   def set(args)
-    category  = args[:category]
+    category = args[:category]
     value = args[:value]
     @probabilities[category.id] = value
   end
@@ -23,14 +22,14 @@ class ProbabilityCollection
     if category
       @probabilities[category.id] *= factor
     else
-      @probabilities.map! {|el| el*factor}
+      @probabilities.map! { |el| el * factor }
     end
   end
   def normalize
-    if self.sum > 0
-      normalization_factor = 1.to_f / self.sum
-      self.multiply(factor: normalization_factor)
+    if sum > 0
+      normalization_factor = 1.to_f / sum
+      multiply(factor: normalization_factor)
     end
     self
   end
@@ -38,7 +37,7 @@ class ProbabilityCollection
   def category_with_max
     if @probabilities.max > 0
       id = @probabilities.find_index(@probabilities.max)
-      @categories.find {|category| category.id == id}
+      @categories.find { |category| category.id == id }
     else
       @categories.default
     end
@@ -50,11 +49,11 @@ class ProbabilityCollection
   def greater_then(value)
     @probabilities.map! do |p|
-       if p > value
-         p
-       else
-         0
-       end
+      if p > value
+        p
+      else
+        0
+      end
     end
   end
@@ -67,15 +66,16 @@ class ProbabilityCollection
     @categories.each do |category|
       result << category.to_s
       result << ':'
-      result << self.find(category).to_s
+      result << find(category).to_s
       result << "\n"
     end
     result
   end
   private
   def initialize_ids
-    @ids =  @categories.map { |category| category.id }
+    @ids = @categories.map(&:id)
   end
   def initalize_probabilities(ids)

data/lib/NaiveText/TextClassifier.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 class TextClassifier
   attr_reader :categories
-  def initialize( args )
+  def initialize(args)
     @categories = args[:categories]
     @calculator = args[:calculator] || ProbabilityCalculator.new(categories: @categories)
   end
@@ -13,12 +13,8 @@ class TextClassifier
     @calculator.get_probabilities_for(text)
   end
-  def propabilities(text)
-    puts "This notation is deprecated in will be removed in later versions. Please use probabilities (4th character b instead of p)"
-    probabilities(text)
-  end
+  private
-private
   def get_category_for(text)
     probabilities = @calculator.get_probabilities_for(text)
     @categories.each do |category|

data/lib/NaiveText/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module NaiveText
-  VERSION = "0.6.0"
+  VERSION = '1.0.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: NaiveText
 version: !ruby/object:Gem::Version
-  version: 0.6.0
+  version: 1.0.0
 platform: ruby
 authors:
 - RicciFlowing
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-12-01 00:00:00.000000000 Z
+date: 2016-01-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler