RubyGems - NaiveText - Versions diffs - 0.6.0 → 1.0.0 - Mend

NaiveText 0.6.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/Gemfile +3 -4
data/Guardfile +3 -4
data/NaiveText.gemspec +12 -12
data/Rakefile +1 -2
data/bin/console +3 -3
data/lib/NaiveText.rb +12 -15
data/lib/NaiveText/Categories.rb +5 -6
data/lib/NaiveText/CategoriesFactory.rb +10 -29
data/lib/NaiveText/Category.rb +2 -3
data/lib/NaiveText/Example.rb +3 -2
data/lib/NaiveText/ExamplesFactory.rb +8 -8
data/lib/NaiveText/ExamplesGroup.rb +15 -17
data/lib/NaiveText/ProbabilityCalculator.rb +22 -22
data/lib/NaiveText/ProbabilityCollection.rb +16 -16
data/lib/NaiveText/TextClassifier.rb +2 -6
data/lib/NaiveText/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3b04e3a990ab60596a6e4067f3e6e6b7b762e9e7
-  data.tar.gz: 95cefeef5c2030e33c7290eecb848ec85e3a4d86
+  metadata.gz: 50e030f17d9a465122b843bd773747c02eee7488
+  data.tar.gz: a6b6ac823fb3ac1e190a2fad37871e258f76bf5e
 SHA512:
-  metadata.gz: d4b7734d40ca51cb0af57485ca7312007ba2ef0982f471cd3d95c000e488ea1d526bc6a03a84d52cfc1eeb41a3dc0793c986e7d9be49424ead2811042f0b8ce5
-  data.tar.gz: aed39b603081561255c043fbd61d9de06e0e91a14a628e1b324589e8eb0f6d4d3428248b9e18c6f35bf79a21852f7a121256a8fa16530f0774960526eeab3deb
+  metadata.gz: 795b9f38baa41fb7899070394832d1d520f63711eae619e0995550984293bb631bb83059826d92444841a17df7297f566371c21d611a5433fff4ee5b3802e224
+  data.tar.gz: 7cd4d3aa96d4b237e98b062250e3cd61536eb705977c088bc4fb004275d4de6f58d3e9b598c7f4b4877fc29e726f3730d7c1614ba4934670faa786812529b17e

data/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,15 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
+## [1.0.0]- 2016-1-5
+### Changed
+- Split up the integration specs. Removed some duplication in the specs.
+- Refactored the specs to be more concise.
+- Cleaned the source code to be more readable.
+### Deleted
+- Removed old and deprecated array option for CategoriesFactory
+- Removed old misspelled call for propabilities on TextClassifier
 ## [0.6.0]- 2015-11-30
 ### Added
 - Added optional language_model, that make it possible to compare words based on the word stem. (Like 'testing', 'tests', 'tested' all matched with the stem 'test')

data/Gemfile CHANGED Viewed

@@ -3,7 +3,6 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in NaiveText.gemspec
 gemspec
-spec.add_development_dependency "guard"
-spec.add_development_dependency "guard-rspec"
-spec.add_development_dependency "guard-rubocop"
+spec.add_development_dependency 'guard'
+spec.add_development_dependency 'guard-rspec'
+spec.add_development_dependency 'guard-rubocop'

data/Guardfile CHANGED Viewed

@@ -24,8 +24,8 @@
 #  * zeus: 'zeus rspec' (requires the server to be started separately)
 #  * 'just' rspec: 'rspec'
-guard :rspec, cmd: "bundle exec rspec" do
-  require "guard/rspec/dsl"
+guard :rspec, cmd: 'bundle exec rspec' do
+  require 'guard/rspec/dsl'
   dsl = Guard::RSpec::Dsl.new(self)
   # Feel free to open issues for suggestions and improvements
@@ -39,10 +39,9 @@ guard :rspec, cmd: "bundle exec rspec" do
   # Ruby files
   ruby = dsl.ruby
   dsl.watch_spec_files_for(ruby.lib_files)
 end
 guard :rubocop, keep_failed: false do
-  watch(%r{(.+\.rb)$})
+  watch(/(.+\.rb)$/)
   watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
 end

data/NaiveText.gemspec CHANGED Viewed

@@ -4,27 +4,27 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'NaiveText/version'
 Gem::Specification.new do |spec|
-  spec.name          = "NaiveText"
+  spec.name          = 'NaiveText'
   spec.version       = NaiveText::VERSION
-  spec.authors       = ["RicciFlowing"]
-  spec.email         = ["benjamin@mathe-sellin.de"]
+  spec.authors       = ['RicciFlowing']
+  spec.email         = ['benjamin@mathe-sellin.de']
-  spec.summary       = "A text classifier written in ruby"
-  spec.description   = "NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app."
-  spec.homepage      = "https://github.com/RicciFlowing/NaiveText"
-  spec.licenses    = ['MIT']
+  spec.summary       = 'A text classifier written in ruby'
+  spec.description   = 'NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app.'
+  spec.homepage      = 'https://github.com/RicciFlowing/NaiveText'
+  spec.licenses = ['MIT']
   spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
-  spec.bindir        = "exe"
+  spec.bindir        = 'exe'
   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
-  spec.require_paths = ["lib"]
+  spec.require_paths = ['lib']
   spec.required_ruby_version = '>= 2.0.0'
   if spec.respond_to?(:metadata)
-    spec.metadata['allowed_push_host'] = "https://rubygems.org"
+    spec.metadata['allowed_push_host'] = 'https://rubygems.org'
   end
-  spec.add_development_dependency "bundler", "~> 1.8"
-  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_development_dependency 'bundler', '~> 1.8'
+  spec.add_development_dependency 'rake', '~> 10.0'
 end

data/Rakefile CHANGED Viewed

@@ -1,2 +1 @@
-require "bundler/gem_tasks"
+require 'bundler/gem_tasks'

data/bin/console CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
-require "bundler/setup"
-require "NaiveText"
+require 'bundler/setup'
+require 'NaiveText'
 # You can add fixtures and/or initialization code here to make experimenting
 # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "NaiveText"
 # require "pry"
 # Pry.start
-require "irb"
+require 'irb'
 IRB.start

data/lib/NaiveText.rb CHANGED Viewed

@@ -1,20 +1,17 @@
-require "NaiveText/version"
-require "NaiveText/Example"
-require "NaiveText/ExamplesFactory"
-require "NaiveText/ExamplesGroup"
-require "NaiveText/ProbabilityCollection"
-require "NaiveText/ProbabilityCalculator"
-require "NaiveText/TextClassifier"
-require "NaiveText/Category"
-require "NaiveText/Categories"
-require "NaiveText/CategoriesFactory"
+require 'NaiveText/version'
+require 'NaiveText/Example'
+require 'NaiveText/ExamplesFactory'
+require 'NaiveText/ExamplesGroup'
+require 'NaiveText/ProbabilityCollection'
+require 'NaiveText/ProbabilityCalculator'
+require 'NaiveText/TextClassifier'
+require 'NaiveText/Category'
+require 'NaiveText/Categories'
+require 'NaiveText/CategoriesFactory'
 module NaiveText
   def self.build(config)
-      @categories = CategoriesFactory.build(config)
-      @test_classifier = TextClassifier.new(categories: @categories)
+    @categories = CategoriesFactory.build(config)
+    @test_classifier = TextClassifier.new(categories: @categories)
   end
 end

data/lib/NaiveText/Categories.rb CHANGED Viewed

@@ -17,14 +17,13 @@ class Categories
   end
   def total_word_count
-    @categories.inject(0) { |count, category | count + category.word_count  }
+    @categories.inject(0) { |count, category| count + category.word_count }
   end
   private
-   def calculate_apriori_propability_for(category)
-     sum_of_words = @categories.inject(0) {|sum, category| sum + category.word_count }
-     category.word_count.to_f / sum_of_words
-   end
+  def calculate_apriori_propability_for(category)
+    sum_of_words = @categories.inject(0) { |sum, category| sum + category.word_count }
+    category.word_count.to_f / sum_of_words
+  end
 end

data/lib/NaiveText/CategoriesFactory.rb CHANGED Viewed

@@ -2,36 +2,17 @@ class CategoriesFactory
   def self.build(config)
     categories = []
     default = nil
-    if config.is_a?(Array)
-      puts "The format [{name: name_of_category, path: path_to_trainings_data}] is deprecated and will be removed in version 1.0.0 (due in Jan. 2016). Use the following arguments instead: categories: [name: 'the name', examples:'An example']"
-      config.each do |category_config|
-        begin
-          examples = ExamplesFactory.from_files(category_config[:path])
-          group = ExamplesGroup.new(examples: examples)
-          categories << Category.new(name: category_config[:name], examples: group )
-        rescue
-          puts "You haven't provided trainingsdata for the category" + category_config[:name]
-          puts "This category was not created."
-        end
+    config[:categories].each do |category_config|
+      begin
+        group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model])
+        category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
+        categories << category
+        default = category if category_config[:name] == config[:default]
+      rescue
+        puts "You haven't provided trainingsdata for the category" + category_config[:name]
+        puts 'This category was not created.'
       end
-      Categories.new(categories: categories)
-    else
-      config[:categories].each do |category_config|
-        begin
-          group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model] )
-          category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
-          categories << category
-          if category_config[:name] == config[:default]
-            default = category
-          end
-        rescue
-          puts "You haven't provided trainingsdata for the category" + category_config[:name]
-          puts "This category was not created."
-        end
-      end
-      Categories.new(categories: categories, default: default )
     end
+    Categories.new(categories: categories, default: default)
   end
 end

data/lib/NaiveText/Category.rb CHANGED Viewed

@@ -3,7 +3,6 @@ class Category
   attr_reader :name, :id, :weight
   def initialize(args)
     @name     = args[:name]
     @examples = args[:examples]
@@ -14,7 +13,7 @@ class Category
   end
   def p(word)
-    if(@examples.word_count>0)
+    if @examples.word_count > 0
       @examples.count(word).to_f / @examples.word_count
     else
       0
@@ -35,7 +34,7 @@ class NullCategory
   attr_reader :id
   def initialize
-    @name = "No category"
+    @name = 'No category'
     @id = 0
   end
 end

data/lib/NaiveText/Example.rb CHANGED Viewed

@@ -6,13 +6,14 @@ class Example
   end
   private
-    def load_text(args)
-    end
+  def load_text(_args)
+  end
 end
 class FileExample < Example
   private
     def load_text(args)
       @text = File.read(args[:path])
     end

data/lib/NaiveText/ExamplesFactory.rb CHANGED Viewed

@@ -1,14 +1,14 @@
 class ExamplesFactory
   def self.from_files(dir_path)
-      begin
-        examples = []
-        Dir.foreach(dir_path) do |file_path|
-          next if file_path == '.' or file_path == '..'
-          examples.push FileExample.new(path: dir_path+'/'+file_path)
-        end
-      rescue
-        puts "Failed loading" + dir_path
+    begin
+      examples = []
+      Dir.foreach(dir_path) do |file_path|
+        next if file_path == '.' || file_path == '..'
+        examples.push FileExample.new(path: dir_path + '/' + file_path)
       end
+    rescue
+      puts 'Failed loading' + dir_path
+    end
     examples
   end
 end

data/lib/NaiveText/ExamplesGroup.rb CHANGED Viewed

@@ -1,13 +1,11 @@
 class ExamplesGroup
   def initialize(args)
     @examples       = args[:examples].to_a || []
-    @language_model = args[:language_model] || lambda {|str| str}
+    @language_model = args[:language_model] || ->(str) { str }
     load_text
     split_text_into_words
     format_words
-    if @words.length == 0
-      raise 'Empty_Trainingsdata'
-    end
+    fail 'Empty_Trainingsdata' if @words.length == 0
   end
   def count(word)
@@ -20,20 +18,20 @@ class ExamplesGroup
   private
-    def load_text
-      @text = ''
-      @examples.each do |example|
-        @text += ' ' + example.text
-      end
+  def load_text
+    @text = ''
+    @examples.each do |example|
+      @text += ' ' + example.text
     end
+  end
-    def split_text_into_words
-      @words = @text.split(/\W+/)
-    end
+  def split_text_into_words
+    @words = @text.split(/\W+/)
+  end
-    def format_words
-      @words.map! {|word| word.downcase}
-      @words.map! {|word| @language_model.call(word)}
-      @words
-    end
+  def format_words
+    @words.map!(&:downcase)
+    @words.map! { |word| @language_model.call(word) }
+    @words
+  end
 end

data/lib/NaiveText/ProbabilityCalculator.rb CHANGED Viewed

@@ -9,35 +9,35 @@ class ProbabilityCalculator
     @probabilities.normalize
   end
   private
-    def protect_factor(factor)
-      [factor, minimum].max
-    end
-    def minimum
-      1.to_f/(10*@categories.total_word_count)
-    end
+  def protect_factor(factor)
+    [factor, minimum].max
+  end
-    def calculateProbabilities(text)
-      set_apriori_probabilities
-      list_of_words = text.split(/\W+/)
-      list_of_words.each do |word|
-        @categories.each do |category|
-          @probabilities.multiply(category: category, factor: protect_factor(category.p(word)) )
-        end
-      end
-      remove_minimum(text)
-    end
+  def minimum
+    1.to_f / (10 * @categories.total_word_count)
+  end
-    def set_apriori_probabilities
+  def calculateProbabilities(text)
+    set_apriori_probabilities
+    list_of_words = text.split(/\W+/)
+    list_of_words.each do |word|
       @categories.each do |category|
-        @probabilities.set(category: category, value: @categories.p_apriori(category))
+        @probabilities.multiply(category: category, factor: protect_factor(category.p(word)))
       end
     end
+    remove_minimum(text)
+  end
-    def remove_minimum(text)
-      times = text.split(/\W+/).length
-      @probabilities.greater_then(minimum**times)
+  def set_apriori_probabilities
+    @categories.each do |category|
+      @probabilities.set(category: category, value: @categories.p_apriori(category))
     end
+  end
+  def remove_minimum(text)
+    times = text.split(/\W+/).length
+    @probabilities.greater_then(minimum**times)
+  end
 end

data/lib/NaiveText/ProbabilityCollection.rb CHANGED Viewed

@@ -1,18 +1,17 @@
 class ProbabilityCollection
   def initialize(args)
-    @categories  =  args[:categories] || []
+    @categories = args[:categories] || []
     initialize_ids
     @probabilities = []
     initalize_probabilities(@ids)
   end
   def find(category)
-    return @probabilities[category.id]
+    @probabilities[category.id]
   end
   def set(args)
-    category  = args[:category]
+    category = args[:category]
     value = args[:value]
     @probabilities[category.id] = value
   end
@@ -23,14 +22,14 @@ class ProbabilityCollection
     if category
       @probabilities[category.id] *= factor
     else
-      @probabilities.map! {|el| el*factor}
+      @probabilities.map! { |el| el * factor }
     end
   end
   def normalize
-    if self.sum > 0
-      normalization_factor = 1.to_f / self.sum
-      self.multiply(factor: normalization_factor)
+    if sum > 0
+      normalization_factor = 1.to_f / sum
+      multiply(factor: normalization_factor)
     end
     self
   end
@@ -38,7 +37,7 @@ class ProbabilityCollection
   def category_with_max
     if @probabilities.max > 0
       id = @probabilities.find_index(@probabilities.max)
-      @categories.find {|category| category.id == id}
+      @categories.find { |category| category.id == id }
     else
       @categories.default
     end
@@ -50,11 +49,11 @@ class ProbabilityCollection
   def greater_then(value)
     @probabilities.map! do |p|
-       if p > value
-         p
-       else
-         0
-       end
+      if p > value
+        p
+      else
+        0
+      end
     end
   end
@@ -67,15 +66,16 @@ class ProbabilityCollection
     @categories.each do |category|
       result << category.to_s
       result << ':'
-      result << self.find(category).to_s
+      result << find(category).to_s
       result << "\n"
     end
     result
   end
   private
   def initialize_ids
-    @ids =  @categories.map { |category| category.id }
+    @ids = @categories.map(&:id)
   end
   def initalize_probabilities(ids)

data/lib/NaiveText/TextClassifier.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 class TextClassifier
   attr_reader :categories
-  def initialize( args )
+  def initialize(args)
     @categories = args[:categories]
     @calculator = args[:calculator] || ProbabilityCalculator.new(categories: @categories)
   end
@@ -13,12 +13,8 @@ class TextClassifier
     @calculator.get_probabilities_for(text)
   end
-  def propabilities(text)
-    puts "This notation is deprecated in will be removed in later versions. Please use probabilities (4th character b instead of p)"
-    probabilities(text)
-  end
+  private
-private
   def get_category_for(text)
     probabilities = @calculator.get_probabilities_for(text)
     @categories.each do |category|

data/lib/NaiveText/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module NaiveText
-  VERSION = "0.6.0"
+  VERSION = '1.0.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: NaiveText
 version: !ruby/object:Gem::Version
-  version: 0.6.0
+  version: 1.0.0
 platform: ruby
 authors:
 - RicciFlowing
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-12-01 00:00:00.000000000 Z
+date: 2016-01-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler