NaiveText 0.6.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b04e3a990ab60596a6e4067f3e6e6b7b762e9e7
4
- data.tar.gz: 95cefeef5c2030e33c7290eecb848ec85e3a4d86
3
+ metadata.gz: 50e030f17d9a465122b843bd773747c02eee7488
4
+ data.tar.gz: a6b6ac823fb3ac1e190a2fad37871e258f76bf5e
5
5
  SHA512:
6
- metadata.gz: d4b7734d40ca51cb0af57485ca7312007ba2ef0982f471cd3d95c000e488ea1d526bc6a03a84d52cfc1eeb41a3dc0793c986e7d9be49424ead2811042f0b8ce5
7
- data.tar.gz: aed39b603081561255c043fbd61d9de06e0e91a14a628e1b324589e8eb0f6d4d3428248b9e18c6f35bf79a21852f7a121256a8fa16530f0774960526eeab3deb
6
+ metadata.gz: 795b9f38baa41fb7899070394832d1d520f63711eae619e0995550984293bb631bb83059826d92444841a17df7297f566371c21d611a5433fff4ee5b3802e224
7
+ data.tar.gz: 7cd4d3aa96d4b237e98b062250e3cd61536eb705977c088bc4fb004275d4de6f58d3e9b598c7f4b4877fc29e726f3730d7c1614ba4934670faa786812529b17e
data/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [1.0.0]- 2016-1-5
6
+ ### Changed
7
+ - Split up the integration specs. Removed some duplication in the specs.
8
+ - Refactored the specs to be more concise.
9
+ - Cleaned the source code to be more readable.
10
+ ### Deleted
11
+ - Removed old and deprecated array option for CategoriesFactory
12
+ - Removed old misspelled call for propabilities on TextClassifier
13
+
5
14
  ## [0.6.0]- 2015-11-30
6
15
  ### Added
7
16
  - Added optional language_model, that make it possible to compare words based on the word stem. (Like 'testing', 'tests', 'tested' all matched with the stem 'test')
data/Gemfile CHANGED
@@ -3,7 +3,6 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in NaiveText.gemspec
4
4
  gemspec
5
5
 
6
-
7
- spec.add_development_dependency "guard"
8
- spec.add_development_dependency "guard-rspec"
9
- spec.add_development_dependency "guard-rubocop"
6
+ spec.add_development_dependency 'guard'
7
+ spec.add_development_dependency 'guard-rspec'
8
+ spec.add_development_dependency 'guard-rubocop'
data/Guardfile CHANGED
@@ -24,8 +24,8 @@
24
24
  # * zeus: 'zeus rspec' (requires the server to be started separately)
25
25
  # * 'just' rspec: 'rspec'
26
26
 
27
- guard :rspec, cmd: "bundle exec rspec" do
28
- require "guard/rspec/dsl"
27
+ guard :rspec, cmd: 'bundle exec rspec' do
28
+ require 'guard/rspec/dsl'
29
29
  dsl = Guard::RSpec::Dsl.new(self)
30
30
 
31
31
  # Feel free to open issues for suggestions and improvements
@@ -39,10 +39,9 @@ guard :rspec, cmd: "bundle exec rspec" do
39
39
  # Ruby files
40
40
  ruby = dsl.ruby
41
41
  dsl.watch_spec_files_for(ruby.lib_files)
42
-
43
42
  end
44
43
 
45
44
  guard :rubocop, keep_failed: false do
46
- watch(%r{(.+\.rb)$})
45
+ watch(/(.+\.rb)$/)
47
46
  watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
48
47
  end
data/NaiveText.gemspec CHANGED
@@ -4,27 +4,27 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'NaiveText/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "NaiveText"
7
+ spec.name = 'NaiveText'
8
8
  spec.version = NaiveText::VERSION
9
- spec.authors = ["RicciFlowing"]
10
- spec.email = ["benjamin@mathe-sellin.de"]
9
+ spec.authors = ['RicciFlowing']
10
+ spec.email = ['benjamin@mathe-sellin.de']
11
11
 
12
- spec.summary = "A text classifier written in ruby"
13
- spec.description = "NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app."
14
- spec.homepage = "https://github.com/RicciFlowing/NaiveText"
15
- spec.licenses = ['MIT']
12
+ spec.summary = 'A text classifier written in ruby'
13
+ spec.description = 'NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app.'
14
+ spec.homepage = 'https://github.com/RicciFlowing/NaiveText'
15
+ spec.licenses = ['MIT']
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
- spec.bindir = "exe"
18
+ spec.bindir = 'exe'
19
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
21
21
 
22
22
  spec.required_ruby_version = '>= 2.0.0'
23
23
 
24
24
  if spec.respond_to?(:metadata)
25
- spec.metadata['allowed_push_host'] = "https://rubygems.org"
25
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
26
26
  end
27
27
 
28
- spec.add_development_dependency "bundler", "~> 1.8"
29
- spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency 'bundler', '~> 1.8'
29
+ spec.add_development_dependency 'rake', '~> 10.0'
30
30
  end
data/Rakefile CHANGED
@@ -1,2 +1 @@
1
- require "bundler/gem_tasks"
2
-
1
+ require 'bundler/gem_tasks'
data/bin/console CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "NaiveText"
3
+ require 'bundler/setup'
4
+ require 'NaiveText'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "NaiveText"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
13
+ require 'irb'
14
14
  IRB.start
data/lib/NaiveText.rb CHANGED
@@ -1,20 +1,17 @@
1
- require "NaiveText/version"
2
- require "NaiveText/Example"
3
- require "NaiveText/ExamplesFactory"
4
- require "NaiveText/ExamplesGroup"
5
- require "NaiveText/ProbabilityCollection"
6
- require "NaiveText/ProbabilityCalculator"
7
- require "NaiveText/TextClassifier"
8
- require "NaiveText/Category"
9
- require "NaiveText/Categories"
10
- require "NaiveText/CategoriesFactory"
11
-
12
-
1
+ require 'NaiveText/version'
2
+ require 'NaiveText/Example'
3
+ require 'NaiveText/ExamplesFactory'
4
+ require 'NaiveText/ExamplesGroup'
5
+ require 'NaiveText/ProbabilityCollection'
6
+ require 'NaiveText/ProbabilityCalculator'
7
+ require 'NaiveText/TextClassifier'
8
+ require 'NaiveText/Category'
9
+ require 'NaiveText/Categories'
10
+ require 'NaiveText/CategoriesFactory'
13
11
 
14
12
  module NaiveText
15
-
16
13
  def self.build(config)
17
- @categories = CategoriesFactory.build(config)
18
- @test_classifier = TextClassifier.new(categories: @categories)
14
+ @categories = CategoriesFactory.build(config)
15
+ @test_classifier = TextClassifier.new(categories: @categories)
19
16
  end
20
17
  end
@@ -17,14 +17,13 @@ class Categories
17
17
  end
18
18
 
19
19
  def total_word_count
20
- @categories.inject(0) { |count, category | count + category.word_count }
20
+ @categories.inject(0) { |count, category| count + category.word_count }
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def calculate_apriori_propability_for(category)
26
- sum_of_words = @categories.inject(0) {|sum, category| sum + category.word_count }
27
- category.word_count.to_f / sum_of_words
28
- end
29
-
25
+ def calculate_apriori_propability_for(category)
26
+ sum_of_words = @categories.inject(0) { |sum, category| sum + category.word_count }
27
+ category.word_count.to_f / sum_of_words
28
+ end
30
29
  end
@@ -2,36 +2,17 @@ class CategoriesFactory
2
2
  def self.build(config)
3
3
  categories = []
4
4
  default = nil
5
- if config.is_a?(Array)
6
- puts "The format [{name: name_of_category, path: path_to_trainings_data}] is deprecated and will be removed in version 1.0.0 (due in Jan. 2016). Use the following arguments instead: categories: [name: 'the name', examples:'An example']"
7
- config.each do |category_config|
8
- begin
9
- examples = ExamplesFactory.from_files(category_config[:path])
10
- group = ExamplesGroup.new(examples: examples)
11
- categories << Category.new(name: category_config[:name], examples: group )
12
- rescue
13
- puts "You haven't provided trainingsdata for the category" + category_config[:name]
14
- puts "This category was not created."
15
- end
5
+ config[:categories].each do |category_config|
6
+ begin
7
+ group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model])
8
+ category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
9
+ categories << category
10
+ default = category if category_config[:name] == config[:default]
11
+ rescue
12
+ puts "You haven't provided trainingsdata for the category" + category_config[:name]
13
+ puts 'This category was not created.'
16
14
  end
17
- Categories.new(categories: categories)
18
-
19
-
20
- else
21
- config[:categories].each do |category_config|
22
- begin
23
- group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model] )
24
- category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
25
- categories << category
26
- if category_config[:name] == config[:default]
27
- default = category
28
- end
29
- rescue
30
- puts "You haven't provided trainingsdata for the category" + category_config[:name]
31
- puts "This category was not created."
32
- end
33
- end
34
- Categories.new(categories: categories, default: default )
35
15
  end
16
+ Categories.new(categories: categories, default: default)
36
17
  end
37
18
  end
@@ -3,7 +3,6 @@ class Category
3
3
 
4
4
  attr_reader :name, :id, :weight
5
5
 
6
-
7
6
  def initialize(args)
8
7
  @name = args[:name]
9
8
  @examples = args[:examples]
@@ -14,7 +13,7 @@ class Category
14
13
  end
15
14
 
16
15
  def p(word)
17
- if(@examples.word_count>0)
16
+ if @examples.word_count > 0
18
17
  @examples.count(word).to_f / @examples.word_count
19
18
  else
20
19
  0
@@ -35,7 +34,7 @@ class NullCategory
35
34
  attr_reader :id
36
35
 
37
36
  def initialize
38
- @name = "No category"
37
+ @name = 'No category'
39
38
  @id = 0
40
39
  end
41
40
  end
@@ -6,13 +6,14 @@ class Example
6
6
  end
7
7
 
8
8
  private
9
- def load_text(args)
10
9
 
11
- end
10
+ def load_text(_args)
11
+ end
12
12
  end
13
13
 
14
14
  class FileExample < Example
15
15
  private
16
+
16
17
  def load_text(args)
17
18
  @text = File.read(args[:path])
18
19
  end
@@ -1,14 +1,14 @@
1
1
  class ExamplesFactory
2
2
  def self.from_files(dir_path)
3
- begin
4
- examples = []
5
- Dir.foreach(dir_path) do |file_path|
6
- next if file_path == '.' or file_path == '..'
7
- examples.push FileExample.new(path: dir_path+'/'+file_path)
8
- end
9
- rescue
10
- puts "Failed loading" + dir_path
3
+ begin
4
+ examples = []
5
+ Dir.foreach(dir_path) do |file_path|
6
+ next if file_path == '.' || file_path == '..'
7
+ examples.push FileExample.new(path: dir_path + '/' + file_path)
11
8
  end
9
+ rescue
10
+ puts 'Failed loading' + dir_path
11
+ end
12
12
  examples
13
13
  end
14
14
  end
@@ -1,13 +1,11 @@
1
1
  class ExamplesGroup
2
2
  def initialize(args)
3
3
  @examples = args[:examples].to_a || []
4
- @language_model = args[:language_model] || lambda {|str| str}
4
+ @language_model = args[:language_model] || ->(str) { str }
5
5
  load_text
6
6
  split_text_into_words
7
7
  format_words
8
- if @words.length == 0
9
- raise 'Empty_Trainingsdata'
10
- end
8
+ fail 'Empty_Trainingsdata' if @words.length == 0
11
9
  end
12
10
 
13
11
  def count(word)
@@ -20,20 +18,20 @@ class ExamplesGroup
20
18
 
21
19
  private
22
20
 
23
- def load_text
24
- @text = ''
25
- @examples.each do |example|
26
- @text += ' ' + example.text
27
- end
21
+ def load_text
22
+ @text = ''
23
+ @examples.each do |example|
24
+ @text += ' ' + example.text
28
25
  end
26
+ end
29
27
 
30
- def split_text_into_words
31
- @words = @text.split(/\W+/)
32
- end
28
+ def split_text_into_words
29
+ @words = @text.split(/\W+/)
30
+ end
33
31
 
34
- def format_words
35
- @words.map! {|word| word.downcase}
36
- @words.map! {|word| @language_model.call(word)}
37
- @words
38
- end
32
+ def format_words
33
+ @words.map!(&:downcase)
34
+ @words.map! { |word| @language_model.call(word) }
35
+ @words
36
+ end
39
37
  end
@@ -9,35 +9,35 @@ class ProbabilityCalculator
9
9
  @probabilities.normalize
10
10
  end
11
11
 
12
-
13
12
  private
14
- def protect_factor(factor)
15
- [factor, minimum].max
16
- end
17
13
 
18
- def minimum
19
- 1.to_f/(10*@categories.total_word_count)
20
- end
14
+ def protect_factor(factor)
15
+ [factor, minimum].max
16
+ end
21
17
 
22
- def calculateProbabilities(text)
23
- set_apriori_probabilities
24
- list_of_words = text.split(/\W+/)
25
- list_of_words.each do |word|
26
- @categories.each do |category|
27
- @probabilities.multiply(category: category, factor: protect_factor(category.p(word)) )
28
- end
29
- end
30
- remove_minimum(text)
31
- end
18
+ def minimum
19
+ 1.to_f / (10 * @categories.total_word_count)
20
+ end
32
21
 
33
- def set_apriori_probabilities
22
+ def calculateProbabilities(text)
23
+ set_apriori_probabilities
24
+ list_of_words = text.split(/\W+/)
25
+ list_of_words.each do |word|
34
26
  @categories.each do |category|
35
- @probabilities.set(category: category, value: @categories.p_apriori(category))
27
+ @probabilities.multiply(category: category, factor: protect_factor(category.p(word)))
36
28
  end
37
29
  end
30
+ remove_minimum(text)
31
+ end
38
32
 
39
- def remove_minimum(text)
40
- times = text.split(/\W+/).length
41
- @probabilities.greater_then(minimum**times)
33
+ def set_apriori_probabilities
34
+ @categories.each do |category|
35
+ @probabilities.set(category: category, value: @categories.p_apriori(category))
42
36
  end
37
+ end
38
+
39
+ def remove_minimum(text)
40
+ times = text.split(/\W+/).length
41
+ @probabilities.greater_then(minimum**times)
42
+ end
43
43
  end
@@ -1,18 +1,17 @@
1
1
  class ProbabilityCollection
2
2
  def initialize(args)
3
- @categories = args[:categories] || []
3
+ @categories = args[:categories] || []
4
4
  initialize_ids
5
5
  @probabilities = []
6
6
  initalize_probabilities(@ids)
7
7
  end
8
8
 
9
9
  def find(category)
10
- return @probabilities[category.id]
10
+ @probabilities[category.id]
11
11
  end
12
12
 
13
-
14
13
  def set(args)
15
- category = args[:category]
14
+ category = args[:category]
16
15
  value = args[:value]
17
16
  @probabilities[category.id] = value
18
17
  end
@@ -23,14 +22,14 @@ class ProbabilityCollection
23
22
  if category
24
23
  @probabilities[category.id] *= factor
25
24
  else
26
- @probabilities.map! {|el| el*factor}
25
+ @probabilities.map! { |el| el * factor }
27
26
  end
28
27
  end
29
28
 
30
29
  def normalize
31
- if self.sum > 0
32
- normalization_factor = 1.to_f / self.sum
33
- self.multiply(factor: normalization_factor)
30
+ if sum > 0
31
+ normalization_factor = 1.to_f / sum
32
+ multiply(factor: normalization_factor)
34
33
  end
35
34
  self
36
35
  end
@@ -38,7 +37,7 @@ class ProbabilityCollection
38
37
  def category_with_max
39
38
  if @probabilities.max > 0
40
39
  id = @probabilities.find_index(@probabilities.max)
41
- @categories.find {|category| category.id == id}
40
+ @categories.find { |category| category.id == id }
42
41
  else
43
42
  @categories.default
44
43
  end
@@ -50,11 +49,11 @@ class ProbabilityCollection
50
49
 
51
50
  def greater_then(value)
52
51
  @probabilities.map! do |p|
53
- if p > value
54
- p
55
- else
56
- 0
57
- end
52
+ if p > value
53
+ p
54
+ else
55
+ 0
56
+ end
58
57
  end
59
58
  end
60
59
 
@@ -67,15 +66,16 @@ class ProbabilityCollection
67
66
  @categories.each do |category|
68
67
  result << category.to_s
69
68
  result << ':'
70
- result << self.find(category).to_s
69
+ result << find(category).to_s
71
70
  result << "\n"
72
71
  end
73
72
  result
74
73
  end
75
74
 
76
75
  private
76
+
77
77
  def initialize_ids
78
- @ids = @categories.map { |category| category.id }
78
+ @ids = @categories.map(&:id)
79
79
  end
80
80
 
81
81
  def initalize_probabilities(ids)
@@ -1,6 +1,6 @@
1
1
  class TextClassifier
2
2
  attr_reader :categories
3
- def initialize( args )
3
+ def initialize(args)
4
4
  @categories = args[:categories]
5
5
  @calculator = args[:calculator] || ProbabilityCalculator.new(categories: @categories)
6
6
  end
@@ -13,12 +13,8 @@ class TextClassifier
13
13
  @calculator.get_probabilities_for(text)
14
14
  end
15
15
 
16
- def propabilities(text)
17
- puts "This notation is deprecated in will be removed in later versions. Please use probabilities (4th character b instead of p)"
18
- probabilities(text)
19
- end
16
+ private
20
17
 
21
- private
22
18
  def get_category_for(text)
23
19
  probabilities = @calculator.get_probabilities_for(text)
24
20
  @categories.each do |category|
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.6.0"
2
+ VERSION = '1.0.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-12-01 00:00:00.000000000 Z
11
+ date: 2016-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler