NaiveText 0.6.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b04e3a990ab60596a6e4067f3e6e6b7b762e9e7
4
- data.tar.gz: 95cefeef5c2030e33c7290eecb848ec85e3a4d86
3
+ metadata.gz: 50e030f17d9a465122b843bd773747c02eee7488
4
+ data.tar.gz: a6b6ac823fb3ac1e190a2fad37871e258f76bf5e
5
5
  SHA512:
6
- metadata.gz: d4b7734d40ca51cb0af57485ca7312007ba2ef0982f471cd3d95c000e488ea1d526bc6a03a84d52cfc1eeb41a3dc0793c986e7d9be49424ead2811042f0b8ce5
7
- data.tar.gz: aed39b603081561255c043fbd61d9de06e0e91a14a628e1b324589e8eb0f6d4d3428248b9e18c6f35bf79a21852f7a121256a8fa16530f0774960526eeab3deb
6
+ metadata.gz: 795b9f38baa41fb7899070394832d1d520f63711eae619e0995550984293bb631bb83059826d92444841a17df7297f566371c21d611a5433fff4ee5b3802e224
7
+ data.tar.gz: 7cd4d3aa96d4b237e98b062250e3cd61536eb705977c088bc4fb004275d4de6f58d3e9b598c7f4b4877fc29e726f3730d7c1614ba4934670faa786812529b17e
data/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
+ ## [1.0.0]- 2016-1-5
6
+ ### Changed
7
+ - Split up the integration specs. Removed some duplication in the specs.
8
+ - Refactored the specs to be more concise.
9
+ - Cleaned the source code to be more readable.
10
+ ### Deleted
11
+ - Removed old and deprecated array option for CategoriesFactory
12
+ - Removed old misspelled call for propabilities on TextClassifier
13
+
5
14
  ## [0.6.0]- 2015-11-30
6
15
  ### Added
7
16
  - Added optional language_model, that make it possible to compare words based on the word stem. (Like 'testing', 'tests', 'tested' all matched with the stem 'test')
data/Gemfile CHANGED
@@ -3,7 +3,6 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in NaiveText.gemspec
4
4
  gemspec
5
5
 
6
-
7
- spec.add_development_dependency "guard"
8
- spec.add_development_dependency "guard-rspec"
9
- spec.add_development_dependency "guard-rubocop"
6
+ spec.add_development_dependency 'guard'
7
+ spec.add_development_dependency 'guard-rspec'
8
+ spec.add_development_dependency 'guard-rubocop'
data/Guardfile CHANGED
@@ -24,8 +24,8 @@
24
24
  # * zeus: 'zeus rspec' (requires the server to be started separately)
25
25
  # * 'just' rspec: 'rspec'
26
26
 
27
- guard :rspec, cmd: "bundle exec rspec" do
28
- require "guard/rspec/dsl"
27
+ guard :rspec, cmd: 'bundle exec rspec' do
28
+ require 'guard/rspec/dsl'
29
29
  dsl = Guard::RSpec::Dsl.new(self)
30
30
 
31
31
  # Feel free to open issues for suggestions and improvements
@@ -39,10 +39,9 @@ guard :rspec, cmd: "bundle exec rspec" do
39
39
  # Ruby files
40
40
  ruby = dsl.ruby
41
41
  dsl.watch_spec_files_for(ruby.lib_files)
42
-
43
42
  end
44
43
 
45
44
  guard :rubocop, keep_failed: false do
46
- watch(%r{(.+\.rb)$})
45
+ watch(/(.+\.rb)$/)
47
46
  watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
48
47
  end
data/NaiveText.gemspec CHANGED
@@ -4,27 +4,27 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'NaiveText/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "NaiveText"
7
+ spec.name = 'NaiveText'
8
8
  spec.version = NaiveText::VERSION
9
- spec.authors = ["RicciFlowing"]
10
- spec.email = ["benjamin@mathe-sellin.de"]
9
+ spec.authors = ['RicciFlowing']
10
+ spec.email = ['benjamin@mathe-sellin.de']
11
11
 
12
- spec.summary = "A text classifier written in ruby"
13
- spec.description = "NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app."
14
- spec.homepage = "https://github.com/RicciFlowing/NaiveText"
15
- spec.licenses = ['MIT']
12
+ spec.summary = 'A text classifier written in ruby'
13
+ spec.description = 'NaiveText is a text classifier gem written in ruby and made to be easily integratable in your Rails app.'
14
+ spec.homepage = 'https://github.com/RicciFlowing/NaiveText'
15
+ spec.licenses = ['MIT']
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
- spec.bindir = "exe"
18
+ spec.bindir = 'exe'
19
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
21
21
 
22
22
  spec.required_ruby_version = '>= 2.0.0'
23
23
 
24
24
  if spec.respond_to?(:metadata)
25
- spec.metadata['allowed_push_host'] = "https://rubygems.org"
25
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
26
26
  end
27
27
 
28
- spec.add_development_dependency "bundler", "~> 1.8"
29
- spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency 'bundler', '~> 1.8'
29
+ spec.add_development_dependency 'rake', '~> 10.0'
30
30
  end
data/Rakefile CHANGED
@@ -1,2 +1 @@
1
- require "bundler/gem_tasks"
2
-
1
+ require 'bundler/gem_tasks'
data/bin/console CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "NaiveText"
3
+ require 'bundler/setup'
4
+ require 'NaiveText'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "NaiveText"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
13
+ require 'irb'
14
14
  IRB.start
data/lib/NaiveText.rb CHANGED
@@ -1,20 +1,17 @@
1
- require "NaiveText/version"
2
- require "NaiveText/Example"
3
- require "NaiveText/ExamplesFactory"
4
- require "NaiveText/ExamplesGroup"
5
- require "NaiveText/ProbabilityCollection"
6
- require "NaiveText/ProbabilityCalculator"
7
- require "NaiveText/TextClassifier"
8
- require "NaiveText/Category"
9
- require "NaiveText/Categories"
10
- require "NaiveText/CategoriesFactory"
11
-
12
-
1
+ require 'NaiveText/version'
2
+ require 'NaiveText/Example'
3
+ require 'NaiveText/ExamplesFactory'
4
+ require 'NaiveText/ExamplesGroup'
5
+ require 'NaiveText/ProbabilityCollection'
6
+ require 'NaiveText/ProbabilityCalculator'
7
+ require 'NaiveText/TextClassifier'
8
+ require 'NaiveText/Category'
9
+ require 'NaiveText/Categories'
10
+ require 'NaiveText/CategoriesFactory'
13
11
 
14
12
  module NaiveText
15
-
16
13
  def self.build(config)
17
- @categories = CategoriesFactory.build(config)
18
- @test_classifier = TextClassifier.new(categories: @categories)
14
+ @categories = CategoriesFactory.build(config)
15
+ @test_classifier = TextClassifier.new(categories: @categories)
19
16
  end
20
17
  end
@@ -17,14 +17,13 @@ class Categories
17
17
  end
18
18
 
19
19
  def total_word_count
20
- @categories.inject(0) { |count, category | count + category.word_count }
20
+ @categories.inject(0) { |count, category| count + category.word_count }
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def calculate_apriori_propability_for(category)
26
- sum_of_words = @categories.inject(0) {|sum, category| sum + category.word_count }
27
- category.word_count.to_f / sum_of_words
28
- end
29
-
25
+ def calculate_apriori_propability_for(category)
26
+ sum_of_words = @categories.inject(0) { |sum, category| sum + category.word_count }
27
+ category.word_count.to_f / sum_of_words
28
+ end
30
29
  end
@@ -2,36 +2,17 @@ class CategoriesFactory
2
2
  def self.build(config)
3
3
  categories = []
4
4
  default = nil
5
- if config.is_a?(Array)
6
- puts "The format [{name: name_of_category, path: path_to_trainings_data}] is deprecated and will be removed in version 1.0.0 (due in Jan. 2016). Use the following arguments instead: categories: [name: 'the name', examples:'An example']"
7
- config.each do |category_config|
8
- begin
9
- examples = ExamplesFactory.from_files(category_config[:path])
10
- group = ExamplesGroup.new(examples: examples)
11
- categories << Category.new(name: category_config[:name], examples: group )
12
- rescue
13
- puts "You haven't provided trainingsdata for the category" + category_config[:name]
14
- puts "This category was not created."
15
- end
5
+ config[:categories].each do |category_config|
6
+ begin
7
+ group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model])
8
+ category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
9
+ categories << category
10
+ default = category if category_config[:name] == config[:default]
11
+ rescue
12
+ puts "You haven't provided trainingsdata for the category" + category_config[:name]
13
+ puts 'This category was not created.'
16
14
  end
17
- Categories.new(categories: categories)
18
-
19
-
20
- else
21
- config[:categories].each do |category_config|
22
- begin
23
- group = ExamplesGroup.new(examples: category_config[:examples], language_model: config[:language_model] )
24
- category = Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
25
- categories << category
26
- if category_config[:name] == config[:default]
27
- default = category
28
- end
29
- rescue
30
- puts "You haven't provided trainingsdata for the category" + category_config[:name]
31
- puts "This category was not created."
32
- end
33
- end
34
- Categories.new(categories: categories, default: default )
35
15
  end
16
+ Categories.new(categories: categories, default: default)
36
17
  end
37
18
  end
@@ -3,7 +3,6 @@ class Category
3
3
 
4
4
  attr_reader :name, :id, :weight
5
5
 
6
-
7
6
  def initialize(args)
8
7
  @name = args[:name]
9
8
  @examples = args[:examples]
@@ -14,7 +13,7 @@ class Category
14
13
  end
15
14
 
16
15
  def p(word)
17
- if(@examples.word_count>0)
16
+ if @examples.word_count > 0
18
17
  @examples.count(word).to_f / @examples.word_count
19
18
  else
20
19
  0
@@ -35,7 +34,7 @@ class NullCategory
35
34
  attr_reader :id
36
35
 
37
36
  def initialize
38
- @name = "No category"
37
+ @name = 'No category'
39
38
  @id = 0
40
39
  end
41
40
  end
@@ -6,13 +6,14 @@ class Example
6
6
  end
7
7
 
8
8
  private
9
- def load_text(args)
10
9
 
11
- end
10
+ def load_text(_args)
11
+ end
12
12
  end
13
13
 
14
14
  class FileExample < Example
15
15
  private
16
+
16
17
  def load_text(args)
17
18
  @text = File.read(args[:path])
18
19
  end
@@ -1,14 +1,14 @@
1
1
  class ExamplesFactory
2
2
  def self.from_files(dir_path)
3
- begin
4
- examples = []
5
- Dir.foreach(dir_path) do |file_path|
6
- next if file_path == '.' or file_path == '..'
7
- examples.push FileExample.new(path: dir_path+'/'+file_path)
8
- end
9
- rescue
10
- puts "Failed loading" + dir_path
3
+ begin
4
+ examples = []
5
+ Dir.foreach(dir_path) do |file_path|
6
+ next if file_path == '.' || file_path == '..'
7
+ examples.push FileExample.new(path: dir_path + '/' + file_path)
11
8
  end
9
+ rescue
10
+ puts 'Failed loading' + dir_path
11
+ end
12
12
  examples
13
13
  end
14
14
  end
@@ -1,13 +1,11 @@
1
1
  class ExamplesGroup
2
2
  def initialize(args)
3
3
  @examples = args[:examples].to_a || []
4
- @language_model = args[:language_model] || lambda {|str| str}
4
+ @language_model = args[:language_model] || ->(str) { str }
5
5
  load_text
6
6
  split_text_into_words
7
7
  format_words
8
- if @words.length == 0
9
- raise 'Empty_Trainingsdata'
10
- end
8
+ fail 'Empty_Trainingsdata' if @words.length == 0
11
9
  end
12
10
 
13
11
  def count(word)
@@ -20,20 +18,20 @@ class ExamplesGroup
20
18
 
21
19
  private
22
20
 
23
- def load_text
24
- @text = ''
25
- @examples.each do |example|
26
- @text += ' ' + example.text
27
- end
21
+ def load_text
22
+ @text = ''
23
+ @examples.each do |example|
24
+ @text += ' ' + example.text
28
25
  end
26
+ end
29
27
 
30
- def split_text_into_words
31
- @words = @text.split(/\W+/)
32
- end
28
+ def split_text_into_words
29
+ @words = @text.split(/\W+/)
30
+ end
33
31
 
34
- def format_words
35
- @words.map! {|word| word.downcase}
36
- @words.map! {|word| @language_model.call(word)}
37
- @words
38
- end
32
+ def format_words
33
+ @words.map!(&:downcase)
34
+ @words.map! { |word| @language_model.call(word) }
35
+ @words
36
+ end
39
37
  end
@@ -9,35 +9,35 @@ class ProbabilityCalculator
9
9
  @probabilities.normalize
10
10
  end
11
11
 
12
-
13
12
  private
14
- def protect_factor(factor)
15
- [factor, minimum].max
16
- end
17
13
 
18
- def minimum
19
- 1.to_f/(10*@categories.total_word_count)
20
- end
14
+ def protect_factor(factor)
15
+ [factor, minimum].max
16
+ end
21
17
 
22
- def calculateProbabilities(text)
23
- set_apriori_probabilities
24
- list_of_words = text.split(/\W+/)
25
- list_of_words.each do |word|
26
- @categories.each do |category|
27
- @probabilities.multiply(category: category, factor: protect_factor(category.p(word)) )
28
- end
29
- end
30
- remove_minimum(text)
31
- end
18
+ def minimum
19
+ 1.to_f / (10 * @categories.total_word_count)
20
+ end
32
21
 
33
- def set_apriori_probabilities
22
+ def calculateProbabilities(text)
23
+ set_apriori_probabilities
24
+ list_of_words = text.split(/\W+/)
25
+ list_of_words.each do |word|
34
26
  @categories.each do |category|
35
- @probabilities.set(category: category, value: @categories.p_apriori(category))
27
+ @probabilities.multiply(category: category, factor: protect_factor(category.p(word)))
36
28
  end
37
29
  end
30
+ remove_minimum(text)
31
+ end
38
32
 
39
- def remove_minimum(text)
40
- times = text.split(/\W+/).length
41
- @probabilities.greater_then(minimum**times)
33
+ def set_apriori_probabilities
34
+ @categories.each do |category|
35
+ @probabilities.set(category: category, value: @categories.p_apriori(category))
42
36
  end
37
+ end
38
+
39
+ def remove_minimum(text)
40
+ times = text.split(/\W+/).length
41
+ @probabilities.greater_then(minimum**times)
42
+ end
43
43
  end
@@ -1,18 +1,17 @@
1
1
  class ProbabilityCollection
2
2
  def initialize(args)
3
- @categories = args[:categories] || []
3
+ @categories = args[:categories] || []
4
4
  initialize_ids
5
5
  @probabilities = []
6
6
  initalize_probabilities(@ids)
7
7
  end
8
8
 
9
9
  def find(category)
10
- return @probabilities[category.id]
10
+ @probabilities[category.id]
11
11
  end
12
12
 
13
-
14
13
  def set(args)
15
- category = args[:category]
14
+ category = args[:category]
16
15
  value = args[:value]
17
16
  @probabilities[category.id] = value
18
17
  end
@@ -23,14 +22,14 @@ class ProbabilityCollection
23
22
  if category
24
23
  @probabilities[category.id] *= factor
25
24
  else
26
- @probabilities.map! {|el| el*factor}
25
+ @probabilities.map! { |el| el * factor }
27
26
  end
28
27
  end
29
28
 
30
29
  def normalize
31
- if self.sum > 0
32
- normalization_factor = 1.to_f / self.sum
33
- self.multiply(factor: normalization_factor)
30
+ if sum > 0
31
+ normalization_factor = 1.to_f / sum
32
+ multiply(factor: normalization_factor)
34
33
  end
35
34
  self
36
35
  end
@@ -38,7 +37,7 @@ class ProbabilityCollection
38
37
  def category_with_max
39
38
  if @probabilities.max > 0
40
39
  id = @probabilities.find_index(@probabilities.max)
41
- @categories.find {|category| category.id == id}
40
+ @categories.find { |category| category.id == id }
42
41
  else
43
42
  @categories.default
44
43
  end
@@ -50,11 +49,11 @@ class ProbabilityCollection
50
49
 
51
50
  def greater_then(value)
52
51
  @probabilities.map! do |p|
53
- if p > value
54
- p
55
- else
56
- 0
57
- end
52
+ if p > value
53
+ p
54
+ else
55
+ 0
56
+ end
58
57
  end
59
58
  end
60
59
 
@@ -67,15 +66,16 @@ class ProbabilityCollection
67
66
  @categories.each do |category|
68
67
  result << category.to_s
69
68
  result << ':'
70
- result << self.find(category).to_s
69
+ result << find(category).to_s
71
70
  result << "\n"
72
71
  end
73
72
  result
74
73
  end
75
74
 
76
75
  private
76
+
77
77
  def initialize_ids
78
- @ids = @categories.map { |category| category.id }
78
+ @ids = @categories.map(&:id)
79
79
  end
80
80
 
81
81
  def initalize_probabilities(ids)
@@ -1,6 +1,6 @@
1
1
  class TextClassifier
2
2
  attr_reader :categories
3
- def initialize( args )
3
+ def initialize(args)
4
4
  @categories = args[:categories]
5
5
  @calculator = args[:calculator] || ProbabilityCalculator.new(categories: @categories)
6
6
  end
@@ -13,12 +13,8 @@ class TextClassifier
13
13
  @calculator.get_probabilities_for(text)
14
14
  end
15
15
 
16
- def propabilities(text)
17
- puts "This notation is deprecated in will be removed in later versions. Please use probabilities (4th character b instead of p)"
18
- probabilities(text)
19
- end
16
+ private
20
17
 
21
- private
22
18
  def get_category_for(text)
23
19
  probabilities = @calculator.get_probabilities_for(text)
24
20
  @categories.each do |category|
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.6.0"
2
+ VERSION = '1.0.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-12-01 00:00:00.000000000 Z
11
+ date: 2016-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler