NaiveText 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 08919bb72416392eb059f571a589335e5aae0dc2
4
- data.tar.gz: cabef8b8d33a2ac2dd811494d02aa0a527d22a87
3
+ metadata.gz: ac7ce8cec1a92d0c067f4953bf28318afed99583
4
+ data.tar.gz: 47337502e94c528349eba4bb34e34bb81a9aa810
5
5
  SHA512:
6
- metadata.gz: 6872002c6eae15ad91a59ea8ff952dda08a9d4152f99d245fbaa4b0080a847d54d28081120d055ddf30b3a4e956636bc7f81bfc8444aa7559166389fa5dc75a7
7
- data.tar.gz: 9f693eff197c98c0a5008207eb2990a7b4209d660cf8e41f947fbb7972a3e1d95875f34903fee8f608bb685910846634994b08664cb6406bf7eedb129a0b5da4
6
+ metadata.gz: f7ab2fbec59e6dadb29ae98b10192e7ac5dd0ddd4e9dbdb0fe43ff289f4a354014170ec1eb55840b57a95dadfc12428880475ad517238cb0813d439c34c4cb2b
7
+ data.tar.gz: 72f0d51bf15c8dee374f876356f4b0af47f1af9b74d1056568adf0c8a8fbc0e45119b0f585c643e3a1913f757a4303cf3af16a01edf0096c0729769039d78fe4
@@ -13,6 +13,10 @@ class Categories
13
13
  @categories.each(&block)
14
14
  end
15
15
 
16
+ def total_word_count
17
+ @categories.inject(0) { |count, category | count + category.word_count }
18
+ end
19
+
16
20
  private
17
21
 
18
22
  def calculate_apriori_propability_for(category)
@@ -3,8 +3,13 @@ class CategoriesFactory
3
3
  categories = []
4
4
 
5
5
  config.each do |category_config|
6
- examples = ExamplesGroup.new(category_config[:path])
7
- categories << Category.new(name: category_config[:name], examples: examples)
6
+ begin
7
+ examples = ExamplesGroup.new(category_config[:path])
8
+ categories << Category.new(name: category_config[:name], examples: examples)
9
+ rescue
10
+ puts "You haven't provided trainingsdata for the category" + category_config[:name]
11
+ puts "This category was not created."
12
+ end
8
13
  end
9
14
 
10
15
  Categories.new(categories: categories)
@@ -12,12 +12,20 @@ class Category
12
12
  end
13
13
 
14
14
  def p(word)
15
- @examples.count(word).to_f / @examples.word_count
15
+ if(@examples.word_count>0)
16
+ @examples.count(word).to_f / @examples.word_count
17
+ else
18
+ 0
19
+ end
16
20
  end
17
21
 
18
22
  def word_count
19
23
  @examples.word_count
20
24
  end
25
+
26
+ def to_s
27
+ @name
28
+ end
21
29
  end
22
30
 
23
31
  class NullCategory
@@ -2,6 +2,9 @@ class ExamplesGroup
2
2
  def initialize(path)
3
3
  @text = load_text(path)
4
4
  @words = @text.split(/\W+/)
5
+ if @words.length == 0
6
+ raise 'Empty Trainingsdata'
7
+ end
5
8
  end
6
9
 
7
10
  def count(word)
@@ -4,24 +4,34 @@ class PropabilityCalculator
4
4
  @propabilities = PropabilityCollection.new(categories: @categories)
5
5
  end
6
6
 
7
- def get_propabilities_for(words)
8
- calculateProbabilities(words)
9
- if(@propabilities.sum > 0)
10
- normalize
11
- end
7
+ def get_propabilities_for(text)
8
+ calculateProbabilities(text)
9
+ normalize unless @propabilities.sum < 0
12
10
  @propabilities
13
11
  end
14
12
 
13
+
15
14
  private
15
+ def minimum
16
+ minimum = 1.to_f/(10*@categories.total_word_count)
17
+ end
16
18
 
17
- def calculateProbabilities(list_of_words)
19
+ def min_factor(factor)
20
+ if factor.to_f < minimum
21
+ factor = minimum
22
+ end
23
+ factor
24
+ end
25
+
26
+ def calculateProbabilities(text)
27
+ list_of_words = text.split(/\W+/)
18
28
  @categories.each do |category|
19
29
  @propabilities.set(category: category, value: p_apriori(category))
20
30
  end
21
31
 
22
32
  list_of_words.each do |word|
23
33
  @categories.each do |category|
24
- @propabilities.multiply(category: category, factor: category.p(word) )
34
+ @propabilities.multiply(category: category, factor: min_factor(category.p(word)) )
25
35
  end
26
36
  end
27
37
  end
@@ -36,5 +46,4 @@ class PropabilityCalculator
36
46
  end
37
47
 
38
48
 
39
-
40
49
  end
@@ -27,15 +27,30 @@ class PropabilityCollection
27
27
  end
28
28
  end
29
29
 
30
- def max
30
+ def category_with_max
31
31
  id = @propabilities.find_index(@propabilities.max)
32
32
  @categories.find {|category| category.id == id}
33
33
  end
34
34
 
35
+ def max
36
+ @propabilities.max
37
+ end
38
+
35
39
  def sum
36
40
  @propabilities.reduce(:+)
37
41
  end
38
42
 
43
+ def to_s
44
+ result = ''
45
+ @categories.each do |category|
46
+ result << category.to_s
47
+ result << ':'
48
+ result << self.find(category).to_s
49
+ result << '\n'
50
+ end
51
+ result
52
+ end
53
+
39
54
  private
40
55
  def initialize_ids
41
56
  @ids = @categories.map { |category| category.id }
@@ -1,21 +1,21 @@
1
1
  class TextClassifier
2
+ attr_reader :categories
2
3
  def initialize( args )
3
4
  @categories = args[:categories]
4
5
  @calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
5
6
  end
6
7
 
7
8
  def classify(text)
8
- words = text.split(/\W+/)
9
- get_category_for(words)
9
+ get_category_for(text)
10
10
  end
11
11
 
12
- def get_category_for(list_of_words)
13
- propabilities = @calculator.get_propabilities_for(list_of_words)
14
- if(propabilities.sum == 0)
15
- NullCategory.new
16
- else
17
- propabilities.max
18
- end
12
+ def propabilities(text)
13
+ @calculator.get_propabilities(text)
19
14
  end
20
15
 
16
+ private
17
+ def get_category_for(text)
18
+ propabilities = @calculator.get_propabilities_for(text)
19
+ propabilities.category_with_max
20
+ end
21
21
  end
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/NaiveText.rb CHANGED
@@ -7,8 +7,10 @@ require "NaiveText/Category"
7
7
  require "NaiveText/Categories"
8
8
  require "NaiveText/CategoriesFactory"
9
9
 
10
+
11
+
10
12
  module NaiveText
11
-
13
+
12
14
  def self.build(config)
13
15
  begin
14
16
  @categories = CategoriesFactory.build(config)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-10-13 00:00:00.000000000 Z
11
+ date: 2015-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler