NaiveText 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 08919bb72416392eb059f571a589335e5aae0dc2
4
- data.tar.gz: cabef8b8d33a2ac2dd811494d02aa0a527d22a87
3
+ metadata.gz: ac7ce8cec1a92d0c067f4953bf28318afed99583
4
+ data.tar.gz: 47337502e94c528349eba4bb34e34bb81a9aa810
5
5
  SHA512:
6
- metadata.gz: 6872002c6eae15ad91a59ea8ff952dda08a9d4152f99d245fbaa4b0080a847d54d28081120d055ddf30b3a4e956636bc7f81bfc8444aa7559166389fa5dc75a7
7
- data.tar.gz: 9f693eff197c98c0a5008207eb2990a7b4209d660cf8e41f947fbb7972a3e1d95875f34903fee8f608bb685910846634994b08664cb6406bf7eedb129a0b5da4
6
+ metadata.gz: f7ab2fbec59e6dadb29ae98b10192e7ac5dd0ddd4e9dbdb0fe43ff289f4a354014170ec1eb55840b57a95dadfc12428880475ad517238cb0813d439c34c4cb2b
7
+ data.tar.gz: 72f0d51bf15c8dee374f876356f4b0af47f1af9b74d1056568adf0c8a8fbc0e45119b0f585c643e3a1913f757a4303cf3af16a01edf0096c0729769039d78fe4
@@ -13,6 +13,10 @@ class Categories
13
13
  @categories.each(&block)
14
14
  end
15
15
 
16
+ def total_word_count
17
+ @categories.inject(0) { |count, category | count + category.word_count }
18
+ end
19
+
16
20
  private
17
21
 
18
22
  def calculate_apriori_propability_for(category)
@@ -3,8 +3,13 @@ class CategoriesFactory
3
3
  categories = []
4
4
 
5
5
  config.each do |category_config|
6
- examples = ExamplesGroup.new(category_config[:path])
7
- categories << Category.new(name: category_config[:name], examples: examples)
6
+ begin
7
+ examples = ExamplesGroup.new(category_config[:path])
8
+ categories << Category.new(name: category_config[:name], examples: examples)
9
+ rescue
10
+ puts "You haven't provided trainingsdata for the category" + category_config[:name]
11
+ puts "This category was not created."
12
+ end
8
13
  end
9
14
 
10
15
  Categories.new(categories: categories)
@@ -12,12 +12,20 @@ class Category
12
12
  end
13
13
 
14
14
  def p(word)
15
- @examples.count(word).to_f / @examples.word_count
15
+ if(@examples.word_count>0)
16
+ @examples.count(word).to_f / @examples.word_count
17
+ else
18
+ 0
19
+ end
16
20
  end
17
21
 
18
22
  def word_count
19
23
  @examples.word_count
20
24
  end
25
+
26
+ def to_s
27
+ @name
28
+ end
21
29
  end
22
30
 
23
31
  class NullCategory
@@ -2,6 +2,9 @@ class ExamplesGroup
2
2
  def initialize(path)
3
3
  @text = load_text(path)
4
4
  @words = @text.split(/\W+/)
5
+ if @words.length == 0
6
+ raise 'Empty Trainingsdata'
7
+ end
5
8
  end
6
9
 
7
10
  def count(word)
@@ -4,24 +4,34 @@ class PropabilityCalculator
4
4
  @propabilities = PropabilityCollection.new(categories: @categories)
5
5
  end
6
6
 
7
- def get_propabilities_for(words)
8
- calculateProbabilities(words)
9
- if(@propabilities.sum > 0)
10
- normalize
11
- end
7
+ def get_propabilities_for(text)
8
+ calculateProbabilities(text)
9
+ normalize unless @propabilities.sum < 0
12
10
  @propabilities
13
11
  end
14
12
 
13
+
15
14
  private
15
+ def minimum
16
+ minimum = 1.to_f/(10*@categories.total_word_count)
17
+ end
16
18
 
17
- def calculateProbabilities(list_of_words)
19
+ def min_factor(factor)
20
+ if factor.to_f < minimum
21
+ factor = minimum
22
+ end
23
+ factor
24
+ end
25
+
26
+ def calculateProbabilities(text)
27
+ list_of_words = text.split(/\W+/)
18
28
  @categories.each do |category|
19
29
  @propabilities.set(category: category, value: p_apriori(category))
20
30
  end
21
31
 
22
32
  list_of_words.each do |word|
23
33
  @categories.each do |category|
24
- @propabilities.multiply(category: category, factor: category.p(word) )
34
+ @propabilities.multiply(category: category, factor: min_factor(category.p(word)) )
25
35
  end
26
36
  end
27
37
  end
@@ -36,5 +46,4 @@ class PropabilityCalculator
36
46
  end
37
47
 
38
48
 
39
-
40
49
  end
@@ -27,15 +27,30 @@ class PropabilityCollection
27
27
  end
28
28
  end
29
29
 
30
- def max
30
+ def category_with_max
31
31
  id = @propabilities.find_index(@propabilities.max)
32
32
  @categories.find {|category| category.id == id}
33
33
  end
34
34
 
35
+ def max
36
+ @propabilities.max
37
+ end
38
+
35
39
  def sum
36
40
  @propabilities.reduce(:+)
37
41
  end
38
42
 
43
+ def to_s
44
+ result = ''
45
+ @categories.each do |category|
46
+ result << category.to_s
47
+ result << ':'
48
+ result << self.find(category).to_s
49
+ result << '\n'
50
+ end
51
+ result
52
+ end
53
+
39
54
  private
40
55
  def initialize_ids
41
56
  @ids = @categories.map { |category| category.id }
@@ -1,21 +1,21 @@
1
1
  class TextClassifier
2
+ attr_reader :categories
2
3
  def initialize( args )
3
4
  @categories = args[:categories]
4
5
  @calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
5
6
  end
6
7
 
7
8
  def classify(text)
8
- words = text.split(/\W+/)
9
- get_category_for(words)
9
+ get_category_for(text)
10
10
  end
11
11
 
12
- def get_category_for(list_of_words)
13
- propabilities = @calculator.get_propabilities_for(list_of_words)
14
- if(propabilities.sum == 0)
15
- NullCategory.new
16
- else
17
- propabilities.max
18
- end
12
+ def propabilities(text)
13
+ @calculator.get_propabilities(text)
19
14
  end
20
15
 
16
+ private
17
+ def get_category_for(text)
18
+ propabilities = @calculator.get_propabilities_for(text)
19
+ propabilities.category_with_max
20
+ end
21
21
  end
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/NaiveText.rb CHANGED
@@ -7,8 +7,10 @@ require "NaiveText/Category"
7
7
  require "NaiveText/Categories"
8
8
  require "NaiveText/CategoriesFactory"
9
9
 
10
+
11
+
10
12
  module NaiveText
11
-
13
+
12
14
  def self.build(config)
13
15
  begin
14
16
  @categories = CategoriesFactory.build(config)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-10-13 00:00:00.000000000 Z
11
+ date: 2015-10-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler