NaiveText 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99c7ed0d2ea0ab00ce13e284e537474e1bd48b5a
4
- data.tar.gz: 1feb4b6118ccfde5c54daca91c1386fbdcfe8e1b
3
+ metadata.gz: 6bffe658cb690c71d28fd693588064745e9640c8
4
+ data.tar.gz: 61ab533db552448d60e2db369b30894d80ea4db1
5
5
  SHA512:
6
- metadata.gz: 810c0f40cdd3852010a8bdbc831c6b8591ea47ddb0ea7154e64899c682cbbf875a7b17b4d0676a809a080f14f9d69810cbe4798c2540b57c2ae0851224458365
7
- data.tar.gz: a28bd4c31239537888d85cca8020ceca2506886a0e4d6bf6f57e78ed383e89bf5adebfebf56bdeda39649c4b5dfd01e8f43d6833101659c6cbc06921a0cd6480
6
+ metadata.gz: 93e4e8662da0b13d0f6eb1b92c3138d46d460b3011fc1fbf051680a8e49affe4560aef19b89ffca8ad37d2b24be505e31819763b24912d479b8aa430b82af965
7
+ data.tar.gz: 61aa7b9f67a81165e4a1910e4b212c00e2fe1bb1c27c7f86d2673500c73793d42f4f06d4ca28946b770417d81c2bb9265ef327de5549c98af7e6625939847f76
data/CHANGELOG.md CHANGED
@@ -2,8 +2,13 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
- ## [Unreleased]
6
- -Fixed a typo in the interface of TextClassifier propabilities --> probabilities. Deprecated the old version.
5
+ ## [0.5.0] - 2015-11-06
6
+ ### Added
7
+ - Added optional weighting to categories
8
+
9
+ ##[0.4.2] - 2015-11-02
10
+ ### Changed
11
+ -Fixed a typo in the interface of TextClassifier propabilities --> probabilities. Deprecated the old version.
7
12
 
8
13
  ## [0.4.1] - 2015-10-29
9
14
  ### Added
@@ -7,18 +7,20 @@ class CategoriesFactory
7
7
  begin
8
8
  examples = ExamplesFactory.from_files(category_config[:path])
9
9
  group = ExamplesGroup.new(examples: examples)
10
- categories << Category.new(name: category_config[:name], examples: group)
10
+ categories << Category.new(name: category_config[:name], examples: group )
11
11
  rescue
12
12
  puts "You haven't provided trainingsdata for the category" + category_config[:name]
13
13
  puts "This category was not created."
14
14
  end
15
15
  end
16
16
  Categories.new(categories: categories)
17
+
18
+
17
19
  else
18
20
  config[:categories].each do |category_config|
19
21
  begin
20
22
  group = ExamplesGroup.new(examples: category_config[:examples])
21
- categories << Category.new(name: category_config[:name], examples: group)
23
+ categories << Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
22
24
  rescue
23
25
  puts "You haven't provided trainingsdata for the category" + category_config[:name]
24
26
  puts "This category was not created."
@@ -1,13 +1,15 @@
1
1
  class Category
2
2
  @@id_counter = 1
3
3
 
4
- attr_reader :name
5
- attr_reader :id
4
+ attr_reader :name, :id, :weight
5
+
6
6
 
7
7
  def initialize(args)
8
- @name = args[:name]
8
+ @name = args[:name]
9
9
  @examples = args[:examples]
10
- @id = @@id_counter
10
+ @weight = args[:weight] || 1
11
+ @id = @@id_counter
12
+
11
13
  @@id_counter += 1
12
14
  end
13
15
 
@@ -6,8 +6,7 @@ class ProbabilityCalculator
6
6
 
7
7
  def get_probabilities_for(text)
8
8
  calculateProbabilities(text)
9
- normalize unless @probabilities.sum <= 0
10
- @probabilities
9
+ @probabilities.normalize
11
10
  end
12
11
 
13
12
 
@@ -33,7 +32,7 @@ class ProbabilityCalculator
33
32
 
34
33
  def set_apriori_probabilities
35
34
  @categories.each do |category|
36
- @probabilities.set(category: category, value: p_apriori(category))
35
+ @probabilities.set(category: category, value: @categories.p_apriori(category))
37
36
  end
38
37
  end
39
38
 
@@ -41,13 +40,4 @@ class ProbabilityCalculator
41
40
  times = text.split(/\W+/).length
42
41
  @probabilities.greater_then(minimum**times)
43
42
  end
44
-
45
- def normalize
46
- normalization_factor = 1.to_f / @probabilities.sum
47
- @probabilities.multiply(factor: normalization_factor)
48
- end
49
-
50
- def p_apriori(category)
51
- @categories.p_apriori(category)
52
- end
53
43
  end
@@ -14,7 +14,7 @@ class ProbabilityCollection
14
14
  def set(args)
15
15
  category = args[:category]
16
16
  value = args[:value]
17
- @probabilities[category.id] = value
17
+ @probabilities[category.id] = value
18
18
  end
19
19
 
20
20
  def multiply(args)
@@ -27,6 +27,14 @@ class ProbabilityCollection
27
27
  end
28
28
  end
29
29
 
30
+ def normalize
31
+ if self.sum > 0
32
+ normalization_factor = 1.to_f / self.sum
33
+ self.multiply(factor: normalization_factor)
34
+ end
35
+ self
36
+ end
37
+
30
38
  def category_with_max
31
39
  if @probabilities.max > 0
32
40
  id = @probabilities.find_index(@probabilities.max)
@@ -21,6 +21,9 @@ class TextClassifier
21
21
  private
22
22
  def get_category_for(text)
23
23
  probabilities = @calculator.get_probabilities_for(text)
24
+ @categories.each do |category|
25
+ probabilities.multiply(category: category, factor: category.weight)
26
+ end
24
27
  probabilities.category_with_max
25
28
  end
26
29
  end
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.4.2"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-02 00:00:00.000000000 Z
11
+ date: 2015-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler