NaiveText 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99c7ed0d2ea0ab00ce13e284e537474e1bd48b5a
4
- data.tar.gz: 1feb4b6118ccfde5c54daca91c1386fbdcfe8e1b
3
+ metadata.gz: 6bffe658cb690c71d28fd693588064745e9640c8
4
+ data.tar.gz: 61ab533db552448d60e2db369b30894d80ea4db1
5
5
  SHA512:
6
- metadata.gz: 810c0f40cdd3852010a8bdbc831c6b8591ea47ddb0ea7154e64899c682cbbf875a7b17b4d0676a809a080f14f9d69810cbe4798c2540b57c2ae0851224458365
7
- data.tar.gz: a28bd4c31239537888d85cca8020ceca2506886a0e4d6bf6f57e78ed383e89bf5adebfebf56bdeda39649c4b5dfd01e8f43d6833101659c6cbc06921a0cd6480
6
+ metadata.gz: 93e4e8662da0b13d0f6eb1b92c3138d46d460b3011fc1fbf051680a8e49affe4560aef19b89ffca8ad37d2b24be505e31819763b24912d479b8aa430b82af965
7
+ data.tar.gz: 61aa7b9f67a81165e4a1910e4b212c00e2fe1bb1c27c7f86d2673500c73793d42f4f06d4ca28946b770417d81c2bb9265ef327de5549c98af7e6625939847f76
data/CHANGELOG.md CHANGED
@@ -2,8 +2,13 @@
2
2
  All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
- ## [Unreleased]
6
- -Fixed a typo in the interface of TextClassifier propabilities --> probabilities. Deprecated the old version.
5
+ ## [0.5.0] - 2015-11-06
6
+ ### Added
7
+ - Added optional weighting to categories
8
+
9
+ ##[0.4.2] - 2015-11-02
10
+ ### Changed
11
+ -Fixed a typo in the interface of TextClassifier propabilities --> probabilities. Deprecated the old version.
7
12
 
8
13
  ## [0.4.1] - 2015-10-29
9
14
  ### Added
@@ -7,18 +7,20 @@ class CategoriesFactory
7
7
  begin
8
8
  examples = ExamplesFactory.from_files(category_config[:path])
9
9
  group = ExamplesGroup.new(examples: examples)
10
- categories << Category.new(name: category_config[:name], examples: group)
10
+ categories << Category.new(name: category_config[:name], examples: group )
11
11
  rescue
12
12
  puts "You haven't provided trainingsdata for the category" + category_config[:name]
13
13
  puts "This category was not created."
14
14
  end
15
15
  end
16
16
  Categories.new(categories: categories)
17
+
18
+
17
19
  else
18
20
  config[:categories].each do |category_config|
19
21
  begin
20
22
  group = ExamplesGroup.new(examples: category_config[:examples])
21
- categories << Category.new(name: category_config[:name], examples: group)
23
+ categories << Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
22
24
  rescue
23
25
  puts "You haven't provided trainingsdata for the category" + category_config[:name]
24
26
  puts "This category was not created."
@@ -1,13 +1,15 @@
1
1
  class Category
2
2
  @@id_counter = 1
3
3
 
4
- attr_reader :name
5
- attr_reader :id
4
+ attr_reader :name, :id, :weight
5
+
6
6
 
7
7
  def initialize(args)
8
- @name = args[:name]
8
+ @name = args[:name]
9
9
  @examples = args[:examples]
10
- @id = @@id_counter
10
+ @weight = args[:weight] || 1
11
+ @id = @@id_counter
12
+
11
13
  @@id_counter += 1
12
14
  end
13
15
 
@@ -6,8 +6,7 @@ class ProbabilityCalculator
6
6
 
7
7
  def get_probabilities_for(text)
8
8
  calculateProbabilities(text)
9
- normalize unless @probabilities.sum <= 0
10
- @probabilities
9
+ @probabilities.normalize
11
10
  end
12
11
 
13
12
 
@@ -33,7 +32,7 @@ class ProbabilityCalculator
33
32
 
34
33
  def set_apriori_probabilities
35
34
  @categories.each do |category|
36
- @probabilities.set(category: category, value: p_apriori(category))
35
+ @probabilities.set(category: category, value: @categories.p_apriori(category))
37
36
  end
38
37
  end
39
38
 
@@ -41,13 +40,4 @@ class ProbabilityCalculator
41
40
  times = text.split(/\W+/).length
42
41
  @probabilities.greater_then(minimum**times)
43
42
  end
44
-
45
- def normalize
46
- normalization_factor = 1.to_f / @probabilities.sum
47
- @probabilities.multiply(factor: normalization_factor)
48
- end
49
-
50
- def p_apriori(category)
51
- @categories.p_apriori(category)
52
- end
53
43
  end
@@ -14,7 +14,7 @@ class ProbabilityCollection
14
14
  def set(args)
15
15
  category = args[:category]
16
16
  value = args[:value]
17
- @probabilities[category.id] = value
17
+ @probabilities[category.id] = value
18
18
  end
19
19
 
20
20
  def multiply(args)
@@ -27,6 +27,14 @@ class ProbabilityCollection
27
27
  end
28
28
  end
29
29
 
30
+ def normalize
31
+ if self.sum > 0
32
+ normalization_factor = 1.to_f / self.sum
33
+ self.multiply(factor: normalization_factor)
34
+ end
35
+ self
36
+ end
37
+
30
38
  def category_with_max
31
39
  if @probabilities.max > 0
32
40
  id = @probabilities.find_index(@probabilities.max)
@@ -21,6 +21,9 @@ class TextClassifier
21
21
  private
22
22
  def get_category_for(text)
23
23
  probabilities = @calculator.get_probabilities_for(text)
24
+ @categories.each do |category|
25
+ probabilities.multiply(category: category, factor: category.weight)
26
+ end
24
27
  probabilities.category_with_max
25
28
  end
26
29
  end
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.4.2"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-11-02 00:00:00.000000000 Z
11
+ date: 2015-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler