NaiveText 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -2
- data/lib/NaiveText/CategoriesFactory.rb +4 -2
- data/lib/NaiveText/Category.rb +6 -4
- data/lib/NaiveText/ProbabilityCalculator.rb +2 -12
- data/lib/NaiveText/ProbabilityCollection.rb +9 -1
- data/lib/NaiveText/TextClassifier.rb +3 -0
- data/lib/NaiveText/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6bffe658cb690c71d28fd693588064745e9640c8
|
4
|
+
data.tar.gz: 61ab533db552448d60e2db369b30894d80ea4db1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93e4e8662da0b13d0f6eb1b92c3138d46d460b3011fc1fbf051680a8e49affe4560aef19b89ffca8ad37d2b24be505e31819763b24912d479b8aa430b82af965
|
7
|
+
data.tar.gz: 61aa7b9f67a81165e4a1910e4b212c00e2fe1bb1c27c7f86d2673500c73793d42f4f06d4ca28946b770417d81c2bb9265ef327de5549c98af7e6625939847f76
|
data/CHANGELOG.md
CHANGED
@@ -2,8 +2,13 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
-
## [
|
6
|
-
|
5
|
+
## [0.5.0] - 2015-11-06
|
6
|
+
### Added
|
7
|
+
- Added optional weighting to categories
|
8
|
+
|
9
|
+
##[0.4.2] - 2015-11-02
|
10
|
+
### Changed
|
11
|
+
-Fixed a typo in the interface of TextClassifier propabilities --> probabilities. Deprecated the old version.
|
7
12
|
|
8
13
|
## [0.4.1] - 2015-10-29
|
9
14
|
### Added
|
@@ -7,18 +7,20 @@ class CategoriesFactory
|
|
7
7
|
begin
|
8
8
|
examples = ExamplesFactory.from_files(category_config[:path])
|
9
9
|
group = ExamplesGroup.new(examples: examples)
|
10
|
-
categories << Category.new(name: category_config[:name], examples: group)
|
10
|
+
categories << Category.new(name: category_config[:name], examples: group )
|
11
11
|
rescue
|
12
12
|
puts "You haven't provided trainingsdata for the category" + category_config[:name]
|
13
13
|
puts "This category was not created."
|
14
14
|
end
|
15
15
|
end
|
16
16
|
Categories.new(categories: categories)
|
17
|
+
|
18
|
+
|
17
19
|
else
|
18
20
|
config[:categories].each do |category_config|
|
19
21
|
begin
|
20
22
|
group = ExamplesGroup.new(examples: category_config[:examples])
|
21
|
-
categories << Category.new(name: category_config[:name], examples: group)
|
23
|
+
categories << Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
|
22
24
|
rescue
|
23
25
|
puts "You haven't provided trainingsdata for the category" + category_config[:name]
|
24
26
|
puts "This category was not created."
|
data/lib/NaiveText/Category.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
class Category
|
2
2
|
@@id_counter = 1
|
3
3
|
|
4
|
-
attr_reader :name
|
5
|
-
|
4
|
+
attr_reader :name, :id, :weight
|
5
|
+
|
6
6
|
|
7
7
|
def initialize(args)
|
8
|
-
@name
|
8
|
+
@name = args[:name]
|
9
9
|
@examples = args[:examples]
|
10
|
-
@
|
10
|
+
@weight = args[:weight] || 1
|
11
|
+
@id = @@id_counter
|
12
|
+
|
11
13
|
@@id_counter += 1
|
12
14
|
end
|
13
15
|
|
@@ -6,8 +6,7 @@ class ProbabilityCalculator
|
|
6
6
|
|
7
7
|
def get_probabilities_for(text)
|
8
8
|
calculateProbabilities(text)
|
9
|
-
|
10
|
-
@probabilities
|
9
|
+
@probabilities.normalize
|
11
10
|
end
|
12
11
|
|
13
12
|
|
@@ -33,7 +32,7 @@ class ProbabilityCalculator
|
|
33
32
|
|
34
33
|
def set_apriori_probabilities
|
35
34
|
@categories.each do |category|
|
36
|
-
@probabilities.set(category: category, value: p_apriori(category))
|
35
|
+
@probabilities.set(category: category, value: @categories.p_apriori(category))
|
37
36
|
end
|
38
37
|
end
|
39
38
|
|
@@ -41,13 +40,4 @@ class ProbabilityCalculator
|
|
41
40
|
times = text.split(/\W+/).length
|
42
41
|
@probabilities.greater_then(minimum**times)
|
43
42
|
end
|
44
|
-
|
45
|
-
def normalize
|
46
|
-
normalization_factor = 1.to_f / @probabilities.sum
|
47
|
-
@probabilities.multiply(factor: normalization_factor)
|
48
|
-
end
|
49
|
-
|
50
|
-
def p_apriori(category)
|
51
|
-
@categories.p_apriori(category)
|
52
|
-
end
|
53
43
|
end
|
@@ -14,7 +14,7 @@ class ProbabilityCollection
|
|
14
14
|
def set(args)
|
15
15
|
category = args[:category]
|
16
16
|
value = args[:value]
|
17
|
-
|
17
|
+
@probabilities[category.id] = value
|
18
18
|
end
|
19
19
|
|
20
20
|
def multiply(args)
|
@@ -27,6 +27,14 @@ class ProbabilityCollection
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
+
def normalize
|
31
|
+
if self.sum > 0
|
32
|
+
normalization_factor = 1.to_f / self.sum
|
33
|
+
self.multiply(factor: normalization_factor)
|
34
|
+
end
|
35
|
+
self
|
36
|
+
end
|
37
|
+
|
30
38
|
def category_with_max
|
31
39
|
if @probabilities.max > 0
|
32
40
|
id = @probabilities.find_index(@probabilities.max)
|
@@ -21,6 +21,9 @@ class TextClassifier
|
|
21
21
|
private
|
22
22
|
def get_category_for(text)
|
23
23
|
probabilities = @calculator.get_probabilities_for(text)
|
24
|
+
@categories.each do |category|
|
25
|
+
probabilities.multiply(category: category, factor: category.weight)
|
26
|
+
end
|
24
27
|
probabilities.category_with_max
|
25
28
|
end
|
26
29
|
end
|
data/lib/NaiveText/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NaiveText
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RicciFlowing
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|