NaiveText 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -2
- data/lib/NaiveText/CategoriesFactory.rb +4 -2
- data/lib/NaiveText/Category.rb +6 -4
- data/lib/NaiveText/ProbabilityCalculator.rb +2 -12
- data/lib/NaiveText/ProbabilityCollection.rb +9 -1
- data/lib/NaiveText/TextClassifier.rb +3 -0
- data/lib/NaiveText/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6bffe658cb690c71d28fd693588064745e9640c8
|
4
|
+
data.tar.gz: 61ab533db552448d60e2db369b30894d80ea4db1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93e4e8662da0b13d0f6eb1b92c3138d46d460b3011fc1fbf051680a8e49affe4560aef19b89ffca8ad37d2b24be505e31819763b24912d479b8aa430b82af965
|
7
|
+
data.tar.gz: 61aa7b9f67a81165e4a1910e4b212c00e2fe1bb1c27c7f86d2673500c73793d42f4f06d4ca28946b770417d81c2bb9265ef327de5549c98af7e6625939847f76
|
data/CHANGELOG.md
CHANGED
@@ -2,8 +2,13 @@
|
|
2
2
|
All notable changes to this project will be documented in this file.
|
3
3
|
This project adheres to [Semantic Versioning](http://semver.org/).
|
4
4
|
|
5
|
-
## [
|
6
|
-
|
5
|
+
## [0.5.0] - 2015-11-06
|
6
|
+
### Added
|
7
|
+
- Added optional weighting to categories
|
8
|
+
|
9
|
+
##[0.4.2] - 2015-11-02
|
10
|
+
### Changed
|
11
|
+
-Fixed a typo in the interface of TextClassifier propabilities --> probabilities. Deprecated the old version.
|
7
12
|
|
8
13
|
## [0.4.1] - 2015-10-29
|
9
14
|
### Added
|
@@ -7,18 +7,20 @@ class CategoriesFactory
|
|
7
7
|
begin
|
8
8
|
examples = ExamplesFactory.from_files(category_config[:path])
|
9
9
|
group = ExamplesGroup.new(examples: examples)
|
10
|
-
categories << Category.new(name: category_config[:name], examples: group)
|
10
|
+
categories << Category.new(name: category_config[:name], examples: group )
|
11
11
|
rescue
|
12
12
|
puts "You haven't provided trainingsdata for the category" + category_config[:name]
|
13
13
|
puts "This category was not created."
|
14
14
|
end
|
15
15
|
end
|
16
16
|
Categories.new(categories: categories)
|
17
|
+
|
18
|
+
|
17
19
|
else
|
18
20
|
config[:categories].each do |category_config|
|
19
21
|
begin
|
20
22
|
group = ExamplesGroup.new(examples: category_config[:examples])
|
21
|
-
categories << Category.new(name: category_config[:name], examples: group)
|
23
|
+
categories << Category.new(name: category_config[:name], examples: group, weight: category_config[:weight])
|
22
24
|
rescue
|
23
25
|
puts "You haven't provided trainingsdata for the category" + category_config[:name]
|
24
26
|
puts "This category was not created."
|
data/lib/NaiveText/Category.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
class Category
|
2
2
|
@@id_counter = 1
|
3
3
|
|
4
|
-
attr_reader :name
|
5
|
-
|
4
|
+
attr_reader :name, :id, :weight
|
5
|
+
|
6
6
|
|
7
7
|
def initialize(args)
|
8
|
-
@name
|
8
|
+
@name = args[:name]
|
9
9
|
@examples = args[:examples]
|
10
|
-
@
|
10
|
+
@weight = args[:weight] || 1
|
11
|
+
@id = @@id_counter
|
12
|
+
|
11
13
|
@@id_counter += 1
|
12
14
|
end
|
13
15
|
|
@@ -6,8 +6,7 @@ class ProbabilityCalculator
|
|
6
6
|
|
7
7
|
def get_probabilities_for(text)
|
8
8
|
calculateProbabilities(text)
|
9
|
-
|
10
|
-
@probabilities
|
9
|
+
@probabilities.normalize
|
11
10
|
end
|
12
11
|
|
13
12
|
|
@@ -33,7 +32,7 @@ class ProbabilityCalculator
|
|
33
32
|
|
34
33
|
def set_apriori_probabilities
|
35
34
|
@categories.each do |category|
|
36
|
-
@probabilities.set(category: category, value: p_apriori(category))
|
35
|
+
@probabilities.set(category: category, value: @categories.p_apriori(category))
|
37
36
|
end
|
38
37
|
end
|
39
38
|
|
@@ -41,13 +40,4 @@ class ProbabilityCalculator
|
|
41
40
|
times = text.split(/\W+/).length
|
42
41
|
@probabilities.greater_then(minimum**times)
|
43
42
|
end
|
44
|
-
|
45
|
-
def normalize
|
46
|
-
normalization_factor = 1.to_f / @probabilities.sum
|
47
|
-
@probabilities.multiply(factor: normalization_factor)
|
48
|
-
end
|
49
|
-
|
50
|
-
def p_apriori(category)
|
51
|
-
@categories.p_apriori(category)
|
52
|
-
end
|
53
43
|
end
|
@@ -14,7 +14,7 @@ class ProbabilityCollection
|
|
14
14
|
def set(args)
|
15
15
|
category = args[:category]
|
16
16
|
value = args[:value]
|
17
|
-
|
17
|
+
@probabilities[category.id] = value
|
18
18
|
end
|
19
19
|
|
20
20
|
def multiply(args)
|
@@ -27,6 +27,14 @@ class ProbabilityCollection
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
+
def normalize
|
31
|
+
if self.sum > 0
|
32
|
+
normalization_factor = 1.to_f / self.sum
|
33
|
+
self.multiply(factor: normalization_factor)
|
34
|
+
end
|
35
|
+
self
|
36
|
+
end
|
37
|
+
|
30
38
|
def category_with_max
|
31
39
|
if @probabilities.max > 0
|
32
40
|
id = @probabilities.find_index(@probabilities.max)
|
@@ -21,6 +21,9 @@ class TextClassifier
|
|
21
21
|
private
|
22
22
|
def get_category_for(text)
|
23
23
|
probabilities = @calculator.get_probabilities_for(text)
|
24
|
+
@categories.each do |category|
|
25
|
+
probabilities.multiply(category: category, factor: category.weight)
|
26
|
+
end
|
24
27
|
probabilities.category_with_max
|
25
28
|
end
|
26
29
|
end
|
data/lib/NaiveText/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NaiveText
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RicciFlowing
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|