NaiveText 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/NaiveText/Categories.rb +4 -0
- data/lib/NaiveText/CategoriesFactory.rb +7 -2
- data/lib/NaiveText/Category.rb +9 -1
- data/lib/NaiveText/ExamplesGroup.rb +3 -0
- data/lib/NaiveText/PropabilityCalculator.rb +17 -8
- data/lib/NaiveText/PropabilityCollection.rb +16 -1
- data/lib/NaiveText/TextClassifier.rb +9 -9
- data/lib/NaiveText/version.rb +1 -1
- data/lib/NaiveText.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac7ce8cec1a92d0c067f4953bf28318afed99583
|
4
|
+
data.tar.gz: 47337502e94c528349eba4bb34e34bb81a9aa810
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7ab2fbec59e6dadb29ae98b10192e7ac5dd0ddd4e9dbdb0fe43ff289f4a354014170ec1eb55840b57a95dadfc12428880475ad517238cb0813d439c34c4cb2b
|
7
|
+
data.tar.gz: 72f0d51bf15c8dee374f876356f4b0af47f1af9b74d1056568adf0c8a8fbc0e45119b0f585c643e3a1913f757a4303cf3af16a01edf0096c0729769039d78fe4
|
data/lib/NaiveText/Categories.rb
CHANGED
@@ -3,8 +3,13 @@ class CategoriesFactory
|
|
3
3
|
categories = []
|
4
4
|
|
5
5
|
config.each do |category_config|
|
6
|
-
|
7
|
-
|
6
|
+
begin
|
7
|
+
examples = ExamplesGroup.new(category_config[:path])
|
8
|
+
categories << Category.new(name: category_config[:name], examples: examples)
|
9
|
+
rescue
|
10
|
+
puts "You haven't provided trainingsdata for the category" + category_config[:name]
|
11
|
+
puts "This category was not created."
|
12
|
+
end
|
8
13
|
end
|
9
14
|
|
10
15
|
Categories.new(categories: categories)
|
data/lib/NaiveText/Category.rb
CHANGED
@@ -12,12 +12,20 @@ class Category
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def p(word)
|
15
|
-
|
15
|
+
if(@examples.word_count>0)
|
16
|
+
@examples.count(word).to_f / @examples.word_count
|
17
|
+
else
|
18
|
+
0
|
19
|
+
end
|
16
20
|
end
|
17
21
|
|
18
22
|
def word_count
|
19
23
|
@examples.word_count
|
20
24
|
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
@name
|
28
|
+
end
|
21
29
|
end
|
22
30
|
|
23
31
|
class NullCategory
|
@@ -4,24 +4,34 @@ class PropabilityCalculator
|
|
4
4
|
@propabilities = PropabilityCollection.new(categories: @categories)
|
5
5
|
end
|
6
6
|
|
7
|
-
def get_propabilities_for(
|
8
|
-
calculateProbabilities(
|
9
|
-
|
10
|
-
normalize
|
11
|
-
end
|
7
|
+
def get_propabilities_for(text)
|
8
|
+
calculateProbabilities(text)
|
9
|
+
normalize unless @propabilities.sum < 0
|
12
10
|
@propabilities
|
13
11
|
end
|
14
12
|
|
13
|
+
|
15
14
|
private
|
15
|
+
def minimum
|
16
|
+
minimum = 1.to_f/(10*@categories.total_word_count)
|
17
|
+
end
|
16
18
|
|
17
|
-
def
|
19
|
+
def min_factor(factor)
|
20
|
+
if factor.to_f < minimum
|
21
|
+
factor = minimum
|
22
|
+
end
|
23
|
+
factor
|
24
|
+
end
|
25
|
+
|
26
|
+
def calculateProbabilities(text)
|
27
|
+
list_of_words = text.split(/\W+/)
|
18
28
|
@categories.each do |category|
|
19
29
|
@propabilities.set(category: category, value: p_apriori(category))
|
20
30
|
end
|
21
31
|
|
22
32
|
list_of_words.each do |word|
|
23
33
|
@categories.each do |category|
|
24
|
-
@propabilities.multiply(category: category, factor: category.p(word) )
|
34
|
+
@propabilities.multiply(category: category, factor: min_factor(category.p(word)) )
|
25
35
|
end
|
26
36
|
end
|
27
37
|
end
|
@@ -36,5 +46,4 @@ class PropabilityCalculator
|
|
36
46
|
end
|
37
47
|
|
38
48
|
|
39
|
-
|
40
49
|
end
|
@@ -27,15 +27,30 @@ class PropabilityCollection
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def category_with_max
|
31
31
|
id = @propabilities.find_index(@propabilities.max)
|
32
32
|
@categories.find {|category| category.id == id}
|
33
33
|
end
|
34
34
|
|
35
|
+
def max
|
36
|
+
@propabilities.max
|
37
|
+
end
|
38
|
+
|
35
39
|
def sum
|
36
40
|
@propabilities.reduce(:+)
|
37
41
|
end
|
38
42
|
|
43
|
+
def to_s
|
44
|
+
result = ''
|
45
|
+
@categories.each do |category|
|
46
|
+
result << category.to_s
|
47
|
+
result << ':'
|
48
|
+
result << self.find(category).to_s
|
49
|
+
result << '\n'
|
50
|
+
end
|
51
|
+
result
|
52
|
+
end
|
53
|
+
|
39
54
|
private
|
40
55
|
def initialize_ids
|
41
56
|
@ids = @categories.map { |category| category.id }
|
@@ -1,21 +1,21 @@
|
|
1
1
|
class TextClassifier
|
2
|
+
attr_reader :categories
|
2
3
|
def initialize( args )
|
3
4
|
@categories = args[:categories]
|
4
5
|
@calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
|
5
6
|
end
|
6
7
|
|
7
8
|
def classify(text)
|
8
|
-
|
9
|
-
get_category_for(words)
|
9
|
+
get_category_for(text)
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
13
|
-
|
14
|
-
if(propabilities.sum == 0)
|
15
|
-
NullCategory.new
|
16
|
-
else
|
17
|
-
propabilities.max
|
18
|
-
end
|
12
|
+
def propabilities(text)
|
13
|
+
@calculator.get_propabilities(text)
|
19
14
|
end
|
20
15
|
|
16
|
+
private
|
17
|
+
def get_category_for(text)
|
18
|
+
propabilities = @calculator.get_propabilities_for(text)
|
19
|
+
propabilities.category_with_max
|
20
|
+
end
|
21
21
|
end
|
data/lib/NaiveText/version.rb
CHANGED
data/lib/NaiveText.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NaiveText
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RicciFlowing
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|