NaiveText 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/NaiveText/Categories.rb +4 -0
- data/lib/NaiveText/CategoriesFactory.rb +7 -2
- data/lib/NaiveText/Category.rb +9 -1
- data/lib/NaiveText/ExamplesGroup.rb +3 -0
- data/lib/NaiveText/PropabilityCalculator.rb +17 -8
- data/lib/NaiveText/PropabilityCollection.rb +16 -1
- data/lib/NaiveText/TextClassifier.rb +9 -9
- data/lib/NaiveText/version.rb +1 -1
- data/lib/NaiveText.rb +3 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac7ce8cec1a92d0c067f4953bf28318afed99583
|
4
|
+
data.tar.gz: 47337502e94c528349eba4bb34e34bb81a9aa810
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7ab2fbec59e6dadb29ae98b10192e7ac5dd0ddd4e9dbdb0fe43ff289f4a354014170ec1eb55840b57a95dadfc12428880475ad517238cb0813d439c34c4cb2b
|
7
|
+
data.tar.gz: 72f0d51bf15c8dee374f876356f4b0af47f1af9b74d1056568adf0c8a8fbc0e45119b0f585c643e3a1913f757a4303cf3af16a01edf0096c0729769039d78fe4
|
data/lib/NaiveText/Categories.rb
CHANGED
@@ -3,8 +3,13 @@ class CategoriesFactory
|
|
3
3
|
categories = []
|
4
4
|
|
5
5
|
config.each do |category_config|
|
6
|
-
|
7
|
-
|
6
|
+
begin
|
7
|
+
examples = ExamplesGroup.new(category_config[:path])
|
8
|
+
categories << Category.new(name: category_config[:name], examples: examples)
|
9
|
+
rescue
|
10
|
+
puts "You haven't provided trainingsdata for the category" + category_config[:name]
|
11
|
+
puts "This category was not created."
|
12
|
+
end
|
8
13
|
end
|
9
14
|
|
10
15
|
Categories.new(categories: categories)
|
data/lib/NaiveText/Category.rb
CHANGED
@@ -12,12 +12,20 @@ class Category
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def p(word)
|
15
|
-
|
15
|
+
if(@examples.word_count>0)
|
16
|
+
@examples.count(word).to_f / @examples.word_count
|
17
|
+
else
|
18
|
+
0
|
19
|
+
end
|
16
20
|
end
|
17
21
|
|
18
22
|
def word_count
|
19
23
|
@examples.word_count
|
20
24
|
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
@name
|
28
|
+
end
|
21
29
|
end
|
22
30
|
|
23
31
|
class NullCategory
|
@@ -4,24 +4,34 @@ class PropabilityCalculator
|
|
4
4
|
@propabilities = PropabilityCollection.new(categories: @categories)
|
5
5
|
end
|
6
6
|
|
7
|
-
def get_propabilities_for(
|
8
|
-
calculateProbabilities(
|
9
|
-
|
10
|
-
normalize
|
11
|
-
end
|
7
|
+
def get_propabilities_for(text)
|
8
|
+
calculateProbabilities(text)
|
9
|
+
normalize unless @propabilities.sum < 0
|
12
10
|
@propabilities
|
13
11
|
end
|
14
12
|
|
13
|
+
|
15
14
|
private
|
15
|
+
def minimum
|
16
|
+
minimum = 1.to_f/(10*@categories.total_word_count)
|
17
|
+
end
|
16
18
|
|
17
|
-
def
|
19
|
+
def min_factor(factor)
|
20
|
+
if factor.to_f < minimum
|
21
|
+
factor = minimum
|
22
|
+
end
|
23
|
+
factor
|
24
|
+
end
|
25
|
+
|
26
|
+
def calculateProbabilities(text)
|
27
|
+
list_of_words = text.split(/\W+/)
|
18
28
|
@categories.each do |category|
|
19
29
|
@propabilities.set(category: category, value: p_apriori(category))
|
20
30
|
end
|
21
31
|
|
22
32
|
list_of_words.each do |word|
|
23
33
|
@categories.each do |category|
|
24
|
-
@propabilities.multiply(category: category, factor: category.p(word) )
|
34
|
+
@propabilities.multiply(category: category, factor: min_factor(category.p(word)) )
|
25
35
|
end
|
26
36
|
end
|
27
37
|
end
|
@@ -36,5 +46,4 @@ class PropabilityCalculator
|
|
36
46
|
end
|
37
47
|
|
38
48
|
|
39
|
-
|
40
49
|
end
|
@@ -27,15 +27,30 @@ class PropabilityCollection
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def category_with_max
|
31
31
|
id = @propabilities.find_index(@propabilities.max)
|
32
32
|
@categories.find {|category| category.id == id}
|
33
33
|
end
|
34
34
|
|
35
|
+
def max
|
36
|
+
@propabilities.max
|
37
|
+
end
|
38
|
+
|
35
39
|
def sum
|
36
40
|
@propabilities.reduce(:+)
|
37
41
|
end
|
38
42
|
|
43
|
+
def to_s
|
44
|
+
result = ''
|
45
|
+
@categories.each do |category|
|
46
|
+
result << category.to_s
|
47
|
+
result << ':'
|
48
|
+
result << self.find(category).to_s
|
49
|
+
result << '\n'
|
50
|
+
end
|
51
|
+
result
|
52
|
+
end
|
53
|
+
|
39
54
|
private
|
40
55
|
def initialize_ids
|
41
56
|
@ids = @categories.map { |category| category.id }
|
@@ -1,21 +1,21 @@
|
|
1
1
|
class TextClassifier
|
2
|
+
attr_reader :categories
|
2
3
|
def initialize( args )
|
3
4
|
@categories = args[:categories]
|
4
5
|
@calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
|
5
6
|
end
|
6
7
|
|
7
8
|
def classify(text)
|
8
|
-
|
9
|
-
get_category_for(words)
|
9
|
+
get_category_for(text)
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
13
|
-
|
14
|
-
if(propabilities.sum == 0)
|
15
|
-
NullCategory.new
|
16
|
-
else
|
17
|
-
propabilities.max
|
18
|
-
end
|
12
|
+
def propabilities(text)
|
13
|
+
@calculator.get_propabilities(text)
|
19
14
|
end
|
20
15
|
|
16
|
+
private
|
17
|
+
def get_category_for(text)
|
18
|
+
propabilities = @calculator.get_propabilities_for(text)
|
19
|
+
propabilities.category_with_max
|
20
|
+
end
|
21
21
|
end
|
data/lib/NaiveText/version.rb
CHANGED
data/lib/NaiveText.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NaiveText
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RicciFlowing
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|