NaiveText 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/NaiveText.rb +2 -13
- data/lib/NaiveText/Category.rb +10 -0
- data/lib/NaiveText/PropabilityCalculator.rb +6 -2
- data/lib/NaiveText/TextClassifier.rb +10 -1
- data/lib/NaiveText/version.rb +1 -1
- metadata +2 -3
- data/lib/NaiveText/Text.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08919bb72416392eb059f571a589335e5aae0dc2
|
4
|
+
data.tar.gz: cabef8b8d33a2ac2dd811494d02aa0a527d22a87
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6872002c6eae15ad91a59ea8ff952dda08a9d4152f99d245fbaa4b0080a847d54d28081120d055ddf30b3a4e956636bc7f81bfc8444aa7559166389fa5dc75a7
|
7
|
+
data.tar.gz: 9f693eff197c98c0a5008207eb2990a7b4209d660cf8e41f947fbb7972a3e1d95875f34903fee8f608bb685910846634994b08664cb6406bf7eedb129a0b5da4
|
data/README.md
CHANGED
@@ -41,18 +41,18 @@ Now build the systems with your categories and training texts:
|
|
41
41
|
```ruby
|
42
42
|
categories_config = [{name: 'interesting', path: 'spec/training/positive'},
|
43
43
|
{name: 'boring', path: 'spec/training/negative'}]
|
44
|
-
NaiveText.build(categories_config)
|
44
|
+
classifier = NaiveText.build(categories_config)
|
45
45
|
```
|
46
46
|
Now you can start classifying texts:
|
47
47
|
|
48
48
|
```ruby
|
49
|
-
|
50
|
-
|
49
|
+
classifier.classify('Seems to be interesting')
|
50
|
+
classifier.classify('Seems to be boring')
|
51
51
|
```
|
52
52
|
Classify will return a category-object on which you can call name to get the name of the category as a string.
|
53
53
|
|
54
54
|
```ruby
|
55
|
-
category =
|
55
|
+
category = classifier.classify('Something interesting')
|
56
56
|
category.name
|
57
57
|
=> 'interesting'
|
58
58
|
```
|
data/lib/NaiveText.rb
CHANGED
@@ -3,15 +3,13 @@ require "NaiveText/ExamplesGroup"
|
|
3
3
|
require "NaiveText/PropabilityCollection"
|
4
4
|
require "NaiveText/PropabilityCalculator"
|
5
5
|
require "NaiveText/TextClassifier"
|
6
|
-
require "NaiveText/Text"
|
7
6
|
require "NaiveText/Category"
|
8
7
|
require "NaiveText/Categories"
|
9
8
|
require "NaiveText/CategoriesFactory"
|
10
9
|
|
11
10
|
module NaiveText
|
12
|
-
|
13
|
-
|
14
|
-
def build(config)
|
11
|
+
|
12
|
+
def self.build(config)
|
15
13
|
begin
|
16
14
|
@categories = CategoriesFactory.build(config)
|
17
15
|
@test_classifier = TextClassifier.new(categories: @categories)
|
@@ -20,13 +18,4 @@ module NaiveText
|
|
20
18
|
The expectedt format is [{name: name_of_category, path: path_to_trainings_data}]"
|
21
19
|
end
|
22
20
|
end
|
23
|
-
|
24
|
-
def classify(text)
|
25
|
-
begin
|
26
|
-
@text = Text.new(text: text, classifier: @test_classifier)
|
27
|
-
@text.classify
|
28
|
-
rescue
|
29
|
-
puts "An Error occured. Did you call NaiveText.build before using classify"
|
30
|
-
end
|
31
|
-
end
|
32
21
|
end
|
data/lib/NaiveText/Category.rb
CHANGED
@@ -6,6 +6,10 @@ class PropabilityCalculator
|
|
6
6
|
|
7
7
|
def get_propabilities_for(words)
|
8
8
|
calculateProbabilities(words)
|
9
|
+
if(@propabilities.sum > 0)
|
10
|
+
normalize
|
11
|
+
end
|
12
|
+
@propabilities
|
9
13
|
end
|
10
14
|
|
11
15
|
private
|
@@ -20,11 +24,11 @@ class PropabilityCalculator
|
|
20
24
|
@propabilities.multiply(category: category, factor: category.p(word) )
|
21
25
|
end
|
22
26
|
end
|
27
|
+
end
|
23
28
|
|
29
|
+
def normalize
|
24
30
|
normalization_factor = 1.to_f / @propabilities.sum
|
25
31
|
@propabilities.multiply(factor: normalization_factor)
|
26
|
-
|
27
|
-
@propabilities
|
28
32
|
end
|
29
33
|
|
30
34
|
def p_apriori(category)
|
@@ -4,9 +4,18 @@ class TextClassifier
|
|
4
4
|
@calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
|
5
5
|
end
|
6
6
|
|
7
|
+
def classify(text)
|
8
|
+
words = text.split(/\W+/)
|
9
|
+
get_category_for(words)
|
10
|
+
end
|
11
|
+
|
7
12
|
def get_category_for(list_of_words)
|
8
13
|
propabilities = @calculator.get_propabilities_for(list_of_words)
|
9
|
-
propabilities.
|
14
|
+
if(propabilities.sum == 0)
|
15
|
+
NullCategory.new
|
16
|
+
else
|
17
|
+
propabilities.max
|
18
|
+
end
|
10
19
|
end
|
11
20
|
|
12
21
|
end
|
data/lib/NaiveText/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NaiveText
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RicciFlowing
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,7 +104,6 @@ files:
|
|
104
104
|
- lib/NaiveText/ExamplesGroup.rb
|
105
105
|
- lib/NaiveText/PropabilityCalculator.rb
|
106
106
|
- lib/NaiveText/PropabilityCollection.rb
|
107
|
-
- lib/NaiveText/Text.rb
|
108
107
|
- lib/NaiveText/TextClassifier.rb
|
109
108
|
- lib/NaiveText/version.rb
|
110
109
|
homepage: https://github.com/RicciFlowing/NaiveText
|
data/lib/NaiveText/Text.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
class Text
|
2
|
-
attr_reader :words
|
3
|
-
def initialize( args )
|
4
|
-
@text = args[:text] || ""
|
5
|
-
path = args[:path]
|
6
|
-
@classifier = args[:classifier] || TextClassifier.new
|
7
|
-
# If both path and text is given both will be concatenated
|
8
|
-
@text += loadText(path) if path
|
9
|
-
|
10
|
-
@words = @text.split(/\W+/)
|
11
|
-
end
|
12
|
-
|
13
|
-
def sample
|
14
|
-
@text.slice(0,50)
|
15
|
-
end
|
16
|
-
|
17
|
-
def classify
|
18
|
-
@classifier.get_category_for(words)
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
def loadText(path)
|
24
|
-
begin
|
25
|
-
loaded_text = File.read(path)
|
26
|
-
rescue
|
27
|
-
puts "You tried to load the file #{path} for classification. This file was not found.
|
28
|
-
Please make sure, that the path is correctly spelled and that you have reading-access to the path given"
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|