NaiveText 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/NaiveText.rb +2 -13
- data/lib/NaiveText/Category.rb +10 -0
- data/lib/NaiveText/PropabilityCalculator.rb +6 -2
- data/lib/NaiveText/TextClassifier.rb +10 -1
- data/lib/NaiveText/version.rb +1 -1
- metadata +2 -3
- data/lib/NaiveText/Text.rb +0 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08919bb72416392eb059f571a589335e5aae0dc2
|
4
|
+
data.tar.gz: cabef8b8d33a2ac2dd811494d02aa0a527d22a87
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6872002c6eae15ad91a59ea8ff952dda08a9d4152f99d245fbaa4b0080a847d54d28081120d055ddf30b3a4e956636bc7f81bfc8444aa7559166389fa5dc75a7
|
7
|
+
data.tar.gz: 9f693eff197c98c0a5008207eb2990a7b4209d660cf8e41f947fbb7972a3e1d95875f34903fee8f608bb685910846634994b08664cb6406bf7eedb129a0b5da4
|
data/README.md
CHANGED
@@ -41,18 +41,18 @@ Now build the systems with your categories and training texts:
|
|
41
41
|
```ruby
|
42
42
|
categories_config = [{name: 'interesting', path: 'spec/training/positive'},
|
43
43
|
{name: 'boring', path: 'spec/training/negative'}]
|
44
|
-
NaiveText.build(categories_config)
|
44
|
+
classifier = NaiveText.build(categories_config)
|
45
45
|
```
|
46
46
|
Now you can start classifying texts:
|
47
47
|
|
48
48
|
```ruby
|
49
|
-
|
50
|
-
|
49
|
+
classifier.classify('Seems to be interesting')
|
50
|
+
classifier.classify('Seems to be boring')
|
51
51
|
```
|
52
52
|
Classify will return a category-object on which you can call name to get the name of the category as a string.
|
53
53
|
|
54
54
|
```ruby
|
55
|
-
category =
|
55
|
+
category = classifier.classify('Something interesting')
|
56
56
|
category.name
|
57
57
|
=> 'interesting'
|
58
58
|
```
|
data/lib/NaiveText.rb
CHANGED
@@ -3,15 +3,13 @@ require "NaiveText/ExamplesGroup"
|
|
3
3
|
require "NaiveText/PropabilityCollection"
|
4
4
|
require "NaiveText/PropabilityCalculator"
|
5
5
|
require "NaiveText/TextClassifier"
|
6
|
-
require "NaiveText/Text"
|
7
6
|
require "NaiveText/Category"
|
8
7
|
require "NaiveText/Categories"
|
9
8
|
require "NaiveText/CategoriesFactory"
|
10
9
|
|
11
10
|
module NaiveText
|
12
|
-
|
13
|
-
|
14
|
-
def build(config)
|
11
|
+
|
12
|
+
def self.build(config)
|
15
13
|
begin
|
16
14
|
@categories = CategoriesFactory.build(config)
|
17
15
|
@test_classifier = TextClassifier.new(categories: @categories)
|
@@ -20,13 +18,4 @@ module NaiveText
|
|
20
18
|
The expectedt format is [{name: name_of_category, path: path_to_trainings_data}]"
|
21
19
|
end
|
22
20
|
end
|
23
|
-
|
24
|
-
def classify(text)
|
25
|
-
begin
|
26
|
-
@text = Text.new(text: text, classifier: @test_classifier)
|
27
|
-
@text.classify
|
28
|
-
rescue
|
29
|
-
puts "An Error occured. Did you call NaiveText.build before using classify"
|
30
|
-
end
|
31
|
-
end
|
32
21
|
end
|
data/lib/NaiveText/Category.rb
CHANGED
@@ -6,6 +6,10 @@ class PropabilityCalculator
|
|
6
6
|
|
7
7
|
def get_propabilities_for(words)
|
8
8
|
calculateProbabilities(words)
|
9
|
+
if(@propabilities.sum > 0)
|
10
|
+
normalize
|
11
|
+
end
|
12
|
+
@propabilities
|
9
13
|
end
|
10
14
|
|
11
15
|
private
|
@@ -20,11 +24,11 @@ class PropabilityCalculator
|
|
20
24
|
@propabilities.multiply(category: category, factor: category.p(word) )
|
21
25
|
end
|
22
26
|
end
|
27
|
+
end
|
23
28
|
|
29
|
+
def normalize
|
24
30
|
normalization_factor = 1.to_f / @propabilities.sum
|
25
31
|
@propabilities.multiply(factor: normalization_factor)
|
26
|
-
|
27
|
-
@propabilities
|
28
32
|
end
|
29
33
|
|
30
34
|
def p_apriori(category)
|
@@ -4,9 +4,18 @@ class TextClassifier
|
|
4
4
|
@calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
|
5
5
|
end
|
6
6
|
|
7
|
+
def classify(text)
|
8
|
+
words = text.split(/\W+/)
|
9
|
+
get_category_for(words)
|
10
|
+
end
|
11
|
+
|
7
12
|
def get_category_for(list_of_words)
|
8
13
|
propabilities = @calculator.get_propabilities_for(list_of_words)
|
9
|
-
propabilities.
|
14
|
+
if(propabilities.sum == 0)
|
15
|
+
NullCategory.new
|
16
|
+
else
|
17
|
+
propabilities.max
|
18
|
+
end
|
10
19
|
end
|
11
20
|
|
12
21
|
end
|
data/lib/NaiveText/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: NaiveText
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- RicciFlowing
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -104,7 +104,6 @@ files:
|
|
104
104
|
- lib/NaiveText/ExamplesGroup.rb
|
105
105
|
- lib/NaiveText/PropabilityCalculator.rb
|
106
106
|
- lib/NaiveText/PropabilityCollection.rb
|
107
|
-
- lib/NaiveText/Text.rb
|
108
107
|
- lib/NaiveText/TextClassifier.rb
|
109
108
|
- lib/NaiveText/version.rb
|
110
109
|
homepage: https://github.com/RicciFlowing/NaiveText
|
data/lib/NaiveText/Text.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
class Text
|
2
|
-
attr_reader :words
|
3
|
-
def initialize( args )
|
4
|
-
@text = args[:text] || ""
|
5
|
-
path = args[:path]
|
6
|
-
@classifier = args[:classifier] || TextClassifier.new
|
7
|
-
# If both path and text is given both will be concatenated
|
8
|
-
@text += loadText(path) if path
|
9
|
-
|
10
|
-
@words = @text.split(/\W+/)
|
11
|
-
end
|
12
|
-
|
13
|
-
def sample
|
14
|
-
@text.slice(0,50)
|
15
|
-
end
|
16
|
-
|
17
|
-
def classify
|
18
|
-
@classifier.get_category_for(words)
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
def loadText(path)
|
24
|
-
begin
|
25
|
-
loaded_text = File.read(path)
|
26
|
-
rescue
|
27
|
-
puts "You tried to load the file #{path} for classification. This file was not found.
|
28
|
-
Please make sure, that the path is correctly spelled and that you have reading-access to the path given"
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|