NaiveText 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a4ff5607deb99718f721a0e6b62636feb91f007
4
- data.tar.gz: 6a0dd76a73cc56784bd333b6766ae2375ff6154d
3
+ metadata.gz: 08919bb72416392eb059f571a589335e5aae0dc2
4
+ data.tar.gz: cabef8b8d33a2ac2dd811494d02aa0a527d22a87
5
5
  SHA512:
6
- metadata.gz: ad7d2e7dc253ebef99f9ee9fd2dcf87b63f41cd0fc7800102cc071b780a22ace3f554a939fcc90ee65acdeeeea4def99bfe86d3813ff9e59aa2d12f2067a2510
7
- data.tar.gz: 4e54cb6f0bd1a0d091bd2b19e0f5afd10cdc85ea37c7cef74d2629d9613fe3a71e540da9fe41f7ed1329b7133933be168e913526c59cafa2563bf569c60b84da
6
+ metadata.gz: 6872002c6eae15ad91a59ea8ff952dda08a9d4152f99d245fbaa4b0080a847d54d28081120d055ddf30b3a4e956636bc7f81bfc8444aa7559166389fa5dc75a7
7
+ data.tar.gz: 9f693eff197c98c0a5008207eb2990a7b4209d660cf8e41f947fbb7972a3e1d95875f34903fee8f608bb685910846634994b08664cb6406bf7eedb129a0b5da4
data/README.md CHANGED
@@ -41,18 +41,18 @@ Now build the systems with your categories and training texts:
41
41
  ```ruby
42
42
  categories_config = [{name: 'interesting', path: 'spec/training/positive'},
43
43
  {name: 'boring', path: 'spec/training/negative'}]
44
- NaiveText.build(categories_config)
44
+ classifier = NaiveText.build(categories_config)
45
45
  ```
46
46
  Now you can start classifying texts:
47
47
 
48
48
  ```ruby
49
- NaiveText.classify('Seems to be interesting')
50
- NaiveText.classify('Seems to be boring')
49
+ classifier.classify('Seems to be interesting')
50
+ classifier.classify('Seems to be boring')
51
51
  ```
52
52
  Classify will return a category-object on which you can call name to get the name of the category as a string.
53
53
 
54
54
  ```ruby
55
- category = NaiveText.classify('Something interesting')
55
+ category = classifier.classify('Something interesting')
56
56
  category.name
57
57
  => 'interesting'
58
58
  ```
data/lib/NaiveText.rb CHANGED
@@ -3,15 +3,13 @@ require "NaiveText/ExamplesGroup"
3
3
  require "NaiveText/PropabilityCollection"
4
4
  require "NaiveText/PropabilityCalculator"
5
5
  require "NaiveText/TextClassifier"
6
- require "NaiveText/Text"
7
6
  require "NaiveText/Category"
8
7
  require "NaiveText/Categories"
9
8
  require "NaiveText/CategoriesFactory"
10
9
 
11
10
  module NaiveText
12
- extend self
13
-
14
- def build(config)
11
+
12
+ def self.build(config)
15
13
  begin
16
14
  @categories = CategoriesFactory.build(config)
17
15
  @test_classifier = TextClassifier.new(categories: @categories)
@@ -20,13 +18,4 @@ module NaiveText
20
18
  The expectedt format is [{name: name_of_category, path: path_to_trainings_data}]"
21
19
  end
22
20
  end
23
-
24
- def classify(text)
25
- begin
26
- @text = Text.new(text: text, classifier: @test_classifier)
27
- @text.classify
28
- rescue
29
- puts "An Error occured. Did you call NaiveText.build before using classify"
30
- end
31
- end
32
21
  end
@@ -19,3 +19,13 @@ class Category
19
19
  @examples.word_count
20
20
  end
21
21
  end
22
+
23
+ class NullCategory
24
+ attr_reader :name
25
+ attr_reader :id
26
+
27
+ def initialize
28
+ @name = "No Category"
29
+ @id = 0
30
+ end
31
+ end
@@ -6,6 +6,10 @@ class PropabilityCalculator
6
6
 
7
7
  def get_propabilities_for(words)
8
8
  calculateProbabilities(words)
9
+ if(@propabilities.sum > 0)
10
+ normalize
11
+ end
12
+ @propabilities
9
13
  end
10
14
 
11
15
  private
@@ -20,11 +24,11 @@ class PropabilityCalculator
20
24
  @propabilities.multiply(category: category, factor: category.p(word) )
21
25
  end
22
26
  end
27
+ end
23
28
 
29
+ def normalize
24
30
  normalization_factor = 1.to_f / @propabilities.sum
25
31
  @propabilities.multiply(factor: normalization_factor)
26
-
27
- @propabilities
28
32
  end
29
33
 
30
34
  def p_apriori(category)
@@ -4,9 +4,18 @@ class TextClassifier
4
4
  @calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
5
5
  end
6
6
 
7
+ def classify(text)
8
+ words = text.split(/\W+/)
9
+ get_category_for(words)
10
+ end
11
+
7
12
  def get_category_for(list_of_words)
8
13
  propabilities = @calculator.get_propabilities_for(list_of_words)
9
- propabilities.max
14
+ if(propabilities.sum == 0)
15
+ NullCategory.new
16
+ else
17
+ propabilities.max
18
+ end
10
19
  end
11
20
 
12
21
  end
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-10-11 00:00:00.000000000 Z
11
+ date: 2015-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,7 +104,6 @@ files:
104
104
  - lib/NaiveText/ExamplesGroup.rb
105
105
  - lib/NaiveText/PropabilityCalculator.rb
106
106
  - lib/NaiveText/PropabilityCollection.rb
107
- - lib/NaiveText/Text.rb
108
107
  - lib/NaiveText/TextClassifier.rb
109
108
  - lib/NaiveText/version.rb
110
109
  homepage: https://github.com/RicciFlowing/NaiveText
@@ -1,32 +0,0 @@
1
- class Text
2
- attr_reader :words
3
- def initialize( args )
4
- @text = args[:text] || ""
5
- path = args[:path]
6
- @classifier = args[:classifier] || TextClassifier.new
7
- # If both path and text is given both will be concatenated
8
- @text += loadText(path) if path
9
-
10
- @words = @text.split(/\W+/)
11
- end
12
-
13
- def sample
14
- @text.slice(0,50)
15
- end
16
-
17
- def classify
18
- @classifier.get_category_for(words)
19
- end
20
-
21
- private
22
-
23
- def loadText(path)
24
- begin
25
- loaded_text = File.read(path)
26
- rescue
27
- puts "You tried to load the file #{path} for classification. This file was not found.
28
- Please make sure, that the path is correctly spelled and that you have reading-access to the path given"
29
- end
30
- end
31
-
32
- end