NaiveText 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a4ff5607deb99718f721a0e6b62636feb91f007
4
- data.tar.gz: 6a0dd76a73cc56784bd333b6766ae2375ff6154d
3
+ metadata.gz: 08919bb72416392eb059f571a589335e5aae0dc2
4
+ data.tar.gz: cabef8b8d33a2ac2dd811494d02aa0a527d22a87
5
5
  SHA512:
6
- metadata.gz: ad7d2e7dc253ebef99f9ee9fd2dcf87b63f41cd0fc7800102cc071b780a22ace3f554a939fcc90ee65acdeeeea4def99bfe86d3813ff9e59aa2d12f2067a2510
7
- data.tar.gz: 4e54cb6f0bd1a0d091bd2b19e0f5afd10cdc85ea37c7cef74d2629d9613fe3a71e540da9fe41f7ed1329b7133933be168e913526c59cafa2563bf569c60b84da
6
+ metadata.gz: 6872002c6eae15ad91a59ea8ff952dda08a9d4152f99d245fbaa4b0080a847d54d28081120d055ddf30b3a4e956636bc7f81bfc8444aa7559166389fa5dc75a7
7
+ data.tar.gz: 9f693eff197c98c0a5008207eb2990a7b4209d660cf8e41f947fbb7972a3e1d95875f34903fee8f608bb685910846634994b08664cb6406bf7eedb129a0b5da4
data/README.md CHANGED
@@ -41,18 +41,18 @@ Now build the systems with your categories and training texts:
41
41
  ```ruby
42
42
  categories_config = [{name: 'interesting', path: 'spec/training/positive'},
43
43
  {name: 'boring', path: 'spec/training/negative'}]
44
- NaiveText.build(categories_config)
44
+ classifier = NaiveText.build(categories_config)
45
45
  ```
46
46
  Now you can start classifying texts:
47
47
 
48
48
  ```ruby
49
- NaiveText.classify('Seems to be interesting')
50
- NaiveText.classify('Seems to be boring')
49
+ classifier.classify('Seems to be interesting')
50
+ classifier.classify('Seems to be boring')
51
51
  ```
52
52
  Classify will return a category-object on which you can call name to get the name of the category as a string.
53
53
 
54
54
  ```ruby
55
- category = NaiveText.classify('Something interesting')
55
+ category = classifier.classify('Something interesting')
56
56
  category.name
57
57
  => 'interesting'
58
58
  ```
data/lib/NaiveText.rb CHANGED
@@ -3,15 +3,13 @@ require "NaiveText/ExamplesGroup"
3
3
  require "NaiveText/PropabilityCollection"
4
4
  require "NaiveText/PropabilityCalculator"
5
5
  require "NaiveText/TextClassifier"
6
- require "NaiveText/Text"
7
6
  require "NaiveText/Category"
8
7
  require "NaiveText/Categories"
9
8
  require "NaiveText/CategoriesFactory"
10
9
 
11
10
  module NaiveText
12
- extend self
13
-
14
- def build(config)
11
+
12
+ def self.build(config)
15
13
  begin
16
14
  @categories = CategoriesFactory.build(config)
17
15
  @test_classifier = TextClassifier.new(categories: @categories)
@@ -20,13 +18,4 @@ module NaiveText
20
18
  The expectedt format is [{name: name_of_category, path: path_to_trainings_data}]"
21
19
  end
22
20
  end
23
-
24
- def classify(text)
25
- begin
26
- @text = Text.new(text: text, classifier: @test_classifier)
27
- @text.classify
28
- rescue
29
- puts "An Error occured. Did you call NaiveText.build before using classify"
30
- end
31
- end
32
21
  end
@@ -19,3 +19,13 @@ class Category
19
19
  @examples.word_count
20
20
  end
21
21
  end
22
+
23
+ class NullCategory
24
+ attr_reader :name
25
+ attr_reader :id
26
+
27
+ def initialize
28
+ @name = "No Category"
29
+ @id = 0
30
+ end
31
+ end
@@ -6,6 +6,10 @@ class PropabilityCalculator
6
6
 
7
7
  def get_propabilities_for(words)
8
8
  calculateProbabilities(words)
9
+ if(@propabilities.sum > 0)
10
+ normalize
11
+ end
12
+ @propabilities
9
13
  end
10
14
 
11
15
  private
@@ -20,11 +24,11 @@ class PropabilityCalculator
20
24
  @propabilities.multiply(category: category, factor: category.p(word) )
21
25
  end
22
26
  end
27
+ end
23
28
 
29
+ def normalize
24
30
  normalization_factor = 1.to_f / @propabilities.sum
25
31
  @propabilities.multiply(factor: normalization_factor)
26
-
27
- @propabilities
28
32
  end
29
33
 
30
34
  def p_apriori(category)
@@ -4,9 +4,18 @@ class TextClassifier
4
4
  @calculator = args[:calculator] || PropabilityCalculator.new(categories: @categories)
5
5
  end
6
6
 
7
+ def classify(text)
8
+ words = text.split(/\W+/)
9
+ get_category_for(words)
10
+ end
11
+
7
12
  def get_category_for(list_of_words)
8
13
  propabilities = @calculator.get_propabilities_for(list_of_words)
9
- propabilities.max
14
+ if(propabilities.sum == 0)
15
+ NullCategory.new
16
+ else
17
+ propabilities.max
18
+ end
10
19
  end
11
20
 
12
21
  end
@@ -1,3 +1,3 @@
1
1
  module NaiveText
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: NaiveText
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - RicciFlowing
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-10-11 00:00:00.000000000 Z
11
+ date: 2015-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -104,7 +104,6 @@ files:
104
104
  - lib/NaiveText/ExamplesGroup.rb
105
105
  - lib/NaiveText/PropabilityCalculator.rb
106
106
  - lib/NaiveText/PropabilityCollection.rb
107
- - lib/NaiveText/Text.rb
108
107
  - lib/NaiveText/TextClassifier.rb
109
108
  - lib/NaiveText/version.rb
110
109
  homepage: https://github.com/RicciFlowing/NaiveText
@@ -1,32 +0,0 @@
1
- class Text
2
- attr_reader :words
3
- def initialize( args )
4
- @text = args[:text] || ""
5
- path = args[:path]
6
- @classifier = args[:classifier] || TextClassifier.new
7
- # If both path and text is given both will be concatenated
8
- @text += loadText(path) if path
9
-
10
- @words = @text.split(/\W+/)
11
- end
12
-
13
- def sample
14
- @text.slice(0,50)
15
- end
16
-
17
- def classify
18
- @classifier.get_category_for(words)
19
- end
20
-
21
- private
22
-
23
- def loadText(path)
24
- begin
25
- loaded_text = File.read(path)
26
- rescue
27
- puts "You tried to load the file #{path} for classification. This file was not found.
28
- Please make sure, that the path is correctly spelled and that you have reading-access to the path given"
29
- end
30
- end
31
-
32
- end