classifier-reborn 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95e6a82c745772e5180c3765d2ae24995f02e2db
4
- data.tar.gz: e1c6fb9a5f8a4651dac1d3dc5fb327cb8e586e44
3
+ metadata.gz: 1219c6a6bbe3f8c2a0820d953da2ef4c85bb5e52
4
+ data.tar.gz: 66575a30d03763da10f35ee669942bc111838e6e
5
5
  SHA512:
6
- metadata.gz: 8d276020491d285ef656d85de17013c3fbcbb3161116ed139798602107dcd2229b85da121d0e0e3d00d6a5be095ae110a24e864d99f8efc2ab62fb4aa26b6b07
7
- data.tar.gz: 735ebae19d3d920efad2e58dadb8580ae04d512528f319a63d1a746f539d5166405d2a8de5efc487f6c4832700652fbc222d71d672b31826ddaeaefd2687d365
6
+ metadata.gz: 7d8012f760b929a521a03aa43914def24bd2ddcf688c6816c3699d549d584cccf91e5b8499f1c988b8eceb8d4cf767ccad2585cb2cacabcd9092ce040d64b02e
7
+ data.tar.gz: 6e6d7e9a50f9ab05d8548b7032c208729c852fa32452a2c411f7ebd50b9ccf53750f0155907a4cfefd697ec12a0dd24fc25e8fd6d9131e7318fbf04350f6b93e
@@ -29,7 +29,7 @@ This should install automatically with RubyGems.
29
29
  If you would like to speed up LSI classification by at least 10x, please install the following libraries:
30
30
 
31
31
  * [GNU GSL](http://www.gnu.org/software/gsl)
32
- * [rb-gsl](http://rb-gsl.rubyforge.org)
32
+ * [rb-gsl](https://rubygems.org/gems/rb-gsl)
33
33
 
34
34
  Notice that LSI will work without these libraries, but as soon as they are installed, Classifier will make use of them. No configuration changes are needed, we like to keep things ridiculously easy for you.
35
35
 
@@ -40,7 +40,7 @@ A Bayesian classifier by Lucas Carlson. Bayesian Classifiers are accurate, fast,
40
40
  ### Usage
41
41
 
42
42
  ```ruby
43
- require 'classifier'
43
+ require 'classifier-reborn'
44
44
  b = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting'
45
45
  b.train_interesting "here are some good words. I hope you love them"
46
46
  b.train_uninteresting "here are some bad words, I hate you"
@@ -74,7 +74,7 @@ theoretically simulates human learning.
74
74
  ### Usage
75
75
 
76
76
  ```ruby
77
- require 'classifier'
77
+ require 'classifier-reborn'
78
78
  lsi = ClassifierReborn::LSI.new
79
79
  strings = [ ["This text deals with dogs. Dogs.", :dog],
80
80
  ["This text involves dogs too. Dogs! ", :dog],
@@ -25,6 +25,6 @@
25
25
  # License:: LGPL
26
26
 
27
27
  require 'rubygems'
28
- require_relative 'classifier-reborn/extensions/string'
28
+ require_relative 'classifier-reborn/category_namer'
29
29
  require_relative 'classifier-reborn/bayes'
30
30
  require_relative 'classifier-reborn/lsi'
@@ -2,7 +2,7 @@
2
2
  # Copyright:: Copyright (c) 2005 Lucas Carlson
3
3
  # License:: LGPL
4
4
 
5
- require_relative 'extensions/string'
5
+ require_relative 'category_namer'
6
6
 
7
7
  module ClassifierReborn
8
8
  class Bayes
@@ -11,9 +11,10 @@ module ClassifierReborn
11
11
  # b = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting', 'Spam'
12
12
  def initialize(*categories)
13
13
  @categories = Hash.new
14
- categories.each { |category| @categories[category.prepare_category_name] = Hash.new }
14
+ categories.each { |category| @categories[CategoryNamer.prepare_name(category)] = Hash.new }
15
15
  @total_words = 0
16
16
  @category_counts = Hash.new(0)
17
+ @category_word_count = Hash.new
17
18
  end
18
19
 
19
20
  # Provides a general training method for all categories specified in Bayes#new
@@ -23,11 +24,13 @@ module ClassifierReborn
23
24
  # b.train "that", "That text"
24
25
  # b.train "The other", "The other text"
25
26
  def train(category, text)
26
- category = category.prepare_category_name
27
- @category_counts[category] += 1
27
+ category = CategoryNamer.prepare_name(category)
28
+ @category_word_count[category] ||= 0
29
+ @category_counts[category] += 1
28
30
  Hasher.word_hash(text).each do |word, count|
29
31
  @categories[category][word] ||= 0
30
32
  @categories[category][word] += count
33
+ @category_word_count[category] += count
31
34
  @total_words += count
32
35
  end
33
36
  end
@@ -40,17 +43,22 @@ module ClassifierReborn
40
43
  # b.train :this, "This text"
41
44
  # b.untrain :this, "This text"
42
45
  def untrain(category, text)
43
- category = category.prepare_category_name
46
+ category = CategoryNamer.prepare_name(category)
47
+ @category_word_count[category] ||= 0
44
48
  @category_counts[category] -= 1
45
49
  Hasher.word_hash(text).each do |word, count|
46
50
  if @total_words >= 0
47
51
  orig = @categories[category][word] || 0
48
- @categories[category][word] ||= 0
49
- @categories[category][word] -= count
52
+ @categories[category][word] ||= 0
53
+ @categories[category][word] -= count
50
54
  if @categories[category][word] <= 0
51
55
  @categories[category].delete(word)
52
56
  count = orig
53
57
  end
58
+
59
+ if @category_word_count[category] >= count
60
+ @category_word_count[category] -= count
61
+ end
54
62
  @total_words -= count
55
63
  end
56
64
  end
@@ -62,13 +70,14 @@ module ClassifierReborn
62
70
  # The largest of these scores (the one closest to 0) is the one picked out by #classify
63
71
  def classifications(text)
64
72
  score = Hash.new
65
- training_count = @category_counts.values.inject { |x,y| x+y }.to_f
73
+ word_hash = Hasher.word_hash(text)
74
+ training_count = @category_counts.values.reduce(:+).to_f
66
75
  @categories.each do |category, category_words|
67
76
  score[category.to_s] = 0
68
- total = category_words.values.inject(0) {|sum, element| sum+element}
69
- Hasher.word_hash(text).each do |word, count|
77
+ total = (@category_word_count[category] || 1).to_f
78
+ word_hash.each do |word, count|
70
79
  s = category_words.has_key?(word) ? category_words[word] : 0.1
71
- score[category.to_s] += Math.log(s/total.to_f)
80
+ score[category.to_s] += Math.log(s/total)
72
81
  end
73
82
  # now add prior probability for the category
74
83
  s = @category_counts.has_key?(category) ? @category_counts[category] : 0.1
@@ -93,7 +102,8 @@ module ClassifierReborn
93
102
  # b.untrain_that "That text"
94
103
  # b.train_the_other "The other text"
95
104
  def method_missing(name, *args)
96
- category = name.to_s.gsub(/(un)?train_([\w]+)/, '\2').prepare_category_name
105
+ cleaned_name = name.to_s.gsub(/(un)?train_([\w]+)/, '\2')
106
+ category = CategoryNamer.prepare_name(cleaned_name)
97
107
  if @categories.has_key? category
98
108
  args.each { |text| eval("#{$1}train(category, text)") }
99
109
  elsif name.to_s =~ /(un)?train_([\w]+)/
@@ -120,7 +130,7 @@ module ClassifierReborn
120
130
  # more criteria than the trained selective categories. In short,
121
131
  # try to initialize your categories at initialization.
122
132
  def add_category(category)
123
- @categories[category.prepare_category_name] = Hash.new
133
+ @categories[CategoryNamer.prepare_name(category)] = Hash.new
124
134
  end
125
135
 
126
136
  alias append_category add_category
@@ -5,6 +5,11 @@
5
5
  require 'fast_stemmer'
6
6
  require 'classifier-reborn/extensions/hasher'
7
7
 
8
- class Object
9
- def prepare_category_name; to_s.gsub("_"," ").capitalize.intern end
8
+ module ClassifierReborn
9
+ module CategoryNamer
10
+ extend self
11
+ def prepare_name(name)
12
+ name.to_s.gsub("_"," ").capitalize.intern
13
+ end
14
+ end
10
15
  end
@@ -5,18 +5,6 @@
5
5
 
6
6
  require 'matrix'
7
7
 
8
- class Array
9
- def sum(identity = 0, &block)
10
- return identity unless size > 0
11
-
12
- if block_given?
13
- map(&block).sum
14
- else
15
- reduce(:+) || 0
16
- end
17
- end
18
- end
19
-
20
8
  class Vector
21
9
  def magnitude
22
10
  sumsqs = 0.0
@@ -44,7 +44,7 @@ module ClassifierReborn
44
44
  end
45
45
 
46
46
  # Perform the scaling transform and force floating point arithmetic
47
- total_words = vec.sum.to_f
47
+ total_words = vec.reduce(0, :+).to_f
48
48
  total_unique_words = 0
49
49
 
50
50
  if $GSL
@@ -1,3 +1,3 @@
1
1
  module ClassifierReborn
2
- VERSION = '2.0.1'
2
+ VERSION = '2.0.2'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier-reborn
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-08-14 00:00:00.000000000 Z
12
+ date: 2014-11-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fast-stemmer
@@ -71,8 +71,8 @@ files:
71
71
  - bin/summarize.rb
72
72
  - lib/classifier-reborn.rb
73
73
  - lib/classifier-reborn/bayes.rb
74
+ - lib/classifier-reborn/category_namer.rb
74
75
  - lib/classifier-reborn/extensions/hasher.rb
75
- - lib/classifier-reborn/extensions/string.rb
76
76
  - lib/classifier-reborn/extensions/vector.rb
77
77
  - lib/classifier-reborn/extensions/vector_serialize.rb
78
78
  - lib/classifier-reborn/lsi.rb