classifier 1.0 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ require 'rake/rdoctask'
5
5
  require 'rake/gempackagetask'
6
6
  require 'rake/contrib/rubyforgepublisher'
7
7
 
8
- PKG_VERSION = "1.0.0"
8
+ PKG_VERSION = "1.1"
9
9
 
10
10
  PKG_FILES = FileList[
11
11
  "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile"
@@ -38,7 +38,7 @@ spec = Gem::Specification.new do |s|
38
38
  #### Basic information.
39
39
 
40
40
  s.name = 'classifier'
41
- s.version = "1.0"
41
+ s.version = PKG_VERSION
42
42
  s.summary = <<-EOF
43
43
  A general classifier module to allow Bayesian and other types of classifications.
44
44
  EOF
@@ -74,4 +74,4 @@ end
74
74
  desc "Publish to RubyForge"
75
75
  task :rubyforge do
76
76
  Rake::RubyForgePublisher.new('classifier', 'cardmagic').upload
77
- end
77
+ end
@@ -12,7 +12,7 @@ class Bayes
12
12
  end
13
13
 
14
14
  def classify(text)
15
- (classifications(text).sort { |a, b| b[1] <=> a[1] })[0][0]
15
+ (classifications(text).sort_by { |a| -a[1] })[0][0]
16
16
  end
17
17
 
18
18
  def classifications(text)
@@ -29,9 +29,9 @@ class Bayes
29
29
  end
30
30
 
31
31
  def method_missing(name, *args)
32
- category = name.to_s.gsub(/train_([\w]+)/, '\1').capitalize.intern
32
+ category = name.to_s.gsub(/train_([\w]+)/, '\1').gsub("_"," ").capitalize.intern
33
33
  if @categories.has_key? category
34
- args.each {|text| add_words category, text}
34
+ args.each {|text| train category, text}
35
35
  elsif name.to_s =~ /train_([\w]+)/
36
36
  raise StandardError, "No such category: #{category}"
37
37
  else
@@ -39,9 +39,8 @@ class Bayes
39
39
  end
40
40
  end
41
41
 
42
- private
43
-
44
- def add_words(category, text)
42
+ def train(category, text)
43
+ category = category.to_s.gsub("_"," ").capitalize.intern
45
44
  text.word_hash.each do |word, count|
46
45
  @categories[category][word] ||= 0
47
46
  @categories[category][word] += count
@@ -50,4 +49,4 @@ class Bayes
50
49
  end
51
50
  end
52
51
 
53
- end
52
+ end
@@ -1,3 +1,11 @@
1
+ # = Author
2
+ #
3
+ # Greg Fast, gdf@speakeasy.net
4
+ #
5
+ # = Copyright
6
+ #
7
+ # Copyright 2005 Greg Fast <gdf@speakeasy.net>
8
+
1
9
  module Classifier
2
10
 
3
11
  module Stemmable
@@ -185,4 +193,4 @@ module Stemmable
185
193
 
186
194
  end
187
195
 
188
- end
196
+ end
@@ -12,13 +12,15 @@ module WordHash
12
12
  def word_hash
13
13
  d = Hash.new
14
14
  corpus = without_punctuation
15
- (corpus.split + gsub(/[\w]/,"").split).each do |word|
15
+ (corpus.split + gsub(/[\w+]/,"").split).each do |word|
16
16
  key = word.downcase.stem.intern
17
- d[key] ||= 0
18
- d[key] += 1
17
+ if !(word =~ /[\w+]/) || word.length > 2
18
+ d[key] ||= 0
19
+ d[key] += 1
20
+ end
19
21
  end
20
22
  return d
21
23
  end
22
24
  end
23
25
 
24
- end
26
+ end
@@ -1,8 +1,8 @@
1
1
  require File.dirname(__FILE__) + '/../test_helper'
2
2
  class StringExtensionsTest < Test::Unit::TestCase
3
3
  def test_word_hash
4
- hash = {:some=>1, :good=>1, :hope=>1, :word=>1, :you=>1, :here=>1, :love=>1, :i=>1, :ar=>1, :them=>1, :"."=>1, :"!"=>1}
4
+ hash = {:some=>1, :good=>1, :hope=>1, :word=>1, :you=>1, :here=>1, :love=>1, :ar=>1, :them=>1, :"."=>1, :"!"=>1}
5
5
 
6
6
  assert_equal hash, "here are some good words. I hope you love them!".word_hash
7
7
  end
8
- end
8
+ end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
3
3
  specification_version: 1
4
4
  name: classifier
5
5
  version: !ruby/object:Gem::Version
6
- version: "1.0"
7
- date: 2005-04-10
6
+ version: "1.1"
7
+ date: 2005-04-11
8
8
  summary: A general classifier module to allow Bayesian and other types of classifications.
9
9
  require_paths:
10
10
  - lib
@@ -36,9 +36,10 @@ files:
36
36
  - lib/classifier/string_extensions/word_hash.rb
37
37
  - bin/bayes.rb
38
38
  - test/bayes
39
+ - test/string_extensions
39
40
  - test/test_helper.rb
40
41
  - test/bayes/bayesian_test.rb
41
- - test/bayes/string_extensions_test.rb
42
+ - test/string_extensions/word_hash_test.rb
42
43
  - Rakefile
43
44
  - README
44
45
  test_files: []