simple_classifier 1.0.0 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+
6
+ PKG_VERSION = "1.3.1"
7
+
8
+ PKG_FILES = FileList[
9
+ "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile", "html/**/*"
10
+ ]
11
+
12
+ desc "Default Task"
13
+ task :default => [ :test ]
14
+
15
+ # Run the unit tests
16
+ desc "Run all unit tests"
17
+ Rake::TestTask.new("test") { |t|
18
+ t.libs << "lib"
19
+ t.pattern = 'test/*/*_test.rb'
20
+ t.verbose = true
21
+ }
22
+
23
+ # Make a console, useful when working on tests
24
+ desc "Generate a test console"
25
+ task :console do
26
+ verbose( false ) { sh "irb -I lib/ -r 'classifier'" }
27
+ end
28
+
29
+ # Genereate the RDoc documentation
30
+ desc "Create documentation"
31
+ Rake::RDocTask.new("doc") { |rdoc|
32
+ rdoc.title = "simple_classifier - A simple Bayesian classifier"
33
+ rdoc.rdoc_dir = 'html'
34
+ rdoc.rdoc_files.include('README')
35
+ rdoc.rdoc_files.include('lib/**/*.rb')
36
+ }
37
+
38
+ desc "Generate gemspec with jeweler"
39
+ begin
40
+ require 'jeweler'
41
+ Jeweler::Tasks.new do |gemspec|
42
+ gemspec.name = "simple_classifier"
43
+ gemspec.summary = "A simple bayesian classifier"
44
+ gemspec.email = "ben.orenstein@gmail.com"
45
+ gemspec.homepage = "http://github.com/r00k/simple_classifier"
46
+ gemspec.authors = ["Ben Orenstein", "Lucas Carlson", "David Fayram II"]
47
+ gemspec.version = '1.3.4'
48
+ end
49
+ Jeweler::GemcutterTasks.new
50
+ rescue LoadError
51
+ puts "Jeweler not available. Install it with: gem install jeweler"
52
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 2.0.0
data/bin/bayes.rb CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  begin
4
4
  require 'rubygems'
5
- require 'classifier'
5
+ require 'simple_classifier'
6
6
  rescue
7
- require 'classifier'
7
+ require 'simple_classifier'
8
8
  end
9
9
 
10
10
  require 'madeleine'
data/bin/summarize.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  begin
4
- require 'rubygems'
5
- require 'classifier'
4
+ require 'rubygems'
5
+ require 'simple_classifier'
6
6
  rescue
7
- require 'classifier'
7
+ require 'simple_classifier'
8
8
  end
9
9
 
10
10
  require 'open-uri'
data/install.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'rbconfig'
2
+ require 'find'
3
+ require 'ftools'
4
+
5
+ include Config
6
+
7
+ # this was adapted from rdoc's install.rb by ways of Log4r
8
+
9
+ $sitedir = CONFIG["sitelibdir"]
10
+ unless $sitedir
11
+ version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
12
+ $libdir = File.join(CONFIG["libdir"], "ruby", version)
13
+ $sitedir = $:.find {|x| x =~ /site_ruby/ }
14
+ if !$sitedir
15
+ $sitedir = File.join($libdir, "site_ruby")
16
+ elsif $sitedir !~ Regexp.quote(version)
17
+ $sitedir = File.join($sitedir, version)
18
+ end
19
+ end
20
+
21
+ makedirs = %w{ simple_classifier }
22
+ makedirs = %w{ simple_classifier/extensions }
23
+ makedirs.each {|f| File::makedirs(File.join($sitedir, *f.split(/\//)))}
24
+
25
+ Dir.chdir("lib")
26
+ begin
27
+ require 'rubygems'
28
+ require 'rake'
29
+ rescue LoadError
30
+ puts
31
+ puts "Please install Gem and Rake from http://rubyforge.org/projects/rubygems and http://rubyforge.org/projects/rake"
32
+ puts
33
+ exit(-1)
34
+ end
35
+
36
+ files = FileList["**/*"]
37
+
38
+ # File::safe_unlink *deprecated.collect{|f| File.join($sitedir, f.split(/\//))}
39
+ files.each {|f|
40
+ File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
41
+ }
42
+
43
+ begin
44
+ require 'stemmer'
45
+ rescue LoadError
46
+ puts
47
+ puts "Please install Stemmer from http://rubyforge.org/projects/stemmer or via 'gem install stemmer'"
48
+ puts
49
+ end
@@ -25,5 +25,5 @@
25
25
  # License:: LGPL
26
26
 
27
27
  require 'rubygems'
28
- require 'classifier/extensions/string'
29
- require 'classifier/bayes'
28
+ require 'simple_classifier/extensions/string'
29
+ require 'simple_classifier/bayes'
@@ -2,7 +2,7 @@
2
2
  # Copyright:: Copyright (c) 2005 Lucas Carlson
3
3
  # License:: LGPL
4
4
 
5
- module Classifier
5
+ module DfhccClassifier
6
6
 
7
7
  class Bayes
8
8
  # The class can be created with one or more categories, each of which will be
@@ -0,0 +1,16 @@
1
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
2
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
3
+ # License:: LGPL
4
+
5
+ begin
6
+ require 'stemmer'
7
+ rescue LoadError
8
+ puts "Please install stemmer from http://rubyforge.org/projects/stemmer or 'gem install stemmer'"
9
+ exit(-1)
10
+ end
11
+
12
+ require 'simple_classifier/extensions/word_hash'
13
+
14
+ class Object
15
+ def prepare_category_name; to_s.gsub("_"," ").capitalize.intern end
16
+ end
@@ -0,0 +1,125 @@
1
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
2
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
3
+ # License:: LGPL
4
+
5
+ # These are extensions to the String class to provide convenience
6
+ # methods for the Classifier package.
7
+ class String
8
+
9
+ # Removes common punctuation symbols, returning a new string.
10
+ # E.g.,
11
+ # "Hello (greeting's), with {braces} < >...?".without_punctuation
12
+ # => "Hello greetings with braces "
13
+ def without_punctuation
14
+ tr( ',?.!;:"@#$%^&*()_=+[]{}\|<>/`~', " " ) .tr( "'\-", "")
15
+ end
16
+
17
+ # Return a Hash of strings => ints. Each word in the string is stemmed,
18
+ # interned, and indexes to its frequency in the document.
19
+ def word_hash
20
+ word_hash_for_words(gsub(/[^\w\s]/,"").split + gsub(/[\w]/," ").split)
21
+ end
22
+
23
+ # Return a word hash without extra punctuation or short symbols, just stemmed words
24
+ def clean_word_hash
25
+ word_hash_for_words gsub(/[^\w\s]/,"").split
26
+ end
27
+
28
+ private
29
+
30
+ def word_hash_for_words(words)
31
+ d = Hash.new
32
+ words.each do |word|
33
+ word.downcase! if word =~ /[\w]+/
34
+ key = word.stem.intern
35
+ if word =~ /[^\w]/ || ! CORPUS_SKIP_WORDS.include?(word) && word.length > 2
36
+ d[key] ||= 0
37
+ d[key] += 1
38
+ end
39
+ end
40
+ return d
41
+ end
42
+
43
+ CORPUS_SKIP_WORDS = [
44
+ "a",
45
+ "again",
46
+ "all",
47
+ "along",
48
+ "are",
49
+ "also",
50
+ "an",
51
+ "and",
52
+ "as",
53
+ "at",
54
+ "but",
55
+ "by",
56
+ "came",
57
+ "can",
58
+ "cant",
59
+ "couldnt",
60
+ "did",
61
+ "didn",
62
+ "didnt",
63
+ "do",
64
+ "doesnt",
65
+ "dont",
66
+ "ever",
67
+ "first",
68
+ "from",
69
+ "have",
70
+ "her",
71
+ "here",
72
+ "him",
73
+ "how",
74
+ "i",
75
+ "if",
76
+ "in",
77
+ "into",
78
+ "is",
79
+ "isnt",
80
+ "it",
81
+ "itll",
82
+ "just",
83
+ "last",
84
+ "least",
85
+ "like",
86
+ "most",
87
+ "my",
88
+ "new",
89
+ "no",
90
+ "not",
91
+ "now",
92
+ "of",
93
+ "on",
94
+ "or",
95
+ "should",
96
+ "sinc",
97
+ "so",
98
+ "some",
99
+ "th",
100
+ "than",
101
+ "this",
102
+ "that",
103
+ "the",
104
+ "their",
105
+ "then",
106
+ "those",
107
+ "to",
108
+ "told",
109
+ "too",
110
+ "true",
111
+ "try",
112
+ "until",
113
+ "url",
114
+ "us",
115
+ "were",
116
+ "when",
117
+ "whether",
118
+ "while",
119
+ "with",
120
+ "within",
121
+ "yes",
122
+ "you",
123
+ "youll",
124
+ ]
125
+ end
@@ -1,7 +1,7 @@
1
1
  require File.dirname(__FILE__) + '/../test_helper'
2
2
  class BayesianTest < Test::Unit::TestCase
3
3
  def setup
4
- @classifier = Classifier::Bayes.new 'Interesting', 'Uninteresting'
4
+ @classifier = DfhccClassifier::Bayes.new 'Interesting', 'Uninteresting'
5
5
  end
6
6
 
7
7
  def test_good_training
@@ -12,7 +12,7 @@ class BayesianTest < Test::Unit::TestCase
12
12
  assert_raise(StandardError) { @classifier.train_no_category "words" }
13
13
  end
14
14
 
15
- def test_bad_method
15
+ def test_method_missing_calls_super
16
16
  assert_raise(NoMethodError) { @classifier.forget_everything_you_know "" }
17
17
  end
18
18
 
data/test/test_helper.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__) + '/../lib')
2
2
 
3
3
  require 'test/unit'
4
- require 'classifier'
4
+ require 'simple_classifier'
5
5
  require 'ruby-debug'
6
6
 
7
7
  # Colorize test ouput
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 3
8
+ - 4
9
+ version: 1.3.4
5
10
  platform: ruby
6
11
  authors:
7
12
  - Ben Orenstein
@@ -11,20 +16,11 @@ autorequire:
11
16
  bindir: bin
12
17
  cert_chain: []
13
18
 
14
- date: 2009-10-31 00:00:00 -04:00
19
+ date: 2010-02-26 00:00:00 -05:00
15
20
  default_executable:
16
- dependencies:
17
- - !ruby/object:Gem::Dependency
18
- name: stemmer
19
- type: :runtime
20
- version_requirement:
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: "0"
26
- version:
27
- description: Simple bayesian filtering, without a lot of fuss.
21
+ dependencies: []
22
+
23
+ description:
28
24
  email: ben.orenstein@gmail.com
29
25
  executables:
30
26
  - bayes.rb
@@ -35,12 +31,22 @@ extra_rdoc_files:
35
31
  - LICENSE
36
32
  - README
37
33
  files:
38
- - lib/classifier.rb
39
- - lib/classifier/bayes.rb
40
34
  - LICENSE
41
35
  - README
36
+ - Rakefile
37
+ - VERSION
38
+ - bin/bayes.rb
39
+ - bin/summarize.rb
40
+ - install.rb
41
+ - lib/simple_classifier.rb
42
+ - lib/simple_classifier/bayes.rb
43
+ - lib/simple_classifier/extensions/string.rb
44
+ - lib/simple_classifier/extensions/word_hash.rb
45
+ - test/bayes/bayesian_test.rb
46
+ - test/extensions/word_hash_test.rb
47
+ - test/test_helper.rb
42
48
  has_rdoc: true
43
- homepage:
49
+ homepage: http://github.com/r00k/simple_classifier
44
50
  licenses: []
45
51
 
46
52
  post_install_message:
@@ -52,21 +58,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
52
58
  requirements:
53
59
  - - ">="
54
60
  - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
55
63
  version: "0"
56
- version:
57
64
  required_rubygems_version: !ruby/object:Gem::Requirement
58
65
  requirements:
59
66
  - - ">="
60
67
  - !ruby/object:Gem::Version
61
- version: 1.3.5
62
- version:
68
+ segments:
69
+ - 0
70
+ version: "0"
63
71
  requirements: []
64
72
 
65
73
  rubyforge_project:
66
- rubygems_version: 1.3.5
74
+ rubygems_version: 1.3.6
67
75
  signing_key:
68
76
  specification_version: 3
69
- summary: A simple bayesian classifier for rubyists
77
+ summary: A simple bayesian classifier
70
78
  test_files:
71
79
  - test/test_helper.rb
72
80
  - test/bayes/bayesian_test.rb