simple_classifier 1.0.0 → 1.3.4

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+
6
+ PKG_VERSION = "1.3.1"
7
+
8
+ PKG_FILES = FileList[
9
+ "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile", "html/**/*"
10
+ ]
11
+
12
+ desc "Default Task"
13
+ task :default => [ :test ]
14
+
15
+ # Run the unit tests
16
+ desc "Run all unit tests"
17
+ Rake::TestTask.new("test") { |t|
18
+ t.libs << "lib"
19
+ t.pattern = 'test/*/*_test.rb'
20
+ t.verbose = true
21
+ }
22
+
23
+ # Make a console, useful when working on tests
24
+ desc "Generate a test console"
25
+ task :console do
26
+ verbose( false ) { sh "irb -I lib/ -r 'classifier'" }
27
+ end
28
+
29
+ # Genereate the RDoc documentation
30
+ desc "Create documentation"
31
+ Rake::RDocTask.new("doc") { |rdoc|
32
+ rdoc.title = "simple_classifier - A simple Bayesian classifier"
33
+ rdoc.rdoc_dir = 'html'
34
+ rdoc.rdoc_files.include('README')
35
+ rdoc.rdoc_files.include('lib/**/*.rb')
36
+ }
37
+
38
+ desc "Generate gemspec with jeweler"
39
+ begin
40
+ require 'jeweler'
41
+ Jeweler::Tasks.new do |gemspec|
42
+ gemspec.name = "simple_classifier"
43
+ gemspec.summary = "A simple bayesian classifier"
44
+ gemspec.email = "ben.orenstein@gmail.com"
45
+ gemspec.homepage = "http://github.com/r00k/simple_classifier"
46
+ gemspec.authors = ["Ben Orenstein", "Lucas Carlson", "David Fayram II"]
47
+ gemspec.version = '1.3.4'
48
+ end
49
+ Jeweler::GemcutterTasks.new
50
+ rescue LoadError
51
+ puts "Jeweler not available. Install it with: gem install jeweler"
52
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 2.0.0
data/bin/bayes.rb CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  begin
4
4
  require 'rubygems'
5
- require 'classifier'
5
+ require 'simple_classifier'
6
6
  rescue
7
- require 'classifier'
7
+ require 'simple_classifier'
8
8
  end
9
9
 
10
10
  require 'madeleine'
data/bin/summarize.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  begin
4
- require 'rubygems'
5
- require 'classifier'
4
+ require 'rubygems'
5
+ require 'simple_classifier'
6
6
  rescue
7
- require 'classifier'
7
+ require 'simple_classifier'
8
8
  end
9
9
 
10
10
  require 'open-uri'
data/install.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'rbconfig'
2
+ require 'find'
3
+ require 'ftools'
4
+
5
+ include Config
6
+
7
+ # this was adapted from rdoc's install.rb by ways of Log4r
8
+
9
+ $sitedir = CONFIG["sitelibdir"]
10
+ unless $sitedir
11
+ version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
12
+ $libdir = File.join(CONFIG["libdir"], "ruby", version)
13
+ $sitedir = $:.find {|x| x =~ /site_ruby/ }
14
+ if !$sitedir
15
+ $sitedir = File.join($libdir, "site_ruby")
16
+ elsif $sitedir !~ Regexp.quote(version)
17
+ $sitedir = File.join($sitedir, version)
18
+ end
19
+ end
20
+
21
+ makedirs = %w{ simple_classifier }
22
+ makedirs = %w{ simple_classifier/extensions }
23
+ makedirs.each {|f| File::makedirs(File.join($sitedir, *f.split(/\//)))}
24
+
25
+ Dir.chdir("lib")
26
+ begin
27
+ require 'rubygems'
28
+ require 'rake'
29
+ rescue LoadError
30
+ puts
31
+ puts "Please install Gem and Rake from http://rubyforge.org/projects/rubygems and http://rubyforge.org/projects/rake"
32
+ puts
33
+ exit(-1)
34
+ end
35
+
36
+ files = FileList["**/*"]
37
+
38
+ # File::safe_unlink *deprecated.collect{|f| File.join($sitedir, f.split(/\//))}
39
+ files.each {|f|
40
+ File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
41
+ }
42
+
43
+ begin
44
+ require 'stemmer'
45
+ rescue LoadError
46
+ puts
47
+ puts "Please install Stemmer from http://rubyforge.org/projects/stemmer or via 'gem install stemmer'"
48
+ puts
49
+ end
@@ -25,5 +25,5 @@
25
25
  # License:: LGPL
26
26
 
27
27
  require 'rubygems'
28
- require 'classifier/extensions/string'
29
- require 'classifier/bayes'
28
+ require 'simple_classifier/extensions/string'
29
+ require 'simple_classifier/bayes'
@@ -2,7 +2,7 @@
2
2
  # Copyright:: Copyright (c) 2005 Lucas Carlson
3
3
  # License:: LGPL
4
4
 
5
- module Classifier
5
+ module DfhccClassifier
6
6
 
7
7
  class Bayes
8
8
  # The class can be created with one or more categories, each of which will be
@@ -0,0 +1,16 @@
1
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
2
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
3
+ # License:: LGPL
4
+
5
+ begin
6
+ require 'stemmer'
7
+ rescue LoadError
8
+ puts "Please install stemmer from http://rubyforge.org/projects/stemmer or 'gem install stemmer'"
9
+ exit(-1)
10
+ end
11
+
12
+ require 'simple_classifier/extensions/word_hash'
13
+
14
+ class Object
15
+ def prepare_category_name; to_s.gsub("_"," ").capitalize.intern end
16
+ end
@@ -0,0 +1,125 @@
1
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
2
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
3
+ # License:: LGPL
4
+
5
+ # These are extensions to the String class to provide convenience
6
+ # methods for the Classifier package.
7
+ class String
8
+
9
+ # Removes common punctuation symbols, returning a new string.
10
+ # E.g.,
11
+ # "Hello (greeting's), with {braces} < >...?".without_punctuation
12
+ # => "Hello greetings with braces "
13
+ def without_punctuation
14
+ tr( ',?.!;:"@#$%^&*()_=+[]{}\|<>/`~', " " ) .tr( "'\-", "")
15
+ end
16
+
17
+ # Return a Hash of strings => ints. Each word in the string is stemmed,
18
+ # interned, and indexes to its frequency in the document.
19
+ def word_hash
20
+ word_hash_for_words(gsub(/[^\w\s]/,"").split + gsub(/[\w]/," ").split)
21
+ end
22
+
23
+ # Return a word hash without extra punctuation or short symbols, just stemmed words
24
+ def clean_word_hash
25
+ word_hash_for_words gsub(/[^\w\s]/,"").split
26
+ end
27
+
28
+ private
29
+
30
+ def word_hash_for_words(words)
31
+ d = Hash.new
32
+ words.each do |word|
33
+ word.downcase! if word =~ /[\w]+/
34
+ key = word.stem.intern
35
+ if word =~ /[^\w]/ || ! CORPUS_SKIP_WORDS.include?(word) && word.length > 2
36
+ d[key] ||= 0
37
+ d[key] += 1
38
+ end
39
+ end
40
+ return d
41
+ end
42
+
43
+ CORPUS_SKIP_WORDS = [
44
+ "a",
45
+ "again",
46
+ "all",
47
+ "along",
48
+ "are",
49
+ "also",
50
+ "an",
51
+ "and",
52
+ "as",
53
+ "at",
54
+ "but",
55
+ "by",
56
+ "came",
57
+ "can",
58
+ "cant",
59
+ "couldnt",
60
+ "did",
61
+ "didn",
62
+ "didnt",
63
+ "do",
64
+ "doesnt",
65
+ "dont",
66
+ "ever",
67
+ "first",
68
+ "from",
69
+ "have",
70
+ "her",
71
+ "here",
72
+ "him",
73
+ "how",
74
+ "i",
75
+ "if",
76
+ "in",
77
+ "into",
78
+ "is",
79
+ "isnt",
80
+ "it",
81
+ "itll",
82
+ "just",
83
+ "last",
84
+ "least",
85
+ "like",
86
+ "most",
87
+ "my",
88
+ "new",
89
+ "no",
90
+ "not",
91
+ "now",
92
+ "of",
93
+ "on",
94
+ "or",
95
+ "should",
96
+ "sinc",
97
+ "so",
98
+ "some",
99
+ "th",
100
+ "than",
101
+ "this",
102
+ "that",
103
+ "the",
104
+ "their",
105
+ "then",
106
+ "those",
107
+ "to",
108
+ "told",
109
+ "too",
110
+ "true",
111
+ "try",
112
+ "until",
113
+ "url",
114
+ "us",
115
+ "were",
116
+ "when",
117
+ "whether",
118
+ "while",
119
+ "with",
120
+ "within",
121
+ "yes",
122
+ "you",
123
+ "youll",
124
+ ]
125
+ end
@@ -1,7 +1,7 @@
1
1
  require File.dirname(__FILE__) + '/../test_helper'
2
2
  class BayesianTest < Test::Unit::TestCase
3
3
  def setup
4
- @classifier = Classifier::Bayes.new 'Interesting', 'Uninteresting'
4
+ @classifier = DfhccClassifier::Bayes.new 'Interesting', 'Uninteresting'
5
5
  end
6
6
 
7
7
  def test_good_training
@@ -12,7 +12,7 @@ class BayesianTest < Test::Unit::TestCase
12
12
  assert_raise(StandardError) { @classifier.train_no_category "words" }
13
13
  end
14
14
 
15
- def test_bad_method
15
+ def test_method_missing_calls_super
16
16
  assert_raise(NoMethodError) { @classifier.forget_everything_you_know "" }
17
17
  end
18
18
 
data/test/test_helper.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  $:.unshift(File.dirname(__FILE__) + '/../lib')
2
2
 
3
3
  require 'test/unit'
4
- require 'classifier'
4
+ require 'simple_classifier'
5
5
  require 'ruby-debug'
6
6
 
7
7
  # Colorize test ouput
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 3
8
+ - 4
9
+ version: 1.3.4
5
10
  platform: ruby
6
11
  authors:
7
12
  - Ben Orenstein
@@ -11,20 +16,11 @@ autorequire:
11
16
  bindir: bin
12
17
  cert_chain: []
13
18
 
14
- date: 2009-10-31 00:00:00 -04:00
19
+ date: 2010-02-26 00:00:00 -05:00
15
20
  default_executable:
16
- dependencies:
17
- - !ruby/object:Gem::Dependency
18
- name: stemmer
19
- type: :runtime
20
- version_requirement:
21
- version_requirements: !ruby/object:Gem::Requirement
22
- requirements:
23
- - - ">="
24
- - !ruby/object:Gem::Version
25
- version: "0"
26
- version:
27
- description: Simple bayesian filtering, without a lot of fuss.
21
+ dependencies: []
22
+
23
+ description:
28
24
  email: ben.orenstein@gmail.com
29
25
  executables:
30
26
  - bayes.rb
@@ -35,12 +31,22 @@ extra_rdoc_files:
35
31
  - LICENSE
36
32
  - README
37
33
  files:
38
- - lib/classifier.rb
39
- - lib/classifier/bayes.rb
40
34
  - LICENSE
41
35
  - README
36
+ - Rakefile
37
+ - VERSION
38
+ - bin/bayes.rb
39
+ - bin/summarize.rb
40
+ - install.rb
41
+ - lib/simple_classifier.rb
42
+ - lib/simple_classifier/bayes.rb
43
+ - lib/simple_classifier/extensions/string.rb
44
+ - lib/simple_classifier/extensions/word_hash.rb
45
+ - test/bayes/bayesian_test.rb
46
+ - test/extensions/word_hash_test.rb
47
+ - test/test_helper.rb
42
48
  has_rdoc: true
43
- homepage:
49
+ homepage: http://github.com/r00k/simple_classifier
44
50
  licenses: []
45
51
 
46
52
  post_install_message:
@@ -52,21 +58,23 @@ required_ruby_version: !ruby/object:Gem::Requirement
52
58
  requirements:
53
59
  - - ">="
54
60
  - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
55
63
  version: "0"
56
- version:
57
64
  required_rubygems_version: !ruby/object:Gem::Requirement
58
65
  requirements:
59
66
  - - ">="
60
67
  - !ruby/object:Gem::Version
61
- version: 1.3.5
62
- version:
68
+ segments:
69
+ - 0
70
+ version: "0"
63
71
  requirements: []
64
72
 
65
73
  rubyforge_project:
66
- rubygems_version: 1.3.5
74
+ rubygems_version: 1.3.6
67
75
  signing_key:
68
76
  specification_version: 3
69
- summary: A simple bayesian classifier for rubyists
77
+ summary: A simple bayesian classifier
70
78
  test_files:
71
79
  - test/test_helper.rb
72
80
  - test/bayes/bayesian_test.rb