simple_classifier 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{README → README.rdoc} +5 -3
- data/VERSION +1 -1
- data/simple_classifier.gemspec +3 -7
- metadata +6 -10
- data/bin/bayes.rb +0 -36
- data/bin/summarize.rb +0 -16
- data/install.rb +0 -49
data/{README → README.rdoc}
RENAMED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
= simple_classifier
|
|
2
|
+
|
|
3
|
+
Bayesian Classification without a lot of fuss
|
|
2
4
|
|
|
3
5
|
== Installation
|
|
4
6
|
|
|
@@ -20,7 +22,7 @@ simple_classifier depends on Martin Porter's stemmer gem, which should be instal
|
|
|
20
22
|
* http://www.paulgraham.com/spam.html
|
|
21
23
|
|
|
22
24
|
== Authors
|
|
23
|
-
* Ben Orenstein
|
|
24
|
-
* Lucas Carlson
|
|
25
|
+
* Ben Orenstein ben.orenstein@gmail.com
|
|
26
|
+
* Lucas Carlson lucas@rufy.com
|
|
25
27
|
|
|
26
28
|
This library is released under the terms of the GNU LGPL. See LICENSE for more details.
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.0.
|
|
1
|
+
2.0.2
|
data/simple_classifier.gemspec
CHANGED
|
@@ -5,26 +5,22 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{simple_classifier}
|
|
8
|
-
s.version = "2.0.
|
|
8
|
+
s.version = "2.0.2"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Ben Orenstein", "Lucas Carlson", "David Fayram II"]
|
|
12
12
|
s.date = %q{2010-02-26}
|
|
13
13
|
s.email = %q{ben.orenstein@gmail.com}
|
|
14
|
-
s.executables = ["bayes.rb", "summarize.rb"]
|
|
15
14
|
s.extra_rdoc_files = [
|
|
16
15
|
"LICENSE",
|
|
17
|
-
"README"
|
|
16
|
+
"README.rdoc"
|
|
18
17
|
]
|
|
19
18
|
s.files = [
|
|
20
19
|
".gitignore",
|
|
21
20
|
"LICENSE",
|
|
22
|
-
"README",
|
|
21
|
+
"README.rdoc",
|
|
23
22
|
"Rakefile",
|
|
24
23
|
"VERSION",
|
|
25
|
-
"bin/bayes.rb",
|
|
26
|
-
"bin/summarize.rb",
|
|
27
|
-
"install.rb",
|
|
28
24
|
"lib/simple_classifier.rb",
|
|
29
25
|
"lib/simple_classifier/bayes.rb",
|
|
30
26
|
"lib/simple_classifier/extensions/string.rb",
|
metadata
CHANGED
|
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
|
5
5
|
segments:
|
|
6
6
|
- 2
|
|
7
7
|
- 0
|
|
8
|
-
-
|
|
9
|
-
version: 2.0.
|
|
8
|
+
- 2
|
|
9
|
+
version: 2.0.2
|
|
10
10
|
platform: ruby
|
|
11
11
|
authors:
|
|
12
12
|
- Ben Orenstein
|
|
@@ -33,23 +33,19 @@ dependencies:
|
|
|
33
33
|
version_requirements: *id001
|
|
34
34
|
description:
|
|
35
35
|
email: ben.orenstein@gmail.com
|
|
36
|
-
executables:
|
|
37
|
-
|
|
38
|
-
- summarize.rb
|
|
36
|
+
executables: []
|
|
37
|
+
|
|
39
38
|
extensions: []
|
|
40
39
|
|
|
41
40
|
extra_rdoc_files:
|
|
42
41
|
- LICENSE
|
|
43
|
-
- README
|
|
42
|
+
- README.rdoc
|
|
44
43
|
files:
|
|
45
44
|
- .gitignore
|
|
46
45
|
- LICENSE
|
|
47
|
-
- README
|
|
46
|
+
- README.rdoc
|
|
48
47
|
- Rakefile
|
|
49
48
|
- VERSION
|
|
50
|
-
- bin/bayes.rb
|
|
51
|
-
- bin/summarize.rb
|
|
52
|
-
- install.rb
|
|
53
49
|
- lib/simple_classifier.rb
|
|
54
50
|
- lib/simple_classifier/bayes.rb
|
|
55
51
|
- lib/simple_classifier/extensions/string.rb
|
data/bin/bayes.rb
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
begin
|
|
4
|
-
require 'rubygems'
|
|
5
|
-
require 'simple_classifier'
|
|
6
|
-
rescue
|
|
7
|
-
require 'simple_classifier'
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
require 'madeleine'
|
|
11
|
-
|
|
12
|
-
m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
|
|
13
|
-
Classifier::Bayes.new 'Interesting', 'Uninteresting'
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
case ARGV[0]
|
|
17
|
-
when "add"
|
|
18
|
-
case ARGV[1].downcase
|
|
19
|
-
when "interesting"
|
|
20
|
-
m.system.train_interesting File.open(ARGV[2]).read
|
|
21
|
-
puts "#{ARGV[2]} has been classified as interesting"
|
|
22
|
-
when "uninteresting"
|
|
23
|
-
m.system.train_uninteresting File.open(ARGV[2]).read
|
|
24
|
-
puts "#{ARGV[2]} has been classified as uninteresting"
|
|
25
|
-
else
|
|
26
|
-
puts "Invalid category: choose between interesting and uninteresting"
|
|
27
|
-
exit(1)
|
|
28
|
-
end
|
|
29
|
-
when "classify"
|
|
30
|
-
puts m.system.classify(File.open(ARGV[1]).read)
|
|
31
|
-
else
|
|
32
|
-
puts "Invalid option: choose add [category] [file] or clasify [file]"
|
|
33
|
-
exit(-1)
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
m.take_snapshot
|
data/bin/summarize.rb
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
begin
|
|
4
|
-
require 'rubygems'
|
|
5
|
-
require 'simple_classifier'
|
|
6
|
-
rescue
|
|
7
|
-
require 'simple_classifier'
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
require 'open-uri'
|
|
11
|
-
|
|
12
|
-
num = ARGV[1].to_i
|
|
13
|
-
num = num < 1 ? 10 : num
|
|
14
|
-
|
|
15
|
-
text = open(ARGV.first).read
|
|
16
|
-
puts text.gsub(/<[^>]+>/,"").gsub(/[\s]+/," ").summary(num)
|
data/install.rb
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
require 'rbconfig'
|
|
2
|
-
require 'find'
|
|
3
|
-
require 'ftools'
|
|
4
|
-
|
|
5
|
-
include Config
|
|
6
|
-
|
|
7
|
-
# this was adapted from rdoc's install.rb by ways of Log4r
|
|
8
|
-
|
|
9
|
-
$sitedir = CONFIG["sitelibdir"]
|
|
10
|
-
unless $sitedir
|
|
11
|
-
version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
|
|
12
|
-
$libdir = File.join(CONFIG["libdir"], "ruby", version)
|
|
13
|
-
$sitedir = $:.find {|x| x =~ /site_ruby/ }
|
|
14
|
-
if !$sitedir
|
|
15
|
-
$sitedir = File.join($libdir, "site_ruby")
|
|
16
|
-
elsif $sitedir !~ Regexp.quote(version)
|
|
17
|
-
$sitedir = File.join($sitedir, version)
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
makedirs = %w{ simple_classifier }
|
|
22
|
-
makedirs = %w{ simple_classifier/extensions }
|
|
23
|
-
makedirs.each {|f| File::makedirs(File.join($sitedir, *f.split(/\//)))}
|
|
24
|
-
|
|
25
|
-
Dir.chdir("lib")
|
|
26
|
-
begin
|
|
27
|
-
require 'rubygems'
|
|
28
|
-
require 'rake'
|
|
29
|
-
rescue LoadError
|
|
30
|
-
puts
|
|
31
|
-
puts "Please install Gem and Rake from http://rubyforge.org/projects/rubygems and http://rubyforge.org/projects/rake"
|
|
32
|
-
puts
|
|
33
|
-
exit(-1)
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
files = FileList["**/*"]
|
|
37
|
-
|
|
38
|
-
# File::safe_unlink *deprecated.collect{|f| File.join($sitedir, f.split(/\//))}
|
|
39
|
-
files.each {|f|
|
|
40
|
-
File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
begin
|
|
44
|
-
require 'stemmer'
|
|
45
|
-
rescue LoadError
|
|
46
|
-
puts
|
|
47
|
-
puts "Please install Stemmer from http://rubyforge.org/projects/stemmer or via 'gem install stemmer'"
|
|
48
|
-
puts
|
|
49
|
-
end
|