crm114 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/README +51 -20
- data/Rakefile +14 -37
- data/VERSION +1 -0
- data/lib/crm114.rb +2 -2
- metadata +57 -49
- data/CHANGELOG +0 -3
- data/Manifest.txt +0 -8
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2005-
|
1
|
+
Copyright (c) 2005-2009 Arto Bendiken <http://ar.to/>
|
2
2
|
|
3
3
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
of this software and associated documentation files (the "Software"), to
|
data/README
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
-
|
1
|
+
= CRM114 Controllable Regex Mutilator for Ruby
|
2
2
|
|
3
3
|
This is a Ruby interface to the CRM114 Controllable Regex Mutilator, an
|
4
4
|
advanced and fast text classifier that uses sparse binary polynomial
|
5
5
|
matching with a Bayesian Chain Rule evaluator and a hidden Markov model to
|
6
6
|
categorize data with up to a 99.87% accuracy.
|
7
7
|
|
8
|
-
|
9
|
-
* http://
|
8
|
+
* http://crm114.rubyforge.org
|
9
|
+
* http://github.com/bendiken/crm114
|
10
|
+
* http://ar.to/2006/07/spam-filters-alien-technology-and-ruby-on-rails
|
11
|
+
|
10
12
|
|
11
13
|
=== About CRM114
|
12
14
|
|
@@ -15,45 +17,74 @@ The Ruby wrapper grew out of this:
|
|
15
17
|
* http://en.wikipedia.org/wiki/Dr_Strangelove
|
16
18
|
* http://www.paulgraham.com/wsy.html
|
17
19
|
|
18
|
-
== Download
|
19
|
-
|
20
|
-
* http://rubyforge.org/projects/crm114
|
21
|
-
* gem install crm114
|
22
|
-
* svn checkout svn://rubyforge.org/var/svn/crm114
|
23
|
-
|
24
|
-
== Dependencies
|
25
|
-
|
26
|
-
Requires the CRM114 binaries to be installed. Specifically, the '+crm+'
|
27
|
-
binary should be accessible in the current user's PATH environment variable.
|
28
20
|
|
29
21
|
== Usage
|
30
22
|
|
31
23
|
The CRM114 library interface is very similar to that of the
|
32
|
-
Classifier[http://rubyforge.org/projects/classifier
|
24
|
+
Classifier[http://rubyforge.org/projects/classifier] project.
|
33
25
|
|
34
26
|
Here follows a brief example:
|
35
27
|
|
36
28
|
require 'crm114'
|
29
|
+
|
37
30
|
crm = Classifier::CRM114.new([:interesting, :boring])
|
31
|
+
|
38
32
|
crm.train! :interesting, 'Some data set with a decent signal to noise ratio.'
|
39
33
|
crm.train! :boring, 'Pig latin, as in lorem ipsum dolor sit amet.'
|
34
|
+
|
40
35
|
crm.classify 'Lorem ipsum' => [:boring, 0.99]
|
41
36
|
crm.interesting? 'Lorem ipsum' => false
|
42
37
|
crm.boring? 'Lorem ipsum' => true
|
43
38
|
|
44
39
|
Have a look at the included unit tests for more comprehensive examples.
|
45
40
|
|
46
|
-
== Related Projects
|
47
41
|
|
42
|
+
== Dependencies
|
43
|
+
|
44
|
+
Requires the CRM114 binaries to be installed. Specifically, the '+crm+'
|
45
|
+
binary should be accessible in the current user's +PATH+ environment
|
46
|
+
variable.
|
47
|
+
|
48
|
+
|
49
|
+
== Download
|
50
|
+
|
51
|
+
To get a local working copy of the development repository, do:
|
52
|
+
|
53
|
+
% git clone git://github.com/bendiken/crm114.git
|
54
|
+
|
55
|
+
Alternatively, you can download the latest development version as a tarball
|
56
|
+
as follows:
|
57
|
+
|
58
|
+
% wget http://github.com/bendiken/crm114/tarball/master
|
59
|
+
|
60
|
+
|
61
|
+
== Installation
|
62
|
+
|
63
|
+
The recommended installation method is via RubyGems. To install the latest
|
64
|
+
official release from RubyForge, do:
|
65
|
+
|
66
|
+
% [sudo] gem install crm114
|
67
|
+
|
68
|
+
To use the very latest bleeding-edge development version, install the gem
|
69
|
+
directly from GitHub as follows:
|
70
|
+
|
71
|
+
% [sudo] gem install bendiken-crm114 -s http://gems.github.com
|
72
|
+
|
73
|
+
|
74
|
+
== Resources
|
75
|
+
|
76
|
+
* http://rubyforge.org/projects/crm114
|
48
77
|
* http://www.elegantchaos.com/node/129 (crm.py)
|
49
|
-
* http://rubyforge.org/projects/classifier
|
50
|
-
* http://rubyforge.org/projects/bishop
|
78
|
+
* http://rubyforge.org/projects/classifier
|
79
|
+
* http://rubyforge.org/projects/bishop
|
80
|
+
|
51
81
|
|
52
82
|
== Author
|
53
83
|
|
54
|
-
Arto Bendiken (mailto:arto.bendiken@gmail.com) - http://
|
84
|
+
* Arto Bendiken (mailto:arto.bendiken@gmail.com) - http://ar.to
|
85
|
+
|
55
86
|
|
56
87
|
== License
|
57
88
|
|
58
|
-
|
59
|
-
|
89
|
+
All source code is available under the terms of the MIT license. For more
|
90
|
+
information, see the accompanying LICENSE file.
|
data/Rakefile
CHANGED
@@ -1,41 +1,18 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), 'lib')))
|
3
3
|
require 'rubygems'
|
4
|
+
require 'rakefile' # http://github.com/bendiken/rakefile
|
4
5
|
require 'crm114'
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
Hoe.new(PKG_NAME, PKG_VERSION) do |p|
|
19
|
-
p.author = PKG_AUTHOR
|
20
|
-
p.email = PKG_EMAIL
|
21
|
-
p.url = PKG_URL
|
22
|
-
p.summary = PKG_DESC
|
23
|
-
p.description = p.paragraphs_of('README', 1).first
|
24
|
-
p.changes = p.paragraphs_of('CHANGELOG', 0..1).join("\n\n")
|
25
|
-
p.spec_extras = { :rdoc_options => ['--main', 'README'] }
|
26
|
-
end
|
27
|
-
|
28
|
-
##############################################################################
|
29
|
-
|
30
|
-
def egrep(pattern, files)
|
31
|
-
Dir[files].each do |file|
|
32
|
-
File.open(file).readlines.each_with_index do |line, lineno|
|
33
|
-
puts "#{file}:#{lineno + 1}:#{line}" if line =~ pattern
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
desc 'Look for TODO and FIXME tags in the code base.'
|
39
|
-
task :todo do
|
40
|
-
egrep /#.*(FIXME|TODO)/, '**/*.rb'
|
7
|
+
desc "Generate YARD documentation (with title)"
|
8
|
+
task :yardocs => :yardoc do
|
9
|
+
# FIXME: fork YARD and patch it to allow the title to be configured
|
10
|
+
sh "sed -i 's/YARD Documentation/CRM114.rb Documentation/' doc/yard/index.html"
|
11
|
+
|
12
|
+
# TODO: investigate why YARD doesn't auto-link URLs like RDoc does
|
13
|
+
html = File.read(file = 'doc/yard/readme.html')
|
14
|
+
html.gsub!(/>(http:\/\/)([\w\d\.\/\-]+)/, '><a href="\1\2" target="_blank">\2</a>')
|
15
|
+
html.gsub!(/(http:\/\/ar\.to)([^\/]+)/, '<a href="\1" target="_top">ar.to</a>\2')
|
16
|
+
html.gsub!(/(mailto:[^\)]+)/, '<a href="\1">\1</a>')
|
17
|
+
File.open(file, 'wb') { |f| f.puts html }
|
41
18
|
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.1
|
data/lib/crm114.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# Author:: Arto Bendiken (mailto:arto.bendiken@gmail.com)
|
2
|
-
# Copyright:: Copyright (c) 2006 Arto Bendiken.
|
2
|
+
# Copyright:: Copyright (c) 2006-2009 Arto Bendiken.
|
3
3
|
# License:: MIT
|
4
4
|
|
5
5
|
module Classifier
|
6
6
|
|
7
7
|
class CRM114
|
8
8
|
|
9
|
-
VERSION = '1.0.
|
9
|
+
VERSION = '1.0.1'
|
10
10
|
|
11
11
|
CLASSIFICATION_TYPE = '<osb unique microgroom>'
|
12
12
|
FILE_EXTENSION = '.css'
|
metadata
CHANGED
@@ -1,63 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.0
|
3
|
-
specification_version: 1
|
4
2
|
name: crm114
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.0.
|
7
|
-
date: 2006-11-06 00:00:00 +01:00
|
8
|
-
summary: Ruby interface to the CRM114 Controllable Regex Mutilator text classification engine.
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
- test
|
12
|
-
email: arto.bendiken@gmail.com
|
13
|
-
homepage: http://crm114.rubyforge.org/
|
14
|
-
rubyforge_project: crm114
|
15
|
-
description: This is a Ruby interface to the CRM114 Controllable Regex Mutilator, an advanced and fast text classifier that uses sparse binary polynomial matching with a Bayesian Chain Rule evaluator and a hidden Markov model to categorize data with up to a 99.87% accuracy.
|
16
|
-
autorequire:
|
17
|
-
default_executable:
|
18
|
-
bindir: bin
|
19
|
-
has_rdoc: true
|
20
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
-
requirements:
|
22
|
-
- - ">"
|
23
|
-
- !ruby/object:Gem::Version
|
24
|
-
version: 0.0.0
|
25
|
-
version:
|
4
|
+
version: 1.0.1
|
26
5
|
platform: ruby
|
27
|
-
signing_key:
|
28
|
-
cert_chain:
|
29
|
-
post_install_message:
|
30
6
|
authors:
|
31
7
|
- Arto Bendiken
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
- Manifest.txt
|
36
|
-
- README
|
37
|
-
- Rakefile
|
38
|
-
- lib/crm114.rb
|
39
|
-
- test/test_code_or_text.rb
|
40
|
-
- test/test_crm114.rb
|
41
|
-
test_files: []
|
42
|
-
|
43
|
-
rdoc_options:
|
44
|
-
- --main
|
45
|
-
- README
|
46
|
-
extra_rdoc_files: []
|
47
|
-
|
48
|
-
executables: []
|
49
|
-
|
50
|
-
extensions: []
|
51
|
-
|
52
|
-
requirements: []
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
53
11
|
|
12
|
+
date: 2009-04-20 00:00:00 +02:00
|
13
|
+
default_executable:
|
54
14
|
dependencies:
|
55
15
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
16
|
+
name: rakefile
|
17
|
+
type: :development
|
57
18
|
version_requirement:
|
58
|
-
version_requirements: !ruby/object:Gem::
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
20
|
requirements:
|
60
21
|
- - ">="
|
61
22
|
- !ruby/object:Gem::Version
|
62
|
-
version:
|
23
|
+
version: "0"
|
63
24
|
version:
|
25
|
+
description: " CRM114.rb is a Ruby interface to the CRM114 Controllable Regex\n Mutilator, an advanced and fast text classifier that uses sparse binary\n polynomial matching with a Bayesian Chain Rule evaluator and a hidden\n Markov model to categorize data with up to a 99.87% accuracy.\n"
|
26
|
+
email: arto.bendiken@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- LICENSE
|
35
|
+
- README
|
36
|
+
- Rakefile
|
37
|
+
- VERSION
|
38
|
+
- lib/crm114.rb
|
39
|
+
- test/test_code_or_text.rb
|
40
|
+
- test/test_crm114.rb
|
41
|
+
has_rdoc: false
|
42
|
+
homepage: http://crm114.rubyforge.org/
|
43
|
+
licenses:
|
44
|
+
- MIT
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options: []
|
47
|
+
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.8.2
|
55
|
+
version:
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
requirements:
|
63
|
+
- CRM114
|
64
|
+
rubyforge_project: crm114
|
65
|
+
rubygems_version: 1.3.2
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Ruby interface to the CRM114 Controllable Regex Mutilator text classification engine.
|
69
|
+
test_files:
|
70
|
+
- test/test_code_or_text.rb
|
71
|
+
- test/test_crm114.rb
|
data/CHANGELOG
DELETED