crm114 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/README +51 -20
- data/Rakefile +14 -37
- data/VERSION +1 -0
- data/lib/crm114.rb +2 -2
- metadata +57 -49
- data/CHANGELOG +0 -3
- data/Manifest.txt +0 -8
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Copyright (c) 2005-
|
1
|
+
Copyright (c) 2005-2009 Arto Bendiken <http://ar.to/>
|
2
2
|
|
3
3
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
of this software and associated documentation files (the "Software"), to
|
data/README
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
-
|
1
|
+
= CRM114 Controllable Regex Mutilator for Ruby
|
2
2
|
|
3
3
|
This is a Ruby interface to the CRM114 Controllable Regex Mutilator, an
|
4
4
|
advanced and fast text classifier that uses sparse binary polynomial
|
5
5
|
matching with a Bayesian Chain Rule evaluator and a hidden Markov model to
|
6
6
|
categorize data with up to a 99.87% accuracy.
|
7
7
|
|
8
|
-
|
9
|
-
* http://
|
8
|
+
* http://crm114.rubyforge.org
|
9
|
+
* http://github.com/bendiken/crm114
|
10
|
+
* http://ar.to/2006/07/spam-filters-alien-technology-and-ruby-on-rails
|
11
|
+
|
10
12
|
|
11
13
|
=== About CRM114
|
12
14
|
|
@@ -15,45 +17,74 @@ The Ruby wrapper grew out of this:
|
|
15
17
|
* http://en.wikipedia.org/wiki/Dr_Strangelove
|
16
18
|
* http://www.paulgraham.com/wsy.html
|
17
19
|
|
18
|
-
== Download
|
19
|
-
|
20
|
-
* http://rubyforge.org/projects/crm114
|
21
|
-
* gem install crm114
|
22
|
-
* svn checkout svn://rubyforge.org/var/svn/crm114
|
23
|
-
|
24
|
-
== Dependencies
|
25
|
-
|
26
|
-
Requires the CRM114 binaries to be installed. Specifically, the '+crm+'
|
27
|
-
binary should be accessible in the current user's PATH environment variable.
|
28
20
|
|
29
21
|
== Usage
|
30
22
|
|
31
23
|
The CRM114 library interface is very similar to that of the
|
32
|
-
Classifier[http://rubyforge.org/projects/classifier
|
24
|
+
Classifier[http://rubyforge.org/projects/classifier] project.
|
33
25
|
|
34
26
|
Here follows a brief example:
|
35
27
|
|
36
28
|
require 'crm114'
|
29
|
+
|
37
30
|
crm = Classifier::CRM114.new([:interesting, :boring])
|
31
|
+
|
38
32
|
crm.train! :interesting, 'Some data set with a decent signal to noise ratio.'
|
39
33
|
crm.train! :boring, 'Pig latin, as in lorem ipsum dolor sit amet.'
|
34
|
+
|
40
35
|
crm.classify 'Lorem ipsum' => [:boring, 0.99]
|
41
36
|
crm.interesting? 'Lorem ipsum' => false
|
42
37
|
crm.boring? 'Lorem ipsum' => true
|
43
38
|
|
44
39
|
Have a look at the included unit tests for more comprehensive examples.
|
45
40
|
|
46
|
-
== Related Projects
|
47
41
|
|
42
|
+
== Dependencies
|
43
|
+
|
44
|
+
Requires the CRM114 binaries to be installed. Specifically, the '+crm+'
|
45
|
+
binary should be accessible in the current user's +PATH+ environment
|
46
|
+
variable.
|
47
|
+
|
48
|
+
|
49
|
+
== Download
|
50
|
+
|
51
|
+
To get a local working copy of the development repository, do:
|
52
|
+
|
53
|
+
% git clone git://github.com/bendiken/crm114.git
|
54
|
+
|
55
|
+
Alternatively, you can download the latest development version as a tarball
|
56
|
+
as follows:
|
57
|
+
|
58
|
+
% wget http://github.com/bendiken/crm114/tarball/master
|
59
|
+
|
60
|
+
|
61
|
+
== Installation
|
62
|
+
|
63
|
+
The recommended installation method is via RubyGems. To install the latest
|
64
|
+
official release from RubyForge, do:
|
65
|
+
|
66
|
+
% [sudo] gem install crm114
|
67
|
+
|
68
|
+
To use the very latest bleeding-edge development version, install the gem
|
69
|
+
directly from GitHub as follows:
|
70
|
+
|
71
|
+
% [sudo] gem install bendiken-crm114 -s http://gems.github.com
|
72
|
+
|
73
|
+
|
74
|
+
== Resources
|
75
|
+
|
76
|
+
* http://rubyforge.org/projects/crm114
|
48
77
|
* http://www.elegantchaos.com/node/129 (crm.py)
|
49
|
-
* http://rubyforge.org/projects/classifier
|
50
|
-
* http://rubyforge.org/projects/bishop
|
78
|
+
* http://rubyforge.org/projects/classifier
|
79
|
+
* http://rubyforge.org/projects/bishop
|
80
|
+
|
51
81
|
|
52
82
|
== Author
|
53
83
|
|
54
|
-
Arto Bendiken (mailto:arto.bendiken@gmail.com) - http://
|
84
|
+
* Arto Bendiken (mailto:arto.bendiken@gmail.com) - http://ar.to
|
85
|
+
|
55
86
|
|
56
87
|
== License
|
57
88
|
|
58
|
-
|
59
|
-
|
89
|
+
All source code is available under the terms of the MIT license. For more
|
90
|
+
information, see the accompanying LICENSE file.
|
data/Rakefile
CHANGED
@@ -1,41 +1,18 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), 'lib')))
|
3
3
|
require 'rubygems'
|
4
|
+
require 'rakefile' # http://github.com/bendiken/rakefile
|
4
5
|
require 'crm114'
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
Hoe.new(PKG_NAME, PKG_VERSION) do |p|
|
19
|
-
p.author = PKG_AUTHOR
|
20
|
-
p.email = PKG_EMAIL
|
21
|
-
p.url = PKG_URL
|
22
|
-
p.summary = PKG_DESC
|
23
|
-
p.description = p.paragraphs_of('README', 1).first
|
24
|
-
p.changes = p.paragraphs_of('CHANGELOG', 0..1).join("\n\n")
|
25
|
-
p.spec_extras = { :rdoc_options => ['--main', 'README'] }
|
26
|
-
end
|
27
|
-
|
28
|
-
##############################################################################
|
29
|
-
|
30
|
-
def egrep(pattern, files)
|
31
|
-
Dir[files].each do |file|
|
32
|
-
File.open(file).readlines.each_with_index do |line, lineno|
|
33
|
-
puts "#{file}:#{lineno + 1}:#{line}" if line =~ pattern
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
desc 'Look for TODO and FIXME tags in the code base.'
|
39
|
-
task :todo do
|
40
|
-
egrep /#.*(FIXME|TODO)/, '**/*.rb'
|
7
|
+
desc "Generate YARD documentation (with title)"
|
8
|
+
task :yardocs => :yardoc do
|
9
|
+
# FIXME: fork YARD and patch it to allow the title to be configured
|
10
|
+
sh "sed -i 's/YARD Documentation/CRM114.rb Documentation/' doc/yard/index.html"
|
11
|
+
|
12
|
+
# TODO: investigate why YARD doesn't auto-link URLs like RDoc does
|
13
|
+
html = File.read(file = 'doc/yard/readme.html')
|
14
|
+
html.gsub!(/>(http:\/\/)([\w\d\.\/\-]+)/, '><a href="\1\2" target="_blank">\2</a>')
|
15
|
+
html.gsub!(/(http:\/\/ar\.to)([^\/]+)/, '<a href="\1" target="_top">ar.to</a>\2')
|
16
|
+
html.gsub!(/(mailto:[^\)]+)/, '<a href="\1">\1</a>')
|
17
|
+
File.open(file, 'wb') { |f| f.puts html }
|
41
18
|
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.1
|
data/lib/crm114.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# Author:: Arto Bendiken (mailto:arto.bendiken@gmail.com)
|
2
|
-
# Copyright:: Copyright (c) 2006 Arto Bendiken.
|
2
|
+
# Copyright:: Copyright (c) 2006-2009 Arto Bendiken.
|
3
3
|
# License:: MIT
|
4
4
|
|
5
5
|
module Classifier
|
6
6
|
|
7
7
|
class CRM114
|
8
8
|
|
9
|
-
VERSION = '1.0.
|
9
|
+
VERSION = '1.0.1'
|
10
10
|
|
11
11
|
CLASSIFICATION_TYPE = '<osb unique microgroom>'
|
12
12
|
FILE_EXTENSION = '.css'
|
metadata
CHANGED
@@ -1,63 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.0
|
3
|
-
specification_version: 1
|
4
2
|
name: crm114
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.0.
|
7
|
-
date: 2006-11-06 00:00:00 +01:00
|
8
|
-
summary: Ruby interface to the CRM114 Controllable Regex Mutilator text classification engine.
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
- test
|
12
|
-
email: arto.bendiken@gmail.com
|
13
|
-
homepage: http://crm114.rubyforge.org/
|
14
|
-
rubyforge_project: crm114
|
15
|
-
description: This is a Ruby interface to the CRM114 Controllable Regex Mutilator, an advanced and fast text classifier that uses sparse binary polynomial matching with a Bayesian Chain Rule evaluator and a hidden Markov model to categorize data with up to a 99.87% accuracy.
|
16
|
-
autorequire:
|
17
|
-
default_executable:
|
18
|
-
bindir: bin
|
19
|
-
has_rdoc: true
|
20
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
-
requirements:
|
22
|
-
- - ">"
|
23
|
-
- !ruby/object:Gem::Version
|
24
|
-
version: 0.0.0
|
25
|
-
version:
|
4
|
+
version: 1.0.1
|
26
5
|
platform: ruby
|
27
|
-
signing_key:
|
28
|
-
cert_chain:
|
29
|
-
post_install_message:
|
30
6
|
authors:
|
31
7
|
- Arto Bendiken
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
- Manifest.txt
|
36
|
-
- README
|
37
|
-
- Rakefile
|
38
|
-
- lib/crm114.rb
|
39
|
-
- test/test_code_or_text.rb
|
40
|
-
- test/test_crm114.rb
|
41
|
-
test_files: []
|
42
|
-
|
43
|
-
rdoc_options:
|
44
|
-
- --main
|
45
|
-
- README
|
46
|
-
extra_rdoc_files: []
|
47
|
-
|
48
|
-
executables: []
|
49
|
-
|
50
|
-
extensions: []
|
51
|
-
|
52
|
-
requirements: []
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
53
11
|
|
12
|
+
date: 2009-04-20 00:00:00 +02:00
|
13
|
+
default_executable:
|
54
14
|
dependencies:
|
55
15
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
16
|
+
name: rakefile
|
17
|
+
type: :development
|
57
18
|
version_requirement:
|
58
|
-
version_requirements: !ruby/object:Gem::
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
20
|
requirements:
|
60
21
|
- - ">="
|
61
22
|
- !ruby/object:Gem::Version
|
62
|
-
version:
|
23
|
+
version: "0"
|
63
24
|
version:
|
25
|
+
description: " CRM114.rb is a Ruby interface to the CRM114 Controllable Regex\n Mutilator, an advanced and fast text classifier that uses sparse binary\n polynomial matching with a Bayesian Chain Rule evaluator and a hidden\n Markov model to categorize data with up to a 99.87% accuracy.\n"
|
26
|
+
email: arto.bendiken@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- LICENSE
|
35
|
+
- README
|
36
|
+
- Rakefile
|
37
|
+
- VERSION
|
38
|
+
- lib/crm114.rb
|
39
|
+
- test/test_code_or_text.rb
|
40
|
+
- test/test_crm114.rb
|
41
|
+
has_rdoc: false
|
42
|
+
homepage: http://crm114.rubyforge.org/
|
43
|
+
licenses:
|
44
|
+
- MIT
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options: []
|
47
|
+
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.8.2
|
55
|
+
version:
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
requirements:
|
63
|
+
- CRM114
|
64
|
+
rubyforge_project: crm114
|
65
|
+
rubygems_version: 1.3.2
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: Ruby interface to the CRM114 Controllable Regex Mutilator text classification engine.
|
69
|
+
test_files:
|
70
|
+
- test/test_code_or_text.rb
|
71
|
+
- test/test_crm114.rb
|
data/CHANGELOG
DELETED