crm114 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/crm114.rb +27 -8
- data/lib/crm114/version.rb +19 -0
- metadata +6 -15
- data/Rakefile +0 -5
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.3
|
data/lib/crm114.rb
CHANGED
@@ -1,33 +1,40 @@
|
|
1
|
-
|
2
|
-
# License:: Public domain
|
1
|
+
require 'crm114/version'
|
3
2
|
|
4
3
|
module Classifier
|
5
|
-
|
6
4
|
class CRM114
|
7
|
-
|
8
|
-
VERSION = '1.0.2'
|
9
|
-
|
10
5
|
CLASSIFICATION_TYPE = '<osb unique microgroom>'
|
11
6
|
FILE_EXTENSION = '.css'
|
12
7
|
CMD_CRM = '/usr/bin/env crm'
|
13
8
|
OPT_LEARN = '-{ learn %s ( %s ) }'
|
14
9
|
OPT_CLASSIFY = '-{ isolate (:stats:); classify %s ( %s ) (:stats:); match [:stats:] (:: :best: :prob:) /Best match to file .. \\(%s\\/([[:graph:]]+)\\%s\\) prob: ([0-9.]+)/; output /:*:best:\\t:*:prob:/ }'
|
15
10
|
|
11
|
+
##
|
16
12
|
# Returns a string containg the installed CRM114 engine version in a
|
17
13
|
# format such as "20060118-BlameTheReavers".
|
14
|
+
#
|
15
|
+
# @return [String, nil]
|
18
16
|
def self.version
|
19
17
|
$1 if IO.popen(CMD_CRM + ' -v', 'r') { |pipe| pipe.readline } =~ /CRM114, version ([\d\w\-\.]+)/
|
20
18
|
end
|
21
19
|
|
20
|
+
##
|
22
21
|
# Returns a new CRM114 classifier defined by the given _categories_.
|
22
|
+
#
|
23
|
+
# @param [Array<#to_s>] categories
|
24
|
+
# @option options [String] :path ('.')
|
23
25
|
def initialize(categories, options = {})
|
24
26
|
@categories = categories.to_a.collect { |category| category.to_s.to_sym }
|
25
27
|
@path = File.expand_path(options[:path] || '.')
|
26
28
|
@debug = options[:debug] || false
|
27
29
|
end
|
28
30
|
|
31
|
+
##
|
29
32
|
# Trains the classifier to consider the given _text_ to be a sample from
|
30
33
|
# the set named by _category_.
|
34
|
+
#
|
35
|
+
# @param [#to_s] category
|
36
|
+
# @param [String] text
|
37
|
+
# @return [void]
|
31
38
|
def learn!(category, text, &block)
|
32
39
|
cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, css_file_path(category)]) + "'"
|
33
40
|
puts cmd if @debug
|
@@ -36,15 +43,22 @@ module Classifier
|
|
36
43
|
|
37
44
|
alias_method :train!, :learn!
|
38
45
|
|
46
|
+
##
|
47
|
+
# @raise NotImplementedError
|
48
|
+
# @return [void]
|
39
49
|
def unlearn!(category, text, &block) # :nodoc:
|
40
|
-
raise 'unlearning not supported at present'
|
50
|
+
raise NotImplementedError.new('unlearning not supported at present')
|
41
51
|
end
|
42
52
|
|
43
53
|
alias_method :untrain!, :unlearn! #:nodoc:
|
44
54
|
|
55
|
+
##
|
45
56
|
# Returns the classification of the provided _text_ as a tuple
|
46
57
|
# containing the highest-probability category and a confidence indicator
|
47
58
|
# in the range of 0.5..1.0.
|
59
|
+
#
|
60
|
+
# @param [String] text
|
61
|
+
# @return [Array(Symbol, Float)]
|
48
62
|
def classify(text = nil, &block)
|
49
63
|
files = @categories.collect { |category| css_file_path(category) }
|
50
64
|
cmd = CMD_CRM + " '" + (OPT_CLASSIFY % [CLASSIFICATION_TYPE, files.join(' '), @path.gsub(/\//, '\/'), FILE_EXTENSION]) + "'"
|
@@ -73,15 +87,20 @@ module Classifier
|
|
73
87
|
|
74
88
|
protected
|
75
89
|
|
90
|
+
##
|
91
|
+
# @param [String] file
|
92
|
+
# @return [void]
|
76
93
|
def self.create_css_file(file)
|
77
94
|
cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, file]) + "'"
|
78
95
|
IO.popen(cmd, 'w') { |pipe| pipe.close }
|
79
96
|
end
|
80
97
|
|
98
|
+
##
|
99
|
+
# @param [#to_s] category
|
100
|
+
# @return [String]
|
81
101
|
def css_file_path(category)
|
82
102
|
File.join(@path, category.to_s + FILE_EXTENSION)
|
83
103
|
end
|
84
104
|
|
85
105
|
end
|
86
|
-
|
87
106
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Classifier class CRM114
|
2
|
+
module VERSION
|
3
|
+
MAJOR = 1
|
4
|
+
MINOR = 0
|
5
|
+
TINY = 3
|
6
|
+
EXTRA = nil
|
7
|
+
|
8
|
+
STRING = [MAJOR, MINOR, TINY].join('.')
|
9
|
+
STRING << "-#{EXTRA}" if EXTRA
|
10
|
+
|
11
|
+
##
|
12
|
+
# @return [String]
|
13
|
+
def self.to_s() STRING end
|
14
|
+
|
15
|
+
##
|
16
|
+
# @return [String]
|
17
|
+
def self.to_str() STRING end
|
18
|
+
end
|
19
|
+
end end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crm114
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arto Bendiken
|
@@ -9,19 +9,10 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-26 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
16
|
-
name: bendiken-rakefile
|
17
|
-
type: :development
|
18
|
-
version_requirement:
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: "0"
|
24
|
-
version:
|
14
|
+
dependencies: []
|
15
|
+
|
25
16
|
description: " CRM114.rb is a Ruby interface to the CRM114 Controllable Regex\n Mutilator, an advanced and fast text classifier that uses sparse binary\n polynomial matching with a Bayesian Chain Rule evaluator and a hidden\n Markov model to categorize data with up to a 99.87% accuracy.\n"
|
26
17
|
email: arto.bendiken@gmail.com
|
27
18
|
executables: []
|
@@ -31,12 +22,12 @@ extensions: []
|
|
31
22
|
extra_rdoc_files: []
|
32
23
|
|
33
24
|
files:
|
34
|
-
- UNLICENSE
|
35
25
|
- AUTHORS
|
36
26
|
- README
|
37
|
-
-
|
27
|
+
- UNLICENSE
|
38
28
|
- VERSION
|
39
29
|
- lib/crm114.rb
|
30
|
+
- lib/crm114/version.rb
|
40
31
|
- test/test_code_or_text.rb
|
41
32
|
- test/test_crm114.rb
|
42
33
|
has_rdoc: false
|