crm114 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/crm114.rb +27 -8
- data/lib/crm114/version.rb +19 -0
- metadata +6 -15
- data/Rakefile +0 -5
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0.
|
1
|
+
1.0.3
|
data/lib/crm114.rb
CHANGED
@@ -1,33 +1,40 @@
|
|
1
|
-
|
2
|
-
# License:: Public domain
|
1
|
+
require 'crm114/version'
|
3
2
|
|
4
3
|
module Classifier
|
5
|
-
|
6
4
|
class CRM114
|
7
|
-
|
8
|
-
VERSION = '1.0.2'
|
9
|
-
|
10
5
|
CLASSIFICATION_TYPE = '<osb unique microgroom>'
|
11
6
|
FILE_EXTENSION = '.css'
|
12
7
|
CMD_CRM = '/usr/bin/env crm'
|
13
8
|
OPT_LEARN = '-{ learn %s ( %s ) }'
|
14
9
|
OPT_CLASSIFY = '-{ isolate (:stats:); classify %s ( %s ) (:stats:); match [:stats:] (:: :best: :prob:) /Best match to file .. \\(%s\\/([[:graph:]]+)\\%s\\) prob: ([0-9.]+)/; output /:*:best:\\t:*:prob:/ }'
|
15
10
|
|
11
|
+
##
|
16
12
|
# Returns a string containg the installed CRM114 engine version in a
|
17
13
|
# format such as "20060118-BlameTheReavers".
|
14
|
+
#
|
15
|
+
# @return [String, nil]
|
18
16
|
def self.version
|
19
17
|
$1 if IO.popen(CMD_CRM + ' -v', 'r') { |pipe| pipe.readline } =~ /CRM114, version ([\d\w\-\.]+)/
|
20
18
|
end
|
21
19
|
|
20
|
+
##
|
22
21
|
# Returns a new CRM114 classifier defined by the given _categories_.
|
22
|
+
#
|
23
|
+
# @param [Array<#to_s>] categories
|
24
|
+
# @option options [String] :path ('.')
|
23
25
|
def initialize(categories, options = {})
|
24
26
|
@categories = categories.to_a.collect { |category| category.to_s.to_sym }
|
25
27
|
@path = File.expand_path(options[:path] || '.')
|
26
28
|
@debug = options[:debug] || false
|
27
29
|
end
|
28
30
|
|
31
|
+
##
|
29
32
|
# Trains the classifier to consider the given _text_ to be a sample from
|
30
33
|
# the set named by _category_.
|
34
|
+
#
|
35
|
+
# @param [#to_s] category
|
36
|
+
# @param [String] text
|
37
|
+
# @return [void]
|
31
38
|
def learn!(category, text, &block)
|
32
39
|
cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, css_file_path(category)]) + "'"
|
33
40
|
puts cmd if @debug
|
@@ -36,15 +43,22 @@ module Classifier
|
|
36
43
|
|
37
44
|
alias_method :train!, :learn!
|
38
45
|
|
46
|
+
##
|
47
|
+
# @raise NotImplementedError
|
48
|
+
# @return [void]
|
39
49
|
def unlearn!(category, text, &block) # :nodoc:
|
40
|
-
raise 'unlearning not supported at present'
|
50
|
+
raise NotImplementedError.new('unlearning not supported at present')
|
41
51
|
end
|
42
52
|
|
43
53
|
alias_method :untrain!, :unlearn! #:nodoc:
|
44
54
|
|
55
|
+
##
|
45
56
|
# Returns the classification of the provided _text_ as a tuple
|
46
57
|
# containing the highest-probability category and a confidence indicator
|
47
58
|
# in the range of 0.5..1.0.
|
59
|
+
#
|
60
|
+
# @param [String] text
|
61
|
+
# @return [Array(Symbol, Float)]
|
48
62
|
def classify(text = nil, &block)
|
49
63
|
files = @categories.collect { |category| css_file_path(category) }
|
50
64
|
cmd = CMD_CRM + " '" + (OPT_CLASSIFY % [CLASSIFICATION_TYPE, files.join(' '), @path.gsub(/\//, '\/'), FILE_EXTENSION]) + "'"
|
@@ -73,15 +87,20 @@ module Classifier
|
|
73
87
|
|
74
88
|
protected
|
75
89
|
|
90
|
+
##
|
91
|
+
# @param [String] file
|
92
|
+
# @return [void]
|
76
93
|
def self.create_css_file(file)
|
77
94
|
cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, file]) + "'"
|
78
95
|
IO.popen(cmd, 'w') { |pipe| pipe.close }
|
79
96
|
end
|
80
97
|
|
98
|
+
##
|
99
|
+
# @param [#to_s] category
|
100
|
+
# @return [String]
|
81
101
|
def css_file_path(category)
|
82
102
|
File.join(@path, category.to_s + FILE_EXTENSION)
|
83
103
|
end
|
84
104
|
|
85
105
|
end
|
86
|
-
|
87
106
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Classifier class CRM114
|
2
|
+
module VERSION
|
3
|
+
MAJOR = 1
|
4
|
+
MINOR = 0
|
5
|
+
TINY = 3
|
6
|
+
EXTRA = nil
|
7
|
+
|
8
|
+
STRING = [MAJOR, MINOR, TINY].join('.')
|
9
|
+
STRING << "-#{EXTRA}" if EXTRA
|
10
|
+
|
11
|
+
##
|
12
|
+
# @return [String]
|
13
|
+
def self.to_s() STRING end
|
14
|
+
|
15
|
+
##
|
16
|
+
# @return [String]
|
17
|
+
def self.to_str() STRING end
|
18
|
+
end
|
19
|
+
end end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crm114
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arto Bendiken
|
@@ -9,19 +9,10 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-26 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
16
|
-
name: bendiken-rakefile
|
17
|
-
type: :development
|
18
|
-
version_requirement:
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: "0"
|
24
|
-
version:
|
14
|
+
dependencies: []
|
15
|
+
|
25
16
|
description: " CRM114.rb is a Ruby interface to the CRM114 Controllable Regex\n Mutilator, an advanced and fast text classifier that uses sparse binary\n polynomial matching with a Bayesian Chain Rule evaluator and a hidden\n Markov model to categorize data with up to a 99.87% accuracy.\n"
|
26
17
|
email: arto.bendiken@gmail.com
|
27
18
|
executables: []
|
@@ -31,12 +22,12 @@ extensions: []
|
|
31
22
|
extra_rdoc_files: []
|
32
23
|
|
33
24
|
files:
|
34
|
-
- UNLICENSE
|
35
25
|
- AUTHORS
|
36
26
|
- README
|
37
|
-
-
|
27
|
+
- UNLICENSE
|
38
28
|
- VERSION
|
39
29
|
- lib/crm114.rb
|
30
|
+
- lib/crm114/version.rb
|
40
31
|
- test/test_code_or_text.rb
|
41
32
|
- test/test_crm114.rb
|
42
33
|
has_rdoc: false
|