crm114 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/VERSION +1 -1
  2. data/lib/crm114.rb +27 -8
  3. data/lib/crm114/version.rb +19 -0
  4. metadata +6 -15
  5. data/Rakefile +0 -5
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.2
1
+ 1.0.3
@@ -1,33 +1,40 @@
1
- # Author:: Arto Bendiken (mailto:arto.bendiken@gmail.com)
2
- # License:: Public domain
1
+ require 'crm114/version'
3
2
 
4
3
  module Classifier
5
-
6
4
  class CRM114
7
-
8
- VERSION = '1.0.2'
9
-
10
5
  CLASSIFICATION_TYPE = '<osb unique microgroom>'
11
6
  FILE_EXTENSION = '.css'
12
7
  CMD_CRM = '/usr/bin/env crm'
13
8
  OPT_LEARN = '-{ learn %s ( %s ) }'
14
9
  OPT_CLASSIFY = '-{ isolate (:stats:); classify %s ( %s ) (:stats:); match [:stats:] (:: :best: :prob:) /Best match to file .. \\(%s\\/([[:graph:]]+)\\%s\\) prob: ([0-9.]+)/; output /:*:best:\\t:*:prob:/ }'
15
10
 
11
+ ##
16
12
  # Returns a string containg the installed CRM114 engine version in a
17
13
  # format such as "20060118-BlameTheReavers".
14
+ #
15
+ # @return [String, nil]
18
16
  def self.version
19
17
  $1 if IO.popen(CMD_CRM + ' -v', 'r') { |pipe| pipe.readline } =~ /CRM114, version ([\d\w\-\.]+)/
20
18
  end
21
19
 
20
+ ##
22
21
  # Returns a new CRM114 classifier defined by the given _categories_.
22
+ #
23
+ # @param [Array<#to_s>] categories
24
+ # @option options [String] :path ('.')
23
25
  def initialize(categories, options = {})
24
26
  @categories = categories.to_a.collect { |category| category.to_s.to_sym }
25
27
  @path = File.expand_path(options[:path] || '.')
26
28
  @debug = options[:debug] || false
27
29
  end
28
30
 
31
+ ##
29
32
  # Trains the classifier to consider the given _text_ to be a sample from
30
33
  # the set named by _category_.
34
+ #
35
+ # @param [#to_s] category
36
+ # @param [String] text
37
+ # @return [void]
31
38
  def learn!(category, text, &block)
32
39
  cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, css_file_path(category)]) + "'"
33
40
  puts cmd if @debug
@@ -36,15 +43,22 @@ module Classifier
36
43
 
37
44
  alias_method :train!, :learn!
38
45
 
46
+ ##
47
+ # @raise NotImplementedError
48
+ # @return [void]
39
49
  def unlearn!(category, text, &block) # :nodoc:
40
- raise 'unlearning not supported at present'
50
+ raise NotImplementedError.new('unlearning not supported at present')
41
51
  end
42
52
 
43
53
  alias_method :untrain!, :unlearn! #:nodoc:
44
54
 
55
+ ##
45
56
  # Returns the classification of the provided _text_ as a tuple
46
57
  # containing the highest-probability category and a confidence indicator
47
58
  # in the range of 0.5..1.0.
59
+ #
60
+ # @param [String] text
61
+ # @return [Array(Symbol, Float)]
48
62
  def classify(text = nil, &block)
49
63
  files = @categories.collect { |category| css_file_path(category) }
50
64
  cmd = CMD_CRM + " '" + (OPT_CLASSIFY % [CLASSIFICATION_TYPE, files.join(' '), @path.gsub(/\//, '\/'), FILE_EXTENSION]) + "'"
@@ -73,15 +87,20 @@ module Classifier
73
87
 
74
88
  protected
75
89
 
90
+ ##
91
+ # @param [String] file
92
+ # @return [void]
76
93
  def self.create_css_file(file)
77
94
  cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, file]) + "'"
78
95
  IO.popen(cmd, 'w') { |pipe| pipe.close }
79
96
  end
80
97
 
98
+ ##
99
+ # @param [#to_s] category
100
+ # @return [String]
81
101
  def css_file_path(category)
82
102
  File.join(@path, category.to_s + FILE_EXTENSION)
83
103
  end
84
104
 
85
105
  end
86
-
87
106
  end
@@ -0,0 +1,19 @@
1
+ module Classifier class CRM114
2
+ module VERSION
3
+ MAJOR = 1
4
+ MINOR = 0
5
+ TINY = 3
6
+ EXTRA = nil
7
+
8
+ STRING = [MAJOR, MINOR, TINY].join('.')
9
+ STRING << "-#{EXTRA}" if EXTRA
10
+
11
+ ##
12
+ # @return [String]
13
+ def self.to_s() STRING end
14
+
15
+ ##
16
+ # @return [String]
17
+ def self.to_str() STRING end
18
+ end
19
+ end end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crm114
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arto Bendiken
@@ -9,19 +9,10 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-20 00:00:00 +01:00
12
+ date: 2009-12-26 00:00:00 +01:00
13
13
  default_executable:
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: bendiken-rakefile
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
20
- requirements:
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: "0"
24
- version:
14
+ dependencies: []
15
+
25
16
  description: " CRM114.rb is a Ruby interface to the CRM114 Controllable Regex\n Mutilator, an advanced and fast text classifier that uses sparse binary\n polynomial matching with a Bayesian Chain Rule evaluator and a hidden\n Markov model to categorize data with up to a 99.87% accuracy.\n"
26
17
  email: arto.bendiken@gmail.com
27
18
  executables: []
@@ -31,12 +22,12 @@ extensions: []
31
22
  extra_rdoc_files: []
32
23
 
33
24
  files:
34
- - UNLICENSE
35
25
  - AUTHORS
36
26
  - README
37
- - Rakefile
27
+ - UNLICENSE
38
28
  - VERSION
39
29
  - lib/crm114.rb
30
+ - lib/crm114/version.rb
40
31
  - test/test_code_or_text.rb
41
32
  - test/test_crm114.rb
42
33
  has_rdoc: false
data/Rakefile DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env ruby
2
- $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), 'lib')))
3
- require 'rubygems'
4
- require 'rakefile' # http://github.com/bendiken/rakefile
5
- require 'crm114'