crm114 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/VERSION +1 -1
  2. data/lib/crm114.rb +27 -8
  3. data/lib/crm114/version.rb +19 -0
  4. metadata +6 -15
  5. data/Rakefile +0 -5
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.2
1
+ 1.0.3
@@ -1,33 +1,40 @@
1
- # Author:: Arto Bendiken (mailto:arto.bendiken@gmail.com)
2
- # License:: Public domain
1
+ require 'crm114/version'
3
2
 
4
3
  module Classifier
5
-
6
4
  class CRM114
7
-
8
- VERSION = '1.0.2'
9
-
10
5
  CLASSIFICATION_TYPE = '<osb unique microgroom>'
11
6
  FILE_EXTENSION = '.css'
12
7
  CMD_CRM = '/usr/bin/env crm'
13
8
  OPT_LEARN = '-{ learn %s ( %s ) }'
14
9
  OPT_CLASSIFY = '-{ isolate (:stats:); classify %s ( %s ) (:stats:); match [:stats:] (:: :best: :prob:) /Best match to file .. \\(%s\\/([[:graph:]]+)\\%s\\) prob: ([0-9.]+)/; output /:*:best:\\t:*:prob:/ }'
15
10
 
11
+ ##
16
12
  # Returns a string containg the installed CRM114 engine version in a
17
13
  # format such as "20060118-BlameTheReavers".
14
+ #
15
+ # @return [String, nil]
18
16
  def self.version
19
17
  $1 if IO.popen(CMD_CRM + ' -v', 'r') { |pipe| pipe.readline } =~ /CRM114, version ([\d\w\-\.]+)/
20
18
  end
21
19
 
20
+ ##
22
21
  # Returns a new CRM114 classifier defined by the given _categories_.
22
+ #
23
+ # @param [Array<#to_s>] categories
24
+ # @option options [String] :path ('.')
23
25
  def initialize(categories, options = {})
24
26
  @categories = categories.to_a.collect { |category| category.to_s.to_sym }
25
27
  @path = File.expand_path(options[:path] || '.')
26
28
  @debug = options[:debug] || false
27
29
  end
28
30
 
31
+ ##
29
32
  # Trains the classifier to consider the given _text_ to be a sample from
30
33
  # the set named by _category_.
34
+ #
35
+ # @param [#to_s] category
36
+ # @param [String] text
37
+ # @return [void]
31
38
  def learn!(category, text, &block)
32
39
  cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, css_file_path(category)]) + "'"
33
40
  puts cmd if @debug
@@ -36,15 +43,22 @@ module Classifier
36
43
 
37
44
  alias_method :train!, :learn!
38
45
 
46
+ ##
47
+ # @raise NotImplementedError
48
+ # @return [void]
39
49
  def unlearn!(category, text, &block) # :nodoc:
40
- raise 'unlearning not supported at present'
50
+ raise NotImplementedError.new('unlearning not supported at present')
41
51
  end
42
52
 
43
53
  alias_method :untrain!, :unlearn! #:nodoc:
44
54
 
55
+ ##
45
56
  # Returns the classification of the provided _text_ as a tuple
46
57
  # containing the highest-probability category and a confidence indicator
47
58
  # in the range of 0.5..1.0.
59
+ #
60
+ # @param [String] text
61
+ # @return [Array(Symbol, Float)]
48
62
  def classify(text = nil, &block)
49
63
  files = @categories.collect { |category| css_file_path(category) }
50
64
  cmd = CMD_CRM + " '" + (OPT_CLASSIFY % [CLASSIFICATION_TYPE, files.join(' '), @path.gsub(/\//, '\/'), FILE_EXTENSION]) + "'"
@@ -73,15 +87,20 @@ module Classifier
73
87
 
74
88
  protected
75
89
 
90
+ ##
91
+ # @param [String] file
92
+ # @return [void]
76
93
  def self.create_css_file(file)
77
94
  cmd = CMD_CRM + " '" + (OPT_LEARN % [CLASSIFICATION_TYPE, file]) + "'"
78
95
  IO.popen(cmd, 'w') { |pipe| pipe.close }
79
96
  end
80
97
 
98
+ ##
99
+ # @param [#to_s] category
100
+ # @return [String]
81
101
  def css_file_path(category)
82
102
  File.join(@path, category.to_s + FILE_EXTENSION)
83
103
  end
84
104
 
85
105
  end
86
-
87
106
  end
@@ -0,0 +1,19 @@
1
+ module Classifier class CRM114
2
+ module VERSION
3
+ MAJOR = 1
4
+ MINOR = 0
5
+ TINY = 3
6
+ EXTRA = nil
7
+
8
+ STRING = [MAJOR, MINOR, TINY].join('.')
9
+ STRING << "-#{EXTRA}" if EXTRA
10
+
11
+ ##
12
+ # @return [String]
13
+ def self.to_s() STRING end
14
+
15
+ ##
16
+ # @return [String]
17
+ def self.to_str() STRING end
18
+ end
19
+ end end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crm114
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arto Bendiken
@@ -9,19 +9,10 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-20 00:00:00 +01:00
12
+ date: 2009-12-26 00:00:00 +01:00
13
13
  default_executable:
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: bendiken-rakefile
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
20
- requirements:
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: "0"
24
- version:
14
+ dependencies: []
15
+
25
16
  description: " CRM114.rb is a Ruby interface to the CRM114 Controllable Regex\n Mutilator, an advanced and fast text classifier that uses sparse binary\n polynomial matching with a Bayesian Chain Rule evaluator and a hidden\n Markov model to categorize data with up to a 99.87% accuracy.\n"
26
17
  email: arto.bendiken@gmail.com
27
18
  executables: []
@@ -31,12 +22,12 @@ extensions: []
31
22
  extra_rdoc_files: []
32
23
 
33
24
  files:
34
- - UNLICENSE
35
25
  - AUTHORS
36
26
  - README
37
- - Rakefile
27
+ - UNLICENSE
38
28
  - VERSION
39
29
  - lib/crm114.rb
30
+ - lib/crm114/version.rb
40
31
  - test/test_code_or_text.rb
41
32
  - test/test_crm114.rb
42
33
  has_rdoc: false
data/Rakefile DELETED
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env ruby
2
- $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), 'lib')))
3
- require 'rubygems'
4
- require 'rakefile' # http://github.com/bendiken/rakefile
5
- require 'crm114'