crfpp 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,11 +24,15 @@ static VALUE learn(VALUE klass, VALUE arguments) {
24
24
  }
25
25
 
26
26
  // start training
27
- result = crfpp_learn(argc, argv) >= 0 ? Qtrue : Qfalse;
27
+ result = crfpp_learn(argc, argv);
28
28
 
29
29
  // free allocated memory
30
30
  free(tmp);
31
31
 
32
+ if (result < 0) {
33
+ rb_raise(rb_eStandardError, "crfpp_learn failed");
34
+ }
35
+
32
36
  return result;
33
37
  }
34
38
 
@@ -5,9 +5,11 @@ require 'tempfile'
5
5
  require 'crfpp/version'
6
6
  require 'crfpp/errors'
7
7
  require 'crfpp/filelike'
8
+ require 'crfpp/token'
9
+ require 'crfpp/data'
8
10
  require 'crfpp/macro'
9
- require 'crfpp/template'
10
11
  require 'crfpp/feature'
12
+ require 'crfpp/template'
11
13
  require 'crfpp/model'
12
14
  require 'crfpp/native'
13
15
  require 'crfpp/utilities'
@@ -0,0 +1,63 @@
1
+ module CRFPP
2
+
3
+ #
4
+ # A Data object represents test or training data.
5
+ #
6
+ class Data
7
+
8
+ extend Forwardable
9
+
10
+ include Enumerable
11
+ include Filelike
12
+
13
+ attr_reader :sentences
14
+
15
+ def_delegators :@sentences, :length, :[], :each
16
+
17
+ def initialize(path = nil)
18
+ @path = path
19
+ open
20
+ end
21
+
22
+ def open
23
+ clear
24
+
25
+ read.lines.each do |line|
26
+ line.chomp!
27
+ if line.strip.empty?
28
+ new_sentence
29
+ else
30
+ push Token.parse(line)
31
+ end
32
+ end
33
+
34
+ self
35
+ end
36
+
37
+ def clear
38
+ @sentences = [[]]
39
+ self
40
+ end
41
+
42
+ def to_s
43
+ empty? ? '' : zip([]).flatten.join("\n")
44
+ end
45
+
46
+ def push(feature)
47
+ @sentences.last << feature
48
+ self
49
+ end
50
+
51
+ alias << push
52
+
53
+ def empty?
54
+ [@sentences].flatten(2).compact.empty?
55
+ end
56
+
57
+ def new_sentence
58
+ @sentences << []
59
+ self
60
+ end
61
+
62
+ end
63
+ end
@@ -8,13 +8,16 @@ module CRFPP
8
8
  @path ||= Tempfile.new('filelike').path
9
9
  end
10
10
 
11
- def write
12
- File.open(path, 'w:UTF-8') do |f|
13
- f.write(to_s)
11
+ def write(file = path, content = to_s)
12
+ File.open(file, 'w:UTF-8') do |f|
13
+ f.write(content)
14
14
  f.close
15
15
  end
16
+ self
16
17
  end
17
18
 
19
+ alias save write
20
+
18
21
  def read
19
22
  f = File.open(path, 'r:UTF-8')
20
23
  f.read
@@ -15,9 +15,16 @@ module CRFPP
15
15
  end
16
16
 
17
17
  def save
18
- write(@data)
19
- self
18
+ save_to(path)
19
+ end
20
+
21
+ def save_to(file)
22
+ write(file, data? ? data : read)
20
23
  end
21
-
24
+
25
+ def data?
26
+ data && !data.empty?
27
+ end
28
+
22
29
  end
23
30
  end
@@ -32,6 +32,7 @@ module CRFPP
32
32
 
33
33
  def clear
34
34
  @sentences = [[]]
35
+ self
35
36
  end
36
37
 
37
38
  def to_s
@@ -43,6 +44,7 @@ module CRFPP
43
44
 
44
45
  def push(feature)
45
46
  @sentences.last << feature
47
+ self
46
48
  end
47
49
 
48
50
  alias << push
@@ -53,6 +55,7 @@ module CRFPP
53
55
 
54
56
  def new_sentence
55
57
  @sentences << []
58
+ self
56
59
  end
57
60
 
58
61
  end
@@ -0,0 +1,31 @@
1
+ module CRFPP
2
+ class Token
3
+
4
+ attr_accessor :word, :tags
5
+
6
+ def self.parse(string)
7
+ tokens = string.split(/\s+/).compact
8
+ tokens.empty? ? nil : new(tokens)
9
+ end
10
+
11
+ # Creates a new Token instance.
12
+ #
13
+ # call-seq:
14
+ # Token.new(word)
15
+ # Token.new(word, tags)
16
+ # Token.new(word, tag, tag, tag ...)
17
+ #
18
+ def initialize(*arguments)
19
+ @word, *@tags = *arguments.flatten
20
+ end
21
+
22
+ def answer
23
+ @tags[-1]
24
+ end
25
+
26
+ def to_s
27
+ [@word.to_s, @tags].flatten.join(' ')
28
+ end
29
+
30
+ end
31
+ end
@@ -3,9 +3,6 @@ module CRFPP
3
3
 
4
4
  # Creates a new Model based on a template and training data.
5
5
  #
6
- # The data parameter can either be an array of strings or a filename. The
7
- # possible options are:
8
- #
9
6
  # :threads: False or the number of threads to us (default is 2).
10
7
  #
11
8
  # :algorithm: L1 or L2 (default)
@@ -26,14 +23,7 @@ module CRFPP
26
23
  def learn(template, data, options = {})
27
24
  options = { :threads => 2, :algorithm => :L2, :cost => 1.0, :frequency => 1}.merge(options)
28
25
 
29
- unless File.exists?(data)
30
- data = save_data_to_tempfile([data].flatten)
31
- temporary = true
32
- end
33
-
34
- template = Template.new(template) unless template.is_a?(Template)
35
- model = Model.new
36
-
26
+ model = Model.new
37
27
  arguments = []
38
28
 
39
29
  # TODO check algorithm names
@@ -43,16 +33,15 @@ module CRFPP
43
33
  arguments << "--thread=#{options[:threads]}"
44
34
  arguments << "--freq=#{options[:frequency]}"
45
35
 
46
- arguments << template.path
47
- arguments << data
36
+ arguments << (template.respond_to?(:path) ? template.path : template)
37
+ arguments << (data.respond_to?(:path) ? data.path : data)
48
38
  arguments << model.path
49
-
50
- success = Native.learn(arguments.join(' '))
51
- raise NativeError, 'crfpp learn failed' unless success
39
+
40
+ Native.learn(arguments.join(' '))
52
41
 
53
42
  model
54
- ensure
55
- data.unlink if temporary
43
+ rescue => error
44
+ raise NativeError, error.message
56
45
  end
57
46
 
58
47
  alias train learn
@@ -1,3 +1,3 @@
1
1
  module CRFPP
2
- VERSION = '0.0.3'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ module CRFPP
4
+
5
+ class TestData < Test::Unit::TestCase
6
+
7
+ def test_load_data_from_empty_file
8
+ file = Tempfile.new('template')
9
+ assert Data.new(file.path).to_s.empty?
10
+ ensure
11
+ file.close
12
+ file.unlink
13
+ end
14
+
15
+ def test_load_data_from_file
16
+ path = "#{FixturesRoot}/test.data"
17
+ assert_equal 'Rockwell NNP B-NP', Data.new(path)[0][0].to_s
18
+ end
19
+
20
+
21
+ end
22
+
23
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crfpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2011-08-18 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2156620200 !ruby/object:Gem::Requirement
16
+ requirement: &2157284480 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.9'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2156620200
24
+ version_requirements: *2157284480
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake-compiler
27
- requirement: &2156619700 !ruby/object:Gem::Requirement
27
+ requirement: &2157283860 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.7'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2156619700
35
+ version_requirements: *2157283860
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ZenTest
38
- requirement: &2156619200 !ruby/object:Gem::Requirement
38
+ requirement: &2157283280 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '4.6'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2156619200
46
+ version_requirements: *2157283280
47
47
  description: A Ruby extension to interface with CRF++, the Conditional Random Fields
48
48
  library written in C++. You need to install libcrfpp to use this gem.
49
49
  email:
@@ -67,14 +67,17 @@ files:
67
67
  - ext/crfpp/tagger.cpp
68
68
  - ext/crfpp/tagger.hpp
69
69
  - lib/crfpp.rb
70
+ - lib/crfpp/data.rb
70
71
  - lib/crfpp/errors.rb
71
72
  - lib/crfpp/feature.rb
72
73
  - lib/crfpp/filelike.rb
73
74
  - lib/crfpp/macro.rb
74
75
  - lib/crfpp/model.rb
75
76
  - lib/crfpp/template.rb
77
+ - lib/crfpp/token.rb
76
78
  - lib/crfpp/utilities.rb
77
79
  - lib/crfpp/version.rb
80
+ - test/crfpp/test_data.rb
78
81
  - test/crfpp/test_feature.rb
79
82
  - test/crfpp/test_filelike.rb
80
83
  - test/crfpp/test_macro.rb
@@ -119,6 +122,7 @@ signing_key:
119
122
  specification_version: 3
120
123
  summary: Conditional Random Fields for Ruby.
121
124
  test_files:
125
+ - test/crfpp/test_data.rb
122
126
  - test/crfpp/test_feature.rb
123
127
  - test/crfpp/test_filelike.rb
124
128
  - test/crfpp/test_macro.rb