crfpp 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,11 +24,15 @@ static VALUE learn(VALUE klass, VALUE arguments) {
24
24
  }
25
25
 
26
26
  // start training
27
- result = crfpp_learn(argc, argv) >= 0 ? Qtrue : Qfalse;
27
+ result = crfpp_learn(argc, argv);
28
28
 
29
29
  // free allocated memory
30
30
  free(tmp);
31
31
 
32
+ if (result < 0) {
33
+ rb_raise(rb_eStandardError, "crfpp_learn failed");
34
+ }
35
+
32
36
  return result;
33
37
  }
34
38
 
@@ -5,9 +5,11 @@ require 'tempfile'
5
5
  require 'crfpp/version'
6
6
  require 'crfpp/errors'
7
7
  require 'crfpp/filelike'
8
+ require 'crfpp/token'
9
+ require 'crfpp/data'
8
10
  require 'crfpp/macro'
9
- require 'crfpp/template'
10
11
  require 'crfpp/feature'
12
+ require 'crfpp/template'
11
13
  require 'crfpp/model'
12
14
  require 'crfpp/native'
13
15
  require 'crfpp/utilities'
@@ -0,0 +1,63 @@
1
+ module CRFPP
2
+
3
+ #
4
+ # A Data object represents test or training data.
5
+ #
6
+ class Data
7
+
8
+ extend Forwardable
9
+
10
+ include Enumerable
11
+ include Filelike
12
+
13
+ attr_reader :sentences
14
+
15
+ def_delegators :@sentences, :length, :[], :each
16
+
17
+ def initialize(path = nil)
18
+ @path = path
19
+ open
20
+ end
21
+
22
+ def open
23
+ clear
24
+
25
+ read.lines.each do |line|
26
+ line.chomp!
27
+ if line.strip.empty?
28
+ new_sentence
29
+ else
30
+ push Token.parse(line)
31
+ end
32
+ end
33
+
34
+ self
35
+ end
36
+
37
+ def clear
38
+ @sentences = [[]]
39
+ self
40
+ end
41
+
42
+ def to_s
43
+ empty? ? '' : zip([]).flatten.join("\n")
44
+ end
45
+
46
+ def push(feature)
47
+ @sentences.last << feature
48
+ self
49
+ end
50
+
51
+ alias << push
52
+
53
+ def empty?
54
+ [@sentences].flatten(2).compact.empty?
55
+ end
56
+
57
+ def new_sentence
58
+ @sentences << []
59
+ self
60
+ end
61
+
62
+ end
63
+ end
@@ -8,13 +8,16 @@ module CRFPP
8
8
  @path ||= Tempfile.new('filelike').path
9
9
  end
10
10
 
11
- def write
12
- File.open(path, 'w:UTF-8') do |f|
13
- f.write(to_s)
11
+ def write(file = path, content = to_s)
12
+ File.open(file, 'w:UTF-8') do |f|
13
+ f.write(content)
14
14
  f.close
15
15
  end
16
+ self
16
17
  end
17
18
 
19
+ alias save write
20
+
18
21
  def read
19
22
  f = File.open(path, 'r:UTF-8')
20
23
  f.read
@@ -15,9 +15,16 @@ module CRFPP
15
15
  end
16
16
 
17
17
  def save
18
- write(@data)
19
- self
18
+ save_to(path)
19
+ end
20
+
21
+ def save_to(file)
22
+ write(file, data? ? data : read)
20
23
  end
21
-
24
+
25
+ def data?
26
+ data && !data.empty?
27
+ end
28
+
22
29
  end
23
30
  end
@@ -32,6 +32,7 @@ module CRFPP
32
32
 
33
33
  def clear
34
34
  @sentences = [[]]
35
+ self
35
36
  end
36
37
 
37
38
  def to_s
@@ -43,6 +44,7 @@ module CRFPP
43
44
 
44
45
  def push(feature)
45
46
  @sentences.last << feature
47
+ self
46
48
  end
47
49
 
48
50
  alias << push
@@ -53,6 +55,7 @@ module CRFPP
53
55
 
54
56
  def new_sentence
55
57
  @sentences << []
58
+ self
56
59
  end
57
60
 
58
61
  end
@@ -0,0 +1,31 @@
1
+ module CRFPP
2
+ class Token
3
+
4
+ attr_accessor :word, :tags
5
+
6
+ def self.parse(string)
7
+ tokens = string.split(/\s+/).compact
8
+ tokens.empty? ? nil : new(tokens)
9
+ end
10
+
11
+ # Creates a new Token instance.
12
+ #
13
+ # call-seq:
14
+ # Token.new(word)
15
+ # Token.new(word, tags)
16
+ # Token.new(word, tag, tag, tag ...)
17
+ #
18
+ def initialize(*arguments)
19
+ @word, *@tags = *arguments.flatten
20
+ end
21
+
22
+ def answer
23
+ @tags[-1]
24
+ end
25
+
26
+ def to_s
27
+ [@word.to_s, @tags].flatten.join(' ')
28
+ end
29
+
30
+ end
31
+ end
@@ -3,9 +3,6 @@ module CRFPP
3
3
 
4
4
  # Creates a new Model based on a template and training data.
5
5
  #
6
- # The data parameter can either be an array of strings or a filename. The
7
- # possible options are:
8
- #
9
6
  # :threads: False or the number of threads to us (default is 2).
10
7
  #
11
8
  # :algorithm: L1 or L2 (default)
@@ -26,14 +23,7 @@ module CRFPP
26
23
  def learn(template, data, options = {})
27
24
  options = { :threads => 2, :algorithm => :L2, :cost => 1.0, :frequency => 1}.merge(options)
28
25
 
29
- unless File.exists?(data)
30
- data = save_data_to_tempfile([data].flatten)
31
- temporary = true
32
- end
33
-
34
- template = Template.new(template) unless template.is_a?(Template)
35
- model = Model.new
36
-
26
+ model = Model.new
37
27
  arguments = []
38
28
 
39
29
  # TODO check algorithm names
@@ -43,16 +33,15 @@ module CRFPP
43
33
  arguments << "--thread=#{options[:threads]}"
44
34
  arguments << "--freq=#{options[:frequency]}"
45
35
 
46
- arguments << template.path
47
- arguments << data
36
+ arguments << (template.respond_to?(:path) ? template.path : template)
37
+ arguments << (data.respond_to?(:path) ? data.path : data)
48
38
  arguments << model.path
49
-
50
- success = Native.learn(arguments.join(' '))
51
- raise NativeError, 'crfpp learn failed' unless success
39
+
40
+ Native.learn(arguments.join(' '))
52
41
 
53
42
  model
54
- ensure
55
- data.unlink if temporary
43
+ rescue => error
44
+ raise NativeError, error.message
56
45
  end
57
46
 
58
47
  alias train learn
@@ -1,3 +1,3 @@
1
1
  module CRFPP
2
- VERSION = '0.0.3'.freeze
2
+ VERSION = '0.0.4'.freeze
3
3
  end
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ module CRFPP
4
+
5
+ class TestData < Test::Unit::TestCase
6
+
7
+ def test_load_data_from_empty_file
8
+ file = Tempfile.new('template')
9
+ assert Data.new(file.path).to_s.empty?
10
+ ensure
11
+ file.close
12
+ file.unlink
13
+ end
14
+
15
+ def test_load_data_from_file
16
+ path = "#{FixturesRoot}/test.data"
17
+ assert_equal 'Rockwell NNP B-NP', Data.new(path)[0][0].to_s
18
+ end
19
+
20
+
21
+ end
22
+
23
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crfpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2011-08-18 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2156620200 !ruby/object:Gem::Requirement
16
+ requirement: &2157284480 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.9'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2156620200
24
+ version_requirements: *2157284480
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake-compiler
27
- requirement: &2156619700 !ruby/object:Gem::Requirement
27
+ requirement: &2157283860 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0.7'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2156619700
35
+ version_requirements: *2157283860
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ZenTest
38
- requirement: &2156619200 !ruby/object:Gem::Requirement
38
+ requirement: &2157283280 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '4.6'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2156619200
46
+ version_requirements: *2157283280
47
47
  description: A Ruby extension to interface with CRF++, the Conditional Random Fields
48
48
  library written in C++. You need to install libcrfpp to use this gem.
49
49
  email:
@@ -67,14 +67,17 @@ files:
67
67
  - ext/crfpp/tagger.cpp
68
68
  - ext/crfpp/tagger.hpp
69
69
  - lib/crfpp.rb
70
+ - lib/crfpp/data.rb
70
71
  - lib/crfpp/errors.rb
71
72
  - lib/crfpp/feature.rb
72
73
  - lib/crfpp/filelike.rb
73
74
  - lib/crfpp/macro.rb
74
75
  - lib/crfpp/model.rb
75
76
  - lib/crfpp/template.rb
77
+ - lib/crfpp/token.rb
76
78
  - lib/crfpp/utilities.rb
77
79
  - lib/crfpp/version.rb
80
+ - test/crfpp/test_data.rb
78
81
  - test/crfpp/test_feature.rb
79
82
  - test/crfpp/test_filelike.rb
80
83
  - test/crfpp/test_macro.rb
@@ -119,6 +122,7 @@ signing_key:
119
122
  specification_version: 3
120
123
  summary: Conditional Random Fields for Ruby.
121
124
  test_files:
125
+ - test/crfpp/test_data.rb
122
126
  - test/crfpp/test_feature.rb
123
127
  - test/crfpp/test_filelike.rb
124
128
  - test/crfpp/test_macro.rb