crfpp 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/crfpp/native.cpp +5 -1
- data/lib/crfpp.rb +3 -1
- data/lib/crfpp/data.rb +63 -0
- data/lib/crfpp/filelike.rb +6 -3
- data/lib/crfpp/model.rb +10 -3
- data/lib/crfpp/template.rb +3 -0
- data/lib/crfpp/token.rb +31 -0
- data/lib/crfpp/utilities.rb +7 -18
- data/lib/crfpp/version.rb +1 -1
- data/test/crfpp/test_data.rb +23 -0
- metadata +11 -7
data/ext/crfpp/native.cpp
CHANGED
@@ -24,11 +24,15 @@ static VALUE learn(VALUE klass, VALUE arguments) {
|
|
24
24
|
}
|
25
25
|
|
26
26
|
// start training
|
27
|
-
result = crfpp_learn(argc, argv)
|
27
|
+
result = crfpp_learn(argc, argv);
|
28
28
|
|
29
29
|
// free allocated memory
|
30
30
|
free(tmp);
|
31
31
|
|
32
|
+
if (result < 0) {
|
33
|
+
rb_raise(rb_eStandardError, "crfpp_learn failed");
|
34
|
+
}
|
35
|
+
|
32
36
|
return result;
|
33
37
|
}
|
34
38
|
|
data/lib/crfpp.rb
CHANGED
@@ -5,9 +5,11 @@ require 'tempfile'
|
|
5
5
|
require 'crfpp/version'
|
6
6
|
require 'crfpp/errors'
|
7
7
|
require 'crfpp/filelike'
|
8
|
+
require 'crfpp/token'
|
9
|
+
require 'crfpp/data'
|
8
10
|
require 'crfpp/macro'
|
9
|
-
require 'crfpp/template'
|
10
11
|
require 'crfpp/feature'
|
12
|
+
require 'crfpp/template'
|
11
13
|
require 'crfpp/model'
|
12
14
|
require 'crfpp/native'
|
13
15
|
require 'crfpp/utilities'
|
data/lib/crfpp/data.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module CRFPP
|
2
|
+
|
3
|
+
#
|
4
|
+
# A Data object represents test or training data.
|
5
|
+
#
|
6
|
+
class Data
|
7
|
+
|
8
|
+
extend Forwardable
|
9
|
+
|
10
|
+
include Enumerable
|
11
|
+
include Filelike
|
12
|
+
|
13
|
+
attr_reader :sentences
|
14
|
+
|
15
|
+
def_delegators :@sentences, :length, :[], :each
|
16
|
+
|
17
|
+
def initialize(path = nil)
|
18
|
+
@path = path
|
19
|
+
open
|
20
|
+
end
|
21
|
+
|
22
|
+
def open
|
23
|
+
clear
|
24
|
+
|
25
|
+
read.lines.each do |line|
|
26
|
+
line.chomp!
|
27
|
+
if line.strip.empty?
|
28
|
+
new_sentence
|
29
|
+
else
|
30
|
+
push Token.parse(line)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
def clear
|
38
|
+
@sentences = [[]]
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
empty? ? '' : zip([]).flatten.join("\n")
|
44
|
+
end
|
45
|
+
|
46
|
+
def push(feature)
|
47
|
+
@sentences.last << feature
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
alias << push
|
52
|
+
|
53
|
+
def empty?
|
54
|
+
[@sentences].flatten(2).compact.empty?
|
55
|
+
end
|
56
|
+
|
57
|
+
def new_sentence
|
58
|
+
@sentences << []
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/crfpp/filelike.rb
CHANGED
@@ -8,13 +8,16 @@ module CRFPP
|
|
8
8
|
@path ||= Tempfile.new('filelike').path
|
9
9
|
end
|
10
10
|
|
11
|
-
def write
|
12
|
-
File.open(
|
13
|
-
f.write(
|
11
|
+
def write(file = path, content = to_s)
|
12
|
+
File.open(file, 'w:UTF-8') do |f|
|
13
|
+
f.write(content)
|
14
14
|
f.close
|
15
15
|
end
|
16
|
+
self
|
16
17
|
end
|
17
18
|
|
19
|
+
alias save write
|
20
|
+
|
18
21
|
def read
|
19
22
|
f = File.open(path, 'r:UTF-8')
|
20
23
|
f.read
|
data/lib/crfpp/model.rb
CHANGED
data/lib/crfpp/template.rb
CHANGED
@@ -32,6 +32,7 @@ module CRFPP
|
|
32
32
|
|
33
33
|
def clear
|
34
34
|
@sentences = [[]]
|
35
|
+
self
|
35
36
|
end
|
36
37
|
|
37
38
|
def to_s
|
@@ -43,6 +44,7 @@ module CRFPP
|
|
43
44
|
|
44
45
|
def push(feature)
|
45
46
|
@sentences.last << feature
|
47
|
+
self
|
46
48
|
end
|
47
49
|
|
48
50
|
alias << push
|
@@ -53,6 +55,7 @@ module CRFPP
|
|
53
55
|
|
54
56
|
def new_sentence
|
55
57
|
@sentences << []
|
58
|
+
self
|
56
59
|
end
|
57
60
|
|
58
61
|
end
|
data/lib/crfpp/token.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module CRFPP
|
2
|
+
class Token
|
3
|
+
|
4
|
+
attr_accessor :word, :tags
|
5
|
+
|
6
|
+
def self.parse(string)
|
7
|
+
tokens = string.split(/\s+/).compact
|
8
|
+
tokens.empty? ? nil : new(tokens)
|
9
|
+
end
|
10
|
+
|
11
|
+
# Creates a new Token instance.
|
12
|
+
#
|
13
|
+
# call-seq:
|
14
|
+
# Token.new(word)
|
15
|
+
# Token.new(word, tags)
|
16
|
+
# Token.new(word, tag, tag, tag ...)
|
17
|
+
#
|
18
|
+
def initialize(*arguments)
|
19
|
+
@word, *@tags = *arguments.flatten
|
20
|
+
end
|
21
|
+
|
22
|
+
def answer
|
23
|
+
@tags[-1]
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
[@word.to_s, @tags].flatten.join(' ')
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
data/lib/crfpp/utilities.rb
CHANGED
@@ -3,9 +3,6 @@ module CRFPP
|
|
3
3
|
|
4
4
|
# Creates a new Model based on a template and training data.
|
5
5
|
#
|
6
|
-
# The data parameter can either be an array of strings or a filename. The
|
7
|
-
# possible options are:
|
8
|
-
#
|
9
6
|
# :threads: False or the number of threads to us (default is 2).
|
10
7
|
#
|
11
8
|
# :algorithm: L1 or L2 (default)
|
@@ -26,14 +23,7 @@ module CRFPP
|
|
26
23
|
def learn(template, data, options = {})
|
27
24
|
options = { :threads => 2, :algorithm => :L2, :cost => 1.0, :frequency => 1}.merge(options)
|
28
25
|
|
29
|
-
|
30
|
-
data = save_data_to_tempfile([data].flatten)
|
31
|
-
temporary = true
|
32
|
-
end
|
33
|
-
|
34
|
-
template = Template.new(template) unless template.is_a?(Template)
|
35
|
-
model = Model.new
|
36
|
-
|
26
|
+
model = Model.new
|
37
27
|
arguments = []
|
38
28
|
|
39
29
|
# TODO check algorithm names
|
@@ -43,16 +33,15 @@ module CRFPP
|
|
43
33
|
arguments << "--thread=#{options[:threads]}"
|
44
34
|
arguments << "--freq=#{options[:frequency]}"
|
45
35
|
|
46
|
-
arguments << template.path
|
47
|
-
arguments << data
|
36
|
+
arguments << (template.respond_to?(:path) ? template.path : template)
|
37
|
+
arguments << (data.respond_to?(:path) ? data.path : data)
|
48
38
|
arguments << model.path
|
49
|
-
|
50
|
-
|
51
|
-
raise NativeError, 'crfpp learn failed' unless success
|
39
|
+
|
40
|
+
Native.learn(arguments.join(' '))
|
52
41
|
|
53
42
|
model
|
54
|
-
|
55
|
-
|
43
|
+
rescue => error
|
44
|
+
raise NativeError, error.message
|
56
45
|
end
|
57
46
|
|
58
47
|
alias train learn
|
data/lib/crfpp/version.rb
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
module CRFPP
|
4
|
+
|
5
|
+
class TestData < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_load_data_from_empty_file
|
8
|
+
file = Tempfile.new('template')
|
9
|
+
assert Data.new(file.path).to_s.empty?
|
10
|
+
ensure
|
11
|
+
file.close
|
12
|
+
file.unlink
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_load_data_from_file
|
16
|
+
path = "#{FixturesRoot}/test.data"
|
17
|
+
assert_equal 'Rockwell NNP B-NP', Data.new(path)[0][0].to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crfpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-08-18 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157284480 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.9'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157284480
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake-compiler
|
27
|
-
requirement: &
|
27
|
+
requirement: &2157283860 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.7'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157283860
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ZenTest
|
38
|
-
requirement: &
|
38
|
+
requirement: &2157283280 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '4.6'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157283280
|
47
47
|
description: A Ruby extension to interface with CRF++, the Conditional Random Fields
|
48
48
|
library written in C++. You need to install libcrfpp to use this gem.
|
49
49
|
email:
|
@@ -67,14 +67,17 @@ files:
|
|
67
67
|
- ext/crfpp/tagger.cpp
|
68
68
|
- ext/crfpp/tagger.hpp
|
69
69
|
- lib/crfpp.rb
|
70
|
+
- lib/crfpp/data.rb
|
70
71
|
- lib/crfpp/errors.rb
|
71
72
|
- lib/crfpp/feature.rb
|
72
73
|
- lib/crfpp/filelike.rb
|
73
74
|
- lib/crfpp/macro.rb
|
74
75
|
- lib/crfpp/model.rb
|
75
76
|
- lib/crfpp/template.rb
|
77
|
+
- lib/crfpp/token.rb
|
76
78
|
- lib/crfpp/utilities.rb
|
77
79
|
- lib/crfpp/version.rb
|
80
|
+
- test/crfpp/test_data.rb
|
78
81
|
- test/crfpp/test_feature.rb
|
79
82
|
- test/crfpp/test_filelike.rb
|
80
83
|
- test/crfpp/test_macro.rb
|
@@ -119,6 +122,7 @@ signing_key:
|
|
119
122
|
specification_version: 3
|
120
123
|
summary: Conditional Random Fields for Ruby.
|
121
124
|
test_files:
|
125
|
+
- test/crfpp/test_data.rb
|
122
126
|
- test/crfpp/test_feature.rb
|
123
127
|
- test/crfpp/test_filelike.rb
|
124
128
|
- test/crfpp/test_macro.rb
|