wapiti 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/lib/wapiti/options.rb
CHANGED
@@ -1,125 +1,124 @@
|
|
1
1
|
module Wapiti
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
end
|
2
|
+
class Options
|
3
|
+
|
4
|
+
include Comparable
|
5
|
+
|
6
|
+
class << self
|
7
|
+
|
8
|
+
# Returns a sorted list of available option attributes.
|
9
|
+
def attribute_names
|
10
|
+
@attribute_names ||= %w{ stop_window convergence_window posterior
|
11
|
+
max_iterations jobsize threads rho1 rho2 stop_epsilon score check
|
12
|
+
algorithm pattern development_data maxent compact sparse skip_tokens
|
13
|
+
compress }.sort.map(&:to_sym).freeze
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the default options.
|
17
|
+
def defaults
|
18
|
+
@defaults ||= new.attributes
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the list of supported algorithm options.
|
22
|
+
def algorithms
|
23
|
+
@algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_accessor :compress
|
29
|
+
|
30
|
+
alias compress? compress
|
31
|
+
|
32
|
+
# Returns the value of the attribute identified by +name+ or nil
|
33
|
+
# if there is no such attribute.
|
34
|
+
def [](name)
|
35
|
+
has_attribute?(name) ? send(name) : nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Updates the value of the attribute identified by +name+ with the
|
39
|
+
# passed-in +value+.
|
40
|
+
def []=(name, value)
|
41
|
+
raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
|
42
|
+
send("#{name}=", value)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Updates all the attributes from the passed-in hash.
|
46
|
+
def update(attributes = {})
|
47
|
+
attributes.each_pair do |k,v|
|
48
|
+
mid = "#{k}="
|
49
|
+
send(mid, v) if respond_to?(mid)
|
50
|
+
end
|
51
|
+
self
|
52
|
+
end
|
53
|
+
|
54
|
+
alias update_attributes update
|
55
|
+
|
56
|
+
def lbfgs
|
57
|
+
{ :clip => clip, :histsz => histsz, :maxls => maxls }
|
58
|
+
end
|
59
|
+
|
60
|
+
def sgdl1
|
61
|
+
{ :eta0 => eta0, :alpha => alpha }
|
62
|
+
end
|
63
|
+
|
64
|
+
def bcd
|
65
|
+
{ :kappa => kappa }
|
66
|
+
end
|
67
|
+
|
68
|
+
def rprop
|
69
|
+
{
|
70
|
+
:stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
|
71
|
+
:stpdec => stpdec, :cutoff => cutoff
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns a hash of all the attributes with their names and values.
|
76
|
+
def attributes
|
77
|
+
Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
|
78
|
+
end
|
79
|
+
|
80
|
+
alias to_hash attributes
|
81
|
+
|
82
|
+
def has_attribute?(attribute)
|
83
|
+
Options.attribute_names.include?(attribute)
|
84
|
+
end
|
85
|
+
|
86
|
+
def valid_algorithm?
|
87
|
+
self.class.algorithms.include?(algorithm)
|
88
|
+
end
|
89
|
+
|
90
|
+
def valid?
|
91
|
+
validate.empty?
|
92
|
+
end
|
93
|
+
|
94
|
+
def validate
|
95
|
+
e = []
|
96
|
+
|
97
|
+
%w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
|
98
|
+
e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
|
99
|
+
end
|
100
|
+
|
101
|
+
%w{ rho1 rho2 }.each do |name|
|
102
|
+
e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
|
103
|
+
end
|
104
|
+
|
105
|
+
e << "unknown algorithm: #{algorithm}" unless valid_algorithm?
|
106
|
+
e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
|
107
|
+
e
|
108
|
+
end
|
109
|
+
|
110
|
+
%w{ maxent compact sparse label check score posterior compress }.each do |m|
|
111
|
+
writer = "#{m}=".to_sym
|
112
|
+
define_method("#{m}!") do
|
113
|
+
send(writer, true)
|
114
|
+
self
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def <=>(other)
|
119
|
+
other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
data/lib/wapiti/utility.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
module Wapiti
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
2
|
+
|
3
|
+
# Creates a Model based on the pattern in the passed-in options and training
|
4
|
+
# +data+ supplied as input.
|
5
|
+
def train(data, options, &block)
|
6
|
+
Model.train(data, options, &block)
|
7
|
+
end
|
8
|
+
|
9
|
+
def load(model)
|
10
|
+
Model.load(model)
|
11
|
+
end
|
12
|
+
|
13
|
+
module_function :train, :load
|
14
|
+
|
15
|
+
end
|
data/lib/wapiti/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Wapiti
|
2
|
-
VERSION = '0.0
|
3
|
-
end
|
2
|
+
VERSION = '0.1.0'.freeze
|
3
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,21 +1,41 @@
|
|
1
|
+
begin
|
2
|
+
require 'simplecov'
|
3
|
+
require 'coveralls' if ENV['CI']
|
4
|
+
rescue LoadError
|
5
|
+
# ignore
|
6
|
+
end
|
7
|
+
|
8
|
+
begin
|
9
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
|
10
|
+
require 'rubinius/debugger'
|
11
|
+
else
|
12
|
+
require 'debugger'
|
13
|
+
end
|
14
|
+
rescue LoadError
|
15
|
+
# ignore
|
16
|
+
end
|
17
|
+
|
18
|
+
$:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
19
|
+
$:.unshift(File.dirname(__FILE__))
|
20
|
+
|
1
21
|
require 'wapiti'
|
2
22
|
|
3
23
|
require 'fileutils'
|
4
24
|
require 'tempfile'
|
5
25
|
|
6
26
|
RSpec::Matchers.define :be_valid_model do
|
7
|
-
|
8
|
-
|
9
|
-
|
27
|
+
match do |model|
|
28
|
+
model.is_a?(Wapiti::Model) && model.nlbl > 0
|
29
|
+
end
|
10
30
|
end
|
11
31
|
|
12
32
|
RSpec::Matchers.define :be_valid_model_file do
|
13
|
-
|
14
|
-
|
15
|
-
|
33
|
+
match do |path|
|
34
|
+
File.exists?(path) && !File.open(path).read.empty?
|
35
|
+
end
|
16
36
|
end
|
17
37
|
|
18
38
|
|
19
|
-
RSpec.
|
20
|
-
|
21
|
-
end
|
39
|
+
RSpec.configure do |c|
|
40
|
+
c.include(FileUtils)
|
41
|
+
end
|
data/spec/wapiti/model_spec.rb
CHANGED
@@ -1,197 +1,233 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
module Wapiti
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
end
|
4
|
+
describe 'Model' do
|
5
|
+
|
6
|
+
describe '.train' do
|
7
|
+
context 'given sufficient options' do
|
8
|
+
let(:pattern) { File.expand_path('../../fixtures/pattern.txt', __FILE__) }
|
9
|
+
let(:input) { File.expand_path('../../fixtures/train.txt', __FILE__) }
|
10
|
+
|
11
|
+
it 'returns a valid model instance' do
|
12
|
+
Model.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'is also exposed as Wapiti.train' do
|
16
|
+
Wapiti.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe 'initialization' do
|
23
|
+
|
24
|
+
context 'when passed no arguments' do
|
25
|
+
it 'creates a new model with default options' do
|
26
|
+
m = Model.new
|
27
|
+
m.should_not be nil
|
28
|
+
m.options.should be_instance_of(Options)
|
29
|
+
m.nlbl.should == 0
|
30
|
+
m.nobs.should == 0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'when passed more than one argument' do
|
35
|
+
it 'should raise an error' do
|
36
|
+
expect { Model.new(1,2) }.to raise_error
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'when passed a hash' do
|
41
|
+
let(:options) { { :threads => 42 } }
|
42
|
+
|
43
|
+
it 'should create the options from the hash' do
|
44
|
+
Model.new(options).options[:threads].should == 42
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'when passed an options instance' do
|
49
|
+
let(:options) { Options.new(:threads => 42) }
|
50
|
+
|
51
|
+
it 'should create the options from the hash' do
|
52
|
+
Model.new(options).options[:threads].should == 42
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'when passed something other than a hash or an options instance' do
|
57
|
+
it 'should raise an error' do
|
58
|
+
expect { Model.new(1) }.to raise_error
|
59
|
+
expect { Model.new(nil) }.to raise_error
|
60
|
+
expect { Model.new(true) }.to raise_error
|
61
|
+
expect { Model.new('foo') }.to raise_error
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'when called with a block' do
|
66
|
+
it 'should pass the options instance to the block' do
|
67
|
+
Model.new(:threads => 42) { |o| o.threads = 23 }.options.threads.should == 23
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#options' do
|
73
|
+
it 'returns the options for training' do
|
74
|
+
Model.new.options.should be_instance_of(Options)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe '#labels' do
|
79
|
+
it 'returns the number of labels (0 by default)' do
|
80
|
+
Model.new.nlbl.should == 0
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe '#observations' do
|
85
|
+
it 'returns the number of observations (0 by default)' do
|
86
|
+
Model.new.observations.should == 0
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe '#features' do
|
91
|
+
it 'returns the number of features (0 by default)' do
|
92
|
+
Model.new.features.should == 0
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe '#total' do
|
97
|
+
it 'returns the total training time (0.0 by default)' do
|
98
|
+
Model.new.total.should == 0.0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
describe '#train' do
|
103
|
+
let(:model) { Model.new(:pattern => File.expand_path('../../fixtures/pattern.txt', __FILE__)) }
|
104
|
+
let(:data) { File.expand_path('../../fixtures/train.txt', __FILE__) }
|
105
|
+
|
106
|
+
it 'accepts a filename as input' do
|
107
|
+
model.train(data).nlbl.should == 6
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'accepts a data array' do
|
111
|
+
# sequence = []
|
112
|
+
# File.open(data).each_line do |line|
|
113
|
+
#
|
114
|
+
# end
|
115
|
+
#
|
116
|
+
# model.train([]).nlbl.should == 6
|
117
|
+
end
|
118
|
+
|
119
|
+
context 'when called without a pattern' do
|
120
|
+
it 'fails because of wapiti' do
|
121
|
+
expect { Model.new.train(data).nlbl.should == 6 }.to raise_error(NativeError)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
describe '#statistics' do
|
128
|
+
context 'given an empty model' do
|
129
|
+
it 'returns zeroes' do
|
130
|
+
s = Model.new.statistics
|
131
|
+
|
132
|
+
s[:sequences][:total].should == 0
|
133
|
+
s[:sequences][:errors].should == 0
|
134
|
+
s[:sequences][:rate].should == 0
|
135
|
+
s[:tokens][:total].should == 0
|
136
|
+
s[:tokens][:errors].should == 0
|
137
|
+
s[:tokens][:rate].should == 0
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
context 'given a trained model' do
|
142
|
+
let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
|
143
|
+
let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
|
144
|
+
|
145
|
+
it 'returns token and sequcence counts and errors' do
|
146
|
+
model.statistics[:tokens][:total].should == 0
|
147
|
+
model.label input
|
148
|
+
model.statistics[:tokens][:total].should == 0
|
149
|
+
|
150
|
+
model.options.check = true
|
151
|
+
model.label input
|
152
|
+
|
153
|
+
model.statistics[:tokens][:total].should == input.map(&:length).reduce(&:+)
|
154
|
+
model.statistics[:sequences][:total].should == input.length
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe '#label' do
|
160
|
+
|
161
|
+
context 'given an empty model' do
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
context 'given a trained model' do
|
166
|
+
let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
|
167
|
+
|
168
|
+
context 'when passed an array of arrays' do
|
169
|
+
let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
|
170
|
+
|
171
|
+
it 'returns an array of token-label pairs' do
|
172
|
+
labels = model.label(input)
|
173
|
+
labels[0].map(&:first).should == input[0]
|
174
|
+
labels[0].map(&:last).should == %w{ B-NP O B-NP O }
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'yields each token/label pair to the supplied block' do
|
178
|
+
labels = model.label(input) do |token, label|
|
179
|
+
[token.downcase, label.downcase]
|
180
|
+
end
|
181
|
+
labels[0].map(&:last).should == %w{ b-np o b-np o }
|
182
|
+
end
|
183
|
+
|
184
|
+
context 'with the :score option set' do
|
185
|
+
before(:each) { model.options.score! }
|
186
|
+
|
187
|
+
it 'returns an array of token-label-score tuples' do
|
188
|
+
model.label(input)[0].map { |t,l,s| s.class }.uniq == [Float]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
context 'with the :nbest option set to 2' do
|
193
|
+
before(:each) { model.options.nbest = 2 }
|
194
|
+
|
195
|
+
it 'returns an array of token-label-label tuples' do
|
196
|
+
model.label(input)[0][-1][1,2] == %w{ O O }
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
context 'when passed a filename' do
|
204
|
+
let(:input) { File.expand_path('../../fixtures/chtest.txt', __FILE__) }
|
205
|
+
|
206
|
+
it 'returns an array of token-label pairs' do
|
207
|
+
labels = model.label(input)
|
208
|
+
labels.should have(77).elements
|
209
|
+
labels[0].take(5).map(&:last).should == %w{ B-NP B-PP B-NP I-NP B-VP }
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
describe '#labels' do
|
218
|
+
it 'returns an empty list by default' do
|
219
|
+
Model.new.labels.should be_empty
|
220
|
+
end
|
221
|
+
|
222
|
+
context 'given a trained model' do
|
223
|
+
let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
|
224
|
+
|
225
|
+
it 'returns a list of all known labels' do
|
226
|
+
model.labels.should have(model.nlbl).elements
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
end
|
233
|
+
end
|