wapiti 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/lib/wapiti/options.rb
CHANGED
@@ -1,125 +1,124 @@
|
|
1
1
|
module Wapiti
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
end
|
2
|
+
class Options
|
3
|
+
|
4
|
+
include Comparable
|
5
|
+
|
6
|
+
class << self
|
7
|
+
|
8
|
+
# Returns a sorted list of available option attributes.
|
9
|
+
def attribute_names
|
10
|
+
@attribute_names ||= %w{ stop_window convergence_window posterior
|
11
|
+
max_iterations jobsize threads rho1 rho2 stop_epsilon score check
|
12
|
+
algorithm pattern development_data maxent compact sparse skip_tokens
|
13
|
+
compress }.sort.map(&:to_sym).freeze
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns the default options.
|
17
|
+
def defaults
|
18
|
+
@defaults ||= new.attributes
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the list of supported algorithm options.
|
22
|
+
def algorithms
|
23
|
+
@algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_accessor :compress
|
29
|
+
|
30
|
+
alias compress? compress
|
31
|
+
|
32
|
+
# Returns the value of the attribute identified by +name+ or nil
|
33
|
+
# if there is no such attribute.
|
34
|
+
def [](name)
|
35
|
+
has_attribute?(name) ? send(name) : nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Updates the value of the attribute identified by +name+ with the
|
39
|
+
# passed-in +value+.
|
40
|
+
def []=(name, value)
|
41
|
+
raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
|
42
|
+
send("#{name}=", value)
|
43
|
+
end
|
44
|
+
|
45
|
+
# Updates all the attributes from the passed-in hash.
|
46
|
+
def update(attributes = {})
|
47
|
+
attributes.each_pair do |k,v|
|
48
|
+
mid = "#{k}="
|
49
|
+
send(mid, v) if respond_to?(mid)
|
50
|
+
end
|
51
|
+
self
|
52
|
+
end
|
53
|
+
|
54
|
+
alias update_attributes update
|
55
|
+
|
56
|
+
def lbfgs
|
57
|
+
{ :clip => clip, :histsz => histsz, :maxls => maxls }
|
58
|
+
end
|
59
|
+
|
60
|
+
def sgdl1
|
61
|
+
{ :eta0 => eta0, :alpha => alpha }
|
62
|
+
end
|
63
|
+
|
64
|
+
def bcd
|
65
|
+
{ :kappa => kappa }
|
66
|
+
end
|
67
|
+
|
68
|
+
def rprop
|
69
|
+
{
|
70
|
+
:stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
|
71
|
+
:stpdec => stpdec, :cutoff => cutoff
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns a hash of all the attributes with their names and values.
|
76
|
+
def attributes
|
77
|
+
Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
|
78
|
+
end
|
79
|
+
|
80
|
+
alias to_hash attributes
|
81
|
+
|
82
|
+
def has_attribute?(attribute)
|
83
|
+
Options.attribute_names.include?(attribute)
|
84
|
+
end
|
85
|
+
|
86
|
+
def valid_algorithm?
|
87
|
+
self.class.algorithms.include?(algorithm)
|
88
|
+
end
|
89
|
+
|
90
|
+
def valid?
|
91
|
+
validate.empty?
|
92
|
+
end
|
93
|
+
|
94
|
+
def validate
|
95
|
+
e = []
|
96
|
+
|
97
|
+
%w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
|
98
|
+
e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
|
99
|
+
end
|
100
|
+
|
101
|
+
%w{ rho1 rho2 }.each do |name|
|
102
|
+
e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
|
103
|
+
end
|
104
|
+
|
105
|
+
e << "unknown algorithm: #{algorithm}" unless valid_algorithm?
|
106
|
+
e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
|
107
|
+
e
|
108
|
+
end
|
109
|
+
|
110
|
+
%w{ maxent compact sparse label check score posterior compress }.each do |m|
|
111
|
+
writer = "#{m}=".to_sym
|
112
|
+
define_method("#{m}!") do
|
113
|
+
send(writer, true)
|
114
|
+
self
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def <=>(other)
|
119
|
+
other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
data/lib/wapiti/utility.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
module Wapiti
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
end
|
2
|
+
|
3
|
+
# Creates a Model based on the pattern in the passed-in options and training
|
4
|
+
# +data+ supplied as input.
|
5
|
+
def train(data, options, &block)
|
6
|
+
Model.train(data, options, &block)
|
7
|
+
end
|
8
|
+
|
9
|
+
def load(model)
|
10
|
+
Model.load(model)
|
11
|
+
end
|
12
|
+
|
13
|
+
module_function :train, :load
|
14
|
+
|
15
|
+
end
|
data/lib/wapiti/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Wapiti
|
2
|
-
VERSION = '0.0
|
3
|
-
end
|
2
|
+
VERSION = '0.1.0'.freeze
|
3
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,21 +1,41 @@
|
|
1
|
+
begin
|
2
|
+
require 'simplecov'
|
3
|
+
require 'coveralls' if ENV['CI']
|
4
|
+
rescue LoadError
|
5
|
+
# ignore
|
6
|
+
end
|
7
|
+
|
8
|
+
begin
|
9
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
|
10
|
+
require 'rubinius/debugger'
|
11
|
+
else
|
12
|
+
require 'debugger'
|
13
|
+
end
|
14
|
+
rescue LoadError
|
15
|
+
# ignore
|
16
|
+
end
|
17
|
+
|
18
|
+
$:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
19
|
+
$:.unshift(File.dirname(__FILE__))
|
20
|
+
|
1
21
|
require 'wapiti'
|
2
22
|
|
3
23
|
require 'fileutils'
|
4
24
|
require 'tempfile'
|
5
25
|
|
6
26
|
RSpec::Matchers.define :be_valid_model do
|
7
|
-
|
8
|
-
|
9
|
-
|
27
|
+
match do |model|
|
28
|
+
model.is_a?(Wapiti::Model) && model.nlbl > 0
|
29
|
+
end
|
10
30
|
end
|
11
31
|
|
12
32
|
RSpec::Matchers.define :be_valid_model_file do
|
13
|
-
|
14
|
-
|
15
|
-
|
33
|
+
match do |path|
|
34
|
+
File.exists?(path) && !File.open(path).read.empty?
|
35
|
+
end
|
16
36
|
end
|
17
37
|
|
18
38
|
|
19
|
-
RSpec.
|
20
|
-
|
21
|
-
end
|
39
|
+
RSpec.configure do |c|
|
40
|
+
c.include(FileUtils)
|
41
|
+
end
|
data/spec/wapiti/model_spec.rb
CHANGED
@@ -1,197 +1,233 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
module Wapiti
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
end
|
4
|
+
describe 'Model' do
|
5
|
+
|
6
|
+
describe '.train' do
|
7
|
+
context 'given sufficient options' do
|
8
|
+
let(:pattern) { File.expand_path('../../fixtures/pattern.txt', __FILE__) }
|
9
|
+
let(:input) { File.expand_path('../../fixtures/train.txt', __FILE__) }
|
10
|
+
|
11
|
+
it 'returns a valid model instance' do
|
12
|
+
Model.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'is also exposed as Wapiti.train' do
|
16
|
+
Wapiti.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe 'initialization' do
|
23
|
+
|
24
|
+
context 'when passed no arguments' do
|
25
|
+
it 'creates a new model with default options' do
|
26
|
+
m = Model.new
|
27
|
+
m.should_not be nil
|
28
|
+
m.options.should be_instance_of(Options)
|
29
|
+
m.nlbl.should == 0
|
30
|
+
m.nobs.should == 0
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'when passed more than one argument' do
|
35
|
+
it 'should raise an error' do
|
36
|
+
expect { Model.new(1,2) }.to raise_error
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'when passed a hash' do
|
41
|
+
let(:options) { { :threads => 42 } }
|
42
|
+
|
43
|
+
it 'should create the options from the hash' do
|
44
|
+
Model.new(options).options[:threads].should == 42
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'when passed an options instance' do
|
49
|
+
let(:options) { Options.new(:threads => 42) }
|
50
|
+
|
51
|
+
it 'should create the options from the hash' do
|
52
|
+
Model.new(options).options[:threads].should == 42
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'when passed something other than a hash or an options instance' do
|
57
|
+
it 'should raise an error' do
|
58
|
+
expect { Model.new(1) }.to raise_error
|
59
|
+
expect { Model.new(nil) }.to raise_error
|
60
|
+
expect { Model.new(true) }.to raise_error
|
61
|
+
expect { Model.new('foo') }.to raise_error
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'when called with a block' do
|
66
|
+
it 'should pass the options instance to the block' do
|
67
|
+
Model.new(:threads => 42) { |o| o.threads = 23 }.options.threads.should == 23
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe '#options' do
|
73
|
+
it 'returns the options for training' do
|
74
|
+
Model.new.options.should be_instance_of(Options)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
describe '#labels' do
|
79
|
+
it 'returns the number of labels (0 by default)' do
|
80
|
+
Model.new.nlbl.should == 0
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe '#observations' do
|
85
|
+
it 'returns the number of observations (0 by default)' do
|
86
|
+
Model.new.observations.should == 0
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe '#features' do
|
91
|
+
it 'returns the number of features (0 by default)' do
|
92
|
+
Model.new.features.should == 0
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe '#total' do
|
97
|
+
it 'returns the total training time (0.0 by default)' do
|
98
|
+
Model.new.total.should == 0.0
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
describe '#train' do
|
103
|
+
let(:model) { Model.new(:pattern => File.expand_path('../../fixtures/pattern.txt', __FILE__)) }
|
104
|
+
let(:data) { File.expand_path('../../fixtures/train.txt', __FILE__) }
|
105
|
+
|
106
|
+
it 'accepts a filename as input' do
|
107
|
+
model.train(data).nlbl.should == 6
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'accepts a data array' do
|
111
|
+
# sequence = []
|
112
|
+
# File.open(data).each_line do |line|
|
113
|
+
#
|
114
|
+
# end
|
115
|
+
#
|
116
|
+
# model.train([]).nlbl.should == 6
|
117
|
+
end
|
118
|
+
|
119
|
+
context 'when called without a pattern' do
|
120
|
+
it 'fails because of wapiti' do
|
121
|
+
expect { Model.new.train(data).nlbl.should == 6 }.to raise_error(NativeError)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
describe '#statistics' do
|
128
|
+
context 'given an empty model' do
|
129
|
+
it 'returns zeroes' do
|
130
|
+
s = Model.new.statistics
|
131
|
+
|
132
|
+
s[:sequences][:total].should == 0
|
133
|
+
s[:sequences][:errors].should == 0
|
134
|
+
s[:sequences][:rate].should == 0
|
135
|
+
s[:tokens][:total].should == 0
|
136
|
+
s[:tokens][:errors].should == 0
|
137
|
+
s[:tokens][:rate].should == 0
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
context 'given a trained model' do
|
142
|
+
let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
|
143
|
+
let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
|
144
|
+
|
145
|
+
it 'returns token and sequcence counts and errors' do
|
146
|
+
model.statistics[:tokens][:total].should == 0
|
147
|
+
model.label input
|
148
|
+
model.statistics[:tokens][:total].should == 0
|
149
|
+
|
150
|
+
model.options.check = true
|
151
|
+
model.label input
|
152
|
+
|
153
|
+
model.statistics[:tokens][:total].should == input.map(&:length).reduce(&:+)
|
154
|
+
model.statistics[:sequences][:total].should == input.length
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe '#label' do
|
160
|
+
|
161
|
+
context 'given an empty model' do
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
context 'given a trained model' do
|
166
|
+
let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
|
167
|
+
|
168
|
+
context 'when passed an array of arrays' do
|
169
|
+
let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
|
170
|
+
|
171
|
+
it 'returns an array of token-label pairs' do
|
172
|
+
labels = model.label(input)
|
173
|
+
labels[0].map(&:first).should == input[0]
|
174
|
+
labels[0].map(&:last).should == %w{ B-NP O B-NP O }
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'yields each token/label pair to the supplied block' do
|
178
|
+
labels = model.label(input) do |token, label|
|
179
|
+
[token.downcase, label.downcase]
|
180
|
+
end
|
181
|
+
labels[0].map(&:last).should == %w{ b-np o b-np o }
|
182
|
+
end
|
183
|
+
|
184
|
+
context 'with the :score option set' do
|
185
|
+
before(:each) { model.options.score! }
|
186
|
+
|
187
|
+
it 'returns an array of token-label-score tuples' do
|
188
|
+
model.label(input)[0].map { |t,l,s| s.class }.uniq == [Float]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
context 'with the :nbest option set to 2' do
|
193
|
+
before(:each) { model.options.nbest = 2 }
|
194
|
+
|
195
|
+
it 'returns an array of token-label-label tuples' do
|
196
|
+
model.label(input)[0][-1][1,2] == %w{ O O }
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
context 'when passed a filename' do
|
204
|
+
let(:input) { File.expand_path('../../fixtures/chtest.txt', __FILE__) }
|
205
|
+
|
206
|
+
it 'returns an array of token-label pairs' do
|
207
|
+
labels = model.label(input)
|
208
|
+
labels.should have(77).elements
|
209
|
+
labels[0].take(5).map(&:last).should == %w{ B-NP B-PP B-NP I-NP B-VP }
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
describe '#labels' do
|
218
|
+
it 'returns an empty list by default' do
|
219
|
+
Model.new.labels.should be_empty
|
220
|
+
end
|
221
|
+
|
222
|
+
context 'given a trained model' do
|
223
|
+
let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
|
224
|
+
|
225
|
+
it 'returns a list of all known labels' do
|
226
|
+
model.labels.should have(model.nlbl).elements
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
end
|
233
|
+
end
|