wapiti 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.simplecov +3 -0
  3. data/Gemfile +25 -2
  4. data/HISTORY.md +5 -1
  5. data/LICENSE +14 -13
  6. data/README.md +9 -16
  7. data/Rakefile +38 -8
  8. data/ext/wapiti/bcd.c +126 -124
  9. data/ext/wapiti/decoder.c +203 -124
  10. data/ext/wapiti/decoder.h +6 -4
  11. data/ext/wapiti/extconf.rb +2 -2
  12. data/ext/wapiti/gradient.c +491 -320
  13. data/ext/wapiti/gradient.h +52 -34
  14. data/ext/wapiti/lbfgs.c +74 -33
  15. data/ext/wapiti/model.c +47 -37
  16. data/ext/wapiti/model.h +22 -20
  17. data/ext/wapiti/native.c +850 -839
  18. data/ext/wapiti/native.h +1 -1
  19. data/ext/wapiti/options.c +52 -20
  20. data/ext/wapiti/options.h +37 -30
  21. data/ext/wapiti/pattern.c +35 -33
  22. data/ext/wapiti/pattern.h +12 -11
  23. data/ext/wapiti/progress.c +14 -13
  24. data/ext/wapiti/progress.h +3 -2
  25. data/ext/wapiti/quark.c +14 -16
  26. data/ext/wapiti/quark.h +6 -5
  27. data/ext/wapiti/reader.c +83 -69
  28. data/ext/wapiti/reader.h +11 -9
  29. data/ext/wapiti/rprop.c +84 -43
  30. data/ext/wapiti/sequence.h +18 -16
  31. data/ext/wapiti/sgdl1.c +45 -43
  32. data/ext/wapiti/thread.c +19 -17
  33. data/ext/wapiti/thread.h +5 -4
  34. data/ext/wapiti/tools.c +7 -7
  35. data/ext/wapiti/tools.h +3 -4
  36. data/ext/wapiti/trainers.h +1 -1
  37. data/ext/wapiti/vmath.c +40 -38
  38. data/ext/wapiti/vmath.h +12 -11
  39. data/ext/wapiti/wapiti.c +159 -37
  40. data/ext/wapiti/wapiti.h +18 -4
  41. data/lib/wapiti.rb +15 -15
  42. data/lib/wapiti/errors.rb +15 -15
  43. data/lib/wapiti/model.rb +92 -84
  44. data/lib/wapiti/options.rb +123 -124
  45. data/lib/wapiti/utility.rb +14 -14
  46. data/lib/wapiti/version.rb +2 -2
  47. data/spec/spec_helper.rb +29 -9
  48. data/spec/wapiti/model_spec.rb +230 -194
  49. data/spec/wapiti/native_spec.rb +7 -8
  50. data/spec/wapiti/options_spec.rb +184 -174
  51. data/wapiti.gemspec +22 -8
  52. metadata +38 -42
  53. data/.gitignore +0 -5
@@ -1,125 +1,124 @@
1
1
  module Wapiti
2
-
3
- class Options
4
-
5
- include Comparable
6
-
7
- class << self
8
-
9
- # Returns a sorted list of available option attributes.
10
- def attribute_names
11
- @attribute_names ||= %w{ stop_window convergence_window posterior
12
- max_iterations jobsize threads rho1 rho2 stop_epsilon score check
13
- algorithm pattern development_data maxent compact sparse skip_tokens
14
- compress }.sort.map(&:to_sym).freeze
15
- end
16
-
17
- # Returns the default options.
18
- def defaults
19
- @defaults ||= new.attributes
20
- end
21
-
22
- # Returns the list of supported algorithm options.
23
- def algorithms
24
- @algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
25
- end
26
-
27
- end
28
-
29
- attr_accessor :compress
30
-
31
- alias compress? compress
32
-
33
- # Returns the value of the attribute identified by +name+ or nil
34
- # if there is no such attribute.
35
- def [](name)
36
- has_attribute?(name) ? send(name) : nil
37
- end
38
-
39
- # Updates the value of the attribute identified by +name+ with the
40
- # passed-in +value+.
41
- def []=(name, value)
42
- raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
43
- send("#{name}=", value)
44
- end
45
-
46
- # Updates all the attributes from the passed-in hash.
47
- def update(attributes = {})
48
- attributes.each_pair do |k,v|
49
- mid = "#{k}="
50
- send(mid, v) if respond_to?(mid)
51
- end
52
- self
53
- end
54
-
55
- alias update_attributes update
56
-
57
- def lbfgs
58
- { :clip => clip, :histsz => histsz, :maxls => maxls }
59
- end
60
-
61
- def sgdl1
62
- { :eta0 => eta0, :alpha => alpha }
63
- end
64
-
65
- def bcd
66
- { :kappa => kappa }
67
- end
68
-
69
- def rprop
70
- {
71
- :stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
72
- :stpdec => stpdec, :cutoff => cutoff
73
- }
74
- end
75
-
76
- # Returns a hash of all the attributes with their names and values.
77
- def attributes
78
- Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
79
- end
80
-
81
- alias to_hash attributes
82
-
83
- def has_attribute?(attribute)
84
- Options.attribute_names.include?(attribute)
85
- end
86
-
87
- def valid_algorithm?
88
- self.class.algorithms.include?(algorithm)
89
- end
90
-
91
- def valid?
92
- validate.empty?
93
- end
94
-
95
- def validate
96
- e = []
97
-
98
- %w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
99
- e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
100
- end
101
-
102
- %w{ rho1 rho2 }.each do |name|
103
- e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
104
- end
105
-
106
- e << "unknown algorithm: #{algorithm}" unless valid_algorithm?
107
- e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
108
- e
109
- end
110
-
111
- %w{ maxent compact sparse label check score posterior compress }.each do |m|
112
- writer = "#{m}=".to_sym
113
- define_method("#{m}!") do
114
- send(writer, true)
115
- self
116
- end
117
- end
118
-
119
- def <=>(other)
120
- other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
121
- end
122
-
123
- end
124
-
125
- end
2
+ class Options
3
+
4
+ include Comparable
5
+
6
+ class << self
7
+
8
+ # Returns a sorted list of available option attributes.
9
+ def attribute_names
10
+ @attribute_names ||= %w{ stop_window convergence_window posterior
11
+ max_iterations jobsize threads rho1 rho2 stop_epsilon score check
12
+ algorithm pattern development_data maxent compact sparse skip_tokens
13
+ compress }.sort.map(&:to_sym).freeze
14
+ end
15
+
16
+ # Returns the default options.
17
+ def defaults
18
+ @defaults ||= new.attributes
19
+ end
20
+
21
+ # Returns the list of supported algorithm options.
22
+ def algorithms
23
+ @algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
24
+ end
25
+
26
+ end
27
+
28
+ attr_accessor :compress
29
+
30
+ alias compress? compress
31
+
32
+ # Returns the value of the attribute identified by +name+ or nil
33
+ # if there is no such attribute.
34
+ def [](name)
35
+ has_attribute?(name) ? send(name) : nil
36
+ end
37
+
38
+ # Updates the value of the attribute identified by +name+ with the
39
+ # passed-in +value+.
40
+ def []=(name, value)
41
+ raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
42
+ send("#{name}=", value)
43
+ end
44
+
45
+ # Updates all the attributes from the passed-in hash.
46
+ def update(attributes = {})
47
+ attributes.each_pair do |k,v|
48
+ mid = "#{k}="
49
+ send(mid, v) if respond_to?(mid)
50
+ end
51
+ self
52
+ end
53
+
54
+ alias update_attributes update
55
+
56
+ def lbfgs
57
+ { :clip => clip, :histsz => histsz, :maxls => maxls }
58
+ end
59
+
60
+ def sgdl1
61
+ { :eta0 => eta0, :alpha => alpha }
62
+ end
63
+
64
+ def bcd
65
+ { :kappa => kappa }
66
+ end
67
+
68
+ def rprop
69
+ {
70
+ :stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
71
+ :stpdec => stpdec, :cutoff => cutoff
72
+ }
73
+ end
74
+
75
+ # Returns a hash of all the attributes with their names and values.
76
+ def attributes
77
+ Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
78
+ end
79
+
80
+ alias to_hash attributes
81
+
82
+ def has_attribute?(attribute)
83
+ Options.attribute_names.include?(attribute)
84
+ end
85
+
86
+ def valid_algorithm?
87
+ self.class.algorithms.include?(algorithm)
88
+ end
89
+
90
+ def valid?
91
+ validate.empty?
92
+ end
93
+
94
+ def validate
95
+ e = []
96
+
97
+ %w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
98
+ e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
99
+ end
100
+
101
+ %w{ rho1 rho2 }.each do |name|
102
+ e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
103
+ end
104
+
105
+ e << "unknown algorithm: #{algorithm}" unless valid_algorithm?
106
+ e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
107
+ e
108
+ end
109
+
110
+ %w{ maxent compact sparse label check score posterior compress }.each do |m|
111
+ writer = "#{m}=".to_sym
112
+ define_method("#{m}!") do
113
+ send(writer, true)
114
+ self
115
+ end
116
+ end
117
+
118
+ def <=>(other)
119
+ other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
120
+ end
121
+
122
+ end
123
+
124
+ end
@@ -1,15 +1,15 @@
1
1
  module Wapiti
2
-
3
- # Creates a Model based on the pattern in the passed-in options and training
4
- # +data+ supplied as input.
5
- def train(data, options, &block)
6
- Model.train(data, options, &block)
7
- end
8
-
9
- def load(model)
10
- Model.load(model)
11
- end
12
-
13
- module_function :train, :load
14
-
15
- end
2
+
3
+ # Creates a Model based on the pattern in the passed-in options and training
4
+ # +data+ supplied as input.
5
+ def train(data, options, &block)
6
+ Model.train(data, options, &block)
7
+ end
8
+
9
+ def load(model)
10
+ Model.load(model)
11
+ end
12
+
13
+ module_function :train, :load
14
+
15
+ end
@@ -1,3 +1,3 @@
1
1
  module Wapiti
2
- VERSION = '0.0.5'.freeze
3
- end
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -1,21 +1,41 @@
1
+ begin
2
+ require 'simplecov'
3
+ require 'coveralls' if ENV['CI']
4
+ rescue LoadError
5
+ # ignore
6
+ end
7
+
8
+ begin
9
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
10
+ require 'rubinius/debugger'
11
+ else
12
+ require 'debugger'
13
+ end
14
+ rescue LoadError
15
+ # ignore
16
+ end
17
+
18
+ $:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
19
+ $:.unshift(File.dirname(__FILE__))
20
+
1
21
  require 'wapiti'
2
22
 
3
23
  require 'fileutils'
4
24
  require 'tempfile'
5
25
 
6
26
  RSpec::Matchers.define :be_valid_model do
7
- match do |model|
8
- model.is_a?(Wapiti::Model) && model.nlbl > 0
9
- end
27
+ match do |model|
28
+ model.is_a?(Wapiti::Model) && model.nlbl > 0
29
+ end
10
30
  end
11
31
 
12
32
  RSpec::Matchers.define :be_valid_model_file do
13
- match do |path|
14
- File.exists?(path) && !File.open(path).read.empty?
15
- end
33
+ match do |path|
34
+ File.exists?(path) && !File.open(path).read.empty?
35
+ end
16
36
  end
17
37
 
18
38
 
19
- RSpec.configuration do |c|
20
- c.include(FileUtils)
21
- end
39
+ RSpec.configure do |c|
40
+ c.include(FileUtils)
41
+ end
@@ -1,197 +1,233 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module Wapiti
4
- describe 'Model' do
5
-
6
- describe '.train' do
7
- context 'given sufficient options' do
8
- let(:pattern) { File.expand_path('../../fixtures/pattern.txt', __FILE__) }
9
- let(:input) { File.expand_path('../../fixtures/train.txt', __FILE__) }
10
-
11
- it 'returns a valid model instance' do
12
- Model.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
13
- end
14
-
15
- end
16
- end
17
-
18
- describe 'initialization' do
19
-
20
- context 'when passed no arguments' do
21
- it 'creates a new model with default options' do
22
- m = Model.new
23
- m.should_not be nil
24
- m.options.should be_instance_of(Options)
25
- m.nlbl.should == 0
26
- m.nobs.should == 0
27
- end
28
- end
29
-
30
- context 'when passed more than one argument' do
31
- it 'should raise an error' do
32
- expect { Model.new(1,2) }.to raise_error
33
- end
34
- end
35
-
36
- context 'when passed a hash' do
37
- let(:options) { { :threads => 42 } }
38
-
39
- it 'should create the options from the hash' do
40
- Model.new(options).options[:threads].should == 42
41
- end
42
- end
43
-
44
- context 'when passed an options instance' do
45
- let(:options) { Options.new(:threads => 42) }
46
-
47
- it 'should create the options from the hash' do
48
- Model.new(options).options[:threads].should == 42
49
- end
50
- end
51
-
52
- context 'when passed something other than a hash or an options instance' do
53
- it 'should raise an error' do
54
- expect { Model.new(1) }.to raise_error
55
- expect { Model.new(nil) }.to raise_error
56
- expect { Model.new(true) }.to raise_error
57
- expect { Model.new('foo') }.to raise_error
58
- end
59
- end
60
-
61
- context 'when called with a block' do
62
- it 'should pass the options instance to the block' do
63
- Model.new(:threads => 42) { |o| o.threads = 23 }.options.threads.should == 23
64
- end
65
- end
66
- end
67
-
68
- describe '#options' do
69
- it 'returns the options for training' do
70
- Model.new.options.should be_instance_of(Options)
71
- end
72
- end
73
-
74
- describe '#labels' do
75
- it 'returns the number of labels (0 by default)' do
76
- Model.new.nlbl.should == 0
77
- end
78
- end
79
-
80
- describe '#observations' do
81
- it 'returns the number of observations (0 by default)' do
82
- Model.new.observations.should == 0
83
- end
84
- end
85
-
86
- describe '#features' do
87
- it 'returns the number of features (0 by default)' do
88
- Model.new.features.should == 0
89
- end
90
- end
91
-
92
- describe '#total' do
93
- it 'returns the total training time (0.0 by default)' do
94
- Model.new.total.should == 0.0
95
- end
96
- end
97
-
98
- describe '#train' do
99
- let(:model) { Model.new(:pattern => File.expand_path('../../fixtures/pattern.txt', __FILE__)) }
100
- let(:data) { File.expand_path('../../fixtures/train.txt', __FILE__) }
101
-
102
- it 'accepts a filename as input' do
103
- model.train(data).nlbl.should == 6
104
- end
105
-
106
- it 'accepts a data array' do
107
- # sequence = []
108
- # File.open(data).each_line do |line|
109
- #
110
- # end
111
- #
112
- # model.train([]).nlbl.should == 6
113
- end
114
-
115
- context 'when called without a pattern' do
116
- it 'fails because of wapiti' do
117
- expect { Model.new.train(data).nlbl.should == 6 }.to raise_error(NativeError)
118
- end
119
- end
120
-
121
- end
122
-
123
- describe '#label' do
124
-
125
- context 'given an empty model' do
126
-
127
- end
128
-
129
- context 'given a trained model' do
130
- let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
131
-
132
- context 'when passed an array of arrays' do
133
- let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
134
-
135
- it 'returns an array of token-label pairs' do
136
- labels = model.label(input)
137
- labels[0].map(&:first).should == input[0]
138
- labels[0].map(&:last).should == %w{ B-NP O B-NP O }
139
- end
140
-
141
- it 'yields each token/label pair to the supplied block' do
142
- labels = model.label(input) do |token, label|
143
- [token.downcase, label.downcase]
144
- end
145
- labels[0].map(&:last).should == %w{ b-np o b-np o }
146
- end
147
-
148
- context 'with the :score option set' do
149
- before(:each) { model.options.score! }
150
-
151
- it 'returns an array of token-label-score tuples' do
152
- model.label(input)[0].map { |t,l,s| s.class }.uniq == [Float]
153
- end
154
- end
155
-
156
- context 'with the :nbest option set to 2' do
157
- before(:each) { model.options.nbest = 2 }
158
-
159
- it 'returns an array of token-label-label tuples' do
160
- model.label(input)[0][-1][1,2] == %w{ O O }
161
- end
162
- end
163
-
164
- end
165
-
166
-
167
- context 'when passed a filename' do
168
- let(:input) { File.expand_path('../../fixtures/chtest.txt', __FILE__) }
169
-
170
- it 'returns an array of token-label pairs' do
171
- labels = model.label(input)
172
- labels.should have(77).elements
173
- labels[0].take(5).map(&:last).should == %w{ B-NP B-PP B-NP I-NP B-VP }
174
- end
175
- end
176
-
177
- end
178
-
179
- end
180
-
181
- describe '#labels' do
182
- it 'returns an empty list by default' do
183
- Model.new.labels.should be_empty
184
- end
185
-
186
- context 'given a trained model' do
187
- let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
188
-
189
- it 'returns a list of all known labels' do
190
- model.labels.should have(model.nlbl).elements
191
- end
192
- end
193
- end
194
-
195
-
196
- end
197
- end
4
+ describe 'Model' do
5
+
6
+ describe '.train' do
7
+ context 'given sufficient options' do
8
+ let(:pattern) { File.expand_path('../../fixtures/pattern.txt', __FILE__) }
9
+ let(:input) { File.expand_path('../../fixtures/train.txt', __FILE__) }
10
+
11
+ it 'returns a valid model instance' do
12
+ Model.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
13
+ end
14
+
15
+ it 'is also exposed as Wapiti.train' do
16
+ Wapiti.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
17
+ end
18
+
19
+ end
20
+ end
21
+
22
+ describe 'initialization' do
23
+
24
+ context 'when passed no arguments' do
25
+ it 'creates a new model with default options' do
26
+ m = Model.new
27
+ m.should_not be nil
28
+ m.options.should be_instance_of(Options)
29
+ m.nlbl.should == 0
30
+ m.nobs.should == 0
31
+ end
32
+ end
33
+
34
+ context 'when passed more than one argument' do
35
+ it 'should raise an error' do
36
+ expect { Model.new(1,2) }.to raise_error
37
+ end
38
+ end
39
+
40
+ context 'when passed a hash' do
41
+ let(:options) { { :threads => 42 } }
42
+
43
+ it 'should create the options from the hash' do
44
+ Model.new(options).options[:threads].should == 42
45
+ end
46
+ end
47
+
48
+ context 'when passed an options instance' do
49
+ let(:options) { Options.new(:threads => 42) }
50
+
51
+ it 'should create the options from the hash' do
52
+ Model.new(options).options[:threads].should == 42
53
+ end
54
+ end
55
+
56
+ context 'when passed something other than a hash or an options instance' do
57
+ it 'should raise an error' do
58
+ expect { Model.new(1) }.to raise_error
59
+ expect { Model.new(nil) }.to raise_error
60
+ expect { Model.new(true) }.to raise_error
61
+ expect { Model.new('foo') }.to raise_error
62
+ end
63
+ end
64
+
65
+ context 'when called with a block' do
66
+ it 'should pass the options instance to the block' do
67
+ Model.new(:threads => 42) { |o| o.threads = 23 }.options.threads.should == 23
68
+ end
69
+ end
70
+ end
71
+
72
+ describe '#options' do
73
+ it 'returns the options for training' do
74
+ Model.new.options.should be_instance_of(Options)
75
+ end
76
+ end
77
+
78
+ describe '#labels' do
79
+ it 'returns the number of labels (0 by default)' do
80
+ Model.new.nlbl.should == 0
81
+ end
82
+ end
83
+
84
+ describe '#observations' do
85
+ it 'returns the number of observations (0 by default)' do
86
+ Model.new.observations.should == 0
87
+ end
88
+ end
89
+
90
+ describe '#features' do
91
+ it 'returns the number of features (0 by default)' do
92
+ Model.new.features.should == 0
93
+ end
94
+ end
95
+
96
+ describe '#total' do
97
+ it 'returns the total training time (0.0 by default)' do
98
+ Model.new.total.should == 0.0
99
+ end
100
+ end
101
+
102
+ describe '#train' do
103
+ let(:model) { Model.new(:pattern => File.expand_path('../../fixtures/pattern.txt', __FILE__)) }
104
+ let(:data) { File.expand_path('../../fixtures/train.txt', __FILE__) }
105
+
106
+ it 'accepts a filename as input' do
107
+ model.train(data).nlbl.should == 6
108
+ end
109
+
110
+ it 'accepts a data array' do
111
+ # sequence = []
112
+ # File.open(data).each_line do |line|
113
+ #
114
+ # end
115
+ #
116
+ # model.train([]).nlbl.should == 6
117
+ end
118
+
119
+ context 'when called without a pattern' do
120
+ it 'fails because of wapiti' do
121
+ expect { Model.new.train(data).nlbl.should == 6 }.to raise_error(NativeError)
122
+ end
123
+ end
124
+
125
+ end
126
+
127
+ describe '#statistics' do
128
+ context 'given an empty model' do
129
+ it 'returns zeroes' do
130
+ s = Model.new.statistics
131
+
132
+ s[:sequences][:total].should == 0
133
+ s[:sequences][:errors].should == 0
134
+ s[:sequences][:rate].should == 0
135
+ s[:tokens][:total].should == 0
136
+ s[:tokens][:errors].should == 0
137
+ s[:tokens][:rate].should == 0
138
+ end
139
+ end
140
+
141
+ context 'given a trained model' do
142
+ let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
143
+ let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
144
+
145
+ it 'returns token and sequcence counts and errors' do
146
+ model.statistics[:tokens][:total].should == 0
147
+ model.label input
148
+ model.statistics[:tokens][:total].should == 0
149
+
150
+ model.options.check = true
151
+ model.label input
152
+
153
+ model.statistics[:tokens][:total].should == input.map(&:length).reduce(&:+)
154
+ model.statistics[:sequences][:total].should == input.length
155
+ end
156
+ end
157
+ end
158
+
159
+ describe '#label' do
160
+
161
+ context 'given an empty model' do
162
+
163
+ end
164
+
165
+ context 'given a trained model' do
166
+ let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
167
+
168
+ context 'when passed an array of arrays' do
169
+ let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
170
+
171
+ it 'returns an array of token-label pairs' do
172
+ labels = model.label(input)
173
+ labels[0].map(&:first).should == input[0]
174
+ labels[0].map(&:last).should == %w{ B-NP O B-NP O }
175
+ end
176
+
177
+ it 'yields each token/label pair to the supplied block' do
178
+ labels = model.label(input) do |token, label|
179
+ [token.downcase, label.downcase]
180
+ end
181
+ labels[0].map(&:last).should == %w{ b-np o b-np o }
182
+ end
183
+
184
+ context 'with the :score option set' do
185
+ before(:each) { model.options.score! }
186
+
187
+ it 'returns an array of token-label-score tuples' do
188
+ model.label(input)[0].map { |t,l,s| s.class }.uniq == [Float]
189
+ end
190
+ end
191
+
192
+ context 'with the :nbest option set to 2' do
193
+ before(:each) { model.options.nbest = 2 }
194
+
195
+ it 'returns an array of token-label-label tuples' do
196
+ model.label(input)[0][-1][1,2] == %w{ O O }
197
+ end
198
+ end
199
+
200
+ end
201
+
202
+
203
+ context 'when passed a filename' do
204
+ let(:input) { File.expand_path('../../fixtures/chtest.txt', __FILE__) }
205
+
206
+ it 'returns an array of token-label pairs' do
207
+ labels = model.label(input)
208
+ labels.should have(77).elements
209
+ labels[0].take(5).map(&:last).should == %w{ B-NP B-PP B-NP I-NP B-VP }
210
+ end
211
+ end
212
+
213
+ end
214
+
215
+ end
216
+
217
+ describe '#labels' do
218
+ it 'returns an empty list by default' do
219
+ Model.new.labels.should be_empty
220
+ end
221
+
222
+ context 'given a trained model' do
223
+ let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
224
+
225
+ it 'returns a list of all known labels' do
226
+ model.labels.should have(model.nlbl).elements
227
+ end
228
+ end
229
+ end
230
+
231
+
232
+ end
233
+ end