wapiti 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.simplecov +3 -0
  3. data/Gemfile +25 -2
  4. data/HISTORY.md +5 -1
  5. data/LICENSE +14 -13
  6. data/README.md +9 -16
  7. data/Rakefile +38 -8
  8. data/ext/wapiti/bcd.c +126 -124
  9. data/ext/wapiti/decoder.c +203 -124
  10. data/ext/wapiti/decoder.h +6 -4
  11. data/ext/wapiti/extconf.rb +2 -2
  12. data/ext/wapiti/gradient.c +491 -320
  13. data/ext/wapiti/gradient.h +52 -34
  14. data/ext/wapiti/lbfgs.c +74 -33
  15. data/ext/wapiti/model.c +47 -37
  16. data/ext/wapiti/model.h +22 -20
  17. data/ext/wapiti/native.c +850 -839
  18. data/ext/wapiti/native.h +1 -1
  19. data/ext/wapiti/options.c +52 -20
  20. data/ext/wapiti/options.h +37 -30
  21. data/ext/wapiti/pattern.c +35 -33
  22. data/ext/wapiti/pattern.h +12 -11
  23. data/ext/wapiti/progress.c +14 -13
  24. data/ext/wapiti/progress.h +3 -2
  25. data/ext/wapiti/quark.c +14 -16
  26. data/ext/wapiti/quark.h +6 -5
  27. data/ext/wapiti/reader.c +83 -69
  28. data/ext/wapiti/reader.h +11 -9
  29. data/ext/wapiti/rprop.c +84 -43
  30. data/ext/wapiti/sequence.h +18 -16
  31. data/ext/wapiti/sgdl1.c +45 -43
  32. data/ext/wapiti/thread.c +19 -17
  33. data/ext/wapiti/thread.h +5 -4
  34. data/ext/wapiti/tools.c +7 -7
  35. data/ext/wapiti/tools.h +3 -4
  36. data/ext/wapiti/trainers.h +1 -1
  37. data/ext/wapiti/vmath.c +40 -38
  38. data/ext/wapiti/vmath.h +12 -11
  39. data/ext/wapiti/wapiti.c +159 -37
  40. data/ext/wapiti/wapiti.h +18 -4
  41. data/lib/wapiti.rb +15 -15
  42. data/lib/wapiti/errors.rb +15 -15
  43. data/lib/wapiti/model.rb +92 -84
  44. data/lib/wapiti/options.rb +123 -124
  45. data/lib/wapiti/utility.rb +14 -14
  46. data/lib/wapiti/version.rb +2 -2
  47. data/spec/spec_helper.rb +29 -9
  48. data/spec/wapiti/model_spec.rb +230 -194
  49. data/spec/wapiti/native_spec.rb +7 -8
  50. data/spec/wapiti/options_spec.rb +184 -174
  51. data/wapiti.gemspec +22 -8
  52. metadata +38 -42
  53. data/.gitignore +0 -5
@@ -1,125 +1,124 @@
1
1
  module Wapiti
2
-
3
- class Options
4
-
5
- include Comparable
6
-
7
- class << self
8
-
9
- # Returns a sorted list of available option attributes.
10
- def attribute_names
11
- @attribute_names ||= %w{ stop_window convergence_window posterior
12
- max_iterations jobsize threads rho1 rho2 stop_epsilon score check
13
- algorithm pattern development_data maxent compact sparse skip_tokens
14
- compress }.sort.map(&:to_sym).freeze
15
- end
16
-
17
- # Returns the default options.
18
- def defaults
19
- @defaults ||= new.attributes
20
- end
21
-
22
- # Returns the list of supported algorithm options.
23
- def algorithms
24
- @algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
25
- end
26
-
27
- end
28
-
29
- attr_accessor :compress
30
-
31
- alias compress? compress
32
-
33
- # Returns the value of the attribute identified by +name+ or nil
34
- # if there is no such attribute.
35
- def [](name)
36
- has_attribute?(name) ? send(name) : nil
37
- end
38
-
39
- # Updates the value of the attribute identified by +name+ with the
40
- # passed-in +value+.
41
- def []=(name, value)
42
- raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
43
- send("#{name}=", value)
44
- end
45
-
46
- # Updates all the attributes from the passed-in hash.
47
- def update(attributes = {})
48
- attributes.each_pair do |k,v|
49
- mid = "#{k}="
50
- send(mid, v) if respond_to?(mid)
51
- end
52
- self
53
- end
54
-
55
- alias update_attributes update
56
-
57
- def lbfgs
58
- { :clip => clip, :histsz => histsz, :maxls => maxls }
59
- end
60
-
61
- def sgdl1
62
- { :eta0 => eta0, :alpha => alpha }
63
- end
64
-
65
- def bcd
66
- { :kappa => kappa }
67
- end
68
-
69
- def rprop
70
- {
71
- :stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
72
- :stpdec => stpdec, :cutoff => cutoff
73
- }
74
- end
75
-
76
- # Returns a hash of all the attributes with their names and values.
77
- def attributes
78
- Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
79
- end
80
-
81
- alias to_hash attributes
82
-
83
- def has_attribute?(attribute)
84
- Options.attribute_names.include?(attribute)
85
- end
86
-
87
- def valid_algorithm?
88
- self.class.algorithms.include?(algorithm)
89
- end
90
-
91
- def valid?
92
- validate.empty?
93
- end
94
-
95
- def validate
96
- e = []
97
-
98
- %w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
99
- e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
100
- end
101
-
102
- %w{ rho1 rho2 }.each do |name|
103
- e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
104
- end
105
-
106
- e << "unknown algorithm: #{algorithm}" unless valid_algorithm?
107
- e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
108
- e
109
- end
110
-
111
- %w{ maxent compact sparse label check score posterior compress }.each do |m|
112
- writer = "#{m}=".to_sym
113
- define_method("#{m}!") do
114
- send(writer, true)
115
- self
116
- end
117
- end
118
-
119
- def <=>(other)
120
- other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
121
- end
122
-
123
- end
124
-
125
- end
2
+ class Options
3
+
4
+ include Comparable
5
+
6
+ class << self
7
+
8
+ # Returns a sorted list of available option attributes.
9
+ def attribute_names
10
+ @attribute_names ||= %w{ stop_window convergence_window posterior
11
+ max_iterations jobsize threads rho1 rho2 stop_epsilon score check
12
+ algorithm pattern development_data maxent compact sparse skip_tokens
13
+ compress }.sort.map(&:to_sym).freeze
14
+ end
15
+
16
+ # Returns the default options.
17
+ def defaults
18
+ @defaults ||= new.attributes
19
+ end
20
+
21
+ # Returns the list of supported algorithm options.
22
+ def algorithms
23
+ @algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
24
+ end
25
+
26
+ end
27
+
28
+ attr_accessor :compress
29
+
30
+ alias compress? compress
31
+
32
+ # Returns the value of the attribute identified by +name+ or nil
33
+ # if there is no such attribute.
34
+ def [](name)
35
+ has_attribute?(name) ? send(name) : nil
36
+ end
37
+
38
+ # Updates the value of the attribute identified by +name+ with the
39
+ # passed-in +value+.
40
+ def []=(name, value)
41
+ raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
42
+ send("#{name}=", value)
43
+ end
44
+
45
+ # Updates all the attributes from the passed-in hash.
46
+ def update(attributes = {})
47
+ attributes.each_pair do |k,v|
48
+ mid = "#{k}="
49
+ send(mid, v) if respond_to?(mid)
50
+ end
51
+ self
52
+ end
53
+
54
+ alias update_attributes update
55
+
56
+ def lbfgs
57
+ { :clip => clip, :histsz => histsz, :maxls => maxls }
58
+ end
59
+
60
+ def sgdl1
61
+ { :eta0 => eta0, :alpha => alpha }
62
+ end
63
+
64
+ def bcd
65
+ { :kappa => kappa }
66
+ end
67
+
68
+ def rprop
69
+ {
70
+ :stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
71
+ :stpdec => stpdec, :cutoff => cutoff
72
+ }
73
+ end
74
+
75
+ # Returns a hash of all the attributes with their names and values.
76
+ def attributes
77
+ Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
78
+ end
79
+
80
+ alias to_hash attributes
81
+
82
+ def has_attribute?(attribute)
83
+ Options.attribute_names.include?(attribute)
84
+ end
85
+
86
+ def valid_algorithm?
87
+ self.class.algorithms.include?(algorithm)
88
+ end
89
+
90
+ def valid?
91
+ validate.empty?
92
+ end
93
+
94
+ def validate
95
+ e = []
96
+
97
+ %w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
98
+ e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
99
+ end
100
+
101
+ %w{ rho1 rho2 }.each do |name|
102
+ e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
103
+ end
104
+
105
+ e << "unknown algorithm: #{algorithm}" unless valid_algorithm?
106
+ e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
107
+ e
108
+ end
109
+
110
+ %w{ maxent compact sparse label check score posterior compress }.each do |m|
111
+ writer = "#{m}=".to_sym
112
+ define_method("#{m}!") do
113
+ send(writer, true)
114
+ self
115
+ end
116
+ end
117
+
118
+ def <=>(other)
119
+ other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
120
+ end
121
+
122
+ end
123
+
124
+ end
@@ -1,15 +1,15 @@
1
1
  module Wapiti
2
-
3
- # Creates a Model based on the pattern in the passed-in options and training
4
- # +data+ supplied as input.
5
- def train(data, options, &block)
6
- Model.train(data, options, &block)
7
- end
8
-
9
- def load(model)
10
- Model.load(model)
11
- end
12
-
13
- module_function :train, :load
14
-
15
- end
2
+
3
+ # Creates a Model based on the pattern in the passed-in options and training
4
+ # +data+ supplied as input.
5
+ def train(data, options, &block)
6
+ Model.train(data, options, &block)
7
+ end
8
+
9
+ def load(model)
10
+ Model.load(model)
11
+ end
12
+
13
+ module_function :train, :load
14
+
15
+ end
@@ -1,3 +1,3 @@
1
1
  module Wapiti
2
- VERSION = '0.0.5'.freeze
3
- end
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -1,21 +1,41 @@
1
+ begin
2
+ require 'simplecov'
3
+ require 'coveralls' if ENV['CI']
4
+ rescue LoadError
5
+ # ignore
6
+ end
7
+
8
+ begin
9
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
10
+ require 'rubinius/debugger'
11
+ else
12
+ require 'debugger'
13
+ end
14
+ rescue LoadError
15
+ # ignore
16
+ end
17
+
18
+ $:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
19
+ $:.unshift(File.dirname(__FILE__))
20
+
1
21
  require 'wapiti'
2
22
 
3
23
  require 'fileutils'
4
24
  require 'tempfile'
5
25
 
6
26
  RSpec::Matchers.define :be_valid_model do
7
- match do |model|
8
- model.is_a?(Wapiti::Model) && model.nlbl > 0
9
- end
27
+ match do |model|
28
+ model.is_a?(Wapiti::Model) && model.nlbl > 0
29
+ end
10
30
  end
11
31
 
12
32
  RSpec::Matchers.define :be_valid_model_file do
13
- match do |path|
14
- File.exists?(path) && !File.open(path).read.empty?
15
- end
33
+ match do |path|
34
+ File.exists?(path) && !File.open(path).read.empty?
35
+ end
16
36
  end
17
37
 
18
38
 
19
- RSpec.configuration do |c|
20
- c.include(FileUtils)
21
- end
39
+ RSpec.configure do |c|
40
+ c.include(FileUtils)
41
+ end
@@ -1,197 +1,233 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module Wapiti
4
- describe 'Model' do
5
-
6
- describe '.train' do
7
- context 'given sufficient options' do
8
- let(:pattern) { File.expand_path('../../fixtures/pattern.txt', __FILE__) }
9
- let(:input) { File.expand_path('../../fixtures/train.txt', __FILE__) }
10
-
11
- it 'returns a valid model instance' do
12
- Model.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
13
- end
14
-
15
- end
16
- end
17
-
18
- describe 'initialization' do
19
-
20
- context 'when passed no arguments' do
21
- it 'creates a new model with default options' do
22
- m = Model.new
23
- m.should_not be nil
24
- m.options.should be_instance_of(Options)
25
- m.nlbl.should == 0
26
- m.nobs.should == 0
27
- end
28
- end
29
-
30
- context 'when passed more than one argument' do
31
- it 'should raise an error' do
32
- expect { Model.new(1,2) }.to raise_error
33
- end
34
- end
35
-
36
- context 'when passed a hash' do
37
- let(:options) { { :threads => 42 } }
38
-
39
- it 'should create the options from the hash' do
40
- Model.new(options).options[:threads].should == 42
41
- end
42
- end
43
-
44
- context 'when passed an options instance' do
45
- let(:options) { Options.new(:threads => 42) }
46
-
47
- it 'should create the options from the hash' do
48
- Model.new(options).options[:threads].should == 42
49
- end
50
- end
51
-
52
- context 'when passed something other than a hash or an options instance' do
53
- it 'should raise an error' do
54
- expect { Model.new(1) }.to raise_error
55
- expect { Model.new(nil) }.to raise_error
56
- expect { Model.new(true) }.to raise_error
57
- expect { Model.new('foo') }.to raise_error
58
- end
59
- end
60
-
61
- context 'when called with a block' do
62
- it 'should pass the options instance to the block' do
63
- Model.new(:threads => 42) { |o| o.threads = 23 }.options.threads.should == 23
64
- end
65
- end
66
- end
67
-
68
- describe '#options' do
69
- it 'returns the options for training' do
70
- Model.new.options.should be_instance_of(Options)
71
- end
72
- end
73
-
74
- describe '#labels' do
75
- it 'returns the number of labels (0 by default)' do
76
- Model.new.nlbl.should == 0
77
- end
78
- end
79
-
80
- describe '#observations' do
81
- it 'returns the number of observations (0 by default)' do
82
- Model.new.observations.should == 0
83
- end
84
- end
85
-
86
- describe '#features' do
87
- it 'returns the number of features (0 by default)' do
88
- Model.new.features.should == 0
89
- end
90
- end
91
-
92
- describe '#total' do
93
- it 'returns the total training time (0.0 by default)' do
94
- Model.new.total.should == 0.0
95
- end
96
- end
97
-
98
- describe '#train' do
99
- let(:model) { Model.new(:pattern => File.expand_path('../../fixtures/pattern.txt', __FILE__)) }
100
- let(:data) { File.expand_path('../../fixtures/train.txt', __FILE__) }
101
-
102
- it 'accepts a filename as input' do
103
- model.train(data).nlbl.should == 6
104
- end
105
-
106
- it 'accepts a data array' do
107
- # sequence = []
108
- # File.open(data).each_line do |line|
109
- #
110
- # end
111
- #
112
- # model.train([]).nlbl.should == 6
113
- end
114
-
115
- context 'when called without a pattern' do
116
- it 'fails because of wapiti' do
117
- expect { Model.new.train(data).nlbl.should == 6 }.to raise_error(NativeError)
118
- end
119
- end
120
-
121
- end
122
-
123
- describe '#label' do
124
-
125
- context 'given an empty model' do
126
-
127
- end
128
-
129
- context 'given a trained model' do
130
- let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
131
-
132
- context 'when passed an array of arrays' do
133
- let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
134
-
135
- it 'returns an array of token-label pairs' do
136
- labels = model.label(input)
137
- labels[0].map(&:first).should == input[0]
138
- labels[0].map(&:last).should == %w{ B-NP O B-NP O }
139
- end
140
-
141
- it 'yields each token/label pair to the supplied block' do
142
- labels = model.label(input) do |token, label|
143
- [token.downcase, label.downcase]
144
- end
145
- labels[0].map(&:last).should == %w{ b-np o b-np o }
146
- end
147
-
148
- context 'with the :score option set' do
149
- before(:each) { model.options.score! }
150
-
151
- it 'returns an array of token-label-score tuples' do
152
- model.label(input)[0].map { |t,l,s| s.class }.uniq == [Float]
153
- end
154
- end
155
-
156
- context 'with the :nbest option set to 2' do
157
- before(:each) { model.options.nbest = 2 }
158
-
159
- it 'returns an array of token-label-label tuples' do
160
- model.label(input)[0][-1][1,2] == %w{ O O }
161
- end
162
- end
163
-
164
- end
165
-
166
-
167
- context 'when passed a filename' do
168
- let(:input) { File.expand_path('../../fixtures/chtest.txt', __FILE__) }
169
-
170
- it 'returns an array of token-label pairs' do
171
- labels = model.label(input)
172
- labels.should have(77).elements
173
- labels[0].take(5).map(&:last).should == %w{ B-NP B-PP B-NP I-NP B-VP }
174
- end
175
- end
176
-
177
- end
178
-
179
- end
180
-
181
- describe '#labels' do
182
- it 'returns an empty list by default' do
183
- Model.new.labels.should be_empty
184
- end
185
-
186
- context 'given a trained model' do
187
- let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
188
-
189
- it 'returns a list of all known labels' do
190
- model.labels.should have(model.nlbl).elements
191
- end
192
- end
193
- end
194
-
195
-
196
- end
197
- end
4
+ describe 'Model' do
5
+
6
+ describe '.train' do
7
+ context 'given sufficient options' do
8
+ let(:pattern) { File.expand_path('../../fixtures/pattern.txt', __FILE__) }
9
+ let(:input) { File.expand_path('../../fixtures/train.txt', __FILE__) }
10
+
11
+ it 'returns a valid model instance' do
12
+ Model.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
13
+ end
14
+
15
+ it 'is also exposed as Wapiti.train' do
16
+ Wapiti.train(input, :pattern => pattern).labels.should == (1..6).map(&:to_s)
17
+ end
18
+
19
+ end
20
+ end
21
+
22
+ describe 'initialization' do
23
+
24
+ context 'when passed no arguments' do
25
+ it 'creates a new model with default options' do
26
+ m = Model.new
27
+ m.should_not be nil
28
+ m.options.should be_instance_of(Options)
29
+ m.nlbl.should == 0
30
+ m.nobs.should == 0
31
+ end
32
+ end
33
+
34
+ context 'when passed more than one argument' do
35
+ it 'should raise an error' do
36
+ expect { Model.new(1,2) }.to raise_error
37
+ end
38
+ end
39
+
40
+ context 'when passed a hash' do
41
+ let(:options) { { :threads => 42 } }
42
+
43
+ it 'should create the options from the hash' do
44
+ Model.new(options).options[:threads].should == 42
45
+ end
46
+ end
47
+
48
+ context 'when passed an options instance' do
49
+ let(:options) { Options.new(:threads => 42) }
50
+
51
+ it 'should create the options from the hash' do
52
+ Model.new(options).options[:threads].should == 42
53
+ end
54
+ end
55
+
56
+ context 'when passed something other than a hash or an options instance' do
57
+ it 'should raise an error' do
58
+ expect { Model.new(1) }.to raise_error
59
+ expect { Model.new(nil) }.to raise_error
60
+ expect { Model.new(true) }.to raise_error
61
+ expect { Model.new('foo') }.to raise_error
62
+ end
63
+ end
64
+
65
+ context 'when called with a block' do
66
+ it 'should pass the options instance to the block' do
67
+ Model.new(:threads => 42) { |o| o.threads = 23 }.options.threads.should == 23
68
+ end
69
+ end
70
+ end
71
+
72
+ describe '#options' do
73
+ it 'returns the options for training' do
74
+ Model.new.options.should be_instance_of(Options)
75
+ end
76
+ end
77
+
78
+ describe '#labels' do
79
+ it 'returns the number of labels (0 by default)' do
80
+ Model.new.nlbl.should == 0
81
+ end
82
+ end
83
+
84
+ describe '#observations' do
85
+ it 'returns the number of observations (0 by default)' do
86
+ Model.new.observations.should == 0
87
+ end
88
+ end
89
+
90
+ describe '#features' do
91
+ it 'returns the number of features (0 by default)' do
92
+ Model.new.features.should == 0
93
+ end
94
+ end
95
+
96
+ describe '#total' do
97
+ it 'returns the total training time (0.0 by default)' do
98
+ Model.new.total.should == 0.0
99
+ end
100
+ end
101
+
102
+ describe '#train' do
103
+ let(:model) { Model.new(:pattern => File.expand_path('../../fixtures/pattern.txt', __FILE__)) }
104
+ let(:data) { File.expand_path('../../fixtures/train.txt', __FILE__) }
105
+
106
+ it 'accepts a filename as input' do
107
+ model.train(data).nlbl.should == 6
108
+ end
109
+
110
+ it 'accepts a data array' do
111
+ # sequence = []
112
+ # File.open(data).each_line do |line|
113
+ #
114
+ # end
115
+ #
116
+ # model.train([]).nlbl.should == 6
117
+ end
118
+
119
+ context 'when called without a pattern' do
120
+ it 'fails because of wapiti' do
121
+ expect { Model.new.train(data).nlbl.should == 6 }.to raise_error(NativeError)
122
+ end
123
+ end
124
+
125
+ end
126
+
127
+ describe '#statistics' do
128
+ context 'given an empty model' do
129
+ it 'returns zeroes' do
130
+ s = Model.new.statistics
131
+
132
+ s[:sequences][:total].should == 0
133
+ s[:sequences][:errors].should == 0
134
+ s[:sequences][:rate].should == 0
135
+ s[:tokens][:total].should == 0
136
+ s[:tokens][:errors].should == 0
137
+ s[:tokens][:rate].should == 0
138
+ end
139
+ end
140
+
141
+ context 'given a trained model' do
142
+ let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
143
+ let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
144
+
145
+ it 'returns token and sequcence counts and errors' do
146
+ model.statistics[:tokens][:total].should == 0
147
+ model.label input
148
+ model.statistics[:tokens][:total].should == 0
149
+
150
+ model.options.check = true
151
+ model.label input
152
+
153
+ model.statistics[:tokens][:total].should == input.map(&:length).reduce(&:+)
154
+ model.statistics[:sequences][:total].should == input.length
155
+ end
156
+ end
157
+ end
158
+
159
+ describe '#label' do
160
+
161
+ context 'given an empty model' do
162
+
163
+ end
164
+
165
+ context 'given a trained model' do
166
+ let(:model) { Wapiti.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
167
+
168
+ context 'when passed an array of arrays' do
169
+ let(:input) { [['Héllo NN B-VP', ', , O', 'world NN B-NP', '! ! O']] }
170
+
171
+ it 'returns an array of token-label pairs' do
172
+ labels = model.label(input)
173
+ labels[0].map(&:first).should == input[0]
174
+ labels[0].map(&:last).should == %w{ B-NP O B-NP O }
175
+ end
176
+
177
+ it 'yields each token/label pair to the supplied block' do
178
+ labels = model.label(input) do |token, label|
179
+ [token.downcase, label.downcase]
180
+ end
181
+ labels[0].map(&:last).should == %w{ b-np o b-np o }
182
+ end
183
+
184
+ context 'with the :score option set' do
185
+ before(:each) { model.options.score! }
186
+
187
+ it 'returns an array of token-label-score tuples' do
188
+ model.label(input)[0].map { |t,l,s| s.class }.uniq == [Float]
189
+ end
190
+ end
191
+
192
+ context 'with the :nbest option set to 2' do
193
+ before(:each) { model.options.nbest = 2 }
194
+
195
+ it 'returns an array of token-label-label tuples' do
196
+ model.label(input)[0][-1][1,2] == %w{ O O }
197
+ end
198
+ end
199
+
200
+ end
201
+
202
+
203
+ context 'when passed a filename' do
204
+ let(:input) { File.expand_path('../../fixtures/chtest.txt', __FILE__) }
205
+
206
+ it 'returns an array of token-label pairs' do
207
+ labels = model.label(input)
208
+ labels.should have(77).elements
209
+ labels[0].take(5).map(&:last).should == %w{ B-NP B-PP B-NP I-NP B-VP }
210
+ end
211
+ end
212
+
213
+ end
214
+
215
+ end
216
+
217
+ describe '#labels' do
218
+ it 'returns an empty list by default' do
219
+ Model.new.labels.should be_empty
220
+ end
221
+
222
+ context 'given a trained model' do
223
+ let(:model) { Model.load(File.expand_path('../../fixtures/ch.mod', __FILE__)) }
224
+
225
+ it 'returns a list of all known labels' do
226
+ model.labels.should have(model.nlbl).elements
227
+ end
228
+ end
229
+ end
230
+
231
+
232
+ end
233
+ end