natto 0.9.5 → 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,17 +1,17 @@
1
1
  # coding: utf-8
2
2
  module Natto
3
3
 
4
- # Module <tt>Binding</tt> encapsulates methods and behavior
5
- # which are made available via <tt>FFI</tt> bindings to
6
- # <tt>mecab</tt>.
4
+ # Module `Binding` encapsulates methods and behavior
5
+ # which are made available via `FFI` bindings to
6
+ # `mecab`.
7
7
  module Binding
8
8
  require 'ffi'
9
9
  require 'rbconfig'
10
10
  extend FFI::Library
11
11
 
12
12
  # String name for the environment variable used by
13
- # <tt>Natto</tt> to indicate the exact name / full path
14
- # to the <tt>mecab</tt> library.
13
+ # `Natto` to indicate the exact name / full path
14
+ # to the `mecab` library.
15
15
  MECAB_PATH = 'MECAB_PATH'.freeze
16
16
 
17
17
  # @private
@@ -19,14 +19,14 @@ module Natto
19
19
  base.extend(ClassMethods)
20
20
  end
21
21
 
22
- # Returns the name of the <tt>mecab</tt> library based on
22
+ # Returns the name of the `mecab` library based on
23
23
  # the runtime environment. The value of the environment
24
- # parameter <tt>MECAB_PATH</tt> is checked before this
24
+ # parameter `MECAB_PATH` is checked before this
25
25
  # function is invoked, and in the case of Windows, a
26
- # <tt>LoadError</tt> will be raised if <tt>MECAB_PATH</tt>
27
- # is <b>not</b> set to the full path of the <tt>mecab</tt>
26
+ # `LoadError` will be raised if `MECAB_PATH`
27
+ # is _not_ set to the full path of the `mecab`
28
28
  # library.
29
- # @return name of the <tt>mecab</tt> library
29
+ # @return name of the `mecab` library
30
30
  # @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
31
31
  # <br/>
32
32
  # e.g., for bash on UNIX/Linux
@@ -37,20 +37,14 @@ module Natto
37
37
  #
38
38
  # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
39
39
  #
40
- # e.g., for Cygwin
41
- #
42
- # export MECAB_PATH=cygmecab-1
43
- #
44
40
  # e.g., from within a Ruby program
45
41
  #
46
- # ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
42
+ # ENV['MECAB_PATH']='usr/local/lib/libmecab.so'
47
43
  def self.find_library
48
44
  host_os = RbConfig::CONFIG['host_os']
49
45
 
50
46
  if host_os =~ /mswin|mingw/i
51
47
  raise LoadError, "Please set #{MECAB_PATH} to the full path to libmecab.dll"
52
- elsif host_os =~ /cygwin/i
53
- 'cygmecab-1'
54
48
  else
55
49
  'mecab'
56
50
  end
@@ -58,10 +52,17 @@ module Natto
58
52
 
59
53
  ffi_lib(ENV[MECAB_PATH] || find_library)
60
54
 
55
+ # new interface
56
+ attach_function :mecab_model_new2, [:string], :pointer
57
+ attach_function :mecab_model_destroy, [:pointer], :void
58
+ attach_function :mecab_model_dictionary_info, [:pointer], :pointer
59
+
60
+ # old interface
61
61
  attach_function :mecab_new2, [:string], :pointer
62
62
  attach_function :mecab_version, [], :string
63
63
  attach_function :mecab_strerror, [:pointer],:string
64
64
  attach_function :mecab_destroy, [:pointer], :void
65
+ attach_function :mecab_set_partial, [:pointer, :int], :void
65
66
  attach_function :mecab_set_theta, [:pointer, :float], :void
66
67
  attach_function :mecab_set_lattice_level, [:pointer, :int], :void
67
68
  attach_function :mecab_set_all_morphs, [:pointer, :int], :void
@@ -75,6 +76,20 @@ module Natto
75
76
 
76
77
  # @private
77
78
  module ClassMethods
79
+
80
+ def mecab_model_new2(options_str)
81
+ Natto::Binding.mecab_model_new2(options_str)
82
+ end
83
+
84
+ def mecab_model_destroy(m_ptr)
85
+ Natto::Binding.mecab_model_destroy(m_ptr)
86
+ end
87
+
88
+ def mecab_model_dictionary_info(m_ptr)
89
+ Natto::Binding.mecab_model_dictionary_info(m_ptr)
90
+ end
91
+
92
+ # ----------------------------------------
78
93
  def mecab_new2(options_str)
79
94
  Natto::Binding.mecab_new2(options_str)
80
95
  end
@@ -91,6 +106,10 @@ module Natto
91
106
  Natto::Binding.mecab_destroy(m_ptr)
92
107
  end
93
108
 
109
+ def mecab_set_partial(m_ptr, ll)
110
+ Natto::Binding.mecab_set_partial(m_ptr, ll)
111
+ end
112
+
94
113
  def mecab_set_theta(m_ptr, t)
95
114
  Natto::Binding.mecab_set_theta(m_ptr, t)
96
115
  end
@@ -0,0 +1,295 @@
1
+ # coding: utf-8
2
+ require 'natto/binding'
3
+ require 'natto/option_parse'
4
+ require 'natto/struct'
5
+
6
+ module Natto
7
+ # `MeCab` is a wrapper class for the `mecab` tagger.
8
+ # Options to the `mecab` tagger are passed in as a string
9
+ # (MeCab command-line style) or as a Ruby-style hash at
10
+ # initialization.
11
+ #
12
+ # ## Usage
13
+ #
14
+ # require 'rubygems' if RUBY_VERSION.to_f < 1.9
15
+ # require 'natto'
16
+ #
17
+ # nm = Natto::MeCab.new('-Ochasen')
18
+ # => #<Natto::MeCab:0x28d3bdc8 \
19
+ # @tagger=#<FFI::Pointer address=0x28afb980>, \
20
+ # @options={:output_format_type=>"chasen"}, \
21
+ # @dicts=[#<Natto::DictionaryInfo:0x289a1f14 \
22
+ # type="0", \
23
+ # filename="/usr/local/lib/mecab/dic/ipadic/sys.dic", \
24
+ # charset="utf8">], \
25
+ # @version="0.996">
26
+ #
27
+ # nm.parse('凡人にしか見えねえ風景ってのがあるんだよ。') do |n|
28
+ # puts "#{n.surface}\t#{n.feature}"
29
+ # end
30
+ # 凡人 名詞,一般,*,*,*,*,凡人,ボンジン,ボンジン
31
+ # に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
32
+ # しか 助詞,係助詞,*,*,*,*,しか,シカ,シカ
33
+ # 見え 動詞,自立,*,*,一段,未然形,見える,ミエ,ミエ
34
+ # ねえ 助動詞,*,*,*,特殊・ナイ,音便基本形,ない,ネエ,ネー
35
+ # 風景 名詞,一般,*,*,*,*,風景,フウケイ,フーケイ
36
+ # って 助詞,格助詞,連語,*,*,*,って,ッテ,ッテ
37
+ # の 名詞,非自立,一般,*,*,*,の,ノ,ノ
38
+ # が 助詞,格助詞,一般,*,*,*,が,ガ,ガ
39
+ # ある 動詞,自立,*,*,五段・ラ行,基本形,ある,アル,アル
40
+ # ん 名詞,非自立,一般,*,*,*,ん,ン,ン
41
+ # だ 助動詞,*,*,*一般,特殊・ダ,基本形,だ,ダ,ダ
42
+ # よ 助詞,終助詞,*,*,*,*,よ,ã¨,ヨ
43
+ # 。 記号,句点,*,*,*,*,。,。,。
44
+ # BOS/EOS,*,*,*,*,*,*,*,*BOS
45
+ #
46
+ class MeCab
47
+ include Natto::Binding
48
+ include Natto::OptionParse
49
+
50
+ attr_reader :tagger, :options, :dicts, :version
51
+
52
+ # Initializes the wrapped `mecab` instance with the
53
+ # given `options`.
54
+ #
55
+ # Options supported are:
56
+ #
57
+ # - :rcfile -- resource file
58
+ # - :dicdir -- system dicdir
59
+ # - :userdic -- user dictionary
60
+ # - :lattice_level -- lattice information level (DEPRECATED)
61
+ # - :output_format_type -- output format type (wakati, chasen, yomi, etc.)
62
+ # - :all_morphs -- output all morphs (default false)
63
+ # - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
64
+ # - :partial -- partial parsing mode
65
+ # - :marginal -- output marginal probability
66
+ # - :max_grouping_size -- maximum grouping size for unknown words (default 24)
67
+ # - :node_format -- user-defined node format
68
+ # - :unk_format -- user-defined unknown node format
69
+ # - :bos_format -- user-defined beginning-of-sentence format
70
+ # - :eos_format -- user-defined end-of-sentence format
71
+ # - :eon_format -- user-defined end-of-NBest format
72
+ # - :unk_feature -- feature for unknown word
73
+ # - :input_buffer_size -- set input buffer size (default 8192)
74
+ # - :allocate_sentence -- allocate new memory for input sentence
75
+ # - :theta -- temperature parameter theta (float, default 0.75)
76
+ # - :cost_factor -- cost factor (integer, default 700)
77
+ #
78
+ # <p>MeCab command-line arguments (-F) or long (--node-format) may be used in
79
+ # addition to Ruby-style `Hash`es</p>
80
+ # <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
81
+ # e.g.<br/>
82
+ #
83
+ # nm = Natto::MeCab.new(:node_format=>'%m¥t%f[7]¥n')
84
+ # => #<Natto::MeCab:0x28d2ae10
85
+ # @tagger=#<FFI::Pointer address=0x28a97980>, \
86
+ # @options={:node_format=>"%m¥t%f[7]¥n"}, \
87
+ # @dicts=[#<Natto::DictionaryInfo:0x28d2a85c \
88
+ # type="0", \
89
+ # filename="/usr/local/lib/mecab/dic/ipadic/sys.dic" \
90
+ # charset="utf8">], \
91
+ # @version="0.996">
92
+ #
93
+ # puts nm.parse('才能とは求める人間に与えられるものではない。')
94
+ # 才能 サイノウ
95
+ # と ト
96
+ # は ハ
97
+ # 求 モトメル
98
+ # 人間 ニンゲン
99
+ # に ニ
100
+ # 与え アタエ
101
+ # られる ラレル
102
+ # もの モノ
103
+ # で デ
104
+ # は ハ
105
+ # ない ナイ
106
+ # 。 。
107
+ # EOS
108
+ #
109
+ # @param [Hash or String]
110
+ # @raise [MeCabError] if `mecab` cannot be initialized with the given `options`
111
+ def initialize(options={})
112
+ @options = self.class.parse_mecab_options(options)
113
+ @dicts = []
114
+
115
+ opt_str = self.class.build_options_str(@options)
116
+ @tagger = self.mecab_new2(opt_str)
117
+ raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @tagger.address == 0x0
118
+
119
+ self.mecab_set_theta(@tagger, @options[:theta]) if @options[:theta]
120
+ self.mecab_set_lattice_level(@tagger, @options[:lattice_level]) if @options[:lattice_level]
121
+ self.mecab_set_all_morphs(@tagger, 1) if @options[:all_morphs]
122
+ self.mecab_set_partial(@tagger, 1) if @options[:partial]
123
+
124
+ # Set mecab parsing implementations for N-best and regular parsing,
125
+ # for both parsing as string and yielding a node object
126
+ # N-Best parsing implementations
127
+ if @options[:nbest] && @options[:nbest] > 1
128
+ self.mecab_set_lattice_level(@tagger, (@options[:lattice_level] || 1))
129
+ @parse_tostr = lambda do |str|
130
+ return self.mecab_nbest_sparse_tostr(@tagger, @options[:nbest], str) ||
131
+ raise(MeCabError.new(self.mecab_strerror(@tagger)))
132
+ end
133
+ @parse_tonodes = lambda do |str|
134
+ nodes = []
135
+ if @options[:nbest] && @options[:nbest] > 1
136
+ self.mecab_nbest_init(@tagger, str)
137
+ n = self.mecab_nbest_next_tonode(@tagger)
138
+ raise(MeCabError.new(self.mecab_strerror(@tagger))) if n.nil? || n.address==0x0
139
+ nlen = @options[:nbest]
140
+ nlen.times do |i|
141
+ s = str.bytes.to_a
142
+ while n && n.address != 0x0
143
+ mn = Natto::MeCabNode.new(n)
144
+ s = s.drop_while {|e| (e==0xa || e==0x20)}
145
+ if !s.empty?
146
+ sarr = []
147
+ mn.length.times { sarr << s.shift }
148
+ surf = sarr.pack('C*')
149
+ #mn.surface = self.class.force_enc(surf)
150
+ mn.surface = surf.force_encoding(Encoding.default_external)
151
+ end
152
+ if @options[:output_format_type] || @options[:node_format]
153
+ mn.feature = self.mecab_format_node(@tagger, n).force_encoding(Encoding.default_external)
154
+ end
155
+ nodes << mn if !mn.is_bos?
156
+ n = mn.next
157
+ end
158
+ n = self.mecab_nbest_next_tonode(@tagger)
159
+ end
160
+ end
161
+ return nodes
162
+ end
163
+ else
164
+ # default parsing implementations
165
+ @parse_tostr = lambda do |str|
166
+ return self.mecab_sparse_tostr(@tagger, str) ||
167
+ raise(MeCabError.new(self.mecab_strerror(@tagger)))
168
+ end
169
+ @parse_tonodes = lambda do |str|
170
+ nodes = []
171
+ n = self.mecab_sparse_tonode(@tagger, str)
172
+ raise(MeCabError.new(self.mecab_strerror(@tagger))) if n.nil? || n.address==0x0
173
+ mn = Natto::MeCabNode.new(n)
174
+ n = mn.next if mn.next.address!=0x0
175
+ s = str.bytes.to_a
176
+ while n && n.address!=0x0
177
+ mn = Natto::MeCabNode.new(n)
178
+ s = s.drop_while {|e| (e==0xa || e==0x20)}
179
+ if !s.empty?
180
+ sarr = []
181
+ mn.length.times { sarr << s.shift }
182
+ surf = sarr.pack('C*')
183
+ mn.surface = surf.force_encoding(Encoding.default_external)
184
+ end
185
+ nodes << mn
186
+ n = mn.next
187
+ end
188
+ return nodes
189
+ end
190
+ end
191
+
192
+ @dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@tagger))
193
+ while @dicts.last.next.address != 0x0
194
+ @dicts << Natto::DictionaryInfo.new(@dicts.last.next)
195
+ end
196
+
197
+ @version = self.mecab_version
198
+
199
+ ObjectSpace.define_finalizer(self, self.class.create_free_proc(@tagger))
200
+ end
201
+
202
+ # Parses the given string `str`. If a block is passed to this method,
203
+ # then node parsing will be used and each node yielded to the given block.
204
+ #
205
+ # @param [String] str
206
+ # @return parsing result from `mecab`
207
+ # @raise [MeCabError] if the `mecab` tagger cannot parse the given string `str`
208
+ # @raise [ArgumentError] if the given string `str` argument is `nil`
209
+ # @see MeCabNode
210
+ def parse(str)
211
+ raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
212
+ if block_given?
213
+ nodes = @parse_tonodes.call(str)
214
+ nodes.each {|n| yield n }
215
+ else
216
+ @parse_tostr.call(str).force_encoding(Encoding.default_external)
217
+ end
218
+ end
219
+
220
+ # Parses the given string `str`, and returns
221
+ # a list of `mecab` nodes.
222
+ # @param [String] str
223
+ # @return [Array] of parsed `mecab` nodes.
224
+ # @raise [MeCabError] if the `mecab` tagger cannot parse the given string `str`
225
+ # @raise [ArgumentError] if the given string `str` argument is `nil`
226
+ # @see MeCabNode
227
+ def parse_as_nodes(str)
228
+ raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
229
+ @parse_tonodes.call(str)
230
+ end
231
+
232
+ # Parses the given string `str`, and returns
233
+ # a list of `mecab` result strings.
234
+ # @param [String] str
235
+ # @return [Array] of parsed `mecab` result strings.
236
+ # @raise [MeCabError] if the `mecab` tagger cannot parse the given string `str`
237
+ # @raise [ArgumentError] if the given string `str` argument is `nil`
238
+ def parse_as_strings(str)
239
+ raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
240
+ @parse_tostr.call(str).force_encoding(Encoding.default_external).lines.to_a
241
+ end
242
+
243
+ # DEPRECATED: use parse_as_nodes instead.
244
+ def readnodes(str)
245
+ $stdout.puts 'DEPRECATED: use parse_as_nodes instead'
246
+ parse_as_nodes(str)
247
+ end
248
+
249
+ # DEPRECATED: use parse_as_strings instead.
250
+ def readlines(str)
251
+ $stdout.puts 'DEPRECATED: use parse_as_strings instead'
252
+ parse_as_strings(str)
253
+ end
254
+
255
+ # Returns human-readable details for the wrapped `mecab` tagger.
256
+ # Overrides `Object#to_s`.
257
+ #
258
+ # - encoded object id
259
+ # - underlying FFI pointer to the `mecab` tagger
260
+ # - options hash
261
+ # - list of dictionaries
262
+ # - MeCab version
263
+ #
264
+ # @return [String] encoded object id, underlying FFI pointer, options hash, list of dictionaries, and MeCab version
265
+ def to_s
266
+ %(#{super.chop} @tagger=#{@tagger}, @options=#{@options.inspect}, @dicts=#{@dicts.to_s}, @version="#{@version.to_s}">)
267
+ end
268
+
269
+ # Overrides `Object#inspect`.
270
+ #
271
+ # @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
272
+ # @see #to_s
273
+ def inspect
274
+ self.to_s
275
+ end
276
+
277
+ # Returns a `Proc` that will properly free resources
278
+ # when this `MeCab` instance is garbage collected.
279
+ # The `Proc` returned is registered to be invoked
280
+ # after the `MeCab` instance owning `ptr`
281
+ # has been destroyed.
282
+ #
283
+ # @param [FFI::Pointer] ptr
284
+ # @return [Proc] to release `mecab` resources properly
285
+ def self.create_free_proc(ptr)
286
+ Proc.new do
287
+ self.mecab_destroy(ptr)
288
+ end
289
+ end
290
+ end
291
+
292
+ # `MeCabError` is a general error class
293
+ # for the `Natto` module.
294
+ class MeCabError < RuntimeError; end
295
+ end
@@ -1,13 +1,16 @@
1
1
  module Natto
2
2
 
3
- # Module <tt>OptionParse</tt> encapsulates methods and behavior
4
- # for parsing the various <tt>mecab</tt> options supported by
5
- # <tt>Natto</tt>.
3
+ # Module `OptionParse` encapsulates methods and behavior
4
+ # for parsing the various `mecab` options supported by
5
+ # `Natto`.
6
6
  module OptionParse
7
7
  require 'optparse'
8
8
 
9
- # Mapping of mecab short-style configuration options to the <tt>mecab</tt> tagger.
10
- # See the <tt>mecab</tt> help for more details.
9
+ WARNING_LATTICE_LEVEL =
10
+ ":lattice-level is DEPRECATED, please use :marginal or :nbest\n".freeze
11
+
12
+ # Mapping of mecab short-style configuration options to the `mecab`
13
+ # tagger. See the `mecab` help for more details.
11
14
  SUPPORTED_OPTS = { '-r' => :rcfile,
12
15
  '-d' => :dicdir,
13
16
  '-u' => :userdic,
@@ -15,6 +18,9 @@ module Natto
15
18
  '-O' => :output_format_type,
16
19
  '-a' => :all_morphs,
17
20
  '-N' => :nbest,
21
+ '-p' => :partial,
22
+ '-m' => :marginal,
23
+ '-M' => :max_grouping_size,
18
24
  '-F' => :node_format,
19
25
  '-U' => :unk_format,
20
26
  '-B' => :bos_format,
@@ -42,35 +48,36 @@ module Natto
42
48
  h = {}
43
49
  if options.is_a? String
44
50
  opts = OptionParser.new do |opts|
45
- opts.on('-r', '--rcfile ARG') { |arg| h[:rcfile] = arg.strip }
46
- opts.on('-d', '--dicdir ARG') { |arg| h[:dicdir] = arg.strip }
47
- opts.on('-u', '--userdic ARG') { |arg| h[:userdic] = arg.strip }
48
- opts.on('-l', '--lattice-level ARG') { |arg| h[:lattice_level] = arg.strip.to_i } # !deprecated in 0.99!!!
49
- opts.on('-O', '--output-format-type ARG') { |arg| h[:output_format_type] = arg.strip }
50
- opts.on('-a', '--all-morphs') { |arg| h[:all_morphs] = true }
51
- opts.on('-N', '--nbest ARG') { |arg| h[:nbest] = arg.strip.to_i }
52
- #opts.on('-m', '--marginal') { |arg| h[:marginal] = true }
53
- opts.on('-F', '--node-format ARG') { |arg| h[:node_format] = arg.strip }
54
- opts.on('-U', '--unk-format ARG') { |arg| h[:unk_format] = arg.strip }
55
- opts.on('-B', '--bos-format ARG') { |arg| h[:bos_format] = arg.strip }
56
- opts.on('-E', '--eos-format ARG') { |arg| h[:eos_format] = arg.strip }
57
- opts.on('-S', '--eon-format ARG') { |arg| h[:eon_format] = arg.strip }
58
- opts.on('-x', '--unk-feature ARG') { |arg| h[:unk_feature] = arg.strip }
59
- opts.on('-b', '--input-buffer-size ARG') { |arg| h[:input_buffer_size] = arg.strip.to_i }
60
- #opts.on('-M', '--open-mutable-dictionary') { |arg| h[:open_mutable_dictionary] = true }
61
- opts.on('-C', '--allocate-sentence') { |arg| h[:allocate_sentence] = true }
62
- opts.on('-t', '--theta ARG') { |arg| h[:theta] = arg.strip.to_f }
63
- opts.on('-c', '--cost-factor ARG') { |arg| h[:cost_factor] = arg.strip.to_i }
51
+ opts.on('-r', '--rcfile ARG') { |arg| h[:rcfile] = arg.strip }
52
+ opts.on('-d', '--dicdir ARG') { |arg| h[:dicdir] = arg.strip }
53
+ opts.on('-u', '--userdic ARG') { |arg| h[:userdic] = arg.strip }
54
+ opts.on('-l', '--lattice-level ARG') { |arg| h[:lattice_level] = arg.strip.to_i } # !deprecated in 0.99!!!
55
+ opts.on('-O', '--output-format-type ARG') { |arg| h[:output_format_type] = arg.strip }
56
+ opts.on('-a', '--all-morphs') { |arg| h[:all_morphs] = true }
57
+ opts.on('-N', '--nbest ARG') { |arg| h[:nbest] = arg.strip.to_i }
58
+ opts.on('-p', '--partial') { |arg| h[:partial] = true }
59
+ opts.on('-m', '--marginal') { |arg| h[:marginal] = true }
60
+ opts.on('-M', '--max-grouping-size ARG'){ |arg| h[:max_grouping_size] = arg.strip.to_i }
61
+ opts.on('-F', '--node-format ARG') { |arg| h[:node_format] = arg.strip }
62
+ opts.on('-U', '--unk-format ARG') { |arg| h[:unk_format] = arg.strip }
63
+ opts.on('-B', '--bos-format ARG') { |arg| h[:bos_format] = arg.strip }
64
+ opts.on('-E', '--eos-format ARG') { |arg| h[:eos_format] = arg.strip }
65
+ opts.on('-S', '--eon-format ARG') { |arg| h[:eon_format] = arg.strip }
66
+ opts.on('-x', '--unk-feature ARG') { |arg| h[:unk_feature] = arg.strip }
67
+ opts.on('-b', '--input-buffer-size ARG'){ |arg| h[:input_buffer_size] = arg.strip.to_i }
68
+ opts.on('-C', '--allocate-sentence') { |arg| h[:allocate_sentence] = true }
69
+ opts.on('-t', '--theta ARG') { |arg| h[:theta] = arg.strip.to_f }
70
+ opts.on('-c', '--cost-factor ARG') { |arg| h[:cost_factor] = arg.strip.to_i }
64
71
  end
65
72
  opts.parse!(options.split)
66
73
  else
67
74
  SUPPORTED_OPTS.values.each do |k|
68
75
  if options.has_key?(k)
69
- if [ :all_morphs, :allocate_sentence ].include?(k)
76
+ if [ :all_morphs, :partial, :marginal, :allocate_sentence ].include?(k)
70
77
  h[k] = true
71
78
  else
72
79
  v = options[k]
73
- if [ :lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
80
+ if [ :lattice_level, :nbest, :max_grouping_size, :input_buffer_size, :cost_factor ].include?(k)
74
81
  h[k] = v.to_i
75
82
  elsif k == :theta
76
83
  h[k] = v.to_f
@@ -81,15 +88,16 @@ module Natto
81
88
  end
82
89
  end
83
90
  end
91
+ $stderr.print WARNING_LATTICE_LEVEL if h.include? :lattice_level
84
92
  raise MeCabError.new("Invalid N value") if h[:nbest] && (h[:nbest] < 1 || h[:nbest] > 512)
85
93
  h
86
94
  end
87
95
 
88
96
  # Returns a string-representation of the options to
89
- # be passed in the construction of the <tt>mecab</tt> tagger.
97
+ # be passed in the construction of the `mecab` tagger.
90
98
  #
91
99
  # @param [Hash] options
92
- # @return [String] representation of the options to the <tt>mecab</tt> tagger
100
+ # @return [String] representation of the options to the `mecab` tagger
93
101
  def build_options_str(options={})
94
102
  opt = []
95
103
  SUPPORTED_OPTS.values.each do |k|