natto 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,17 @@
1
1
  # coding: utf-8
2
2
  module Natto
3
3
 
4
- # Module <tt>Binding</tt> encapsulates methods and behavior
5
- # which are made available via <tt>FFI</tt> bindings to
6
- # <tt>mecab</tt>.
4
+ # Module `Binding` encapsulates methods and behavior
5
+ # which are made available via `FFI` bindings to
6
+ # `mecab`.
7
7
  module Binding
8
8
  require 'ffi'
9
9
  require 'rbconfig'
10
10
  extend FFI::Library
11
11
 
12
12
  # String name for the environment variable used by
13
- # <tt>Natto</tt> to indicate the exact name / full path
14
- # to the <tt>mecab</tt> library.
13
+ # `Natto` to indicate the exact name / full path
14
+ # to the `mecab` library.
15
15
  MECAB_PATH = 'MECAB_PATH'.freeze
16
16
 
17
17
  # @private
@@ -19,14 +19,14 @@ module Natto
19
19
  base.extend(ClassMethods)
20
20
  end
21
21
 
22
- # Returns the name of the <tt>mecab</tt> library based on
22
+ # Returns the name of the `mecab` library based on
23
23
  # the runtime environment. The value of the environment
24
- # parameter <tt>MECAB_PATH</tt> is checked before this
24
+ # parameter `MECAB_PATH` is checked before this
25
25
  # function is invoked, and in the case of Windows, a
26
- # <tt>LoadError</tt> will be raised if <tt>MECAB_PATH</tt>
27
- # is <b>not</b> set to the full path of the <tt>mecab</tt>
26
+ # `LoadError` will be raised if `MECAB_PATH`
27
+ # is _not_ set to the full path of the `mecab`
28
28
  # library.
29
- # @return name of the <tt>mecab</tt> library
29
+ # @return name of the `mecab` library
30
30
  # @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
31
31
  # <br/>
32
32
  # e.g., for bash on UNIX/Linux
@@ -37,20 +37,14 @@ module Natto
37
37
  #
38
38
  # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
39
39
  #
40
- # e.g., for Cygwin
41
- #
42
- # export MECAB_PATH=cygmecab-1
43
- #
44
40
  # e.g., from within a Ruby program
45
41
  #
46
- # ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
42
+ # ENV['MECAB_PATH']='usr/local/lib/libmecab.so'
47
43
  def self.find_library
48
44
  host_os = RbConfig::CONFIG['host_os']
49
45
 
50
46
  if host_os =~ /mswin|mingw/i
51
47
  raise LoadError, "Please set #{MECAB_PATH} to the full path to libmecab.dll"
52
- elsif host_os =~ /cygwin/i
53
- 'cygmecab-1'
54
48
  else
55
49
  'mecab'
56
50
  end
@@ -58,10 +52,17 @@ module Natto
58
52
 
59
53
  ffi_lib(ENV[MECAB_PATH] || find_library)
60
54
 
55
+ # new interface
56
+ attach_function :mecab_model_new2, [:string], :pointer
57
+ attach_function :mecab_model_destroy, [:pointer], :void
58
+ attach_function :mecab_model_dictionary_info, [:pointer], :pointer
59
+
60
+ # old interface
61
61
  attach_function :mecab_new2, [:string], :pointer
62
62
  attach_function :mecab_version, [], :string
63
63
  attach_function :mecab_strerror, [:pointer],:string
64
64
  attach_function :mecab_destroy, [:pointer], :void
65
+ attach_function :mecab_set_partial, [:pointer, :int], :void
65
66
  attach_function :mecab_set_theta, [:pointer, :float], :void
66
67
  attach_function :mecab_set_lattice_level, [:pointer, :int], :void
67
68
  attach_function :mecab_set_all_morphs, [:pointer, :int], :void
@@ -75,6 +76,20 @@ module Natto
75
76
 
76
77
  # @private
77
78
  module ClassMethods
79
+
80
+ def mecab_model_new2(options_str)
81
+ Natto::Binding.mecab_model_new2(options_str)
82
+ end
83
+
84
+ def mecab_model_destroy(m_ptr)
85
+ Natto::Binding.mecab_model_destroy(m_ptr)
86
+ end
87
+
88
+ def mecab_model_dictionary_info(m_ptr)
89
+ Natto::Binding.mecab_model_dictionary_info(m_ptr)
90
+ end
91
+
92
+ # ----------------------------------------
78
93
  def mecab_new2(options_str)
79
94
  Natto::Binding.mecab_new2(options_str)
80
95
  end
@@ -91,6 +106,10 @@ module Natto
91
106
  Natto::Binding.mecab_destroy(m_ptr)
92
107
  end
93
108
 
109
+ def mecab_set_partial(m_ptr, ll)
110
+ Natto::Binding.mecab_set_partial(m_ptr, ll)
111
+ end
112
+
94
113
  def mecab_set_theta(m_ptr, t)
95
114
  Natto::Binding.mecab_set_theta(m_ptr, t)
96
115
  end
@@ -0,0 +1,295 @@
1
+ # coding: utf-8
2
+ require 'natto/binding'
3
+ require 'natto/option_parse'
4
+ require 'natto/struct'
5
+
6
+ module Natto
7
+ # `MeCab` is a wrapper class for the `mecab` tagger.
8
+ # Options to the `mecab` tagger are passed in as a string
9
+ # (MeCab command-line style) or as a Ruby-style hash at
10
+ # initialization.
11
+ #
12
+ # ## Usage
13
+ #
14
+ # require 'rubygems' if RUBY_VERSION.to_f < 1.9
15
+ # require 'natto'
16
+ #
17
+ # nm = Natto::MeCab.new('-Ochasen')
18
+ # => #<Natto::MeCab:0x28d3bdc8 \
19
+ # @tagger=#<FFI::Pointer address=0x28afb980>, \
20
+ # @options={:output_format_type=>"chasen"}, \
21
+ # @dicts=[#<Natto::DictionaryInfo:0x289a1f14 \
22
+ # type="0", \
23
+ # filename="/usr/local/lib/mecab/dic/ipadic/sys.dic", \
24
+ # charset="utf8">], \
25
+ # @version="0.996">
26
+ #
27
+ # nm.parse('凡人にしか見えねえ風景ってのがあるんだよ。') do |n|
28
+ # puts "#{n.surface}\t#{n.feature}"
29
+ # end
30
+ # 凡人 名詞,一般,*,*,*,*,凡人,ボンジン,ボンジン
31
+ # に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
32
+ # しか 助詞,係助詞,*,*,*,*,しか,シカ,シカ
33
+ # 見え 動詞,自立,*,*,一段,未然形,見える,ミエ,ミエ
34
+ # ねえ 助動詞,*,*,*,特殊・ナイ,音便基本形,ない,ネエ,ネー
35
+ # 風景 名詞,一般,*,*,*,*,風景,フウケイ,フーケイ
36
+ # って 助詞,格助詞,連語,*,*,*,って,ッテ,ッテ
37
+ # の 名詞,非自立,一般,*,*,*,の,ノ,ノ
38
+ # が 助詞,格助詞,一般,*,*,*,が,ガ,ガ
39
+ # ある 動詞,自立,*,*,五段・ラ行,基本形,ある,アル,アル
40
+ # ん 名詞,非自立,一般,*,*,*,ん,ン,ン
41
+ # だ 助動詞,*,*,*一般,特殊・ダ,基本形,だ,ダ,ダ
42
+ # よ 助詞,終助詞,*,*,*,*,よ,ã¨,ヨ
43
+ # 。 記号,句点,*,*,*,*,。,。,。
44
+ # BOS/EOS,*,*,*,*,*,*,*,*BOS
45
+ #
46
+ class MeCab
47
+ include Natto::Binding
48
+ include Natto::OptionParse
49
+
50
+ attr_reader :tagger, :options, :dicts, :version
51
+
52
+ # Initializes the wrapped `mecab` instance with the
53
+ # given `options`.
54
+ #
55
+ # Options supported are:
56
+ #
57
+ # - :rcfile -- resource file
58
+ # - :dicdir -- system dicdir
59
+ # - :userdic -- user dictionary
60
+ # - :lattice_level -- lattice information level (DEPRECATED)
61
+ # - :output_format_type -- output format type (wakati, chasen, yomi, etc.)
62
+ # - :all_morphs -- output all morphs (default false)
63
+ # - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
64
+ # - :partial -- partial parsing mode
65
+ # - :marginal -- output marginal probability
66
+ # - :max_grouping_size -- maximum grouping size for unknown words (default 24)
67
+ # - :node_format -- user-defined node format
68
+ # - :unk_format -- user-defined unknown node format
69
+ # - :bos_format -- user-defined beginning-of-sentence format
70
+ # - :eos_format -- user-defined end-of-sentence format
71
+ # - :eon_format -- user-defined end-of-NBest format
72
+ # - :unk_feature -- feature for unknown word
73
+ # - :input_buffer_size -- set input buffer size (default 8192)
74
+ # - :allocate_sentence -- allocate new memory for input sentence
75
+ # - :theta -- temperature parameter theta (float, default 0.75)
76
+ # - :cost_factor -- cost factor (integer, default 700)
77
+ #
78
+ # <p>MeCab command-line arguments (-F) or long (--node-format) may be used in
79
+ # addition to Ruby-style `Hash`es</p>
80
+ # <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
81
+ # e.g.<br/>
82
+ #
83
+ # nm = Natto::MeCab.new(:node_format=>'%m¥t%f[7]¥n')
84
+ # => #<Natto::MeCab:0x28d2ae10
85
+ # @tagger=#<FFI::Pointer address=0x28a97980>, \
86
+ # @options={:node_format=>"%m¥t%f[7]¥n"}, \
87
+ # @dicts=[#<Natto::DictionaryInfo:0x28d2a85c \
88
+ # type="0", \
89
+ # filename="/usr/local/lib/mecab/dic/ipadic/sys.dic" \
90
+ # charset="utf8">], \
91
+ # @version="0.996">
92
+ #
93
+ # puts nm.parse('才能とは求める人間に与えられるものではない。')
94
+ # 才能 サイノウ
95
+ # と ト
96
+ # は ハ
97
+ # 求 モトメル
98
+ # 人間 ニンゲン
99
+ # に ニ
100
+ # 与え アタエ
101
+ # られる ラレル
102
+ # もの モノ
103
+ # で デ
104
+ # は ハ
105
+ # ない ナイ
106
+ # 。 。
107
+ # EOS
108
+ #
109
+ # @param [Hash or String]
110
+ # @raise [MeCabError] if `mecab` cannot be initialized with the given `options`
111
+ def initialize(options={})
112
+ @options = self.class.parse_mecab_options(options)
113
+ @dicts = []
114
+
115
+ opt_str = self.class.build_options_str(@options)
116
+ @tagger = self.mecab_new2(opt_str)
117
+ raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @tagger.address == 0x0
118
+
119
+ self.mecab_set_theta(@tagger, @options[:theta]) if @options[:theta]
120
+ self.mecab_set_lattice_level(@tagger, @options[:lattice_level]) if @options[:lattice_level]
121
+ self.mecab_set_all_morphs(@tagger, 1) if @options[:all_morphs]
122
+ self.mecab_set_partial(@tagger, 1) if @options[:partial]
123
+
124
+ # Set mecab parsing implementations for N-best and regular parsing,
125
+ # for both parsing as string and yielding a node object
126
+ # N-Best parsing implementations
127
+ if @options[:nbest] && @options[:nbest] > 1
128
+ self.mecab_set_lattice_level(@tagger, (@options[:lattice_level] || 1))
129
+ @parse_tostr = lambda do |str|
130
+ return self.mecab_nbest_sparse_tostr(@tagger, @options[:nbest], str) ||
131
+ raise(MeCabError.new(self.mecab_strerror(@tagger)))
132
+ end
133
+ @parse_tonodes = lambda do |str|
134
+ nodes = []
135
+ if @options[:nbest] && @options[:nbest] > 1
136
+ self.mecab_nbest_init(@tagger, str)
137
+ n = self.mecab_nbest_next_tonode(@tagger)
138
+ raise(MeCabError.new(self.mecab_strerror(@tagger))) if n.nil? || n.address==0x0
139
+ nlen = @options[:nbest]
140
+ nlen.times do |i|
141
+ s = str.bytes.to_a
142
+ while n && n.address != 0x0
143
+ mn = Natto::MeCabNode.new(n)
144
+ s = s.drop_while {|e| (e==0xa || e==0x20)}
145
+ if !s.empty?
146
+ sarr = []
147
+ mn.length.times { sarr << s.shift }
148
+ surf = sarr.pack('C*')
149
+ #mn.surface = self.class.force_enc(surf)
150
+ mn.surface = surf.force_encoding(Encoding.default_external)
151
+ end
152
+ if @options[:output_format_type] || @options[:node_format]
153
+ mn.feature = self.mecab_format_node(@tagger, n).force_encoding(Encoding.default_external)
154
+ end
155
+ nodes << mn if !mn.is_bos?
156
+ n = mn.next
157
+ end
158
+ n = self.mecab_nbest_next_tonode(@tagger)
159
+ end
160
+ end
161
+ return nodes
162
+ end
163
+ else
164
+ # default parsing implementations
165
+ @parse_tostr = lambda do |str|
166
+ return self.mecab_sparse_tostr(@tagger, str) ||
167
+ raise(MeCabError.new(self.mecab_strerror(@tagger)))
168
+ end
169
+ @parse_tonodes = lambda do |str|
170
+ nodes = []
171
+ n = self.mecab_sparse_tonode(@tagger, str)
172
+ raise(MeCabError.new(self.mecab_strerror(@tagger))) if n.nil? || n.address==0x0
173
+ mn = Natto::MeCabNode.new(n)
174
+ n = mn.next if mn.next.address!=0x0
175
+ s = str.bytes.to_a
176
+ while n && n.address!=0x0
177
+ mn = Natto::MeCabNode.new(n)
178
+ s = s.drop_while {|e| (e==0xa || e==0x20)}
179
+ if !s.empty?
180
+ sarr = []
181
+ mn.length.times { sarr << s.shift }
182
+ surf = sarr.pack('C*')
183
+ mn.surface = surf.force_encoding(Encoding.default_external)
184
+ end
185
+ nodes << mn
186
+ n = mn.next
187
+ end
188
+ return nodes
189
+ end
190
+ end
191
+
192
+ @dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@tagger))
193
+ while @dicts.last.next.address != 0x0
194
+ @dicts << Natto::DictionaryInfo.new(@dicts.last.next)
195
+ end
196
+
197
+ @version = self.mecab_version
198
+
199
+ ObjectSpace.define_finalizer(self, self.class.create_free_proc(@tagger))
200
+ end
201
+
202
+ # Parses the given string `str`. If a block is passed to this method,
203
+ # then node parsing will be used and each node yielded to the given block.
204
+ #
205
+ # @param [String] str
206
+ # @return parsing result from `mecab`
207
+ # @raise [MeCabError] if the `mecab` tagger cannot parse the given string `str`
208
+ # @raise [ArgumentError] if the given string `str` argument is `nil`
209
+ # @see MeCabNode
210
+ def parse(str)
211
+ raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
212
+ if block_given?
213
+ nodes = @parse_tonodes.call(str)
214
+ nodes.each {|n| yield n }
215
+ else
216
+ @parse_tostr.call(str).force_encoding(Encoding.default_external)
217
+ end
218
+ end
219
+
220
+ # Parses the given string `str`, and returns
221
+ # a list of `mecab` nodes.
222
+ # @param [String] str
223
+ # @return [Array] of parsed `mecab` nodes.
224
+ # @raise [MeCabError] if the `mecab` tagger cannot parse the given string `str`
225
+ # @raise [ArgumentError] if the given string `str` argument is `nil`
226
+ # @see MeCabNode
227
+ def parse_as_nodes(str)
228
+ raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
229
+ @parse_tonodes.call(str)
230
+ end
231
+
232
+ # Parses the given string `str`, and returns
233
+ # a list of `mecab` result strings.
234
+ # @param [String] str
235
+ # @return [Array] of parsed `mecab` result strings.
236
+ # @raise [MeCabError] if the `mecab` tagger cannot parse the given string `str`
237
+ # @raise [ArgumentError] if the given string `str` argument is `nil`
238
+ def parse_as_strings(str)
239
+ raise ArgumentError.new 'String to parse cannot be nil' if str.nil?
240
+ @parse_tostr.call(str).force_encoding(Encoding.default_external).lines.to_a
241
+ end
242
+
243
+ # DEPRECATED: use parse_as_nodes instead.
244
+ def readnodes(str)
245
+ $stdout.puts 'DEPRECATED: use parse_as_nodes instead'
246
+ parse_as_nodes(str)
247
+ end
248
+
249
+ # DEPRECATED: use parse_as_strings instead.
250
+ def readlines(str)
251
+ $stdout.puts 'DEPRECATED: use parse_as_strings instead'
252
+ parse_as_strings(str)
253
+ end
254
+
255
+ # Returns human-readable details for the wrapped `mecab` tagger.
256
+ # Overrides `Object#to_s`.
257
+ #
258
+ # - encoded object id
259
+ # - underlying FFI pointer to the `mecab` tagger
260
+ # - options hash
261
+ # - list of dictionaries
262
+ # - MeCab version
263
+ #
264
+ # @return [String] encoded object id, underlying FFI pointer, options hash, list of dictionaries, and MeCab version
265
+ def to_s
266
+ %(#{super.chop} @tagger=#{@tagger}, @options=#{@options.inspect}, @dicts=#{@dicts.to_s}, @version="#{@version.to_s}">)
267
+ end
268
+
269
+ # Overrides `Object#inspect`.
270
+ #
271
+ # @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
272
+ # @see #to_s
273
+ def inspect
274
+ self.to_s
275
+ end
276
+
277
+ # Returns a `Proc` that will properly free resources
278
+ # when this `MeCab` instance is garbage collected.
279
+ # The `Proc` returned is registered to be invoked
280
+ # after the `MeCab` instance owning `ptr`
281
+ # has been destroyed.
282
+ #
283
+ # @param [FFI::Pointer] ptr
284
+ # @return [Proc] to release `mecab` resources properly
285
+ def self.create_free_proc(ptr)
286
+ Proc.new do
287
+ self.mecab_destroy(ptr)
288
+ end
289
+ end
290
+ end
291
+
292
+ # `MeCabError` is a general error class
293
+ # for the `Natto` module.
294
+ class MeCabError < RuntimeError; end
295
+ end
@@ -1,13 +1,16 @@
1
1
  module Natto
2
2
 
3
- # Module <tt>OptionParse</tt> encapsulates methods and behavior
4
- # for parsing the various <tt>mecab</tt> options supported by
5
- # <tt>Natto</tt>.
3
+ # Module `OptionParse` encapsulates methods and behavior
4
+ # for parsing the various `mecab` options supported by
5
+ # `Natto`.
6
6
  module OptionParse
7
7
  require 'optparse'
8
8
 
9
- # Mapping of mecab short-style configuration options to the <tt>mecab</tt> tagger.
10
- # See the <tt>mecab</tt> help for more details.
9
+ WARNING_LATTICE_LEVEL =
10
+ ":lattice-level is DEPRECATED, please use :marginal or :nbest\n".freeze
11
+
12
+ # Mapping of mecab short-style configuration options to the `mecab`
13
+ # tagger. See the `mecab` help for more details.
11
14
  SUPPORTED_OPTS = { '-r' => :rcfile,
12
15
  '-d' => :dicdir,
13
16
  '-u' => :userdic,
@@ -15,6 +18,9 @@ module Natto
15
18
  '-O' => :output_format_type,
16
19
  '-a' => :all_morphs,
17
20
  '-N' => :nbest,
21
+ '-p' => :partial,
22
+ '-m' => :marginal,
23
+ '-M' => :max_grouping_size,
18
24
  '-F' => :node_format,
19
25
  '-U' => :unk_format,
20
26
  '-B' => :bos_format,
@@ -42,35 +48,36 @@ module Natto
42
48
  h = {}
43
49
  if options.is_a? String
44
50
  opts = OptionParser.new do |opts|
45
- opts.on('-r', '--rcfile ARG') { |arg| h[:rcfile] = arg.strip }
46
- opts.on('-d', '--dicdir ARG') { |arg| h[:dicdir] = arg.strip }
47
- opts.on('-u', '--userdic ARG') { |arg| h[:userdic] = arg.strip }
48
- opts.on('-l', '--lattice-level ARG') { |arg| h[:lattice_level] = arg.strip.to_i } # !deprecated in 0.99!!!
49
- opts.on('-O', '--output-format-type ARG') { |arg| h[:output_format_type] = arg.strip }
50
- opts.on('-a', '--all-morphs') { |arg| h[:all_morphs] = true }
51
- opts.on('-N', '--nbest ARG') { |arg| h[:nbest] = arg.strip.to_i }
52
- #opts.on('-m', '--marginal') { |arg| h[:marginal] = true }
53
- opts.on('-F', '--node-format ARG') { |arg| h[:node_format] = arg.strip }
54
- opts.on('-U', '--unk-format ARG') { |arg| h[:unk_format] = arg.strip }
55
- opts.on('-B', '--bos-format ARG') { |arg| h[:bos_format] = arg.strip }
56
- opts.on('-E', '--eos-format ARG') { |arg| h[:eos_format] = arg.strip }
57
- opts.on('-S', '--eon-format ARG') { |arg| h[:eon_format] = arg.strip }
58
- opts.on('-x', '--unk-feature ARG') { |arg| h[:unk_feature] = arg.strip }
59
- opts.on('-b', '--input-buffer-size ARG') { |arg| h[:input_buffer_size] = arg.strip.to_i }
60
- #opts.on('-M', '--open-mutable-dictionary') { |arg| h[:open_mutable_dictionary] = true }
61
- opts.on('-C', '--allocate-sentence') { |arg| h[:allocate_sentence] = true }
62
- opts.on('-t', '--theta ARG') { |arg| h[:theta] = arg.strip.to_f }
63
- opts.on('-c', '--cost-factor ARG') { |arg| h[:cost_factor] = arg.strip.to_i }
51
+ opts.on('-r', '--rcfile ARG') { |arg| h[:rcfile] = arg.strip }
52
+ opts.on('-d', '--dicdir ARG') { |arg| h[:dicdir] = arg.strip }
53
+ opts.on('-u', '--userdic ARG') { |arg| h[:userdic] = arg.strip }
54
+ opts.on('-l', '--lattice-level ARG') { |arg| h[:lattice_level] = arg.strip.to_i } # !deprecated in 0.99!!!
55
+ opts.on('-O', '--output-format-type ARG') { |arg| h[:output_format_type] = arg.strip }
56
+ opts.on('-a', '--all-morphs') { |arg| h[:all_morphs] = true }
57
+ opts.on('-N', '--nbest ARG') { |arg| h[:nbest] = arg.strip.to_i }
58
+ opts.on('-p', '--partial') { |arg| h[:partial] = true }
59
+ opts.on('-m', '--marginal') { |arg| h[:marginal] = true }
60
+ opts.on('-M', '--max-grouping-size ARG'){ |arg| h[:max_grouping_size] = arg.strip.to_i }
61
+ opts.on('-F', '--node-format ARG') { |arg| h[:node_format] = arg.strip }
62
+ opts.on('-U', '--unk-format ARG') { |arg| h[:unk_format] = arg.strip }
63
+ opts.on('-B', '--bos-format ARG') { |arg| h[:bos_format] = arg.strip }
64
+ opts.on('-E', '--eos-format ARG') { |arg| h[:eos_format] = arg.strip }
65
+ opts.on('-S', '--eon-format ARG') { |arg| h[:eon_format] = arg.strip }
66
+ opts.on('-x', '--unk-feature ARG') { |arg| h[:unk_feature] = arg.strip }
67
+ opts.on('-b', '--input-buffer-size ARG'){ |arg| h[:input_buffer_size] = arg.strip.to_i }
68
+ opts.on('-C', '--allocate-sentence') { |arg| h[:allocate_sentence] = true }
69
+ opts.on('-t', '--theta ARG') { |arg| h[:theta] = arg.strip.to_f }
70
+ opts.on('-c', '--cost-factor ARG') { |arg| h[:cost_factor] = arg.strip.to_i }
64
71
  end
65
72
  opts.parse!(options.split)
66
73
  else
67
74
  SUPPORTED_OPTS.values.each do |k|
68
75
  if options.has_key?(k)
69
- if [ :all_morphs, :allocate_sentence ].include?(k)
76
+ if [ :all_morphs, :partial, :marginal, :allocate_sentence ].include?(k)
70
77
  h[k] = true
71
78
  else
72
79
  v = options[k]
73
- if [ :lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
80
+ if [ :lattice_level, :nbest, :max_grouping_size, :input_buffer_size, :cost_factor ].include?(k)
74
81
  h[k] = v.to_i
75
82
  elsif k == :theta
76
83
  h[k] = v.to_f
@@ -81,15 +88,16 @@ module Natto
81
88
  end
82
89
  end
83
90
  end
91
+ $stderr.print WARNING_LATTICE_LEVEL if h.include? :lattice_level
84
92
  raise MeCabError.new("Invalid N value") if h[:nbest] && (h[:nbest] < 1 || h[:nbest] > 512)
85
93
  h
86
94
  end
87
95
 
88
96
  # Returns a string-representation of the options to
89
- # be passed in the construction of the <tt>mecab</tt> tagger.
97
+ # be passed in the construction of the `mecab` tagger.
90
98
  #
91
99
  # @param [Hash] options
92
- # @return [String] representation of the options to the <tt>mecab</tt> tagger
100
+ # @return [String] representation of the options to the `mecab` tagger
93
101
  def build_options_str(options={})
94
102
  opt = []
95
103
  SUPPORTED_OPTS.values.each do |k|