natto 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -9
- data/README.md +1 -3
- data/lib/natto.rb +96 -17
- data/lib/natto/version.rb +1 -1
- data/test/natto/tc_mecab.rb +111 -60
- data/test/test_natto.rb +1 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
-
- __2011/
|
4
|
-
-
|
5
|
-
-
|
3
|
+
- __2011/10/23__: 0.9.1 release.
|
4
|
+
- MeCab configurations can now be a string as per MeCab command-line
|
5
|
+
- Adding more tests around the configuration processing
|
6
|
+
- Pulling support for mecab option 'output', since it is more of a command-line feature
|
7
|
+
|
8
|
+
- __2011/09/11__: 0.9.0 release.
|
9
|
+
- Bumping up version to 0.9.0, as natto appears to be stable
|
10
|
+
- Also migrated project to github
|
6
11
|
|
7
12
|
- __2011/03/23__: 0.5.1 release.
|
8
|
-
- Corrected mojibake issue for surface and feature values
|
9
|
-
|
10
|
-
- Corrected call to NBest initializer in lambdas
|
11
|
-
for Natto::MeCab#initialize.
|
13
|
+
- Corrected mojibake issue for surface and feature values when node-parsing
|
14
|
+
- Corrected call to NBest initializer in lambdas for Natto::MeCab#initialize
|
12
15
|
|
13
16
|
- __2011/02/26__: 0.5.0 release.
|
14
17
|
- Added support for node parsing using blocks
|
@@ -73,6 +76,6 @@
|
|
73
76
|
- Continuing development on proper resource deallocation
|
74
77
|
- Adding options hash in object initializer
|
75
78
|
|
76
|
-
- __2010/12/13__: Released version 0.0.1.
|
77
|
-
an easy-to-use, production-level Ruby binding to MeCab.
|
79
|
+
- __2010/12/13__: Released version 0.0.1.
|
78
80
|
- Initial release
|
81
|
+
- The objective is to provide an easy-to-use, production-level Ruby binding to MeCab
|
data/README.md
CHANGED
@@ -8,8 +8,6 @@ natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). N
|
|
8
8
|
|
9
9
|
You can learn more about [natto at github](https://github.com/buruzaemon/natto).
|
10
10
|
|
11
|
-
Comments and questions are welcome at the [natto-users Group](http://groups.google.com/group/natto-users).
|
12
|
-
|
13
11
|
## Requirements
|
14
12
|
natto requires the following:
|
15
13
|
|
@@ -76,7 +74,7 @@ e.g., from within a Ruby program
|
|
76
74
|
|
77
75
|
## Contributing to natto
|
78
76
|
- Use [git](http://git-scm.com/) and [check out the latest code at github](https://github.com/buruzaemon/natto) to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
79
|
-
- [Browse
|
77
|
+
- [Browse the issue tracker](https://github.com/buruzaemon/natto/issues) to make sure someone already hasn't requested it and/or contributed it.
|
80
78
|
- Fork the project.
|
81
79
|
- Start a feature/bugfix branch.
|
82
80
|
- Commit and push until you are happy with your contribution.
|
data/lib/natto.rb
CHANGED
@@ -7,7 +7,8 @@ module Natto
|
|
7
7
|
require 'ffi'
|
8
8
|
|
9
9
|
# <tt>MeCab</tt> is a wrapper class for the <tt>mecab</tt> parser.
|
10
|
-
# Options to the <tt>mecab</tt> parser are passed in as a
|
10
|
+
# Options to the <tt>mecab</tt> parser are passed in as a string
|
11
|
+
# (MeCab command-line style) or as a Ruby-style hash at
|
11
12
|
# initialization.
|
12
13
|
#
|
13
14
|
# <h2>Usage</h2>
|
@@ -15,7 +16,7 @@ module Natto
|
|
15
16
|
# require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
16
17
|
# require 'natto'
|
17
18
|
#
|
18
|
-
# nm = Natto::MeCab.new(
|
19
|
+
# nm = Natto::MeCab.new('-O chasen2')
|
19
20
|
# => #<Natto::MeCab:0x28d3bdc8 \
|
20
21
|
# @ptr=#<FFI::Pointer address=0x28afb980>, \
|
21
22
|
# @options={:output_format_type=>"chasen2"}, \
|
@@ -40,16 +41,28 @@ module Natto
|
|
40
41
|
|
41
42
|
attr_reader :options, :dicts, :version
|
42
43
|
|
43
|
-
#
|
44
|
+
# Mapping of mecab short-style configuration options to the <tt>mecab</tt> parser.
|
44
45
|
# See the <tt>mecab</tt> help for more details.
|
45
|
-
SUPPORTED_OPTS =
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
:
|
46
|
+
SUPPORTED_OPTS = { '-r' => :rcfile,
|
47
|
+
'-d' => :dicdir,
|
48
|
+
'-u' => :userdic,
|
49
|
+
'-l' => :lattice_level,
|
50
|
+
'-a' => :all_morphs,
|
51
|
+
'-O' => :output_format_type,
|
52
|
+
'-F' => :node_format,
|
53
|
+
'-U' => :unk_format,
|
54
|
+
'-B' => :bos_format,
|
55
|
+
'-E' => :eos_format,
|
56
|
+
'-S' => :eon_format,
|
57
|
+
'-x' => :unk_feature,
|
58
|
+
'-b' => :input_buffer_size,
|
59
|
+
'-C' => :allocate_sentence,
|
60
|
+
'-N' => :nbest,
|
61
|
+
'-t' => :theta,
|
62
|
+
'-c' => :cost_factor }.freeze
|
50
63
|
|
51
64
|
# Initializes the wrapped <tt>mecab</tt> instance with the
|
52
|
-
# given <tt>options</tt
|
65
|
+
# given <tt>options</tt>.
|
53
66
|
#
|
54
67
|
# Options supported are:
|
55
68
|
#
|
@@ -70,8 +83,9 @@ module Natto
|
|
70
83
|
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
71
84
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
72
85
|
# - :cost_factor -- cost factor (integer, default 700)
|
73
|
-
# - :output -- set the output file name
|
74
86
|
#
|
87
|
+
# <p>MeCab command-line arguments (-F) or long (--node-format) may be used in
|
88
|
+
# addition to Ruby-style <code>Hash</code>es</p>
|
75
89
|
# <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
|
76
90
|
# e.g.<br/>
|
77
91
|
#
|
@@ -95,11 +109,12 @@ module Natto
|
|
95
109
|
# EOS
|
96
110
|
# => nil
|
97
111
|
#
|
98
|
-
# @param [Hash]
|
112
|
+
# @param [Hash or String]
|
99
113
|
# @raise [MeCabError] if <tt>mecab</tt> cannot be initialized with the given <tt>options</tt>
|
100
114
|
# @see MeCab::SUPPORTED_OPTS
|
101
115
|
def initialize(options={})
|
102
|
-
@options =
|
116
|
+
@options = self.class.parse_mecab_options(options)
|
117
|
+
|
103
118
|
@dicts = []
|
104
119
|
|
105
120
|
opt_str = self.class.build_options_str(@options)
|
@@ -107,8 +122,8 @@ module Natto
|
|
107
122
|
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
108
123
|
|
109
124
|
# set mecab parsing options
|
110
|
-
self.mecab_set_theta(@ptr, @options[:theta]
|
111
|
-
self.mecab_set_lattice_level(@ptr, @options[:lattice_level]
|
125
|
+
self.mecab_set_theta(@ptr, @options[:theta]) if @options[:theta]
|
126
|
+
self.mecab_set_lattice_level(@ptr, @options[:lattice_level]) if @options[:lattice_level]
|
112
127
|
self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
|
113
128
|
|
114
129
|
# Set mecab parsing implementations for N-best and regular parsing,
|
@@ -192,7 +207,7 @@ module Natto
|
|
192
207
|
#
|
193
208
|
# @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
|
194
209
|
def to_s
|
195
|
-
%(#{super.chop} @ptr=#{@ptr.to_s}, @options=#{@options.
|
210
|
+
%(#{super.chop} @ptr=#{@ptr.to_s}, @options=#{@options.inspect}, @dicts=#{@dicts.to_s}, @version="#{@version.to_s}">)
|
196
211
|
end
|
197
212
|
|
198
213
|
# Overrides <tt>Object#inspect</tt>.
|
@@ -217,6 +232,71 @@ module Natto
|
|
217
232
|
end
|
218
233
|
end
|
219
234
|
|
235
|
+
# Prepares and returns a hash mapping symbols for
|
236
|
+
# the specified, recognized MeCab options, and their
|
237
|
+
# values. Will parse and convert string (short or
|
238
|
+
# long argument styles) or hash.
|
239
|
+
def self.parse_mecab_options(options={})
|
240
|
+
h = {}
|
241
|
+
if options.is_a? String
|
242
|
+
tokens = options.split
|
243
|
+
t = tokens.shift
|
244
|
+
while t
|
245
|
+
if SUPPORTED_OPTS[t]
|
246
|
+
k = SUPPORTED_OPTS[t]
|
247
|
+
if [ :all_morphs, :allocate_sentence ].include?(k)
|
248
|
+
h[k] = true
|
249
|
+
else
|
250
|
+
v = tokens.shift
|
251
|
+
if [:lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
252
|
+
h[k] = v.to_i
|
253
|
+
elsif k == :theta
|
254
|
+
h[k] = v.to_f
|
255
|
+
else
|
256
|
+
h[k] = v
|
257
|
+
end
|
258
|
+
end
|
259
|
+
elsif t.start_with?('--')
|
260
|
+
k = t.split('--').last
|
261
|
+
if k.include?('=')
|
262
|
+
k,v = k.split('=')
|
263
|
+
k = k.gsub('-','_').to_sym
|
264
|
+
if SUPPORTED_OPTS.values.include?(k)
|
265
|
+
if [:lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
266
|
+
h[k] = v.to_i
|
267
|
+
elsif k == :theta
|
268
|
+
h[k] = v.to_f
|
269
|
+
else
|
270
|
+
h[k] = v
|
271
|
+
end
|
272
|
+
end
|
273
|
+
elsif %w( all-morphs allocate-sentence ).include?(k)
|
274
|
+
h[k.gsub('-','_').to_sym] = true
|
275
|
+
end
|
276
|
+
end
|
277
|
+
t = tokens.shift
|
278
|
+
end
|
279
|
+
else
|
280
|
+
SUPPORTED_OPTS.values.each do |k|
|
281
|
+
if options.has_key?(k)
|
282
|
+
if [ :all_morphs, :allocate_sentence ].include?(k)
|
283
|
+
h[k] = true
|
284
|
+
else
|
285
|
+
v = options[k]
|
286
|
+
if [ :lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
287
|
+
h[k] = v.to_i
|
288
|
+
elsif k == :theta
|
289
|
+
h[k] = v.to_f
|
290
|
+
else
|
291
|
+
h[k] = v
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
h
|
298
|
+
end
|
299
|
+
|
220
300
|
# Returns a string-representation of the options to
|
221
301
|
# be passed in the construction of <tt>mecab</tt>.
|
222
302
|
#
|
@@ -224,10 +304,9 @@ module Natto
|
|
224
304
|
# @return [String] representation of the options to the <tt>mecab</tt> parser
|
225
305
|
def self.build_options_str(options={})
|
226
306
|
opt = []
|
227
|
-
SUPPORTED_OPTS.each do |k|
|
307
|
+
SUPPORTED_OPTS.values.each do |k|
|
228
308
|
if options.has_key? k
|
229
309
|
key = k.to_s.gsub('_', '-')
|
230
|
-
# all-morphs and allocate-sentence are just flags
|
231
310
|
if %w( all-morphs allocate-sentence ).include? key
|
232
311
|
opt << "--#{key}" if options[k]==true
|
233
312
|
else
|
data/lib/natto/version.rb
CHANGED
data/test/natto/tc_mecab.rb
CHANGED
@@ -22,70 +22,97 @@ class TestMeCab < Test::Unit::TestCase
|
|
22
22
|
@m = nil
|
23
23
|
end
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
assert_equal('',
|
29
|
-
|
30
|
-
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
31
|
-
assert_equal('', res)
|
32
|
-
|
33
|
-
res = Natto::MeCab.build_options_str(:rcfile=>"/some/file")
|
34
|
-
assert_equal('--rcfile=/some/file', res)
|
35
|
-
|
36
|
-
res = Natto::MeCab.build_options_str(:dicdir=>"/some/other/file")
|
37
|
-
assert_equal('--dicdir=/some/other/file', res)
|
38
|
-
|
39
|
-
res = Natto::MeCab.build_options_str(:userdic=>"/yet/another/file")
|
40
|
-
assert_equal('--userdic=/yet/another/file', res)
|
41
|
-
|
42
|
-
res = Natto::MeCab.build_options_str(:lattice_level=>42)
|
43
|
-
assert_equal('--lattice-level=42', res)
|
44
|
-
|
45
|
-
res = Natto::MeCab.build_options_str(:all_morphs=>true)
|
46
|
-
assert_equal('--all-morphs', res)
|
47
|
-
|
48
|
-
res = Natto::MeCab.build_options_str(:output_format_type=>"natto")
|
49
|
-
assert_equal('--output-format-type=natto', res)
|
25
|
+
def test_parse_mecab_options
|
26
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options('-r /some/file'))
|
27
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options('--rcfile=/some/file'))
|
28
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(:rcfile=>"/some/file"))
|
50
29
|
|
51
|
-
|
52
|
-
assert_equal('
|
30
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options('-d /some/other/file'))
|
31
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options('--dicdir=/some/other/file'))
|
32
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(:dicdir=>"/some/other/file"))
|
53
33
|
|
54
|
-
|
55
|
-
assert_equal('
|
56
|
-
|
57
|
-
|
58
|
-
assert_equal('
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
assert_equal(
|
65
|
-
|
66
|
-
|
67
|
-
assert_equal('--
|
68
|
-
|
69
|
-
|
70
|
-
assert_equal('
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
assert_equal('
|
34
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options('-u /yet/another/file'))
|
35
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options('--userdic=/yet/another/file'))
|
36
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(:userdic=>"/yet/another/file"))
|
37
|
+
|
38
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options('-l 42'))
|
39
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options('--lattice-level=42'))
|
40
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(:lattice_level=>42))
|
41
|
+
|
42
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options('-a'))
|
43
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options('--all-morphs'))
|
44
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(:all_morphs=>true))
|
45
|
+
|
46
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options('-O natto'))
|
47
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options('--output-format-type=natto'))
|
48
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(:output_format_type=>"natto"))
|
49
|
+
|
50
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-F %m\t%f[7]\n'))
|
51
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--node-format=%m\t%f[7]\n'))
|
52
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:node_format=>'%m\t%f[7]\n'))
|
53
|
+
|
54
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-U %m\t%f[7]\n'))
|
55
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--unk-format=%m\t%f[7]\n'))
|
56
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:unk_format=>'%m\t%f[7]\n'))
|
57
|
+
|
58
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-B %m\t%f[7]\n'))
|
59
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--bos-format=%m\t%f[7]\n'))
|
60
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:bos_format=>'%m\t%f[7]\n'))
|
61
|
+
|
62
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-E %m\t%f[7]\n'))
|
63
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--eos-format=%m\t%f[7]\n'))
|
64
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:eos_format=>'%m\t%f[7]\n'))
|
65
|
+
|
66
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-S %m\t%f[7]\n'))
|
67
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--eon-format=%m\t%f[7]\n'))
|
68
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:eon_format=>'%m\t%f[7]\n'))
|
69
|
+
|
70
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-x %m\t%f[7]\n'))
|
71
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--unk-feature=%m\t%f[7]\n'))
|
72
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:unk_feature=>'%m\t%f[7]\n'))
|
73
|
+
|
74
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options('-b 102400'))
|
75
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options('--input-buffer-size=102400'))
|
76
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(:input_buffer_size=>102400))
|
77
|
+
|
78
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options('-C'))
|
79
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options('--allocate-sentence'))
|
80
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(:allocate_sentence=>true))
|
81
|
+
|
82
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options('-N 42'))
|
83
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options('--nbest=42'))
|
84
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(:nbest=>42))
|
85
|
+
|
86
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options('-t 0.42'))
|
87
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options('--theta=0.42'))
|
88
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(:theta=>0.42))
|
89
|
+
|
90
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options('-c 42'))
|
91
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options('--cost-factor=42'))
|
92
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(:cost_factor=>42))
|
77
93
|
|
78
|
-
|
79
|
-
assert_equal(
|
94
|
+
assert_equal({}, Natto::MeCab.parse_mecab_options)
|
95
|
+
assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
|
96
|
+
end
|
80
97
|
|
81
|
-
|
82
|
-
assert_equal('--
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
assert_equal('--
|
98
|
+
def test_build_options_str
|
99
|
+
assert_equal('--rcfile=/some/file', Natto::MeCab.build_options_str(:rcfile=>"/some/file"))
|
100
|
+
assert_equal('--dicdir=/some/other/file', Natto::MeCab.build_options_str(:dicdir=>"/some/other/file"))
|
101
|
+
assert_equal('--userdic=/yet/another/file', Natto::MeCab.build_options_str(:userdic=>"/yet/another/file"))
|
102
|
+
assert_equal('--lattice-level=42', Natto::MeCab.build_options_str(:lattice_level=>42))
|
103
|
+
assert_equal('--all-morphs', Natto::MeCab.build_options_str(:all_morphs=>true))
|
104
|
+
assert_equal('--output-format-type=natto', Natto::MeCab.build_options_str(:output_format_type=>"natto"))
|
105
|
+
assert_equal('--node-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n'))
|
106
|
+
assert_equal('--unk-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n'))
|
107
|
+
assert_equal('--bos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n'))
|
108
|
+
assert_equal('--eos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n'))
|
109
|
+
assert_equal('--eon-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n'))
|
110
|
+
assert_equal('--unk-feature=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n'))
|
111
|
+
assert_equal('--input-buffer-size=102400',Natto::MeCab.build_options_str(:input_buffer_size=>102400))
|
112
|
+
assert_equal('--allocate-sentence', Natto::MeCab.build_options_str(:allocate_sentence=>true))
|
113
|
+
assert_equal('--nbest=42', Natto::MeCab.build_options_str(:nbest=>42))
|
114
|
+
assert_equal('--theta=0.42', Natto::MeCab.build_options_str(:theta=>0.42))
|
115
|
+
assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
|
89
116
|
end
|
90
117
|
|
91
118
|
# Tests the construction and initial state of a Natto::MeCab instance.
|
@@ -101,18 +128,42 @@ class TestMeCab < Test::Unit::TestCase
|
|
101
128
|
m = Natto::MeCab.new(opts)
|
102
129
|
end
|
103
130
|
assert_equal(opts, m.options)
|
131
|
+
assert_nothing_raised do
|
132
|
+
m = Natto::MeCab.new("-O chasen")
|
133
|
+
end
|
134
|
+
assert_equal(opts, m.options)
|
135
|
+
assert_nothing_raised do
|
136
|
+
m = Natto::MeCab.new("--output-format-type=chasen")
|
137
|
+
end
|
138
|
+
assert_equal(opts, m.options)
|
104
139
|
|
105
140
|
opts = {:all_morphs=>true, :allocate_sentence=>true}
|
106
141
|
assert_nothing_raised do
|
107
142
|
m = Natto::MeCab.new(opts)
|
108
143
|
end
|
109
144
|
assert_equal(opts, m.options)
|
145
|
+
assert_nothing_raised do
|
146
|
+
m = Natto::MeCab.new('-a -C')
|
147
|
+
end
|
148
|
+
assert_equal(opts, m.options)
|
149
|
+
assert_nothing_raised do
|
150
|
+
m = Natto::MeCab.new('--all-morphs --allocate-sentence')
|
151
|
+
end
|
152
|
+
assert_equal(opts, m.options)
|
110
153
|
|
111
154
|
opts = {:lattice_level=>999}
|
112
155
|
assert_nothing_raised do
|
113
156
|
m = Natto::MeCab.new(opts)
|
114
157
|
end
|
115
158
|
assert_equal(opts, m.options)
|
159
|
+
assert_nothing_raised do
|
160
|
+
m = Natto::MeCab.new('-l 999')
|
161
|
+
end
|
162
|
+
assert_equal(opts, m.options)
|
163
|
+
assert_nothing_raised do
|
164
|
+
m = Natto::MeCab.new('--lattice-level=999')
|
165
|
+
end
|
166
|
+
assert_equal(opts, m.options)
|
116
167
|
end
|
117
168
|
|
118
169
|
# Tests the initialize method for error cases for erroneous mecab options.
|
data/test/test_natto.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.9.
|
5
|
+
version: 0.9.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Brooke M. Fujita
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-10-23 00:00:00 +09:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|