natto 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -9
- data/README.md +1 -3
- data/lib/natto.rb +96 -17
- data/lib/natto/version.rb +1 -1
- data/test/natto/tc_mecab.rb +111 -60
- data/test/test_natto.rb +1 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
-
- __2011/
|
4
|
-
-
|
5
|
-
-
|
3
|
+
- __2011/10/23__: 0.9.1 release.
|
4
|
+
- MeCab configurations can now be a string as per MeCab command-line
|
5
|
+
- Adding more tests around the configuration processing
|
6
|
+
- Pulling support for mecab option 'output', since it is more of a command-line feature
|
7
|
+
|
8
|
+
- __2011/09/11__: 0.9.0 release.
|
9
|
+
- Bumping up version to 0.9.0, as natto appears to be stable
|
10
|
+
- Also migrated project to github
|
6
11
|
|
7
12
|
- __2011/03/23__: 0.5.1 release.
|
8
|
-
- Corrected mojibake issue for surface and feature values
|
9
|
-
|
10
|
-
- Corrected call to NBest initializer in lambdas
|
11
|
-
for Natto::MeCab#initialize.
|
13
|
+
- Corrected mojibake issue for surface and feature values when node-parsing
|
14
|
+
- Corrected call to NBest initializer in lambdas for Natto::MeCab#initialize
|
12
15
|
|
13
16
|
- __2011/02/26__: 0.5.0 release.
|
14
17
|
- Added support for node parsing using blocks
|
@@ -73,6 +76,6 @@
|
|
73
76
|
- Continuing development on proper resource deallocation
|
74
77
|
- Adding options hash in object initializer
|
75
78
|
|
76
|
-
- __2010/12/13__: Released version 0.0.1.
|
77
|
-
an easy-to-use, production-level Ruby binding to MeCab.
|
79
|
+
- __2010/12/13__: Released version 0.0.1.
|
78
80
|
- Initial release
|
81
|
+
- The objective is to provide an easy-to-use, production-level Ruby binding to MeCab
|
data/README.md
CHANGED
@@ -8,8 +8,6 @@ natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). N
|
|
8
8
|
|
9
9
|
You can learn more about [natto at github](https://github.com/buruzaemon/natto).
|
10
10
|
|
11
|
-
Comments and questions are welcome at the [natto-users Group](http://groups.google.com/group/natto-users).
|
12
|
-
|
13
11
|
## Requirements
|
14
12
|
natto requires the following:
|
15
13
|
|
@@ -76,7 +74,7 @@ e.g., from within a Ruby program
|
|
76
74
|
|
77
75
|
## Contributing to natto
|
78
76
|
- Use [git](http://git-scm.com/) and [check out the latest code at github](https://github.com/buruzaemon/natto) to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
79
|
-
- [Browse
|
77
|
+
- [Browse the issue tracker](https://github.com/buruzaemon/natto/issues) to make sure someone already hasn't requested it and/or contributed it.
|
80
78
|
- Fork the project.
|
81
79
|
- Start a feature/bugfix branch.
|
82
80
|
- Commit and push until you are happy with your contribution.
|
data/lib/natto.rb
CHANGED
@@ -7,7 +7,8 @@ module Natto
|
|
7
7
|
require 'ffi'
|
8
8
|
|
9
9
|
# <tt>MeCab</tt> is a wrapper class for the <tt>mecab</tt> parser.
|
10
|
-
# Options to the <tt>mecab</tt> parser are passed in as a
|
10
|
+
# Options to the <tt>mecab</tt> parser are passed in as a string
|
11
|
+
# (MeCab command-line style) or as a Ruby-style hash at
|
11
12
|
# initialization.
|
12
13
|
#
|
13
14
|
# <h2>Usage</h2>
|
@@ -15,7 +16,7 @@ module Natto
|
|
15
16
|
# require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
16
17
|
# require 'natto'
|
17
18
|
#
|
18
|
-
# nm = Natto::MeCab.new(
|
19
|
+
# nm = Natto::MeCab.new('-O chasen2')
|
19
20
|
# => #<Natto::MeCab:0x28d3bdc8 \
|
20
21
|
# @ptr=#<FFI::Pointer address=0x28afb980>, \
|
21
22
|
# @options={:output_format_type=>"chasen2"}, \
|
@@ -40,16 +41,28 @@ module Natto
|
|
40
41
|
|
41
42
|
attr_reader :options, :dicts, :version
|
42
43
|
|
43
|
-
#
|
44
|
+
# Mapping of mecab short-style configuration options to the <tt>mecab</tt> parser.
|
44
45
|
# See the <tt>mecab</tt> help for more details.
|
45
|
-
SUPPORTED_OPTS =
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
:
|
46
|
+
SUPPORTED_OPTS = { '-r' => :rcfile,
|
47
|
+
'-d' => :dicdir,
|
48
|
+
'-u' => :userdic,
|
49
|
+
'-l' => :lattice_level,
|
50
|
+
'-a' => :all_morphs,
|
51
|
+
'-O' => :output_format_type,
|
52
|
+
'-F' => :node_format,
|
53
|
+
'-U' => :unk_format,
|
54
|
+
'-B' => :bos_format,
|
55
|
+
'-E' => :eos_format,
|
56
|
+
'-S' => :eon_format,
|
57
|
+
'-x' => :unk_feature,
|
58
|
+
'-b' => :input_buffer_size,
|
59
|
+
'-C' => :allocate_sentence,
|
60
|
+
'-N' => :nbest,
|
61
|
+
'-t' => :theta,
|
62
|
+
'-c' => :cost_factor }.freeze
|
50
63
|
|
51
64
|
# Initializes the wrapped <tt>mecab</tt> instance with the
|
52
|
-
# given <tt>options</tt
|
65
|
+
# given <tt>options</tt>.
|
53
66
|
#
|
54
67
|
# Options supported are:
|
55
68
|
#
|
@@ -70,8 +83,9 @@ module Natto
|
|
70
83
|
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
71
84
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
72
85
|
# - :cost_factor -- cost factor (integer, default 700)
|
73
|
-
# - :output -- set the output file name
|
74
86
|
#
|
87
|
+
# <p>MeCab command-line arguments (-F) or long (--node-format) may be used in
|
88
|
+
# addition to Ruby-style <code>Hash</code>es</p>
|
75
89
|
# <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
|
76
90
|
# e.g.<br/>
|
77
91
|
#
|
@@ -95,11 +109,12 @@ module Natto
|
|
95
109
|
# EOS
|
96
110
|
# => nil
|
97
111
|
#
|
98
|
-
# @param [Hash]
|
112
|
+
# @param [Hash or String]
|
99
113
|
# @raise [MeCabError] if <tt>mecab</tt> cannot be initialized with the given <tt>options</tt>
|
100
114
|
# @see MeCab::SUPPORTED_OPTS
|
101
115
|
def initialize(options={})
|
102
|
-
@options =
|
116
|
+
@options = self.class.parse_mecab_options(options)
|
117
|
+
|
103
118
|
@dicts = []
|
104
119
|
|
105
120
|
opt_str = self.class.build_options_str(@options)
|
@@ -107,8 +122,8 @@ module Natto
|
|
107
122
|
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
108
123
|
|
109
124
|
# set mecab parsing options
|
110
|
-
self.mecab_set_theta(@ptr, @options[:theta]
|
111
|
-
self.mecab_set_lattice_level(@ptr, @options[:lattice_level]
|
125
|
+
self.mecab_set_theta(@ptr, @options[:theta]) if @options[:theta]
|
126
|
+
self.mecab_set_lattice_level(@ptr, @options[:lattice_level]) if @options[:lattice_level]
|
112
127
|
self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
|
113
128
|
|
114
129
|
# Set mecab parsing implementations for N-best and regular parsing,
|
@@ -192,7 +207,7 @@ module Natto
|
|
192
207
|
#
|
193
208
|
# @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
|
194
209
|
def to_s
|
195
|
-
%(#{super.chop} @ptr=#{@ptr.to_s}, @options=#{@options.
|
210
|
+
%(#{super.chop} @ptr=#{@ptr.to_s}, @options=#{@options.inspect}, @dicts=#{@dicts.to_s}, @version="#{@version.to_s}">)
|
196
211
|
end
|
197
212
|
|
198
213
|
# Overrides <tt>Object#inspect</tt>.
|
@@ -217,6 +232,71 @@ module Natto
|
|
217
232
|
end
|
218
233
|
end
|
219
234
|
|
235
|
+
# Prepares and returns a hash mapping symbols for
|
236
|
+
# the specified, recognized MeCab options, and their
|
237
|
+
# values. Will parse and convert string (short or
|
238
|
+
# long argument styles) or hash.
|
239
|
+
def self.parse_mecab_options(options={})
|
240
|
+
h = {}
|
241
|
+
if options.is_a? String
|
242
|
+
tokens = options.split
|
243
|
+
t = tokens.shift
|
244
|
+
while t
|
245
|
+
if SUPPORTED_OPTS[t]
|
246
|
+
k = SUPPORTED_OPTS[t]
|
247
|
+
if [ :all_morphs, :allocate_sentence ].include?(k)
|
248
|
+
h[k] = true
|
249
|
+
else
|
250
|
+
v = tokens.shift
|
251
|
+
if [:lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
252
|
+
h[k] = v.to_i
|
253
|
+
elsif k == :theta
|
254
|
+
h[k] = v.to_f
|
255
|
+
else
|
256
|
+
h[k] = v
|
257
|
+
end
|
258
|
+
end
|
259
|
+
elsif t.start_with?('--')
|
260
|
+
k = t.split('--').last
|
261
|
+
if k.include?('=')
|
262
|
+
k,v = k.split('=')
|
263
|
+
k = k.gsub('-','_').to_sym
|
264
|
+
if SUPPORTED_OPTS.values.include?(k)
|
265
|
+
if [:lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
266
|
+
h[k] = v.to_i
|
267
|
+
elsif k == :theta
|
268
|
+
h[k] = v.to_f
|
269
|
+
else
|
270
|
+
h[k] = v
|
271
|
+
end
|
272
|
+
end
|
273
|
+
elsif %w( all-morphs allocate-sentence ).include?(k)
|
274
|
+
h[k.gsub('-','_').to_sym] = true
|
275
|
+
end
|
276
|
+
end
|
277
|
+
t = tokens.shift
|
278
|
+
end
|
279
|
+
else
|
280
|
+
SUPPORTED_OPTS.values.each do |k|
|
281
|
+
if options.has_key?(k)
|
282
|
+
if [ :all_morphs, :allocate_sentence ].include?(k)
|
283
|
+
h[k] = true
|
284
|
+
else
|
285
|
+
v = options[k]
|
286
|
+
if [ :lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
287
|
+
h[k] = v.to_i
|
288
|
+
elsif k == :theta
|
289
|
+
h[k] = v.to_f
|
290
|
+
else
|
291
|
+
h[k] = v
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
h
|
298
|
+
end
|
299
|
+
|
220
300
|
# Returns a string-representation of the options to
|
221
301
|
# be passed in the construction of <tt>mecab</tt>.
|
222
302
|
#
|
@@ -224,10 +304,9 @@ module Natto
|
|
224
304
|
# @return [String] representation of the options to the <tt>mecab</tt> parser
|
225
305
|
def self.build_options_str(options={})
|
226
306
|
opt = []
|
227
|
-
SUPPORTED_OPTS.each do |k|
|
307
|
+
SUPPORTED_OPTS.values.each do |k|
|
228
308
|
if options.has_key? k
|
229
309
|
key = k.to_s.gsub('_', '-')
|
230
|
-
# all-morphs and allocate-sentence are just flags
|
231
310
|
if %w( all-morphs allocate-sentence ).include? key
|
232
311
|
opt << "--#{key}" if options[k]==true
|
233
312
|
else
|
data/lib/natto/version.rb
CHANGED
data/test/natto/tc_mecab.rb
CHANGED
@@ -22,70 +22,97 @@ class TestMeCab < Test::Unit::TestCase
|
|
22
22
|
@m = nil
|
23
23
|
end
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
assert_equal('',
|
29
|
-
|
30
|
-
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
31
|
-
assert_equal('', res)
|
32
|
-
|
33
|
-
res = Natto::MeCab.build_options_str(:rcfile=>"/some/file")
|
34
|
-
assert_equal('--rcfile=/some/file', res)
|
35
|
-
|
36
|
-
res = Natto::MeCab.build_options_str(:dicdir=>"/some/other/file")
|
37
|
-
assert_equal('--dicdir=/some/other/file', res)
|
38
|
-
|
39
|
-
res = Natto::MeCab.build_options_str(:userdic=>"/yet/another/file")
|
40
|
-
assert_equal('--userdic=/yet/another/file', res)
|
41
|
-
|
42
|
-
res = Natto::MeCab.build_options_str(:lattice_level=>42)
|
43
|
-
assert_equal('--lattice-level=42', res)
|
44
|
-
|
45
|
-
res = Natto::MeCab.build_options_str(:all_morphs=>true)
|
46
|
-
assert_equal('--all-morphs', res)
|
47
|
-
|
48
|
-
res = Natto::MeCab.build_options_str(:output_format_type=>"natto")
|
49
|
-
assert_equal('--output-format-type=natto', res)
|
25
|
+
def test_parse_mecab_options
|
26
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options('-r /some/file'))
|
27
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options('--rcfile=/some/file'))
|
28
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(:rcfile=>"/some/file"))
|
50
29
|
|
51
|
-
|
52
|
-
assert_equal('
|
30
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options('-d /some/other/file'))
|
31
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options('--dicdir=/some/other/file'))
|
32
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(:dicdir=>"/some/other/file"))
|
53
33
|
|
54
|
-
|
55
|
-
assert_equal('
|
56
|
-
|
57
|
-
|
58
|
-
assert_equal('
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
assert_equal(
|
65
|
-
|
66
|
-
|
67
|
-
assert_equal('--
|
68
|
-
|
69
|
-
|
70
|
-
assert_equal('
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
assert_equal('
|
34
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options('-u /yet/another/file'))
|
35
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options('--userdic=/yet/another/file'))
|
36
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(:userdic=>"/yet/another/file"))
|
37
|
+
|
38
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options('-l 42'))
|
39
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options('--lattice-level=42'))
|
40
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(:lattice_level=>42))
|
41
|
+
|
42
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options('-a'))
|
43
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options('--all-morphs'))
|
44
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(:all_morphs=>true))
|
45
|
+
|
46
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options('-O natto'))
|
47
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options('--output-format-type=natto'))
|
48
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(:output_format_type=>"natto"))
|
49
|
+
|
50
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-F %m\t%f[7]\n'))
|
51
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--node-format=%m\t%f[7]\n'))
|
52
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:node_format=>'%m\t%f[7]\n'))
|
53
|
+
|
54
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-U %m\t%f[7]\n'))
|
55
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--unk-format=%m\t%f[7]\n'))
|
56
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:unk_format=>'%m\t%f[7]\n'))
|
57
|
+
|
58
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-B %m\t%f[7]\n'))
|
59
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--bos-format=%m\t%f[7]\n'))
|
60
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:bos_format=>'%m\t%f[7]\n'))
|
61
|
+
|
62
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-E %m\t%f[7]\n'))
|
63
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--eos-format=%m\t%f[7]\n'))
|
64
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:eos_format=>'%m\t%f[7]\n'))
|
65
|
+
|
66
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-S %m\t%f[7]\n'))
|
67
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--eon-format=%m\t%f[7]\n'))
|
68
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:eon_format=>'%m\t%f[7]\n'))
|
69
|
+
|
70
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('-x %m\t%f[7]\n'))
|
71
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options('--unk-feature=%m\t%f[7]\n'))
|
72
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(:unk_feature=>'%m\t%f[7]\n'))
|
73
|
+
|
74
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options('-b 102400'))
|
75
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options('--input-buffer-size=102400'))
|
76
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(:input_buffer_size=>102400))
|
77
|
+
|
78
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options('-C'))
|
79
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options('--allocate-sentence'))
|
80
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(:allocate_sentence=>true))
|
81
|
+
|
82
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options('-N 42'))
|
83
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options('--nbest=42'))
|
84
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(:nbest=>42))
|
85
|
+
|
86
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options('-t 0.42'))
|
87
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options('--theta=0.42'))
|
88
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(:theta=>0.42))
|
89
|
+
|
90
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options('-c 42'))
|
91
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options('--cost-factor=42'))
|
92
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(:cost_factor=>42))
|
77
93
|
|
78
|
-
|
79
|
-
assert_equal(
|
94
|
+
assert_equal({}, Natto::MeCab.parse_mecab_options)
|
95
|
+
assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
|
96
|
+
end
|
80
97
|
|
81
|
-
|
82
|
-
assert_equal('--
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
assert_equal('--
|
98
|
+
def test_build_options_str
|
99
|
+
assert_equal('--rcfile=/some/file', Natto::MeCab.build_options_str(:rcfile=>"/some/file"))
|
100
|
+
assert_equal('--dicdir=/some/other/file', Natto::MeCab.build_options_str(:dicdir=>"/some/other/file"))
|
101
|
+
assert_equal('--userdic=/yet/another/file', Natto::MeCab.build_options_str(:userdic=>"/yet/another/file"))
|
102
|
+
assert_equal('--lattice-level=42', Natto::MeCab.build_options_str(:lattice_level=>42))
|
103
|
+
assert_equal('--all-morphs', Natto::MeCab.build_options_str(:all_morphs=>true))
|
104
|
+
assert_equal('--output-format-type=natto', Natto::MeCab.build_options_str(:output_format_type=>"natto"))
|
105
|
+
assert_equal('--node-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n'))
|
106
|
+
assert_equal('--unk-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n'))
|
107
|
+
assert_equal('--bos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n'))
|
108
|
+
assert_equal('--eos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n'))
|
109
|
+
assert_equal('--eon-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n'))
|
110
|
+
assert_equal('--unk-feature=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n'))
|
111
|
+
assert_equal('--input-buffer-size=102400',Natto::MeCab.build_options_str(:input_buffer_size=>102400))
|
112
|
+
assert_equal('--allocate-sentence', Natto::MeCab.build_options_str(:allocate_sentence=>true))
|
113
|
+
assert_equal('--nbest=42', Natto::MeCab.build_options_str(:nbest=>42))
|
114
|
+
assert_equal('--theta=0.42', Natto::MeCab.build_options_str(:theta=>0.42))
|
115
|
+
assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
|
89
116
|
end
|
90
117
|
|
91
118
|
# Tests the construction and initial state of a Natto::MeCab instance.
|
@@ -101,18 +128,42 @@ class TestMeCab < Test::Unit::TestCase
|
|
101
128
|
m = Natto::MeCab.new(opts)
|
102
129
|
end
|
103
130
|
assert_equal(opts, m.options)
|
131
|
+
assert_nothing_raised do
|
132
|
+
m = Natto::MeCab.new("-O chasen")
|
133
|
+
end
|
134
|
+
assert_equal(opts, m.options)
|
135
|
+
assert_nothing_raised do
|
136
|
+
m = Natto::MeCab.new("--output-format-type=chasen")
|
137
|
+
end
|
138
|
+
assert_equal(opts, m.options)
|
104
139
|
|
105
140
|
opts = {:all_morphs=>true, :allocate_sentence=>true}
|
106
141
|
assert_nothing_raised do
|
107
142
|
m = Natto::MeCab.new(opts)
|
108
143
|
end
|
109
144
|
assert_equal(opts, m.options)
|
145
|
+
assert_nothing_raised do
|
146
|
+
m = Natto::MeCab.new('-a -C')
|
147
|
+
end
|
148
|
+
assert_equal(opts, m.options)
|
149
|
+
assert_nothing_raised do
|
150
|
+
m = Natto::MeCab.new('--all-morphs --allocate-sentence')
|
151
|
+
end
|
152
|
+
assert_equal(opts, m.options)
|
110
153
|
|
111
154
|
opts = {:lattice_level=>999}
|
112
155
|
assert_nothing_raised do
|
113
156
|
m = Natto::MeCab.new(opts)
|
114
157
|
end
|
115
158
|
assert_equal(opts, m.options)
|
159
|
+
assert_nothing_raised do
|
160
|
+
m = Natto::MeCab.new('-l 999')
|
161
|
+
end
|
162
|
+
assert_equal(opts, m.options)
|
163
|
+
assert_nothing_raised do
|
164
|
+
m = Natto::MeCab.new('--lattice-level=999')
|
165
|
+
end
|
166
|
+
assert_equal(opts, m.options)
|
116
167
|
end
|
117
168
|
|
118
169
|
# Tests the initialize method for error cases for erroneous mecab options.
|
data/test/test_natto.rb
CHANGED
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.9.
|
5
|
+
version: 0.9.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Brooke M. Fujita
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
13
|
+
date: 2011-10-23 00:00:00 +09:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|