natto 0.9.2 → 0.9.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/README.md +3 -3
- data/lib/natto.rb +32 -54
- data/lib/natto/version.rb +1 -1
- data/test/natto/tc_binding.rb +2 -1
- data/test/natto/tc_mecab.rb +132 -66
- data/test/natto/tc_mecabnode.rb +5 -10
- metadata +54 -37
data/CHANGELOG
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
- __2012/02/xx__: 0.9.3 release.
|
4
|
+
- Updating structures, tests following the latest release of MeCab 0.993a
|
5
|
+
- Ending support for 0.98 MeCab.
|
6
|
+
- Support for MeCab 0.993 in Cygwin is still not confirmed.
|
7
|
+
|
3
8
|
- __2011/11/14__: 0.9.2 release.
|
4
9
|
- Bumping up version to 0.9.2, following the migration of this project to bitbucket (hg)
|
5
10
|
|
data/README.md
CHANGED
@@ -11,7 +11,7 @@ You can learn more about [natto at bitbucket](https://bitbucket.org/buruzaemon/n
|
|
11
11
|
## Requirements
|
12
12
|
natto requires the following:
|
13
13
|
|
14
|
-
- [MeCab _0.
|
14
|
+
- [MeCab _0.993_](http://code.google.com/p/mecab/downloads/list)
|
15
15
|
- [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
|
16
16
|
- Ruby _1.8.7 or greater_
|
17
17
|
|
@@ -43,10 +43,10 @@ e.g., from within a Ruby program
|
|
43
43
|
@dicts=[#<Natto::DictionaryInfo:0x28d3061c
|
44
44
|
filename="/usr/local/lib/mecab/dic/ipadic/sys.dic",
|
45
45
|
charset="utf8">],
|
46
|
-
@version="0.
|
46
|
+
@version="0.992">
|
47
47
|
|
48
48
|
puts nm.version
|
49
|
-
=> "0.
|
49
|
+
=> "0.992"
|
50
50
|
|
51
51
|
sysdic = nm.dicts.first
|
52
52
|
|
data/lib/natto.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
2
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
4
3
|
require 'natto/binding'
|
5
4
|
|
6
5
|
module Natto
|
7
6
|
require 'ffi'
|
7
|
+
require 'optparse'
|
8
8
|
|
9
9
|
# <tt>MeCab</tt> is a wrapper class for the <tt>mecab</tt> parser.
|
10
10
|
# Options to the <tt>mecab</tt> parser are passed in as a string
|
@@ -23,7 +23,7 @@ module Natto
|
|
23
23
|
# @dicts=[#<Natto::DictionaryInfo:0x289a1f14 \
|
24
24
|
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic", \
|
25
25
|
# charset="utf8">], \
|
26
|
-
# @version="0.
|
26
|
+
# @version="0.993">
|
27
27
|
#
|
28
28
|
# nm.parse('ネバネバの組み合わせ美味しいです。') do |n|
|
29
29
|
# puts "#{n.surface}\t#{n.feature}"
|
@@ -47,8 +47,9 @@ module Natto
|
|
47
47
|
'-d' => :dicdir,
|
48
48
|
'-u' => :userdic,
|
49
49
|
'-l' => :lattice_level,
|
50
|
-
'-a' => :all_morphs,
|
51
50
|
'-O' => :output_format_type,
|
51
|
+
'-a' => :all_morphs,
|
52
|
+
'-N' => :nbest,
|
52
53
|
'-F' => :node_format,
|
53
54
|
'-U' => :unk_format,
|
54
55
|
'-B' => :bos_format,
|
@@ -57,7 +58,6 @@ module Natto
|
|
57
58
|
'-x' => :unk_feature,
|
58
59
|
'-b' => :input_buffer_size,
|
59
60
|
'-C' => :allocate_sentence,
|
60
|
-
'-N' => :nbest,
|
61
61
|
'-t' => :theta,
|
62
62
|
'-c' => :cost_factor }.freeze
|
63
63
|
|
@@ -70,8 +70,9 @@ module Natto
|
|
70
70
|
# - :dicdir -- system dicdir
|
71
71
|
# - :userdic -- user dictionary
|
72
72
|
# - :lattice_level -- lattice information level (integer, default 0)
|
73
|
-
# - :all_morphs -- output all morphs (default false)
|
74
73
|
# - :output_format_type -- output format type (wakati, chasen, yomi, etc.)
|
74
|
+
# - :all_morphs -- output all morphs (default false)
|
75
|
+
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
75
76
|
# - :node_format -- user-defined node format
|
76
77
|
# - :unk_format -- user-defined unknown node format
|
77
78
|
# - :bos_format -- user-defined beginning-of-sentence format
|
@@ -80,7 +81,6 @@ module Natto
|
|
80
81
|
# - :unk_feature -- feature for unknown word
|
81
82
|
# - :input_buffer_size -- set input buffer size (default 8192)
|
82
83
|
# - :allocate_sentence -- allocate new memory for input sentence
|
83
|
-
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
84
84
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
85
85
|
# - :cost_factor -- cost factor (integer, default 700)
|
86
86
|
#
|
@@ -95,7 +95,7 @@ module Natto
|
|
95
95
|
# @dicts=[#<Natto::DictionaryInfo:0x28d2a85c \
|
96
96
|
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic" \
|
97
97
|
# charset="utf8">], \
|
98
|
-
# @version="0.
|
98
|
+
# @version="0.993">
|
99
99
|
#
|
100
100
|
# puts nm.parse('簡単で美味しくて良いですよね。')
|
101
101
|
# 簡単 カンタン
|
@@ -178,7 +178,7 @@ module Natto
|
|
178
178
|
i = 0
|
179
179
|
while node.nil? == false
|
180
180
|
if node.length > 0
|
181
|
-
|
181
|
+
node.surface = str.bytes.to_a()[i, node.length].pack('C*')
|
182
182
|
end
|
183
183
|
yield node
|
184
184
|
if node[:next].address != 0x0
|
@@ -239,43 +239,28 @@ module Natto
|
|
239
239
|
def self.parse_mecab_options(options={})
|
240
240
|
h = {}
|
241
241
|
if options.is_a? String
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
if SUPPORTED_OPTS.values.include?(k)
|
265
|
-
if [:lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
266
|
-
h[k] = v.to_i
|
267
|
-
elsif k == :theta
|
268
|
-
h[k] = v.to_f
|
269
|
-
else
|
270
|
-
h[k] = v
|
271
|
-
end
|
272
|
-
end
|
273
|
-
elsif %w( all-morphs allocate-sentence ).include?(k)
|
274
|
-
h[k.gsub('-','_').to_sym] = true
|
275
|
-
end
|
276
|
-
end
|
277
|
-
t = tokens.shift
|
278
|
-
end
|
242
|
+
opts = OptionParser.new do |opts|
|
243
|
+
opts.on('-r', '--rcfile ARG') { |arg| h[:rcfile] = arg.strip }
|
244
|
+
opts.on('-d', '--dicdir ARG') { |arg| h[:dicdir] = arg.strip }
|
245
|
+
opts.on('-u', '--userdic ARG') { |arg| h[:userdic] = arg.strip }
|
246
|
+
opts.on('-l', '--lattice-level ARG') { |arg| h[:lattice_level] = arg.strip.to_i } # !deprecated in 0.99!!!
|
247
|
+
opts.on('-O', '--output-format-type ARG') { |arg| h[:output_format_type] = arg.strip }
|
248
|
+
opts.on('-a', '--all-morphs') { |arg| h[:all_morphs] = true }
|
249
|
+
opts.on('-N', '--nbest ARG') { |arg| h[:nbest] = arg.strip.to_i }
|
250
|
+
#opts.on('-m', '--marginal') { |arg| h[:marginal] = true }
|
251
|
+
opts.on('-F', '--node-format ARG') { |arg| h[:node_format] = arg.strip }
|
252
|
+
opts.on('-U', '--unk-format ARG') { |arg| h[:unk_format] = arg.strip }
|
253
|
+
opts.on('-B', '--bos-format ARG') { |arg| h[:bos_format] = arg.strip }
|
254
|
+
opts.on('-E', '--eos-format ARG') { |arg| h[:eos_format] = arg.strip }
|
255
|
+
opts.on('-S', '--eon-format ARG') { |arg| h[:eon_format] = arg.strip }
|
256
|
+
opts.on('-x', '--unk-feature ARG') { |arg| h[:unk_feature] = arg.strip }
|
257
|
+
opts.on('-b', '--input-buffer-size ARG') { |arg| h[:input_buffer_size] = arg.strip.to_i }
|
258
|
+
#opts.on('-M', '--open-mutable-dictionary') { |arg| h[:open_mutable_dictionary] = true }
|
259
|
+
opts.on('-C', '--allocate-sentence') { |arg| h[:allocate_sentence] = true }
|
260
|
+
opts.on('-t', '--theta ARG') { |arg| h[:theta] = arg.strip.to_f }
|
261
|
+
opts.on('-c', '--cost-factor ARG') { |arg| h[:cost_factor] = arg.strip.to_i }
|
262
|
+
end
|
263
|
+
opts.parse!(options.split)
|
279
264
|
else
|
280
265
|
SUPPORTED_OPTS.values.each do |k|
|
281
266
|
if options.has_key?(k)
|
@@ -294,6 +279,7 @@ module Natto
|
|
294
279
|
end
|
295
280
|
end
|
296
281
|
end
|
282
|
+
raise MeCabError.new("Invalid N value") if h[:nbest] && (h[:nbest] < 1 || h[:nbest] > 512)
|
297
283
|
h
|
298
284
|
end
|
299
285
|
|
@@ -425,8 +411,6 @@ module Natto
|
|
425
411
|
# - :bnext
|
426
412
|
# - :rpath
|
427
413
|
# - :lpath
|
428
|
-
# - :begin_node_list
|
429
|
-
# - :end_node_list
|
430
414
|
# - :surface
|
431
415
|
# - :feature
|
432
416
|
# - :id
|
@@ -438,14 +422,12 @@ module Natto
|
|
438
422
|
# - :char_type
|
439
423
|
# - :stat
|
440
424
|
# - :isbest
|
441
|
-
# - :sentence_length
|
442
425
|
# - :alpha
|
443
426
|
# - :beta
|
444
427
|
# - :beta
|
445
428
|
# - :prob
|
446
429
|
# - :wcost
|
447
430
|
# - :cost
|
448
|
-
# - :token
|
449
431
|
#
|
450
432
|
# <h2>Usage</h2>
|
451
433
|
# An instance of <tt>MeCabNode</tt> is yielded to a block
|
@@ -511,8 +493,6 @@ module Natto
|
|
511
493
|
:bnext, :pointer,
|
512
494
|
:rpath, :pointer,
|
513
495
|
:lpath, :pointer,
|
514
|
-
:begin_node_list, :pointer,
|
515
|
-
:end_node_list, :pointer,
|
516
496
|
:surface, :string,
|
517
497
|
:feature, :string,
|
518
498
|
:id, :uint,
|
@@ -524,13 +504,11 @@ module Natto
|
|
524
504
|
:char_type, :uchar,
|
525
505
|
:stat, :uchar,
|
526
506
|
:isbest, :uchar,
|
527
|
-
:sentence_length, :uint,
|
528
507
|
:alpha, :float,
|
529
508
|
:beta, :float,
|
530
509
|
:prob, :float,
|
531
510
|
:wcost, :short,
|
532
|
-
:cost, :long
|
533
|
-
:token, :pointer
|
511
|
+
:cost, :long
|
534
512
|
|
535
513
|
if RUBY_VERSION.to_f < 1.9
|
536
514
|
alias_method :deprecated_id, :id
|
data/lib/natto/version.rb
CHANGED
data/test/natto/tc_binding.rb
CHANGED
@@ -15,7 +15,8 @@ class TestNattoBinding < Test::Unit::TestCase
|
|
15
15
|
|
16
16
|
# Tests the mecab_version function.
|
17
17
|
def test_mecab_version
|
18
|
-
|
18
|
+
mv = `mecab -v`.split.last
|
19
|
+
assert_equal(mv, @klass.mecab_version)
|
19
20
|
end
|
20
21
|
|
21
22
|
# Tests for the inclusion of mecab methods made available
|
data/test/natto/tc_mecab.rb
CHANGED
@@ -16,6 +16,7 @@ class TestMeCab < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
def setup
|
18
18
|
@m = Natto::MeCab.new
|
19
|
+
@ver = `mecab -v`.strip.split.last
|
19
20
|
end
|
20
21
|
|
21
22
|
def teardown
|
@@ -23,73 +24,144 @@ class TestMeCab < Test::Unit::TestCase
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def test_parse_mecab_options
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
27
|
+
[ '-r /some/file',
|
28
|
+
'-r/some/file',
|
29
|
+
'--rcfile=/some/file',
|
30
|
+
'--rcfile /some/file',
|
31
|
+
{:rcfile=>"/some/file"} ].each do |opts|
|
32
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(opts))
|
33
|
+
end
|
34
|
+
|
35
|
+
[ '-d /some/other/file',
|
36
|
+
'-d/some/other/file',
|
37
|
+
'--dicdir=/some/other/file',
|
38
|
+
'--dicdir /some/other/file',
|
39
|
+
{:dicdir=>"/some/other/file"} ].each do |opts|
|
40
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(opts))
|
41
|
+
end
|
42
|
+
|
43
|
+
[ '-u /yet/another/file',
|
44
|
+
'-u/yet/another/file',
|
45
|
+
'--userdic=/yet/another/file',
|
46
|
+
'--userdic /yet/another/file',
|
47
|
+
{:userdic=>"/yet/another/file"} ].each do |opts|
|
48
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(opts))
|
49
|
+
end
|
50
|
+
|
51
|
+
[ '-l 42',
|
52
|
+
'-l42',
|
53
|
+
'--lattice-level=42',
|
54
|
+
'--lattice-level 42',
|
55
|
+
{:lattice_level=>42}
|
56
|
+
].each do |opts|
|
57
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(opts))
|
58
|
+
end
|
59
|
+
|
60
|
+
[ '-a',
|
61
|
+
'--all-morphs',
|
62
|
+
{:all_morphs=>true} ].each do |opts|
|
63
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(opts))
|
64
|
+
end
|
65
|
+
|
66
|
+
[ '-O natto',
|
67
|
+
'-Onatto',
|
68
|
+
'--output-format-type=natto',
|
69
|
+
'--output-format-type natto',
|
70
|
+
{:output_format_type=>"natto"} ].each do |opts|
|
71
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(opts))
|
72
|
+
end
|
73
|
+
|
74
|
+
[ '-N 42',
|
75
|
+
'-N42',
|
76
|
+
'--nbest=42',
|
77
|
+
'--nbest 42',
|
78
|
+
{:nbest=>42}
|
79
|
+
].each do |opts|
|
80
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(opts))
|
81
|
+
end
|
82
|
+
[ '--nbest=-1', '--nbest=0', '--nbest=513' ].each do |bad|
|
83
|
+
assert_raise Natto::MeCabError do
|
84
|
+
Natto::MeCab.parse_mecab_options(bad)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
[ '-F %m\t%f[7]\n',
|
89
|
+
'-F%m\t%f[7]\n',
|
90
|
+
'--node-format=%m\t%f[7]\n',
|
91
|
+
'--node-format %m\t%f[7]\n',
|
92
|
+
{:node_format=>'%m\t%f[7]\n'} ].each do |opts|
|
93
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
94
|
+
end
|
95
|
+
|
96
|
+
[ '-U %m\t%f[7]\n',
|
97
|
+
'-U%m\t%f[7]\n',
|
98
|
+
'--unk-format=%m\t%f[7]\n',
|
99
|
+
'--unk-format %m\t%f[7]\n',
|
100
|
+
{:unk_format=>'%m\t%f[7]\n'} ].each do |opts|
|
101
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
102
|
+
end
|
61
103
|
|
62
|
-
|
63
|
-
|
64
|
-
|
104
|
+
[ '-B %m\t%f[7]\n',
|
105
|
+
'-B%m\t%f[7]\n',
|
106
|
+
'--bos-format=%m\t%f[7]\n',
|
107
|
+
'--bos-format %m\t%f[7]\n',
|
108
|
+
{:bos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
109
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
110
|
+
end
|
65
111
|
|
66
|
-
|
67
|
-
|
68
|
-
|
112
|
+
[ '-E %m\t%f[7]\n',
|
113
|
+
'-E%m\t%f[7]\n',
|
114
|
+
'--eos-format=%m\t%f[7]\n',
|
115
|
+
'--eos-format %m\t%f[7]\n',
|
116
|
+
{:eos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
117
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
118
|
+
end
|
69
119
|
|
70
|
-
|
71
|
-
|
72
|
-
|
120
|
+
[ '-S %m\t%f[7]\n',
|
121
|
+
'-S%m\t%f[7]\n',
|
122
|
+
'--eon-format=%m\t%f[7]\n',
|
123
|
+
'--eon-format %m\t%f[7]\n',
|
124
|
+
{:eon_format=>'%m\t%f[7]\n'} ].each do |opts|
|
125
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
126
|
+
end
|
73
127
|
|
74
|
-
|
75
|
-
|
76
|
-
|
128
|
+
[ '-x %m\t%f[7]\n',
|
129
|
+
'-x%m\t%f[7]\n',
|
130
|
+
'--unk-feature=%m\t%f[7]\n',
|
131
|
+
'--unk-feature %m\t%f[7]\n',
|
132
|
+
{:unk_feature=>'%m\t%f[7]\n'} ].each do |opts|
|
133
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
134
|
+
end
|
77
135
|
|
78
|
-
|
79
|
-
|
80
|
-
|
136
|
+
[ '-b 102400',
|
137
|
+
'-b102400',
|
138
|
+
'--input-buffer-size=102400',
|
139
|
+
'--input-buffer-size 102400',
|
140
|
+
{:input_buffer_size=>102400} ].each do |opts|
|
141
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(opts))
|
142
|
+
end
|
81
143
|
|
82
|
-
|
83
|
-
|
84
|
-
|
144
|
+
[ '-C',
|
145
|
+
'--allocate-sentence',
|
146
|
+
{:allocate_sentence=>true} ].each do |opts|
|
147
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(opts))
|
148
|
+
end
|
85
149
|
|
86
|
-
|
87
|
-
|
88
|
-
|
150
|
+
[ '-t 0.42',
|
151
|
+
'-t0.42',
|
152
|
+
'--theta=0.42',
|
153
|
+
'--theta 0.42',
|
154
|
+
{:theta=>0.42} ].each do |opts|
|
155
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(opts))
|
156
|
+
end
|
89
157
|
|
90
|
-
|
91
|
-
|
92
|
-
|
158
|
+
[ '-c 42',
|
159
|
+
'-c42',
|
160
|
+
'--cost-factor=42',
|
161
|
+
'--cost-factor 42',
|
162
|
+
{:cost_factor=>42} ].each do |opts|
|
163
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(opts))
|
164
|
+
end
|
93
165
|
|
94
166
|
assert_equal({}, Natto::MeCab.parse_mecab_options)
|
95
167
|
assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
|
@@ -115,7 +187,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
115
187
|
assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
|
116
188
|
end
|
117
189
|
|
118
|
-
# Tests the construction and initial state of a Natto::MeCab instance.
|
119
190
|
def test_construction
|
120
191
|
m = nil
|
121
192
|
assert_nothing_raised do
|
@@ -166,7 +237,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
166
237
|
assert_equal(opts, m.options)
|
167
238
|
end
|
168
239
|
|
169
|
-
# Tests the initialize method for error cases for erroneous mecab options.
|
170
240
|
def test_initialize_with_errors
|
171
241
|
assert_raise Natto::MeCabError do
|
172
242
|
Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
|
@@ -185,12 +255,10 @@ class TestMeCab < Test::Unit::TestCase
|
|
185
255
|
end
|
186
256
|
end
|
187
257
|
|
188
|
-
# Tests the mecab version string accessor class method of Natto::MeCab.
|
189
258
|
def test_version_accessor
|
190
|
-
assert_equal(
|
259
|
+
assert_equal(@ver, @m.version)
|
191
260
|
end
|
192
261
|
|
193
|
-
# Tests Natto::MeCab parsing using the --all-morphs option.
|
194
262
|
def test_all_morphs
|
195
263
|
m = Natto::MeCab.new(:all_morphs=>true)
|
196
264
|
expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
|
@@ -202,7 +270,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
202
270
|
assert_equal(expected, actual)
|
203
271
|
end
|
204
272
|
|
205
|
-
# Tests Natto::MeCab parsing (default parse_tostr).
|
206
273
|
def test_parse_tostr_default
|
207
274
|
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
208
275
|
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
@@ -213,7 +280,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
213
280
|
assert_equal(expected, actual)
|
214
281
|
end
|
215
282
|
|
216
|
-
# Tests Natto::MeCab parsing (default parse_tonode).
|
217
283
|
def test_parse_tonode_default
|
218
284
|
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
219
285
|
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
data/test/natto/tc_mecabnode.rb
CHANGED
@@ -66,15 +66,12 @@ class TestMeCabNode < Test::Unit::TestCase
|
|
66
66
|
# in 1.8.n
|
67
67
|
def test_mecabnode_accessors
|
68
68
|
node = @nodes[0]
|
69
|
-
|
70
|
-
:prev,
|
69
|
+
[ :prev,
|
71
70
|
:next,
|
72
71
|
:enext,
|
73
72
|
:bnext,
|
74
73
|
:rpath,
|
75
74
|
:lpath,
|
76
|
-
:begin_node_list,
|
77
|
-
:end_node_list,
|
78
75
|
:surface,
|
79
76
|
:feature,
|
80
77
|
:id,
|
@@ -86,16 +83,14 @@ class TestMeCabNode < Test::Unit::TestCase
|
|
86
83
|
:char_type,
|
87
84
|
:stat,
|
88
85
|
:isbest,
|
89
|
-
:sentence_length,
|
90
86
|
:alpha,
|
91
87
|
:beta,
|
92
88
|
:prob,
|
93
89
|
:wcost,
|
94
|
-
:cost
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
assert_not_nil(node.respond_to? nomme )
|
90
|
+
:cost ].each do |nomme|
|
91
|
+
assert_nothing_raised do
|
92
|
+
node.send nomme
|
93
|
+
end
|
99
94
|
end
|
100
95
|
|
101
96
|
# NoMethodError will be raised for anything else!
|
metadata
CHANGED
@@ -1,39 +1,48 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 61
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 9
|
9
|
+
- 3
|
10
|
+
version: 0.9.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Brooke M. Fujita
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-02-13 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: ffi
|
16
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
24
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 1
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
- 6
|
32
|
+
- 3
|
21
33
|
version: 0.6.3
|
22
34
|
type: :runtime
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
|
27
|
-
and JRuby (jvm) equally well, on any OS.
|
28
|
-
|
29
|
-
ruby で mecab を自由に利用できる natto をぜひ試してください。
|
30
|
-
|
31
|
-
'
|
35
|
+
version_requirements: *id001
|
36
|
+
description: "natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS.\n\
|
37
|
+
ruby \xE3\x81\xA7 mecab \xE3\x82\x92\xE8\x87\xAA\xE7\x94\xB1\xE3\x81\xAB\xE5\x88\xA9\xE7\x94\xA8\xE3\x81\xA7\xE3\x81\x8D\xE3\x82\x8B natto \xE3\x82\x92\xE3\x81\x9C\xE3\x81\xB2\xE8\xA9\xA6\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\xE3\x81\xA0\xE3\x81\x95\xE3\x81\x84\xE3\x80\x82\n"
|
32
38
|
email: buruzaemon@gmail.com
|
33
39
|
executables: []
|
40
|
+
|
34
41
|
extensions: []
|
42
|
+
|
35
43
|
extra_rdoc_files: []
|
36
|
-
|
44
|
+
|
45
|
+
files:
|
37
46
|
- lib/natto.rb
|
38
47
|
- lib/natto/binding.rb
|
39
48
|
- lib/natto/version.rb
|
@@ -47,32 +56,40 @@ files:
|
|
47
56
|
- CHANGELOG
|
48
57
|
- .yardopts
|
49
58
|
homepage: https://bitbucket.org/buruzaemon/natto/overview
|
50
|
-
licenses:
|
59
|
+
licenses:
|
51
60
|
- BSD
|
52
61
|
post_install_message:
|
53
62
|
rdoc_options: []
|
54
|
-
|
63
|
+
|
64
|
+
require_paths:
|
55
65
|
- lib
|
56
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
67
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 57
|
72
|
+
segments:
|
73
|
+
- 1
|
74
|
+
- 8
|
75
|
+
- 7
|
61
76
|
version: 1.8.7
|
62
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
78
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
|
68
|
-
|
69
|
-
-
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
hash: 3
|
83
|
+
segments:
|
84
|
+
- 0
|
85
|
+
version: "0"
|
86
|
+
requirements:
|
87
|
+
- MeCab, 0.993 or greater
|
70
88
|
- FFI, 0.6.3 or greater
|
71
89
|
rubyforge_project:
|
72
90
|
rubygems_version: 1.8.10
|
73
91
|
signing_key:
|
74
92
|
specification_version: 3
|
75
|
-
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
76
|
-
|
77
|
-
test_files:
|
93
|
+
summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
|
94
|
+
test_files:
|
78
95
|
- test/test_natto.rb
|