natto 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/README.md +3 -3
- data/lib/natto.rb +32 -54
- data/lib/natto/version.rb +1 -1
- data/test/natto/tc_binding.rb +2 -1
- data/test/natto/tc_mecab.rb +132 -66
- data/test/natto/tc_mecabnode.rb +5 -10
- metadata +54 -37
data/CHANGELOG
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
- __2012/02/xx__: 0.9.3 release.
|
4
|
+
- Updating structures, tests following the latest release of MeCab 0.993a
|
5
|
+
- Ending support for 0.98 MeCab.
|
6
|
+
- Support for MeCab 0.993 in Cygwin is still not confirmed.
|
7
|
+
|
3
8
|
- __2011/11/14__: 0.9.2 release.
|
4
9
|
- Bumping up version to 0.9.2, following the migration of this project to bitbucket (hg)
|
5
10
|
|
data/README.md
CHANGED
@@ -11,7 +11,7 @@ You can learn more about [natto at bitbucket](https://bitbucket.org/buruzaemon/n
|
|
11
11
|
## Requirements
|
12
12
|
natto requires the following:
|
13
13
|
|
14
|
-
- [MeCab _0.
|
14
|
+
- [MeCab _0.993_](http://code.google.com/p/mecab/downloads/list)
|
15
15
|
- [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
|
16
16
|
- Ruby _1.8.7 or greater_
|
17
17
|
|
@@ -43,10 +43,10 @@ e.g., from within a Ruby program
|
|
43
43
|
@dicts=[#<Natto::DictionaryInfo:0x28d3061c
|
44
44
|
filename="/usr/local/lib/mecab/dic/ipadic/sys.dic",
|
45
45
|
charset="utf8">],
|
46
|
-
@version="0.
|
46
|
+
@version="0.992">
|
47
47
|
|
48
48
|
puts nm.version
|
49
|
-
=> "0.
|
49
|
+
=> "0.992"
|
50
50
|
|
51
51
|
sysdic = nm.dicts.first
|
52
52
|
|
data/lib/natto.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
2
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
4
3
|
require 'natto/binding'
|
5
4
|
|
6
5
|
module Natto
|
7
6
|
require 'ffi'
|
7
|
+
require 'optparse'
|
8
8
|
|
9
9
|
# <tt>MeCab</tt> is a wrapper class for the <tt>mecab</tt> parser.
|
10
10
|
# Options to the <tt>mecab</tt> parser are passed in as a string
|
@@ -23,7 +23,7 @@ module Natto
|
|
23
23
|
# @dicts=[#<Natto::DictionaryInfo:0x289a1f14 \
|
24
24
|
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic", \
|
25
25
|
# charset="utf8">], \
|
26
|
-
# @version="0.
|
26
|
+
# @version="0.993">
|
27
27
|
#
|
28
28
|
# nm.parse('ネバネバの組み合わせ美味しいです。') do |n|
|
29
29
|
# puts "#{n.surface}\t#{n.feature}"
|
@@ -47,8 +47,9 @@ module Natto
|
|
47
47
|
'-d' => :dicdir,
|
48
48
|
'-u' => :userdic,
|
49
49
|
'-l' => :lattice_level,
|
50
|
-
'-a' => :all_morphs,
|
51
50
|
'-O' => :output_format_type,
|
51
|
+
'-a' => :all_morphs,
|
52
|
+
'-N' => :nbest,
|
52
53
|
'-F' => :node_format,
|
53
54
|
'-U' => :unk_format,
|
54
55
|
'-B' => :bos_format,
|
@@ -57,7 +58,6 @@ module Natto
|
|
57
58
|
'-x' => :unk_feature,
|
58
59
|
'-b' => :input_buffer_size,
|
59
60
|
'-C' => :allocate_sentence,
|
60
|
-
'-N' => :nbest,
|
61
61
|
'-t' => :theta,
|
62
62
|
'-c' => :cost_factor }.freeze
|
63
63
|
|
@@ -70,8 +70,9 @@ module Natto
|
|
70
70
|
# - :dicdir -- system dicdir
|
71
71
|
# - :userdic -- user dictionary
|
72
72
|
# - :lattice_level -- lattice information level (integer, default 0)
|
73
|
-
# - :all_morphs -- output all morphs (default false)
|
74
73
|
# - :output_format_type -- output format type (wakati, chasen, yomi, etc.)
|
74
|
+
# - :all_morphs -- output all morphs (default false)
|
75
|
+
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
75
76
|
# - :node_format -- user-defined node format
|
76
77
|
# - :unk_format -- user-defined unknown node format
|
77
78
|
# - :bos_format -- user-defined beginning-of-sentence format
|
@@ -80,7 +81,6 @@ module Natto
|
|
80
81
|
# - :unk_feature -- feature for unknown word
|
81
82
|
# - :input_buffer_size -- set input buffer size (default 8192)
|
82
83
|
# - :allocate_sentence -- allocate new memory for input sentence
|
83
|
-
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
84
84
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
85
85
|
# - :cost_factor -- cost factor (integer, default 700)
|
86
86
|
#
|
@@ -95,7 +95,7 @@ module Natto
|
|
95
95
|
# @dicts=[#<Natto::DictionaryInfo:0x28d2a85c \
|
96
96
|
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic" \
|
97
97
|
# charset="utf8">], \
|
98
|
-
# @version="0.
|
98
|
+
# @version="0.993">
|
99
99
|
#
|
100
100
|
# puts nm.parse('簡単で美味しくて良いですよね。')
|
101
101
|
# 簡単 カンタン
|
@@ -178,7 +178,7 @@ module Natto
|
|
178
178
|
i = 0
|
179
179
|
while node.nil? == false
|
180
180
|
if node.length > 0
|
181
|
-
|
181
|
+
node.surface = str.bytes.to_a()[i, node.length].pack('C*')
|
182
182
|
end
|
183
183
|
yield node
|
184
184
|
if node[:next].address != 0x0
|
@@ -239,43 +239,28 @@ module Natto
|
|
239
239
|
def self.parse_mecab_options(options={})
|
240
240
|
h = {}
|
241
241
|
if options.is_a? String
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
if SUPPORTED_OPTS.values.include?(k)
|
265
|
-
if [:lattice_level, :input_buffer_size, :nbest, :cost_factor ].include?(k)
|
266
|
-
h[k] = v.to_i
|
267
|
-
elsif k == :theta
|
268
|
-
h[k] = v.to_f
|
269
|
-
else
|
270
|
-
h[k] = v
|
271
|
-
end
|
272
|
-
end
|
273
|
-
elsif %w( all-morphs allocate-sentence ).include?(k)
|
274
|
-
h[k.gsub('-','_').to_sym] = true
|
275
|
-
end
|
276
|
-
end
|
277
|
-
t = tokens.shift
|
278
|
-
end
|
242
|
+
opts = OptionParser.new do |opts|
|
243
|
+
opts.on('-r', '--rcfile ARG') { |arg| h[:rcfile] = arg.strip }
|
244
|
+
opts.on('-d', '--dicdir ARG') { |arg| h[:dicdir] = arg.strip }
|
245
|
+
opts.on('-u', '--userdic ARG') { |arg| h[:userdic] = arg.strip }
|
246
|
+
opts.on('-l', '--lattice-level ARG') { |arg| h[:lattice_level] = arg.strip.to_i } # !deprecated in 0.99!!!
|
247
|
+
opts.on('-O', '--output-format-type ARG') { |arg| h[:output_format_type] = arg.strip }
|
248
|
+
opts.on('-a', '--all-morphs') { |arg| h[:all_morphs] = true }
|
249
|
+
opts.on('-N', '--nbest ARG') { |arg| h[:nbest] = arg.strip.to_i }
|
250
|
+
#opts.on('-m', '--marginal') { |arg| h[:marginal] = true }
|
251
|
+
opts.on('-F', '--node-format ARG') { |arg| h[:node_format] = arg.strip }
|
252
|
+
opts.on('-U', '--unk-format ARG') { |arg| h[:unk_format] = arg.strip }
|
253
|
+
opts.on('-B', '--bos-format ARG') { |arg| h[:bos_format] = arg.strip }
|
254
|
+
opts.on('-E', '--eos-format ARG') { |arg| h[:eos_format] = arg.strip }
|
255
|
+
opts.on('-S', '--eon-format ARG') { |arg| h[:eon_format] = arg.strip }
|
256
|
+
opts.on('-x', '--unk-feature ARG') { |arg| h[:unk_feature] = arg.strip }
|
257
|
+
opts.on('-b', '--input-buffer-size ARG') { |arg| h[:input_buffer_size] = arg.strip.to_i }
|
258
|
+
#opts.on('-M', '--open-mutable-dictionary') { |arg| h[:open_mutable_dictionary] = true }
|
259
|
+
opts.on('-C', '--allocate-sentence') { |arg| h[:allocate_sentence] = true }
|
260
|
+
opts.on('-t', '--theta ARG') { |arg| h[:theta] = arg.strip.to_f }
|
261
|
+
opts.on('-c', '--cost-factor ARG') { |arg| h[:cost_factor] = arg.strip.to_i }
|
262
|
+
end
|
263
|
+
opts.parse!(options.split)
|
279
264
|
else
|
280
265
|
SUPPORTED_OPTS.values.each do |k|
|
281
266
|
if options.has_key?(k)
|
@@ -294,6 +279,7 @@ module Natto
|
|
294
279
|
end
|
295
280
|
end
|
296
281
|
end
|
282
|
+
raise MeCabError.new("Invalid N value") if h[:nbest] && (h[:nbest] < 1 || h[:nbest] > 512)
|
297
283
|
h
|
298
284
|
end
|
299
285
|
|
@@ -425,8 +411,6 @@ module Natto
|
|
425
411
|
# - :bnext
|
426
412
|
# - :rpath
|
427
413
|
# - :lpath
|
428
|
-
# - :begin_node_list
|
429
|
-
# - :end_node_list
|
430
414
|
# - :surface
|
431
415
|
# - :feature
|
432
416
|
# - :id
|
@@ -438,14 +422,12 @@ module Natto
|
|
438
422
|
# - :char_type
|
439
423
|
# - :stat
|
440
424
|
# - :isbest
|
441
|
-
# - :sentence_length
|
442
425
|
# - :alpha
|
443
426
|
# - :beta
|
444
427
|
# - :beta
|
445
428
|
# - :prob
|
446
429
|
# - :wcost
|
447
430
|
# - :cost
|
448
|
-
# - :token
|
449
431
|
#
|
450
432
|
# <h2>Usage</h2>
|
451
433
|
# An instance of <tt>MeCabNode</tt> is yielded to a block
|
@@ -511,8 +493,6 @@ module Natto
|
|
511
493
|
:bnext, :pointer,
|
512
494
|
:rpath, :pointer,
|
513
495
|
:lpath, :pointer,
|
514
|
-
:begin_node_list, :pointer,
|
515
|
-
:end_node_list, :pointer,
|
516
496
|
:surface, :string,
|
517
497
|
:feature, :string,
|
518
498
|
:id, :uint,
|
@@ -524,13 +504,11 @@ module Natto
|
|
524
504
|
:char_type, :uchar,
|
525
505
|
:stat, :uchar,
|
526
506
|
:isbest, :uchar,
|
527
|
-
:sentence_length, :uint,
|
528
507
|
:alpha, :float,
|
529
508
|
:beta, :float,
|
530
509
|
:prob, :float,
|
531
510
|
:wcost, :short,
|
532
|
-
:cost, :long
|
533
|
-
:token, :pointer
|
511
|
+
:cost, :long
|
534
512
|
|
535
513
|
if RUBY_VERSION.to_f < 1.9
|
536
514
|
alias_method :deprecated_id, :id
|
data/lib/natto/version.rb
CHANGED
data/test/natto/tc_binding.rb
CHANGED
@@ -15,7 +15,8 @@ class TestNattoBinding < Test::Unit::TestCase
|
|
15
15
|
|
16
16
|
# Tests the mecab_version function.
|
17
17
|
def test_mecab_version
|
18
|
-
|
18
|
+
mv = `mecab -v`.split.last
|
19
|
+
assert_equal(mv, @klass.mecab_version)
|
19
20
|
end
|
20
21
|
|
21
22
|
# Tests for the inclusion of mecab methods made available
|
data/test/natto/tc_mecab.rb
CHANGED
@@ -16,6 +16,7 @@ class TestMeCab < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
def setup
|
18
18
|
@m = Natto::MeCab.new
|
19
|
+
@ver = `mecab -v`.strip.split.last
|
19
20
|
end
|
20
21
|
|
21
22
|
def teardown
|
@@ -23,73 +24,144 @@ class TestMeCab < Test::Unit::TestCase
|
|
23
24
|
end
|
24
25
|
|
25
26
|
def test_parse_mecab_options
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
27
|
+
[ '-r /some/file',
|
28
|
+
'-r/some/file',
|
29
|
+
'--rcfile=/some/file',
|
30
|
+
'--rcfile /some/file',
|
31
|
+
{:rcfile=>"/some/file"} ].each do |opts|
|
32
|
+
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(opts))
|
33
|
+
end
|
34
|
+
|
35
|
+
[ '-d /some/other/file',
|
36
|
+
'-d/some/other/file',
|
37
|
+
'--dicdir=/some/other/file',
|
38
|
+
'--dicdir /some/other/file',
|
39
|
+
{:dicdir=>"/some/other/file"} ].each do |opts|
|
40
|
+
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(opts))
|
41
|
+
end
|
42
|
+
|
43
|
+
[ '-u /yet/another/file',
|
44
|
+
'-u/yet/another/file',
|
45
|
+
'--userdic=/yet/another/file',
|
46
|
+
'--userdic /yet/another/file',
|
47
|
+
{:userdic=>"/yet/another/file"} ].each do |opts|
|
48
|
+
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(opts))
|
49
|
+
end
|
50
|
+
|
51
|
+
[ '-l 42',
|
52
|
+
'-l42',
|
53
|
+
'--lattice-level=42',
|
54
|
+
'--lattice-level 42',
|
55
|
+
{:lattice_level=>42}
|
56
|
+
].each do |opts|
|
57
|
+
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(opts))
|
58
|
+
end
|
59
|
+
|
60
|
+
[ '-a',
|
61
|
+
'--all-morphs',
|
62
|
+
{:all_morphs=>true} ].each do |opts|
|
63
|
+
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(opts))
|
64
|
+
end
|
65
|
+
|
66
|
+
[ '-O natto',
|
67
|
+
'-Onatto',
|
68
|
+
'--output-format-type=natto',
|
69
|
+
'--output-format-type natto',
|
70
|
+
{:output_format_type=>"natto"} ].each do |opts|
|
71
|
+
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(opts))
|
72
|
+
end
|
73
|
+
|
74
|
+
[ '-N 42',
|
75
|
+
'-N42',
|
76
|
+
'--nbest=42',
|
77
|
+
'--nbest 42',
|
78
|
+
{:nbest=>42}
|
79
|
+
].each do |opts|
|
80
|
+
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(opts))
|
81
|
+
end
|
82
|
+
[ '--nbest=-1', '--nbest=0', '--nbest=513' ].each do |bad|
|
83
|
+
assert_raise Natto::MeCabError do
|
84
|
+
Natto::MeCab.parse_mecab_options(bad)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
[ '-F %m\t%f[7]\n',
|
89
|
+
'-F%m\t%f[7]\n',
|
90
|
+
'--node-format=%m\t%f[7]\n',
|
91
|
+
'--node-format %m\t%f[7]\n',
|
92
|
+
{:node_format=>'%m\t%f[7]\n'} ].each do |opts|
|
93
|
+
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
94
|
+
end
|
95
|
+
|
96
|
+
[ '-U %m\t%f[7]\n',
|
97
|
+
'-U%m\t%f[7]\n',
|
98
|
+
'--unk-format=%m\t%f[7]\n',
|
99
|
+
'--unk-format %m\t%f[7]\n',
|
100
|
+
{:unk_format=>'%m\t%f[7]\n'} ].each do |opts|
|
101
|
+
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
102
|
+
end
|
61
103
|
|
62
|
-
|
63
|
-
|
64
|
-
|
104
|
+
[ '-B %m\t%f[7]\n',
|
105
|
+
'-B%m\t%f[7]\n',
|
106
|
+
'--bos-format=%m\t%f[7]\n',
|
107
|
+
'--bos-format %m\t%f[7]\n',
|
108
|
+
{:bos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
109
|
+
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
110
|
+
end
|
65
111
|
|
66
|
-
|
67
|
-
|
68
|
-
|
112
|
+
[ '-E %m\t%f[7]\n',
|
113
|
+
'-E%m\t%f[7]\n',
|
114
|
+
'--eos-format=%m\t%f[7]\n',
|
115
|
+
'--eos-format %m\t%f[7]\n',
|
116
|
+
{:eos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
117
|
+
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
118
|
+
end
|
69
119
|
|
70
|
-
|
71
|
-
|
72
|
-
|
120
|
+
[ '-S %m\t%f[7]\n',
|
121
|
+
'-S%m\t%f[7]\n',
|
122
|
+
'--eon-format=%m\t%f[7]\n',
|
123
|
+
'--eon-format %m\t%f[7]\n',
|
124
|
+
{:eon_format=>'%m\t%f[7]\n'} ].each do |opts|
|
125
|
+
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
126
|
+
end
|
73
127
|
|
74
|
-
|
75
|
-
|
76
|
-
|
128
|
+
[ '-x %m\t%f[7]\n',
|
129
|
+
'-x%m\t%f[7]\n',
|
130
|
+
'--unk-feature=%m\t%f[7]\n',
|
131
|
+
'--unk-feature %m\t%f[7]\n',
|
132
|
+
{:unk_feature=>'%m\t%f[7]\n'} ].each do |opts|
|
133
|
+
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
134
|
+
end
|
77
135
|
|
78
|
-
|
79
|
-
|
80
|
-
|
136
|
+
[ '-b 102400',
|
137
|
+
'-b102400',
|
138
|
+
'--input-buffer-size=102400',
|
139
|
+
'--input-buffer-size 102400',
|
140
|
+
{:input_buffer_size=>102400} ].each do |opts|
|
141
|
+
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(opts))
|
142
|
+
end
|
81
143
|
|
82
|
-
|
83
|
-
|
84
|
-
|
144
|
+
[ '-C',
|
145
|
+
'--allocate-sentence',
|
146
|
+
{:allocate_sentence=>true} ].each do |opts|
|
147
|
+
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(opts))
|
148
|
+
end
|
85
149
|
|
86
|
-
|
87
|
-
|
88
|
-
|
150
|
+
[ '-t 0.42',
|
151
|
+
'-t0.42',
|
152
|
+
'--theta=0.42',
|
153
|
+
'--theta 0.42',
|
154
|
+
{:theta=>0.42} ].each do |opts|
|
155
|
+
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(opts))
|
156
|
+
end
|
89
157
|
|
90
|
-
|
91
|
-
|
92
|
-
|
158
|
+
[ '-c 42',
|
159
|
+
'-c42',
|
160
|
+
'--cost-factor=42',
|
161
|
+
'--cost-factor 42',
|
162
|
+
{:cost_factor=>42} ].each do |opts|
|
163
|
+
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(opts))
|
164
|
+
end
|
93
165
|
|
94
166
|
assert_equal({}, Natto::MeCab.parse_mecab_options)
|
95
167
|
assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
|
@@ -115,7 +187,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
115
187
|
assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
|
116
188
|
end
|
117
189
|
|
118
|
-
# Tests the construction and initial state of a Natto::MeCab instance.
|
119
190
|
def test_construction
|
120
191
|
m = nil
|
121
192
|
assert_nothing_raised do
|
@@ -166,7 +237,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
166
237
|
assert_equal(opts, m.options)
|
167
238
|
end
|
168
239
|
|
169
|
-
# Tests the initialize method for error cases for erroneous mecab options.
|
170
240
|
def test_initialize_with_errors
|
171
241
|
assert_raise Natto::MeCabError do
|
172
242
|
Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
|
@@ -185,12 +255,10 @@ class TestMeCab < Test::Unit::TestCase
|
|
185
255
|
end
|
186
256
|
end
|
187
257
|
|
188
|
-
# Tests the mecab version string accessor class method of Natto::MeCab.
|
189
258
|
def test_version_accessor
|
190
|
-
assert_equal(
|
259
|
+
assert_equal(@ver, @m.version)
|
191
260
|
end
|
192
261
|
|
193
|
-
# Tests Natto::MeCab parsing using the --all-morphs option.
|
194
262
|
def test_all_morphs
|
195
263
|
m = Natto::MeCab.new(:all_morphs=>true)
|
196
264
|
expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
|
@@ -202,7 +270,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
202
270
|
assert_equal(expected, actual)
|
203
271
|
end
|
204
272
|
|
205
|
-
# Tests Natto::MeCab parsing (default parse_tostr).
|
206
273
|
def test_parse_tostr_default
|
207
274
|
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
208
275
|
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
@@ -213,7 +280,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
213
280
|
assert_equal(expected, actual)
|
214
281
|
end
|
215
282
|
|
216
|
-
# Tests Natto::MeCab parsing (default parse_tonode).
|
217
283
|
def test_parse_tonode_default
|
218
284
|
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
219
285
|
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
data/test/natto/tc_mecabnode.rb
CHANGED
@@ -66,15 +66,12 @@ class TestMeCabNode < Test::Unit::TestCase
|
|
66
66
|
# in 1.8.n
|
67
67
|
def test_mecabnode_accessors
|
68
68
|
node = @nodes[0]
|
69
|
-
|
70
|
-
:prev,
|
69
|
+
[ :prev,
|
71
70
|
:next,
|
72
71
|
:enext,
|
73
72
|
:bnext,
|
74
73
|
:rpath,
|
75
74
|
:lpath,
|
76
|
-
:begin_node_list,
|
77
|
-
:end_node_list,
|
78
75
|
:surface,
|
79
76
|
:feature,
|
80
77
|
:id,
|
@@ -86,16 +83,14 @@ class TestMeCabNode < Test::Unit::TestCase
|
|
86
83
|
:char_type,
|
87
84
|
:stat,
|
88
85
|
:isbest,
|
89
|
-
:sentence_length,
|
90
86
|
:alpha,
|
91
87
|
:beta,
|
92
88
|
:prob,
|
93
89
|
:wcost,
|
94
|
-
:cost
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
assert_not_nil(node.respond_to? nomme )
|
90
|
+
:cost ].each do |nomme|
|
91
|
+
assert_nothing_raised do
|
92
|
+
node.send nomme
|
93
|
+
end
|
99
94
|
end
|
100
95
|
|
101
96
|
# NoMethodError will be raised for anything else!
|
metadata
CHANGED
@@ -1,39 +1,48 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 61
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 9
|
9
|
+
- 3
|
10
|
+
version: 0.9.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Brooke M. Fujita
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-02-13 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: ffi
|
16
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
24
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 1
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
- 6
|
32
|
+
- 3
|
21
33
|
version: 0.6.3
|
22
34
|
type: :runtime
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
|
27
|
-
and JRuby (jvm) equally well, on any OS.
|
28
|
-
|
29
|
-
ruby で mecab を自由に利用できる natto をぜひ試してください。
|
30
|
-
|
31
|
-
'
|
35
|
+
version_requirements: *id001
|
36
|
+
description: "natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS.\n\
|
37
|
+
ruby \xE3\x81\xA7 mecab \xE3\x82\x92\xE8\x87\xAA\xE7\x94\xB1\xE3\x81\xAB\xE5\x88\xA9\xE7\x94\xA8\xE3\x81\xA7\xE3\x81\x8D\xE3\x82\x8B natto \xE3\x82\x92\xE3\x81\x9C\xE3\x81\xB2\xE8\xA9\xA6\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\xE3\x81\xA0\xE3\x81\x95\xE3\x81\x84\xE3\x80\x82\n"
|
32
38
|
email: buruzaemon@gmail.com
|
33
39
|
executables: []
|
40
|
+
|
34
41
|
extensions: []
|
42
|
+
|
35
43
|
extra_rdoc_files: []
|
36
|
-
|
44
|
+
|
45
|
+
files:
|
37
46
|
- lib/natto.rb
|
38
47
|
- lib/natto/binding.rb
|
39
48
|
- lib/natto/version.rb
|
@@ -47,32 +56,40 @@ files:
|
|
47
56
|
- CHANGELOG
|
48
57
|
- .yardopts
|
49
58
|
homepage: https://bitbucket.org/buruzaemon/natto/overview
|
50
|
-
licenses:
|
59
|
+
licenses:
|
51
60
|
- BSD
|
52
61
|
post_install_message:
|
53
62
|
rdoc_options: []
|
54
|
-
|
63
|
+
|
64
|
+
require_paths:
|
55
65
|
- lib
|
56
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
67
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 57
|
72
|
+
segments:
|
73
|
+
- 1
|
74
|
+
- 8
|
75
|
+
- 7
|
61
76
|
version: 1.8.7
|
62
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
78
|
none: false
|
64
|
-
requirements:
|
65
|
-
- -
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
|
68
|
-
|
69
|
-
-
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
hash: 3
|
83
|
+
segments:
|
84
|
+
- 0
|
85
|
+
version: "0"
|
86
|
+
requirements:
|
87
|
+
- MeCab, 0.993 or greater
|
70
88
|
- FFI, 0.6.3 or greater
|
71
89
|
rubyforge_project:
|
72
90
|
rubygems_version: 1.8.10
|
73
91
|
signing_key:
|
74
92
|
specification_version: 3
|
75
|
-
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
76
|
-
|
77
|
-
test_files:
|
93
|
+
summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
|
94
|
+
test_files:
|
78
95
|
- test/test_natto.rb
|