natto 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/README.md +20 -13
- data/lib/natto/binding.rb +49 -8
- data/lib/natto/version.rb +10 -3
- data/lib/natto.rb +316 -62
- data/test/natto/tc_binding.rb +10 -3
- data/test/natto/tc_dictionaryinfo.rb +28 -12
- data/test/natto/tc_mecab.rb +56 -7
- data/test/natto/tc_mecabnode.rb +106 -0
- data/test/test_natto.rb +1 -0
- metadata +6 -5
data/CHANGELOG
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
- __2011/02/2?__: 0.5.0 release.
|
4
|
+
- Added support for node parsing using blocks
|
5
|
+
- Added support for mecab options nbest, all-morphs
|
6
|
+
- Pulling support for mecab option partial, since it is more of a command-line feature
|
7
|
+
|
3
8
|
- __2011/01/27__: 0.4.1 release.
|
4
9
|
- Tweaking the description in natto.gemspec a bit
|
5
10
|
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ A Tasty Ruby Binding with MeCab
|
|
4
4
|
## What is natto?
|
5
5
|
natto combines the [Ruby programming language](http://www.ruby-lang.org/) with [MeCab](http://mecab.sourceforge.net/), the part-of-speech and morphological analyzer for the Japanese language.
|
6
6
|
|
7
|
-
natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary,
|
7
|
+
natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary, as natto is _not_ a C extension. natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well. natto will also run on Windows, Unix/Linux, and Mac.
|
8
8
|
|
9
9
|
You can learn more about [natto at Google Code Projects](http://code.google.com/p/natto/).
|
10
10
|
|
@@ -31,29 +31,36 @@ e.g., on Windows
|
|
31
31
|
set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
32
32
|
e.g., for Cygwin
|
33
33
|
export MECAB_PATH=cygmecab-1
|
34
|
+
e.g., from within a Ruby program
|
35
|
+
ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
|
34
36
|
|
35
37
|
## Usage
|
36
38
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
37
39
|
require 'natto'
|
38
40
|
|
39
|
-
|
40
|
-
=> #<Natto::MeCab:
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
nm = Natto::MeCab.new
|
42
|
+
=> #<Natto::MeCab:0x28d30748
|
43
|
+
@ptr=#<FFI::Pointer address=0x28a97d50>, \
|
44
|
+
@options={}, \
|
45
|
+
@dicts=[#<Natto::DictionaryInfo:0x28d3061c
|
46
|
+
filename="/usr/local/lib/mecab/dic/ipadic/sys.dic",
|
47
|
+
charset="utf8">],
|
48
|
+
@version="0.98">
|
44
49
|
|
45
|
-
puts
|
46
|
-
=> 0.98
|
50
|
+
puts nm.version
|
51
|
+
=> "0.98"
|
47
52
|
|
48
|
-
sysdic =
|
53
|
+
sysdic = nm.dicts.first
|
49
54
|
|
50
55
|
puts sysdic.filename
|
51
|
-
=> /usr/local/lib/mecab/dic/ipadic/sys.dic
|
56
|
+
=> "/usr/local/lib/mecab/dic/ipadic/sys.dic"
|
52
57
|
|
53
58
|
puts sysdic.charset
|
54
|
-
=> utf8
|
59
|
+
=> "utf8"
|
55
60
|
|
56
|
-
|
61
|
+
nm.parse('暑い日にはもってこいの一品ですね。') do |n|
|
62
|
+
puts "#{n.surface}\t#{n.feature}"
|
63
|
+
end
|
57
64
|
暑い 形容詞,自立,*,*,形容詞・アウオ段,基本形,暑い,アツイ,アツイ
|
58
65
|
日 名詞,非自立,副詞可能,*,*,*,日,ヒ,ヒ
|
59
66
|
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
@@ -64,7 +71,7 @@ e.g., for Cygwin
|
|
64
71
|
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
65
72
|
ね 助詞,終助詞,*,*,*,*,ね,ネ,ネ
|
66
73
|
。 終助詞記号,句点,*,*,*,*,。,。,。
|
67
|
-
|
74
|
+
BOS/EOS,*,*,*,*,*,*,*,*
|
68
75
|
=> nil
|
69
76
|
|
70
77
|
## Contributing to natto
|
data/lib/natto/binding.rb
CHANGED
@@ -13,7 +13,7 @@ module Natto
|
|
13
13
|
# String name for the environment variable used by
|
14
14
|
# <tt>Natto</tt> to indicate the exact name / full path
|
15
15
|
# to the <tt>mecab</tt> library.
|
16
|
-
MECAB_PATH = 'MECAB_PATH'
|
16
|
+
MECAB_PATH = 'MECAB_PATH'.freeze
|
17
17
|
|
18
18
|
# @private
|
19
19
|
def self.included(base)
|
@@ -31,11 +31,13 @@ module Natto
|
|
31
31
|
# @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
|
32
32
|
# <br/>
|
33
33
|
# e.g., for bash on UNIX/Linux
|
34
|
-
# export MECAB_PATH
|
34
|
+
# export MECAB_PATH=/usr/local/lib/libmecab.so
|
35
35
|
# e.g., on Windows
|
36
36
|
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
37
37
|
# e.g., for Cygwin
|
38
38
|
# export MECAB_PATH=cygmecab-1
|
39
|
+
# e.g., from within a Ruby program
|
40
|
+
# ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
|
39
41
|
def self.find_library
|
40
42
|
host_os = RbConfig::CONFIG['host_os']
|
41
43
|
|
@@ -50,33 +52,72 @@ module Natto
|
|
50
52
|
|
51
53
|
ffi_lib(ENV[MECAB_PATH] || find_library)
|
52
54
|
|
53
|
-
attach_function :mecab_version, [], :string
|
54
55
|
attach_function :mecab_new2, [:string], :pointer
|
56
|
+
attach_function :mecab_version, [], :string
|
57
|
+
attach_function :mecab_strerror, [:pointer],:string
|
55
58
|
attach_function :mecab_destroy, [:pointer], :void
|
59
|
+
|
60
|
+
attach_function :mecab_set_theta, [:pointer, :float], :void
|
61
|
+
attach_function :mecab_set_lattice_level, [:pointer, :int], :void
|
62
|
+
attach_function :mecab_set_all_morphs, [:pointer, :int], :void
|
63
|
+
|
56
64
|
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
|
57
|
-
attach_function :
|
65
|
+
attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
|
66
|
+
|
67
|
+
attach_function :mecab_nbest_init, [:pointer, :string], :int
|
68
|
+
attach_function :mecab_nbest_sparse_tostr, [:pointer, :int, :string], :string
|
69
|
+
attach_function :mecab_nbest_next_tonode, [:pointer], :pointer
|
70
|
+
|
58
71
|
attach_function :mecab_dictionary_info, [:pointer], :pointer
|
59
72
|
|
60
73
|
# @private
|
61
74
|
module ClassMethods
|
75
|
+
def mecab_new2(options_str)
|
76
|
+
Natto::Binding.mecab_new2(options_str)
|
77
|
+
end
|
78
|
+
|
62
79
|
def mecab_version
|
63
80
|
Natto::Binding.mecab_version
|
64
81
|
end
|
65
82
|
|
66
|
-
def
|
67
|
-
Natto::Binding.
|
83
|
+
def mecab_strerror(ptr)
|
84
|
+
Natto::Binding.mecab_strerror(ptr)
|
68
85
|
end
|
69
86
|
|
70
87
|
def mecab_destroy(ptr)
|
71
88
|
Natto::Binding.mecab_destroy(ptr)
|
72
89
|
end
|
73
90
|
|
91
|
+
def mecab_set_theta(ptr, t)
|
92
|
+
Natto::Binding.mecab_set_theta(ptr, t)
|
93
|
+
end
|
94
|
+
|
95
|
+
def mecab_set_lattice_level(ptr, ll)
|
96
|
+
Natto::Binding.mecab_set_lattice_level(ptr, ll)
|
97
|
+
end
|
98
|
+
|
99
|
+
def mecab_set_all_morphs(ptr, am)
|
100
|
+
Natto::Binding.mecab_set_all_morphs(ptr, am)
|
101
|
+
end
|
102
|
+
|
74
103
|
def mecab_sparse_tostr(ptr, str)
|
75
104
|
Natto::Binding.mecab_sparse_tostr(ptr, str)
|
76
105
|
end
|
106
|
+
|
107
|
+
def mecab_sparse_tonode(ptr, str)
|
108
|
+
Natto::Binding.mecab_sparse_tonode(ptr, str)
|
109
|
+
end
|
77
110
|
|
78
|
-
def
|
79
|
-
Natto::Binding.
|
111
|
+
def mecab_nbest_next_tonode(ptr)
|
112
|
+
Natto::Binding.mecab_nbest_next_tonode(ptr)
|
113
|
+
end
|
114
|
+
|
115
|
+
def mecab_nbest_init(ptr, str)
|
116
|
+
Natto::Binding.mecab_nbest_init(ptr, str)
|
117
|
+
end
|
118
|
+
|
119
|
+
def mecab_nbest_sparse_tostr(ptr, n, str)
|
120
|
+
Natto::Binding.mecab_nbest_sparse_tostr(ptr, n, str)
|
80
121
|
end
|
81
122
|
|
82
123
|
def mecab_dictionary_info(ptr)
|
data/lib/natto/version.rb
CHANGED
@@ -6,8 +6,15 @@
|
|
6
6
|
# <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
|
7
7
|
# parser.
|
8
8
|
#
|
9
|
-
# <tt>Natto::
|
10
|
-
#
|
9
|
+
# <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
|
10
|
+
# struct.
|
11
|
+
#
|
12
|
+
# <tt>Natto::MeCabNode</tt> is a wrapper for the struct representing
|
13
|
+
# a <tt>mecab</tt>-parsed node.
|
14
|
+
#
|
15
|
+
# <tt>Natto::DictionaryInfo</tt> is a wrapper for the struct
|
16
|
+
# representing a <tt>Natto::MeCab</tt> instance's related
|
17
|
+
# dictionary information.
|
11
18
|
#
|
12
19
|
# <tt>Natto::MeCabError</tt> is a general error class for the
|
13
20
|
# <tt>Natto</tt> module.
|
@@ -16,5 +23,5 @@
|
|
16
23
|
# which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
|
17
24
|
module Natto
|
18
25
|
# Version string for this Rubygem.
|
19
|
-
VERSION = "0.
|
26
|
+
VERSION = "0.5.0"
|
20
27
|
end
|
data/lib/natto.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
|
2
3
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
3
4
|
require 'natto/binding'
|
4
5
|
|
@@ -14,23 +15,25 @@ module Natto
|
|
14
15
|
# require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
15
16
|
# require 'natto'
|
16
17
|
#
|
17
|
-
#
|
18
|
-
# => #<Natto::MeCab:
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
18
|
+
# nm = Natto::MeCab.new(:output_format_type=>'chasen2')
|
19
|
+
# => #<Natto::MeCab:0x28d3bdc8 \
|
20
|
+
# @ptr=#<FFI::Pointer address=0x28afb980>, \
|
21
|
+
# @options={:output_format_type=>"chasen2"}, \
|
22
|
+
# @dicts=[#<Natto::DictionaryInfo:0x289a1f14 \
|
23
|
+
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic", \
|
24
|
+
# charset="utf8">], \
|
25
|
+
# @version="0.98">
|
24
26
|
#
|
25
|
-
#
|
26
|
-
# puts
|
27
|
+
# nm.parse('ネバネバの組み合わせ美味しいです。') do |n|
|
28
|
+
# puts "#{n.surface}\t#{n.feature}"
|
27
29
|
# end
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
30
|
+
#
|
31
|
+
# ネバネバ 名詞,サ変接続,*,*,*,*,ネバネバ,ネバネバ,ネバネバ
|
32
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
33
|
+
# 組み合わせ 名詞,一般,*,*,*,*,組み合わせ,クミアワセ,クミアワセ
|
34
|
+
# 美味しい 形容詞,自立,*,*,形容詞・イ段,基本形,美味しい,オイシイ,オイシイ
|
35
|
+
# です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
36
|
+
# 。 記号,句点,*,*,*,*,。,。,。
|
34
37
|
#
|
35
38
|
class MeCab
|
36
39
|
include Natto::Binding
|
@@ -39,11 +42,11 @@ module Natto
|
|
39
42
|
|
40
43
|
# Supported options to the <tt>mecab</tt> parser.
|
41
44
|
# See the <tt>mecab</tt> help for more details.
|
42
|
-
SUPPORTED_OPTS = [
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
SUPPORTED_OPTS = [ :rcfile, :dicdir, :userdic, :lattice_level, :all_morphs,
|
46
|
+
:output_format_type, :node_format, :unk_format,
|
47
|
+
:bos_format, :eos_format, :eon_format, :unk_feature,
|
48
|
+
:input_buffer_size, :allocate_sentence, :nbest, :theta,
|
49
|
+
:cost_factor, :output ].freeze
|
47
50
|
|
48
51
|
# Initializes the wrapped <tt>mecab</tt> instance with the
|
49
52
|
# given <tt>options</tt> hash.
|
@@ -56,7 +59,6 @@ module Natto
|
|
56
59
|
# - :lattice_level -- lattice information level (integer, default 0)
|
57
60
|
# - :all_morphs -- output all morphs (default false)
|
58
61
|
# - :output_format_type -- output format type (wakati, chasen, yomi, etc.)
|
59
|
-
# - :partial -- partial parsing mode
|
60
62
|
# - :node_format -- user-defined node format
|
61
63
|
# - :unk_format -- user-defined unknown node format
|
62
64
|
# - :bos_format -- user-defined beginning-of-sentence format
|
@@ -65,20 +67,23 @@ module Natto
|
|
65
67
|
# - :unk_feature -- feature for unknown word
|
66
68
|
# - :input_buffer_size -- set input buffer size (default 8192)
|
67
69
|
# - :allocate_sentence -- allocate new memory for input sentence
|
68
|
-
# - :nbest -- output N best results (integer, default 1)
|
70
|
+
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
69
71
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
70
72
|
# - :cost_factor -- cost factor (integer, default 700)
|
73
|
+
# - :output -- set the output file name
|
71
74
|
#
|
72
75
|
# <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
|
73
76
|
# e.g.<br/>
|
74
77
|
#
|
75
|
-
#
|
76
|
-
# => #<Natto::MeCab:
|
77
|
-
#
|
78
|
-
#
|
79
|
-
#
|
78
|
+
# nm = Natto::MeCab.new(:node_format=>'%m¥t%f[7]¥n')
|
79
|
+
# => #<Natto::MeCab:0x28d2ae10 @ptr=#<FFI::Pointer address=0x28a97980>, \
|
80
|
+
# @options={:node_format=>"%m¥t%f[7]¥n"}, \
|
81
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d2a85c \
|
82
|
+
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic" \
|
83
|
+
# charset="utf8">], \
|
84
|
+
# @version="0.98">
|
80
85
|
#
|
81
|
-
# puts
|
86
|
+
# puts nm.parse('簡単で美味しくて良いですよね。')
|
82
87
|
# 簡単 カンタン
|
83
88
|
# で デ
|
84
89
|
# 美味しくて オイシクテ
|
@@ -101,23 +106,92 @@ module Natto
|
|
101
106
|
@ptr = self.mecab_new2(opt_str)
|
102
107
|
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
103
108
|
|
109
|
+
# set mecab parsing options
|
110
|
+
self.mecab_set_theta(@ptr, @options[:theta].to_f) if @options[:theta]
|
111
|
+
self.mecab_set_lattice_level(@ptr, @options[:lattice_level].to_i) if @options[:lattice_level]
|
112
|
+
self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
|
113
|
+
|
114
|
+
# set mecab parsing implementations
|
115
|
+
if @options[:nbest] && @options[:nbest] > 1
|
116
|
+
# N-Best parsing implementations
|
117
|
+
self.mecab_nbest_init(@ptr, str)
|
118
|
+
# nbest parsing require lattice level >= 1
|
119
|
+
self.mecab_set_lattice_level(@ptr, (@options[:lattice_level] || 1))
|
120
|
+
@parse_tostr = lambda { |str|
|
121
|
+
return self.mecab_nbest_sparse_tostr(@ptr, @options[:nbest], str) ||
|
122
|
+
raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
123
|
+
}
|
124
|
+
@parse_tonode = lambda { |str| return self.mecab_nbest_next_tonode(@ptr) }
|
125
|
+
else
|
126
|
+
# default parsing implementations
|
127
|
+
@parse_tostr = lambda { |str|
|
128
|
+
return self.mecab_sparse_tostr(@ptr, str) || raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
129
|
+
}
|
130
|
+
@parse_tonode = lambda { |str| return self.mecab_sparse_tonode(@ptr, str) }
|
131
|
+
end
|
132
|
+
|
133
|
+
# set ref to dictionaries
|
104
134
|
@dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
105
135
|
while @dicts.last.next.address != 0x0
|
106
136
|
@dicts << Natto::DictionaryInfo.new(@dicts.last.next)
|
107
137
|
end
|
108
138
|
|
139
|
+
# set ref to mecab version string
|
109
140
|
@version = self.mecab_version
|
110
141
|
|
142
|
+
# set Proc for freeing mecab pointer
|
111
143
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
112
144
|
end
|
113
|
-
|
114
|
-
# Parses the given string <tt>str</tt>.
|
145
|
+
|
146
|
+
# Parses the given string <tt>str</tt>. If a block is passed to this method,
|
147
|
+
# then node parsing will be used and each node yielded to the given block.
|
115
148
|
#
|
116
149
|
# @param [String] str
|
117
150
|
# @return parsing result from <tt>mecab</tt>
|
118
151
|
# @raise [MeCabError] if the <tt>mecab</tt> parser cannot parse the given string <tt>str</tt>
|
152
|
+
# @see MeCabNode
|
119
153
|
def parse(str)
|
120
|
-
|
154
|
+
if block_given?
|
155
|
+
m_node_ptr = @parse_tonode.call(str)
|
156
|
+
head = Natto::MeCabNode.new(m_node_ptr)
|
157
|
+
if head && head[:next].address != 0x0
|
158
|
+
node = Natto::MeCabNode.new(head[:next])
|
159
|
+
while (node.nil? == false)
|
160
|
+
yield node
|
161
|
+
if node[:next].address != 0x0
|
162
|
+
node = Natto::MeCabNode.new(node[:next])
|
163
|
+
else
|
164
|
+
break
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
else
|
169
|
+
result = @parse_tostr.call(str)
|
170
|
+
result.force_encoding(Encoding.default_external) if result.respond_to?(:encoding) && result.encoding!=Encoding.default_external
|
171
|
+
result
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns human-readable details for the wrapped <tt>mecab</tt> parser.
|
176
|
+
# Overrides <tt>Object#to_s</tt>.
|
177
|
+
#
|
178
|
+
# - encoded object id
|
179
|
+
# - FFI pointer to <tt>mecab</tt> object
|
180
|
+
# - options hash
|
181
|
+
# - list of dictionaries
|
182
|
+
# - MeCab version
|
183
|
+
#
|
184
|
+
# @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
|
185
|
+
def to_s
|
186
|
+
%(#{super.chop} @ptr=#{@ptr.to_s}, @options=#{@options.to_s}, @dicts=#{@dicts.to_s}, @version="#{@version.to_s}">)
|
187
|
+
end
|
188
|
+
|
189
|
+
# Overrides <tt>Object#inspect</tt>.
|
190
|
+
#
|
191
|
+
# @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
|
192
|
+
# @see #to_s
|
193
|
+
def inspect
|
194
|
+
self.to_s
|
121
195
|
end
|
122
196
|
|
123
197
|
# Returns a <tt>Proc</tt> that will properly free resources
|
@@ -138,14 +212,14 @@ module Natto
|
|
138
212
|
# be passed in the construction of <tt>mecab</tt>.
|
139
213
|
#
|
140
214
|
# @param [Hash] options
|
141
|
-
# @return
|
215
|
+
# @return [String] representation of the options to the <tt>mecab</tt> parser
|
142
216
|
def self.build_options_str(options={})
|
143
217
|
opt = []
|
144
218
|
SUPPORTED_OPTS.each do |k|
|
145
219
|
if options.has_key? k
|
146
220
|
key = k.to_s.gsub('_', '-')
|
147
|
-
# all-morphs
|
148
|
-
if %w( all-morphs
|
221
|
+
# all-morphs and allocate-sentence are just flags
|
222
|
+
if %w( all-morphs allocate-sentence ).include? key
|
149
223
|
opt << "--#{key}" if options[k]==true
|
150
224
|
else
|
151
225
|
opt << "--#{key}=#{options[k]}"
|
@@ -160,6 +234,21 @@ module Natto
|
|
160
234
|
# for the <tt>Natto</tt> module.
|
161
235
|
class MeCabError < RuntimeError; end
|
162
236
|
|
237
|
+
# <tt>MeCabStruct</tt> is a general base class
|
238
|
+
# for <tt>FFI::Struct</tt> objects in the <tt>Natto</tt> module.
|
239
|
+
class MeCabStruct < FFI::Struct
|
240
|
+
# Provides accessor methods for the members of the <tt>mecab</tt> struct.
|
241
|
+
#
|
242
|
+
# @param [String] attr_name
|
243
|
+
# @return member values for the <tt>mecab</tt> struct
|
244
|
+
# @raise [NoMethodError] if <tt>attr_name</tt> is not a member of this <tt>mecab</tt> struct
|
245
|
+
def method_missing(attr_name)
|
246
|
+
member_sym = attr_name.id2name.to_sym
|
247
|
+
return self[member_sym] if self.members.include?(member_sym)
|
248
|
+
raise(NoMethodError.new("undefined method '#{attr_name}' for #{self}"))
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
163
252
|
# <tt>DictionaryInfo</tt> is a wrapper for the structure holding
|
164
253
|
# the <tt>MeCab</tt> instance's related dictionary information.
|
165
254
|
#
|
@@ -180,27 +269,16 @@ module Natto
|
|
180
269
|
# <tt>mecab</tt> dictionary attributes can be obtained by
|
181
270
|
# using their corresponding accessor.
|
182
271
|
#
|
183
|
-
#
|
272
|
+
# nm = Natto::MeCab.new
|
184
273
|
#
|
185
|
-
# sysdic =
|
274
|
+
# sysdic = nm.dicts.first
|
186
275
|
#
|
187
276
|
# puts sysdic.filename
|
188
|
-
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
277
|
+
# => "/usr/local/lib/mecab/dic/ipadic/sys.dic"
|
189
278
|
#
|
190
279
|
# puts sysdic.charset
|
191
|
-
# => utf8
|
192
|
-
|
193
|
-
# It is also possible to use the <tt>Symbol</tt> for the
|
194
|
-
# <tt>mecab</tt> dictionary member to index into the
|
195
|
-
# <tt>FFI::Struct</tt> layout associative array like so:
|
196
|
-
#
|
197
|
-
# puts sysdic[:filename]
|
198
|
-
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
199
|
-
#
|
200
|
-
# puts sysdic[:charset]
|
201
|
-
# => utf8
|
202
|
-
#
|
203
|
-
class DictionaryInfo < FFI::Struct
|
280
|
+
# => "utf8"
|
281
|
+
class DictionaryInfo < MeCabStruct
|
204
282
|
|
205
283
|
layout :filename, :string,
|
206
284
|
:charset, :string,
|
@@ -211,7 +289,6 @@ module Natto
|
|
211
289
|
:version, :ushort,
|
212
290
|
:next, :pointer
|
213
291
|
|
214
|
-
# Hack to avoid that deprecation message Object#type thrown in Ruby 1.8.7.
|
215
292
|
if RUBY_VERSION.to_f < 1.9
|
216
293
|
alias_method :deprecated_type, :type
|
217
294
|
# <tt>Object#type</tt> override defined when <tt>RUBY_VERSION</tt> is
|
@@ -226,22 +303,199 @@ module Natto
|
|
226
303
|
end
|
227
304
|
end
|
228
305
|
|
229
|
-
#
|
306
|
+
# Returns human-readable details for this <tt>mecab</tt> dictionary.
|
307
|
+
# Overrides <tt>Object#to_s</tt>.
|
230
308
|
#
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
309
|
+
# - encoded object id
|
310
|
+
# - full-path dictionary filename
|
311
|
+
# - dictionary charset
|
312
|
+
#
|
313
|
+
# @return [String] encoded object id, dictionary filename, and charset
|
314
|
+
def to_s
|
315
|
+
%(#{super.chop} filename="#{self.filename}", charset="#{self.charset}">)
|
316
|
+
end
|
317
|
+
|
318
|
+
# Overrides <tt>Object#inspect</tt>.
|
319
|
+
#
|
320
|
+
# @return [String] encoded object id, dictionary filename, and charset
|
321
|
+
# @see #to_s
|
322
|
+
def inspect
|
323
|
+
self.to_s
|
238
324
|
end
|
325
|
+
end
|
326
|
+
|
327
|
+
# <tt>MeCabNode</tt> is a wrapper for the structure holding
|
328
|
+
# the parsed <tt>node</tt>.
|
329
|
+
#
|
330
|
+
# Values for the <tt>mecab</tt> node attributes may be
|
331
|
+
# obtained by using the following <tt>Symbol</tt>s as keys
|
332
|
+
# to the layout associative array of <tt>FFI::Struct</tt> members.
|
333
|
+
#
|
334
|
+
# - :prev
|
335
|
+
# - :next
|
336
|
+
# - :enext
|
337
|
+
# - :bnext
|
338
|
+
# - :rpath
|
339
|
+
# - :lpath
|
340
|
+
# - :begin_node_list
|
341
|
+
# - :end_node_list
|
342
|
+
# - :surface
|
343
|
+
# - :feature
|
344
|
+
# - :id
|
345
|
+
# - :length
|
346
|
+
# - :rlength
|
347
|
+
# - :rcAttr
|
348
|
+
# - :lcAttr
|
349
|
+
# - :posid
|
350
|
+
# - :char_type
|
351
|
+
# - :stat
|
352
|
+
# - :isbest
|
353
|
+
# - :sentence_length
|
354
|
+
# - :alpha
|
355
|
+
# - :beta
|
356
|
+
# - :beta
|
357
|
+
# - :prob
|
358
|
+
# - :wcost
|
359
|
+
# - :cost
|
360
|
+
# - :token
|
361
|
+
#
|
362
|
+
# <h2>Usage</h2>
|
363
|
+
# An instance of <tt>MeCabNode</tt> is yielded to a block
|
364
|
+
# used with <tt>MeCab#parse</tt>. Each resulting node is
|
365
|
+
# yielded to the block passed in, where the above-mentioned
|
366
|
+
# node attributes may be accessed.
|
367
|
+
#
|
368
|
+
# nm = Natto::MeCab.new
|
369
|
+
#
|
370
|
+
# nm.parse('めかぶの使い方がわからなくて困ってました。') do |n|
|
371
|
+
# puts "#{n.surface}¥t#{n.cost}"
|
372
|
+
# end
|
373
|
+
#
|
374
|
+
# め 7961
|
375
|
+
# かぶ 19303
|
376
|
+
# の 25995
|
377
|
+
# 使い方 29182
|
378
|
+
# が 28327
|
379
|
+
# わから 33625
|
380
|
+
# なく 34256
|
381
|
+
# て 36454
|
382
|
+
# 困っ 43797
|
383
|
+
# て 42178
|
384
|
+
# まし 46708
|
385
|
+
# た 46111
|
386
|
+
# 。 42677
|
387
|
+
# 41141
|
388
|
+
# => nil
|
389
|
+
#
|
390
|
+
# It is also possible to use the <tt>Symbol</tt> for the
|
391
|
+
# <tt>mecab</tt> node member to index into the
|
392
|
+
# <tt>FFI::Struct</tt> layout associative array like so:
|
393
|
+
#
|
394
|
+
# nm.parse('納豆に乗っけて頂きます!') {|n| puts n[:feature] }
|
395
|
+
#
|
396
|
+
# 名詞,一般,*,*,*,*,納豆,ナットウ,ナットー
|
397
|
+
# 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
398
|
+
# 動詞,自立,*,*,一段,連用形,乗っける,ノッケ,ノッケ
|
399
|
+
# 助詞,接続助詞,*,*,*,*,て,テ,テ
|
400
|
+
# 動詞,非自立,*,*,五段・カ行イ音便,連用形,頂く,イタダキ,イタダキ
|
401
|
+
# 助動詞,*,*,*,特殊・マス,基本形,ます,マス,マス
|
402
|
+
# 記号,一般,*,*,*,*,!,!,!
|
403
|
+
# BOS/EOS,*,*,*,*,*,*,*,*
|
404
|
+
# => nil
|
405
|
+
#
|
406
|
+
class MeCabNode < MeCabStruct
|
407
|
+
|
408
|
+
# Normal <tt>mecab</tt> node.
|
409
|
+
NOR_NODE = 0
|
410
|
+
# Unknown <tt>mecab</tt> node.
|
411
|
+
UNK_NODE = 1
|
412
|
+
# Beginning-of-string <tt>mecab</tt> node.
|
413
|
+
BOS_NODE = 2
|
414
|
+
# End-of-string <tt>mecab</tt> node.
|
415
|
+
EOS_NODE = 3
|
416
|
+
# End-of-NBest <tt>mecab</tt> node list.
|
417
|
+
EON_NODE = 4
|
239
418
|
|
240
|
-
|
419
|
+
layout :prev, :pointer,
|
420
|
+
:next, :pointer,
|
421
|
+
:enext, :pointer,
|
422
|
+
:bnext, :pointer,
|
423
|
+
:rpath, :pointer,
|
424
|
+
:lpath, :pointer,
|
425
|
+
:begin_node_list, :pointer,
|
426
|
+
:end_node_list, :pointer,
|
427
|
+
:surface, :string,
|
428
|
+
:feature, :string,
|
429
|
+
:id, :uint,
|
430
|
+
:length, :ushort,
|
431
|
+
:rlength, :ushort,
|
432
|
+
:rcAttr, :ushort,
|
433
|
+
:lcAttr, :ushort,
|
434
|
+
:posid, :ushort,
|
435
|
+
:char_type, :uchar,
|
436
|
+
:stat, :uchar,
|
437
|
+
:isbest, :uchar,
|
438
|
+
:sentence_length, :uint,
|
439
|
+
:alpha, :float,
|
440
|
+
:beta, :float,
|
441
|
+
:prob, :float,
|
442
|
+
:wcost, :short,
|
443
|
+
:cost, :long,
|
444
|
+
:token, :pointer
|
445
|
+
|
446
|
+
if RUBY_VERSION.to_f < 1.9
|
447
|
+
alias_method :deprecated_id, :id
|
448
|
+
# <tt>Object#id</tt> override defined when <tt>RUBY_VERSION</tt> is
|
449
|
+
# older than 1.9. This is a hack to avoid the <tt>Object#id</tt>
|
450
|
+
# deprecation warning thrown up in Ruby 1.8.7.
|
451
|
+
#
|
452
|
+
# <i>This method override is not defined when the Ruby interpreter
|
453
|
+
# is 1.9 or greater.</i>
|
454
|
+
# @return [Fixnum] <tt>mecab</tt> node id
|
455
|
+
def id
|
456
|
+
self[:id]
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
# Returns the <tt>surface</tt> value for this node.
|
461
|
+
#
|
462
|
+
# @return [String] <tt>mecab</tt> node surface value
|
463
|
+
def surface
|
464
|
+
if self[:surface] && self[:length] > 0
|
465
|
+
@surface ||= self[:surface].bytes.to_a()[0,self[:length]].pack('C*')
|
466
|
+
@surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
|
467
|
+
end
|
468
|
+
@surface
|
469
|
+
end
|
470
|
+
|
471
|
+
# Returns the <tt>feature</tt> value for this node.
|
472
|
+
#
|
473
|
+
# @return [String] <tt>mecab</tt> node feature value
|
474
|
+
def feature
|
475
|
+
@feature ||= self[:feature]
|
476
|
+
@feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
|
477
|
+
@feature
|
478
|
+
end
|
479
|
+
|
480
|
+
# Returns human-readable details for the <tt>mecab</tt> node.
|
481
|
+
# Overrides <tt>Object#to_s</tt>.
|
482
|
+
#
|
483
|
+
# - encoded object id
|
484
|
+
# - stat
|
485
|
+
# - surface
|
486
|
+
# - feature
|
241
487
|
#
|
242
|
-
# @return [String]
|
488
|
+
# @return [String] encoded object id, stat, surface, and feature
|
243
489
|
def to_s
|
244
|
-
self[:
|
490
|
+
%(#{super.chop} stat=#{self[:stat]}, surface="#{self.surface}", feature="#{self.feature}">)
|
491
|
+
end
|
492
|
+
|
493
|
+
# Overrides <tt>Object#inspect</tt>.
|
494
|
+
#
|
495
|
+
# @return [String] encoded object id, stat, surface, and feature
|
496
|
+
# @see #to_s
|
497
|
+
def inspect
|
498
|
+
self.to_s
|
245
499
|
end
|
246
500
|
end
|
247
501
|
end
|
data/test/natto/tc_binding.rb
CHANGED
@@ -21,11 +21,18 @@ class TestNattoBinding < Test::Unit::TestCase
|
|
21
21
|
# Tests for the inclusion of mecab methods made available
|
22
22
|
# to any classes including the Natto::Binding module.
|
23
23
|
def test_functions_included
|
24
|
-
[ :
|
25
|
-
:
|
24
|
+
[ :mecab_new2,
|
25
|
+
:mecab_version,
|
26
|
+
:mecab_strerror,
|
26
27
|
:mecab_destroy,
|
28
|
+
:mecab_set_theta,
|
29
|
+
:mecab_set_lattice_level,
|
30
|
+
:mecab_set_all_morphs,
|
27
31
|
:mecab_sparse_tostr,
|
28
|
-
:
|
32
|
+
:mecab_nbest_sparse_tostr,
|
33
|
+
:mecab_nbest_init,
|
34
|
+
:mecab_nbest_sparse_tostr,
|
35
|
+
:mecab_nbest_next_tonode,
|
29
36
|
:mecab_dictionary_info ].each do |f|
|
30
37
|
assert(@klass.respond_to? f)
|
31
38
|
end
|
@@ -4,11 +4,19 @@
|
|
4
4
|
# behavior of Natto::DictionaryInfo
|
5
5
|
class TestDictionaryInfo < Test::Unit::TestCase
|
6
6
|
def setup
|
7
|
-
|
7
|
+
m = Natto::MeCab.new
|
8
|
+
@dicts = m.dicts
|
9
|
+
|
10
|
+
out = `mecab -D`.lines.to_a
|
11
|
+
out.each do |l|
|
12
|
+
tokens = l.split("\t")
|
13
|
+
@sysdic_filename = tokens[1].strip if tokens[0] =~ /filename:/i
|
14
|
+
@sysdic_charset = tokens[1].strip if tokens[0] =~ /charset:/i
|
15
|
+
end
|
8
16
|
end
|
9
17
|
|
10
18
|
def teardown
|
11
|
-
@
|
19
|
+
@dicts = nil
|
12
20
|
end
|
13
21
|
|
14
22
|
# Tests the dictionaries accessor method of Natto::MeCab.
|
@@ -17,28 +25,36 @@ class TestDictionaryInfo < Test::Unit::TestCase
|
|
17
25
|
# b) system dictionary encoding is utf-8
|
18
26
|
# c) only dealing w/ case of 1 dictionary being used
|
19
27
|
def test_dictionaries_accessor
|
20
|
-
dicts
|
21
|
-
|
22
|
-
sysdic
|
23
|
-
assert_equal(
|
24
|
-
assert_equal('utf8', sysdic[:charset])
|
28
|
+
assert @dicts.empty? == false
|
29
|
+
sysdic = @dicts.first
|
30
|
+
assert_equal(@sysdic_filename, sysdic[:filename])
|
31
|
+
assert_equal(@sysdic_charset, sysdic[:charset])
|
25
32
|
assert_equal(0x0, sysdic[:next].address)
|
26
|
-
#assert_nil(sysdic.next)
|
27
33
|
end
|
28
34
|
|
29
35
|
# Tests the to_s method.
|
30
36
|
def test_to_s
|
31
|
-
|
37
|
+
#<Natto::DictionaryInfo:0x288879bc @filename=\"/usr/local/lib/mecab/dic/ipadic/sys.dic\", @charset=\"utf8\">
|
38
|
+
assert(@dicts.first.to_s.include?("filename=\"#{@sysdic_filename}\", charset=\"#{@sysdic_charset}\""))
|
32
39
|
end
|
33
40
|
|
34
41
|
# Tests the accessors of Natto::DictionaryInfo.
|
35
42
|
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
36
43
|
# in 1.8.n
|
37
44
|
def test_dictionary_info_member_accessors
|
38
|
-
sysdic = @
|
39
|
-
members =
|
45
|
+
sysdic = @dicts.first
|
46
|
+
members = [
|
47
|
+
:filename,
|
48
|
+
:charset,
|
49
|
+
:type,
|
50
|
+
:size,
|
51
|
+
:lsize,
|
52
|
+
:rsize,
|
53
|
+
:version,
|
54
|
+
:next
|
55
|
+
]
|
40
56
|
members.each do |nomme|
|
41
|
-
assert_not_nil(sysdic.send nomme
|
57
|
+
assert_not_nil(sysdic.send nomme )
|
42
58
|
end
|
43
59
|
|
44
60
|
# NoMethodError will be raised for anything else!
|
data/test/natto/tc_mecab.rb
CHANGED
@@ -1,8 +1,19 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
require 'rbconfig'
|
3
|
+
require 'nkf'
|
2
4
|
|
3
5
|
# TestMeCab encapsulates tests for the basic
|
4
6
|
# behavior of Natto::MeCab.
|
5
7
|
class TestMeCab < Test::Unit::TestCase
|
8
|
+
|
9
|
+
host_os = RbConfig::CONFIG['host_os']
|
10
|
+
# we need to transfrom from UTF-8 ot SJIS if we are on Windows!
|
11
|
+
if host_os =~ /mswin|mingw/i
|
12
|
+
TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
|
13
|
+
else
|
14
|
+
TEST_STR = '試験ですよ、これが。'
|
15
|
+
end
|
16
|
+
|
6
17
|
def setup
|
7
18
|
@m = Natto::MeCab.new
|
8
19
|
end
|
@@ -37,9 +48,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
37
48
|
res = Natto::MeCab.build_options_str(:output_format_type=>"natto")
|
38
49
|
assert_equal('--output-format-type=natto', res)
|
39
50
|
|
40
|
-
res = Natto::MeCab.build_options_str(:partial=>true)
|
41
|
-
assert_equal('--partial', res)
|
42
|
-
|
43
51
|
res = Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n')
|
44
52
|
assert_equal('--node-format=%m\t%f[7]\n', res)
|
45
53
|
|
@@ -76,10 +84,8 @@ class TestMeCab < Test::Unit::TestCase
|
|
76
84
|
res = Natto::MeCab.build_options_str(:output_format_type=>"natto",
|
77
85
|
:userdic=>"/some/file",
|
78
86
|
:dicdir=>"/some/other/file",
|
79
|
-
:partial=>true,
|
80
87
|
:all_morphs=>true)
|
81
|
-
assert_equal('--dicdir=/some/other/file --userdic=/some/file --all-morphs --output-format-type=natto
|
82
|
-
|
88
|
+
assert_equal('--dicdir=/some/other/file --userdic=/some/file --all-morphs --output-format-type=natto', res)
|
83
89
|
end
|
84
90
|
|
85
91
|
# Tests the construction and initial state of a Natto::MeCab instance.
|
@@ -96,7 +102,13 @@ class TestMeCab < Test::Unit::TestCase
|
|
96
102
|
end
|
97
103
|
assert_equal(opts, m.options)
|
98
104
|
|
99
|
-
opts = {:all_morphs=>true, :
|
105
|
+
opts = {:all_morphs=>true, :allocate_sentence=>true}
|
106
|
+
assert_nothing_raised do
|
107
|
+
m = Natto::MeCab.new(opts)
|
108
|
+
end
|
109
|
+
assert_equal(opts, m.options)
|
110
|
+
|
111
|
+
opts = {:lattice_level=>999}
|
100
112
|
assert_nothing_raised do
|
101
113
|
m = Natto::MeCab.new(opts)
|
102
114
|
end
|
@@ -126,4 +138,41 @@ class TestMeCab < Test::Unit::TestCase
|
|
126
138
|
def test_version_accessor
|
127
139
|
assert_equal('0.98', @m.version)
|
128
140
|
end
|
141
|
+
|
142
|
+
# Tests Natto::MeCab parsing using the --all-morphs option.
|
143
|
+
def test_all_morphs
|
144
|
+
m = Natto::MeCab.new(:all_morphs=>true)
|
145
|
+
expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
|
146
|
+
expected.delete_if {|e| e =~ /^(EOS|BOS)/ }
|
147
|
+
|
148
|
+
actual = m.parse(TEST_STR).lines.to_a
|
149
|
+
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
150
|
+
|
151
|
+
assert_equal(expected, actual)
|
152
|
+
end
|
153
|
+
|
154
|
+
# Tests Natto::MeCab parsing (default parse_tostr).
|
155
|
+
def test_parse_tostr_default
|
156
|
+
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
157
|
+
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
158
|
+
|
159
|
+
actual = @m.parse(TEST_STR).lines.to_a
|
160
|
+
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
161
|
+
|
162
|
+
assert_equal(expected, actual)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Tests Natto::MeCab parsing (default parse_tonode).
|
166
|
+
def test_parse_tonode_default
|
167
|
+
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
168
|
+
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
169
|
+
|
170
|
+
actual = []
|
171
|
+
@m.parse(TEST_STR) do |node|
|
172
|
+
actual << "#{node.surface}\t#{node.feature}\n"
|
173
|
+
end
|
174
|
+
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
175
|
+
|
176
|
+
assert_equal(expected, actual)
|
177
|
+
end
|
129
178
|
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'rbconfig'
|
3
|
+
require 'nkf'
|
4
|
+
|
5
|
+
# TestMeCabNode encapsulates tests for the basic
|
6
|
+
# behavior of Natto::MeCabNode
|
7
|
+
class TestMeCabNode < Test::Unit::TestCase
|
8
|
+
|
9
|
+
host_os = RbConfig::CONFIG['host_os']
|
10
|
+
# we need to transfrom from UTF-8 ot SJIS if we are on Windows!
|
11
|
+
if host_os =~ /mswin|mingw/i
|
12
|
+
TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
|
13
|
+
else
|
14
|
+
TEST_STR = '試験ですよ、これが。'
|
15
|
+
end
|
16
|
+
|
17
|
+
def setup
|
18
|
+
nm = Natto::MeCab.new
|
19
|
+
@nodes = []
|
20
|
+
nm.parse(TEST_STR) { |n| @nodes << n }
|
21
|
+
end
|
22
|
+
|
23
|
+
def teardown
|
24
|
+
@nodes = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
# Tests the surface and feature accessors methods.
|
28
|
+
def test_surface_and_feature_accessors
|
29
|
+
raw = `echo #{TEST_STR} | mecab`.lines.to_a
|
30
|
+
raw.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
31
|
+
expected = {}
|
32
|
+
raw.each do |l|
|
33
|
+
tokens = l.split("\t")
|
34
|
+
expected[tokens[0]]=tokens[1].strip
|
35
|
+
end
|
36
|
+
|
37
|
+
actual = {}
|
38
|
+
@nodes.each do |n|
|
39
|
+
actual[n.surface]=n.feature if (n.stat==Natto::MeCabNode::NOR_NODE ||
|
40
|
+
n.stat==Natto::MeCabNode::UNK_NODE)
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_equal(expected, actual)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Tests MeCabNode#surface to show that it is consistent
|
47
|
+
# no matter how many times it is invoked.
|
48
|
+
def test_manysurfaces
|
49
|
+
@nodes.each do |n|
|
50
|
+
expected = n.surface
|
51
|
+
5.times { assert_equal(expected, n.surface) }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tests MeCabNode#feature to show that it is consistent
|
56
|
+
# no matter how many times it is invoked.
|
57
|
+
def test_manyfeature
|
58
|
+
@nodes.each do |n|
|
59
|
+
expected = n.feature
|
60
|
+
5.times { assert_equal(expected, n.feature) }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Tests that the accessors of Natto::MeCabNode exist.
|
65
|
+
# Note: Object#id is deprecated in 1.9.n, but comes with a warning
|
66
|
+
# in 1.8.n
|
67
|
+
def test_mecabnode_accessors
|
68
|
+
node = @nodes[0]
|
69
|
+
members = [
|
70
|
+
:prev,
|
71
|
+
:next,
|
72
|
+
:enext,
|
73
|
+
:bnext,
|
74
|
+
:rpath,
|
75
|
+
:lpath,
|
76
|
+
:begin_node_list,
|
77
|
+
:end_node_list,
|
78
|
+
:surface,
|
79
|
+
:feature,
|
80
|
+
:id,
|
81
|
+
:length,
|
82
|
+
:rlength,
|
83
|
+
:rcAttr,
|
84
|
+
:lcAttr,
|
85
|
+
:posid,
|
86
|
+
:char_type,
|
87
|
+
:stat,
|
88
|
+
:isbest,
|
89
|
+
:sentence_length,
|
90
|
+
:alpha,
|
91
|
+
:beta,
|
92
|
+
:prob,
|
93
|
+
:wcost,
|
94
|
+
:cost,
|
95
|
+
:token
|
96
|
+
]
|
97
|
+
members.each do |nomme|
|
98
|
+
assert_not_nil(node.respond_to? nomme )
|
99
|
+
end
|
100
|
+
|
101
|
+
# NoMethodError will be raised for anything else!
|
102
|
+
assert_raise NoMethodError do
|
103
|
+
node.send :unknown_attr
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/test/test_natto.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 5
|
9
|
+
- 0
|
10
|
+
version: 0.5.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brooke M. Fujita
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-02-26 00:00:00 +09:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- test/natto/tc_binding.rb
|
52
52
|
- test/natto/tc_dictionaryinfo.rb
|
53
53
|
- test/natto/tc_mecab.rb
|
54
|
+
- test/natto/tc_mecabnode.rb
|
54
55
|
- README.md
|
55
56
|
- LICENSE
|
56
57
|
- CHANGELOG
|