natto 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/README.md +20 -13
- data/lib/natto/binding.rb +49 -8
- data/lib/natto/version.rb +10 -3
- data/lib/natto.rb +316 -62
- data/test/natto/tc_binding.rb +10 -3
- data/test/natto/tc_dictionaryinfo.rb +28 -12
- data/test/natto/tc_mecab.rb +56 -7
- data/test/natto/tc_mecabnode.rb +106 -0
- data/test/test_natto.rb +1 -0
- metadata +6 -5
data/CHANGELOG
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
+
- __2011/02/2?__: 0.5.0 release.
|
4
|
+
- Added support for node parsing using blocks
|
5
|
+
- Added support for mecab options nbest, all-morphs
|
6
|
+
- Pulling support for mecab option partial, since it is more of a command-line feature
|
7
|
+
|
3
8
|
- __2011/01/27__: 0.4.1 release.
|
4
9
|
- Tweaking the description in natto.gemspec a bit
|
5
10
|
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ A Tasty Ruby Binding with MeCab
|
|
4
4
|
## What is natto?
|
5
5
|
natto combines the [Ruby programming language](http://www.ruby-lang.org/) with [MeCab](http://mecab.sourceforge.net/), the part-of-speech and morphological analyzer for the Japanese language.
|
6
6
|
|
7
|
-
natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary,
|
7
|
+
natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary, as natto is _not_ a C extension. natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well. natto will also run on Windows, Unix/Linux, and Mac.
|
8
8
|
|
9
9
|
You can learn more about [natto at Google Code Projects](http://code.google.com/p/natto/).
|
10
10
|
|
@@ -31,29 +31,36 @@ e.g., on Windows
|
|
31
31
|
set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
32
32
|
e.g., for Cygwin
|
33
33
|
export MECAB_PATH=cygmecab-1
|
34
|
+
e.g., from within a Ruby program
|
35
|
+
ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
|
34
36
|
|
35
37
|
## Usage
|
36
38
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
37
39
|
require 'natto'
|
38
40
|
|
39
|
-
|
40
|
-
=> #<Natto::MeCab:
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
nm = Natto::MeCab.new
|
42
|
+
=> #<Natto::MeCab:0x28d30748
|
43
|
+
@ptr=#<FFI::Pointer address=0x28a97d50>, \
|
44
|
+
@options={}, \
|
45
|
+
@dicts=[#<Natto::DictionaryInfo:0x28d3061c
|
46
|
+
filename="/usr/local/lib/mecab/dic/ipadic/sys.dic",
|
47
|
+
charset="utf8">],
|
48
|
+
@version="0.98">
|
44
49
|
|
45
|
-
puts
|
46
|
-
=> 0.98
|
50
|
+
puts nm.version
|
51
|
+
=> "0.98"
|
47
52
|
|
48
|
-
sysdic =
|
53
|
+
sysdic = nm.dicts.first
|
49
54
|
|
50
55
|
puts sysdic.filename
|
51
|
-
=> /usr/local/lib/mecab/dic/ipadic/sys.dic
|
56
|
+
=> "/usr/local/lib/mecab/dic/ipadic/sys.dic"
|
52
57
|
|
53
58
|
puts sysdic.charset
|
54
|
-
=> utf8
|
59
|
+
=> "utf8"
|
55
60
|
|
56
|
-
|
61
|
+
nm.parse('暑い日にはもってこいの一品ですね。') do |n|
|
62
|
+
puts "#{n.surface}\t#{n.feature}"
|
63
|
+
end
|
57
64
|
暑い 形容詞,自立,*,*,形容詞・アウオ段,基本形,暑い,アツイ,アツイ
|
58
65
|
日 名詞,非自立,副詞可能,*,*,*,日,ヒ,ヒ
|
59
66
|
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
@@ -64,7 +71,7 @@ e.g., for Cygwin
|
|
64
71
|
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
65
72
|
ね 助詞,終助詞,*,*,*,*,ね,ネ,ネ
|
66
73
|
。 終助詞記号,句点,*,*,*,*,。,。,。
|
67
|
-
|
74
|
+
BOS/EOS,*,*,*,*,*,*,*,*
|
68
75
|
=> nil
|
69
76
|
|
70
77
|
## Contributing to natto
|
data/lib/natto/binding.rb
CHANGED
@@ -13,7 +13,7 @@ module Natto
|
|
13
13
|
# String name for the environment variable used by
|
14
14
|
# <tt>Natto</tt> to indicate the exact name / full path
|
15
15
|
# to the <tt>mecab</tt> library.
|
16
|
-
MECAB_PATH = 'MECAB_PATH'
|
16
|
+
MECAB_PATH = 'MECAB_PATH'.freeze
|
17
17
|
|
18
18
|
# @private
|
19
19
|
def self.included(base)
|
@@ -31,11 +31,13 @@ module Natto
|
|
31
31
|
# @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
|
32
32
|
# <br/>
|
33
33
|
# e.g., for bash on UNIX/Linux
|
34
|
-
# export MECAB_PATH
|
34
|
+
# export MECAB_PATH=/usr/local/lib/libmecab.so
|
35
35
|
# e.g., on Windows
|
36
36
|
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
37
37
|
# e.g., for Cygwin
|
38
38
|
# export MECAB_PATH=cygmecab-1
|
39
|
+
# e.g., from within a Ruby program
|
40
|
+
# ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
|
39
41
|
def self.find_library
|
40
42
|
host_os = RbConfig::CONFIG['host_os']
|
41
43
|
|
@@ -50,33 +52,72 @@ module Natto
|
|
50
52
|
|
51
53
|
ffi_lib(ENV[MECAB_PATH] || find_library)
|
52
54
|
|
53
|
-
attach_function :mecab_version, [], :string
|
54
55
|
attach_function :mecab_new2, [:string], :pointer
|
56
|
+
attach_function :mecab_version, [], :string
|
57
|
+
attach_function :mecab_strerror, [:pointer],:string
|
55
58
|
attach_function :mecab_destroy, [:pointer], :void
|
59
|
+
|
60
|
+
attach_function :mecab_set_theta, [:pointer, :float], :void
|
61
|
+
attach_function :mecab_set_lattice_level, [:pointer, :int], :void
|
62
|
+
attach_function :mecab_set_all_morphs, [:pointer, :int], :void
|
63
|
+
|
56
64
|
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
|
57
|
-
attach_function :
|
65
|
+
attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
|
66
|
+
|
67
|
+
attach_function :mecab_nbest_init, [:pointer, :string], :int
|
68
|
+
attach_function :mecab_nbest_sparse_tostr, [:pointer, :int, :string], :string
|
69
|
+
attach_function :mecab_nbest_next_tonode, [:pointer], :pointer
|
70
|
+
|
58
71
|
attach_function :mecab_dictionary_info, [:pointer], :pointer
|
59
72
|
|
60
73
|
# @private
|
61
74
|
module ClassMethods
|
75
|
+
def mecab_new2(options_str)
|
76
|
+
Natto::Binding.mecab_new2(options_str)
|
77
|
+
end
|
78
|
+
|
62
79
|
def mecab_version
|
63
80
|
Natto::Binding.mecab_version
|
64
81
|
end
|
65
82
|
|
66
|
-
def
|
67
|
-
Natto::Binding.
|
83
|
+
def mecab_strerror(ptr)
|
84
|
+
Natto::Binding.mecab_strerror(ptr)
|
68
85
|
end
|
69
86
|
|
70
87
|
def mecab_destroy(ptr)
|
71
88
|
Natto::Binding.mecab_destroy(ptr)
|
72
89
|
end
|
73
90
|
|
91
|
+
def mecab_set_theta(ptr, t)
|
92
|
+
Natto::Binding.mecab_set_theta(ptr, t)
|
93
|
+
end
|
94
|
+
|
95
|
+
def mecab_set_lattice_level(ptr, ll)
|
96
|
+
Natto::Binding.mecab_set_lattice_level(ptr, ll)
|
97
|
+
end
|
98
|
+
|
99
|
+
def mecab_set_all_morphs(ptr, am)
|
100
|
+
Natto::Binding.mecab_set_all_morphs(ptr, am)
|
101
|
+
end
|
102
|
+
|
74
103
|
def mecab_sparse_tostr(ptr, str)
|
75
104
|
Natto::Binding.mecab_sparse_tostr(ptr, str)
|
76
105
|
end
|
106
|
+
|
107
|
+
def mecab_sparse_tonode(ptr, str)
|
108
|
+
Natto::Binding.mecab_sparse_tonode(ptr, str)
|
109
|
+
end
|
77
110
|
|
78
|
-
def
|
79
|
-
Natto::Binding.
|
111
|
+
def mecab_nbest_next_tonode(ptr)
|
112
|
+
Natto::Binding.mecab_nbest_next_tonode(ptr)
|
113
|
+
end
|
114
|
+
|
115
|
+
def mecab_nbest_init(ptr, str)
|
116
|
+
Natto::Binding.mecab_nbest_init(ptr, str)
|
117
|
+
end
|
118
|
+
|
119
|
+
def mecab_nbest_sparse_tostr(ptr, n, str)
|
120
|
+
Natto::Binding.mecab_nbest_sparse_tostr(ptr, n, str)
|
80
121
|
end
|
81
122
|
|
82
123
|
def mecab_dictionary_info(ptr)
|
data/lib/natto/version.rb
CHANGED
@@ -6,8 +6,15 @@
|
|
6
6
|
# <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
|
7
7
|
# parser.
|
8
8
|
#
|
9
|
-
# <tt>Natto::
|
10
|
-
#
|
9
|
+
# <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
|
10
|
+
# struct.
|
11
|
+
#
|
12
|
+
# <tt>Natto::MeCabNode</tt> is a wrapper for the struct representing
|
13
|
+
# a <tt>mecab</tt>-parsed node.
|
14
|
+
#
|
15
|
+
# <tt>Natto::DictionaryInfo</tt> is a wrapper for the struct
|
16
|
+
# representing a <tt>Natto::MeCab</tt> instance's related
|
17
|
+
# dictionary information.
|
11
18
|
#
|
12
19
|
# <tt>Natto::MeCabError</tt> is a general error class for the
|
13
20
|
# <tt>Natto</tt> module.
|
@@ -16,5 +23,5 @@
|
|
16
23
|
# which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
|
17
24
|
module Natto
|
18
25
|
# Version string for this Rubygem.
|
19
|
-
VERSION = "0.
|
26
|
+
VERSION = "0.5.0"
|
20
27
|
end
|
data/lib/natto.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
|
2
3
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
3
4
|
require 'natto/binding'
|
4
5
|
|
@@ -14,23 +15,25 @@ module Natto
|
|
14
15
|
# require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
15
16
|
# require 'natto'
|
16
17
|
#
|
17
|
-
#
|
18
|
-
# => #<Natto::MeCab:
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
18
|
+
# nm = Natto::MeCab.new(:output_format_type=>'chasen2')
|
19
|
+
# => #<Natto::MeCab:0x28d3bdc8 \
|
20
|
+
# @ptr=#<FFI::Pointer address=0x28afb980>, \
|
21
|
+
# @options={:output_format_type=>"chasen2"}, \
|
22
|
+
# @dicts=[#<Natto::DictionaryInfo:0x289a1f14 \
|
23
|
+
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic", \
|
24
|
+
# charset="utf8">], \
|
25
|
+
# @version="0.98">
|
24
26
|
#
|
25
|
-
#
|
26
|
-
# puts
|
27
|
+
# nm.parse('ネバネバの組み合わせ美味しいです。') do |n|
|
28
|
+
# puts "#{n.surface}\t#{n.feature}"
|
27
29
|
# end
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
30
|
+
#
|
31
|
+
# ネバネバ 名詞,サ変接続,*,*,*,*,ネバネバ,ネバネバ,ネバネバ
|
32
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
33
|
+
# 組み合わせ 名詞,一般,*,*,*,*,組み合わせ,クミアワセ,クミアワセ
|
34
|
+
# 美味しい 形容詞,自立,*,*,形容詞・イ段,基本形,美味しい,オイシイ,オイシイ
|
35
|
+
# です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
36
|
+
# 。 記号,句点,*,*,*,*,。,。,。
|
34
37
|
#
|
35
38
|
class MeCab
|
36
39
|
include Natto::Binding
|
@@ -39,11 +42,11 @@ module Natto
|
|
39
42
|
|
40
43
|
# Supported options to the <tt>mecab</tt> parser.
|
41
44
|
# See the <tt>mecab</tt> help for more details.
|
42
|
-
SUPPORTED_OPTS = [
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
SUPPORTED_OPTS = [ :rcfile, :dicdir, :userdic, :lattice_level, :all_morphs,
|
46
|
+
:output_format_type, :node_format, :unk_format,
|
47
|
+
:bos_format, :eos_format, :eon_format, :unk_feature,
|
48
|
+
:input_buffer_size, :allocate_sentence, :nbest, :theta,
|
49
|
+
:cost_factor, :output ].freeze
|
47
50
|
|
48
51
|
# Initializes the wrapped <tt>mecab</tt> instance with the
|
49
52
|
# given <tt>options</tt> hash.
|
@@ -56,7 +59,6 @@ module Natto
|
|
56
59
|
# - :lattice_level -- lattice information level (integer, default 0)
|
57
60
|
# - :all_morphs -- output all morphs (default false)
|
58
61
|
# - :output_format_type -- output format type (wakati, chasen, yomi, etc.)
|
59
|
-
# - :partial -- partial parsing mode
|
60
62
|
# - :node_format -- user-defined node format
|
61
63
|
# - :unk_format -- user-defined unknown node format
|
62
64
|
# - :bos_format -- user-defined beginning-of-sentence format
|
@@ -65,20 +67,23 @@ module Natto
|
|
65
67
|
# - :unk_feature -- feature for unknown word
|
66
68
|
# - :input_buffer_size -- set input buffer size (default 8192)
|
67
69
|
# - :allocate_sentence -- allocate new memory for input sentence
|
68
|
-
# - :nbest -- output N best results (integer, default 1)
|
70
|
+
# - :nbest -- output N best results (integer, default 1), requires lattice level >= 1
|
69
71
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
70
72
|
# - :cost_factor -- cost factor (integer, default 700)
|
73
|
+
# - :output -- set the output file name
|
71
74
|
#
|
72
75
|
# <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
|
73
76
|
# e.g.<br/>
|
74
77
|
#
|
75
|
-
#
|
76
|
-
# => #<Natto::MeCab:
|
77
|
-
#
|
78
|
-
#
|
79
|
-
#
|
78
|
+
# nm = Natto::MeCab.new(:node_format=>'%m¥t%f[7]¥n')
|
79
|
+
# => #<Natto::MeCab:0x28d2ae10 @ptr=#<FFI::Pointer address=0x28a97980>, \
|
80
|
+
# @options={:node_format=>"%m¥t%f[7]¥n"}, \
|
81
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d2a85c \
|
82
|
+
# filename="/usr/local/lib/mecab/dic/ipadic/sys.dic" \
|
83
|
+
# charset="utf8">], \
|
84
|
+
# @version="0.98">
|
80
85
|
#
|
81
|
-
# puts
|
86
|
+
# puts nm.parse('簡単で美味しくて良いですよね。')
|
82
87
|
# 簡単 カンタン
|
83
88
|
# で デ
|
84
89
|
# 美味しくて オイシクテ
|
@@ -101,23 +106,92 @@ module Natto
|
|
101
106
|
@ptr = self.mecab_new2(opt_str)
|
102
107
|
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
103
108
|
|
109
|
+
# set mecab parsing options
|
110
|
+
self.mecab_set_theta(@ptr, @options[:theta].to_f) if @options[:theta]
|
111
|
+
self.mecab_set_lattice_level(@ptr, @options[:lattice_level].to_i) if @options[:lattice_level]
|
112
|
+
self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
|
113
|
+
|
114
|
+
# set mecab parsing implementations
|
115
|
+
if @options[:nbest] && @options[:nbest] > 1
|
116
|
+
# N-Best parsing implementations
|
117
|
+
self.mecab_nbest_init(@ptr, str)
|
118
|
+
# nbest parsing require lattice level >= 1
|
119
|
+
self.mecab_set_lattice_level(@ptr, (@options[:lattice_level] || 1))
|
120
|
+
@parse_tostr = lambda { |str|
|
121
|
+
return self.mecab_nbest_sparse_tostr(@ptr, @options[:nbest], str) ||
|
122
|
+
raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
123
|
+
}
|
124
|
+
@parse_tonode = lambda { |str| return self.mecab_nbest_next_tonode(@ptr) }
|
125
|
+
else
|
126
|
+
# default parsing implementations
|
127
|
+
@parse_tostr = lambda { |str|
|
128
|
+
return self.mecab_sparse_tostr(@ptr, str) || raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
129
|
+
}
|
130
|
+
@parse_tonode = lambda { |str| return self.mecab_sparse_tonode(@ptr, str) }
|
131
|
+
end
|
132
|
+
|
133
|
+
# set ref to dictionaries
|
104
134
|
@dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
105
135
|
while @dicts.last.next.address != 0x0
|
106
136
|
@dicts << Natto::DictionaryInfo.new(@dicts.last.next)
|
107
137
|
end
|
108
138
|
|
139
|
+
# set ref to mecab version string
|
109
140
|
@version = self.mecab_version
|
110
141
|
|
142
|
+
# set Proc for freeing mecab pointer
|
111
143
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
112
144
|
end
|
113
|
-
|
114
|
-
# Parses the given string <tt>str</tt>.
|
145
|
+
|
146
|
+
# Parses the given string <tt>str</tt>. If a block is passed to this method,
|
147
|
+
# then node parsing will be used and each node yielded to the given block.
|
115
148
|
#
|
116
149
|
# @param [String] str
|
117
150
|
# @return parsing result from <tt>mecab</tt>
|
118
151
|
# @raise [MeCabError] if the <tt>mecab</tt> parser cannot parse the given string <tt>str</tt>
|
152
|
+
# @see MeCabNode
|
119
153
|
def parse(str)
|
120
|
-
|
154
|
+
if block_given?
|
155
|
+
m_node_ptr = @parse_tonode.call(str)
|
156
|
+
head = Natto::MeCabNode.new(m_node_ptr)
|
157
|
+
if head && head[:next].address != 0x0
|
158
|
+
node = Natto::MeCabNode.new(head[:next])
|
159
|
+
while (node.nil? == false)
|
160
|
+
yield node
|
161
|
+
if node[:next].address != 0x0
|
162
|
+
node = Natto::MeCabNode.new(node[:next])
|
163
|
+
else
|
164
|
+
break
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
else
|
169
|
+
result = @parse_tostr.call(str)
|
170
|
+
result.force_encoding(Encoding.default_external) if result.respond_to?(:encoding) && result.encoding!=Encoding.default_external
|
171
|
+
result
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns human-readable details for the wrapped <tt>mecab</tt> parser.
|
176
|
+
# Overrides <tt>Object#to_s</tt>.
|
177
|
+
#
|
178
|
+
# - encoded object id
|
179
|
+
# - FFI pointer to <tt>mecab</tt> object
|
180
|
+
# - options hash
|
181
|
+
# - list of dictionaries
|
182
|
+
# - MeCab version
|
183
|
+
#
|
184
|
+
# @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
|
185
|
+
def to_s
|
186
|
+
%(#{super.chop} @ptr=#{@ptr.to_s}, @options=#{@options.to_s}, @dicts=#{@dicts.to_s}, @version="#{@version.to_s}">)
|
187
|
+
end
|
188
|
+
|
189
|
+
# Overrides <tt>Object#inspect</tt>.
|
190
|
+
#
|
191
|
+
# @return [String] encoded object id, FFI pointer, options hash, list of dictionaries, and MeCab version
|
192
|
+
# @see #to_s
|
193
|
+
def inspect
|
194
|
+
self.to_s
|
121
195
|
end
|
122
196
|
|
123
197
|
# Returns a <tt>Proc</tt> that will properly free resources
|
@@ -138,14 +212,14 @@ module Natto
|
|
138
212
|
# be passed in the construction of <tt>mecab</tt>.
|
139
213
|
#
|
140
214
|
# @param [Hash] options
|
141
|
-
# @return
|
215
|
+
# @return [String] representation of the options to the <tt>mecab</tt> parser
|
142
216
|
def self.build_options_str(options={})
|
143
217
|
opt = []
|
144
218
|
SUPPORTED_OPTS.each do |k|
|
145
219
|
if options.has_key? k
|
146
220
|
key = k.to_s.gsub('_', '-')
|
147
|
-
# all-morphs
|
148
|
-
if %w( all-morphs
|
221
|
+
# all-morphs and allocate-sentence are just flags
|
222
|
+
if %w( all-morphs allocate-sentence ).include? key
|
149
223
|
opt << "--#{key}" if options[k]==true
|
150
224
|
else
|
151
225
|
opt << "--#{key}=#{options[k]}"
|
@@ -160,6 +234,21 @@ module Natto
|
|
160
234
|
# for the <tt>Natto</tt> module.
|
161
235
|
class MeCabError < RuntimeError; end
|
162
236
|
|
237
|
+
# <tt>MeCabStruct</tt> is a general base class
|
238
|
+
# for <tt>FFI::Struct</tt> objects in the <tt>Natto</tt> module.
|
239
|
+
class MeCabStruct < FFI::Struct
|
240
|
+
# Provides accessor methods for the members of the <tt>mecab</tt> struct.
|
241
|
+
#
|
242
|
+
# @param [String] attr_name
|
243
|
+
# @return member values for the <tt>mecab</tt> struct
|
244
|
+
# @raise [NoMethodError] if <tt>attr_name</tt> is not a member of this <tt>mecab</tt> struct
|
245
|
+
def method_missing(attr_name)
|
246
|
+
member_sym = attr_name.id2name.to_sym
|
247
|
+
return self[member_sym] if self.members.include?(member_sym)
|
248
|
+
raise(NoMethodError.new("undefined method '#{attr_name}' for #{self}"))
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
163
252
|
# <tt>DictionaryInfo</tt> is a wrapper for the structure holding
|
164
253
|
# the <tt>MeCab</tt> instance's related dictionary information.
|
165
254
|
#
|
@@ -180,27 +269,16 @@ module Natto
|
|
180
269
|
# <tt>mecab</tt> dictionary attributes can be obtained by
|
181
270
|
# using their corresponding accessor.
|
182
271
|
#
|
183
|
-
#
|
272
|
+
# nm = Natto::MeCab.new
|
184
273
|
#
|
185
|
-
# sysdic =
|
274
|
+
# sysdic = nm.dicts.first
|
186
275
|
#
|
187
276
|
# puts sysdic.filename
|
188
|
-
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
277
|
+
# => "/usr/local/lib/mecab/dic/ipadic/sys.dic"
|
189
278
|
#
|
190
279
|
# puts sysdic.charset
|
191
|
-
# => utf8
|
192
|
-
|
193
|
-
# It is also possible to use the <tt>Symbol</tt> for the
|
194
|
-
# <tt>mecab</tt> dictionary member to index into the
|
195
|
-
# <tt>FFI::Struct</tt> layout associative array like so:
|
196
|
-
#
|
197
|
-
# puts sysdic[:filename]
|
198
|
-
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
199
|
-
#
|
200
|
-
# puts sysdic[:charset]
|
201
|
-
# => utf8
|
202
|
-
#
|
203
|
-
class DictionaryInfo < FFI::Struct
|
280
|
+
# => "utf8"
|
281
|
+
class DictionaryInfo < MeCabStruct
|
204
282
|
|
205
283
|
layout :filename, :string,
|
206
284
|
:charset, :string,
|
@@ -211,7 +289,6 @@ module Natto
|
|
211
289
|
:version, :ushort,
|
212
290
|
:next, :pointer
|
213
291
|
|
214
|
-
# Hack to avoid that deprecation message Object#type thrown in Ruby 1.8.7.
|
215
292
|
if RUBY_VERSION.to_f < 1.9
|
216
293
|
alias_method :deprecated_type, :type
|
217
294
|
# <tt>Object#type</tt> override defined when <tt>RUBY_VERSION</tt> is
|
@@ -226,22 +303,199 @@ module Natto
|
|
226
303
|
end
|
227
304
|
end
|
228
305
|
|
229
|
-
#
|
306
|
+
# Returns human-readable details for this <tt>mecab</tt> dictionary.
|
307
|
+
# Overrides <tt>Object#to_s</tt>.
|
230
308
|
#
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
309
|
+
# - encoded object id
|
310
|
+
# - full-path dictionary filename
|
311
|
+
# - dictionary charset
|
312
|
+
#
|
313
|
+
# @return [String] encoded object id, dictionary filename, and charset
|
314
|
+
def to_s
|
315
|
+
%(#{super.chop} filename="#{self.filename}", charset="#{self.charset}">)
|
316
|
+
end
|
317
|
+
|
318
|
+
# Overrides <tt>Object#inspect</tt>.
|
319
|
+
#
|
320
|
+
# @return [String] encoded object id, dictionary filename, and charset
|
321
|
+
# @see #to_s
|
322
|
+
def inspect
|
323
|
+
self.to_s
|
238
324
|
end
|
325
|
+
end
|
326
|
+
|
327
|
+
# <tt>MeCabNode</tt> is a wrapper for the structure holding
|
328
|
+
# the parsed <tt>node</tt>.
|
329
|
+
#
|
330
|
+
# Values for the <tt>mecab</tt> node attributes may be
|
331
|
+
# obtained by using the following <tt>Symbol</tt>s as keys
|
332
|
+
# to the layout associative array of <tt>FFI::Struct</tt> members.
|
333
|
+
#
|
334
|
+
# - :prev
|
335
|
+
# - :next
|
336
|
+
# - :enext
|
337
|
+
# - :bnext
|
338
|
+
# - :rpath
|
339
|
+
# - :lpath
|
340
|
+
# - :begin_node_list
|
341
|
+
# - :end_node_list
|
342
|
+
# - :surface
|
343
|
+
# - :feature
|
344
|
+
# - :id
|
345
|
+
# - :length
|
346
|
+
# - :rlength
|
347
|
+
# - :rcAttr
|
348
|
+
# - :lcAttr
|
349
|
+
# - :posid
|
350
|
+
# - :char_type
|
351
|
+
# - :stat
|
352
|
+
# - :isbest
|
353
|
+
# - :sentence_length
|
354
|
+
# - :alpha
|
355
|
+
# - :beta
|
356
|
+
# - :beta
|
357
|
+
# - :prob
|
358
|
+
# - :wcost
|
359
|
+
# - :cost
|
360
|
+
# - :token
|
361
|
+
#
|
362
|
+
# <h2>Usage</h2>
|
363
|
+
# An instance of <tt>MeCabNode</tt> is yielded to a block
|
364
|
+
# used with <tt>MeCab#parse</tt>. Each resulting node is
|
365
|
+
# yielded to the block passed in, where the above-mentioned
|
366
|
+
# node attributes may be accessed.
|
367
|
+
#
|
368
|
+
# nm = Natto::MeCab.new
|
369
|
+
#
|
370
|
+
# nm.parse('めかぶの使い方がわからなくて困ってました。') do |n|
|
371
|
+
# puts "#{n.surface}¥t#{n.cost}"
|
372
|
+
# end
|
373
|
+
#
|
374
|
+
# め 7961
|
375
|
+
# かぶ 19303
|
376
|
+
# の 25995
|
377
|
+
# 使い方 29182
|
378
|
+
# が 28327
|
379
|
+
# わから 33625
|
380
|
+
# なく 34256
|
381
|
+
# て 36454
|
382
|
+
# 困っ 43797
|
383
|
+
# て 42178
|
384
|
+
# まし 46708
|
385
|
+
# た 46111
|
386
|
+
# 。 42677
|
387
|
+
# 41141
|
388
|
+
# => nil
|
389
|
+
#
|
390
|
+
# It is also possible to use the <tt>Symbol</tt> for the
|
391
|
+
# <tt>mecab</tt> node member to index into the
|
392
|
+
# <tt>FFI::Struct</tt> layout associative array like so:
|
393
|
+
#
|
394
|
+
# nm.parse('納豆に乗っけて頂きます!') {|n| puts n[:feature] }
|
395
|
+
#
|
396
|
+
# 名詞,一般,*,*,*,*,納豆,ナットウ,ナットー
|
397
|
+
# 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
398
|
+
# 動詞,自立,*,*,一段,連用形,乗っける,ノッケ,ノッケ
|
399
|
+
# 助詞,接続助詞,*,*,*,*,て,テ,テ
|
400
|
+
# 動詞,非自立,*,*,五段・カ行イ音便,連用形,頂く,イタダキ,イタダキ
|
401
|
+
# 助動詞,*,*,*,特殊・マス,基本形,ます,マス,マス
|
402
|
+
# 記号,一般,*,*,*,*,!,!,!
|
403
|
+
# BOS/EOS,*,*,*,*,*,*,*,*
|
404
|
+
# => nil
|
405
|
+
#
|
406
|
+
class MeCabNode < MeCabStruct
|
407
|
+
|
408
|
+
# Normal <tt>mecab</tt> node.
|
409
|
+
NOR_NODE = 0
|
410
|
+
# Unknown <tt>mecab</tt> node.
|
411
|
+
UNK_NODE = 1
|
412
|
+
# Beginning-of-string <tt>mecab</tt> node.
|
413
|
+
BOS_NODE = 2
|
414
|
+
# End-of-string <tt>mecab</tt> node.
|
415
|
+
EOS_NODE = 3
|
416
|
+
# End-of-NBest <tt>mecab</tt> node list.
|
417
|
+
EON_NODE = 4
|
239
418
|
|
240
|
-
|
419
|
+
layout :prev, :pointer,
|
420
|
+
:next, :pointer,
|
421
|
+
:enext, :pointer,
|
422
|
+
:bnext, :pointer,
|
423
|
+
:rpath, :pointer,
|
424
|
+
:lpath, :pointer,
|
425
|
+
:begin_node_list, :pointer,
|
426
|
+
:end_node_list, :pointer,
|
427
|
+
:surface, :string,
|
428
|
+
:feature, :string,
|
429
|
+
:id, :uint,
|
430
|
+
:length, :ushort,
|
431
|
+
:rlength, :ushort,
|
432
|
+
:rcAttr, :ushort,
|
433
|
+
:lcAttr, :ushort,
|
434
|
+
:posid, :ushort,
|
435
|
+
:char_type, :uchar,
|
436
|
+
:stat, :uchar,
|
437
|
+
:isbest, :uchar,
|
438
|
+
:sentence_length, :uint,
|
439
|
+
:alpha, :float,
|
440
|
+
:beta, :float,
|
441
|
+
:prob, :float,
|
442
|
+
:wcost, :short,
|
443
|
+
:cost, :long,
|
444
|
+
:token, :pointer
|
445
|
+
|
446
|
+
if RUBY_VERSION.to_f < 1.9
|
447
|
+
alias_method :deprecated_id, :id
|
448
|
+
# <tt>Object#id</tt> override defined when <tt>RUBY_VERSION</tt> is
|
449
|
+
# older than 1.9. This is a hack to avoid the <tt>Object#id</tt>
|
450
|
+
# deprecation warning thrown up in Ruby 1.8.7.
|
451
|
+
#
|
452
|
+
# <i>This method override is not defined when the Ruby interpreter
|
453
|
+
# is 1.9 or greater.</i>
|
454
|
+
# @return [Fixnum] <tt>mecab</tt> node id
|
455
|
+
def id
|
456
|
+
self[:id]
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
# Returns the <tt>surface</tt> value for this node.
|
461
|
+
#
|
462
|
+
# @return [String] <tt>mecab</tt> node surface value
|
463
|
+
def surface
|
464
|
+
if self[:surface] && self[:length] > 0
|
465
|
+
@surface ||= self[:surface].bytes.to_a()[0,self[:length]].pack('C*')
|
466
|
+
@surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
|
467
|
+
end
|
468
|
+
@surface
|
469
|
+
end
|
470
|
+
|
471
|
+
# Returns the <tt>feature</tt> value for this node.
|
472
|
+
#
|
473
|
+
# @return [String] <tt>mecab</tt> node feature value
|
474
|
+
def feature
|
475
|
+
@feature ||= self[:feature]
|
476
|
+
@feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
|
477
|
+
@feature
|
478
|
+
end
|
479
|
+
|
480
|
+
# Returns human-readable details for the <tt>mecab</tt> node.
|
481
|
+
# Overrides <tt>Object#to_s</tt>.
|
482
|
+
#
|
483
|
+
# - encoded object id
|
484
|
+
# - stat
|
485
|
+
# - surface
|
486
|
+
# - feature
|
241
487
|
#
|
242
|
-
# @return [String]
|
488
|
+
# @return [String] encoded object id, stat, surface, and feature
|
243
489
|
def to_s
|
244
|
-
self[:
|
490
|
+
%(#{super.chop} stat=#{self[:stat]}, surface="#{self.surface}", feature="#{self.feature}">)
|
491
|
+
end
|
492
|
+
|
493
|
+
# Overrides <tt>Object#inspect</tt>.
|
494
|
+
#
|
495
|
+
# @return [String] encoded object id, stat, surface, and feature
|
496
|
+
# @see #to_s
|
497
|
+
def inspect
|
498
|
+
self.to_s
|
245
499
|
end
|
246
500
|
end
|
247
501
|
end
|
data/test/natto/tc_binding.rb
CHANGED
@@ -21,11 +21,18 @@ class TestNattoBinding < Test::Unit::TestCase
|
|
21
21
|
# Tests for the inclusion of mecab methods made available
|
22
22
|
# to any classes including the Natto::Binding module.
|
23
23
|
def test_functions_included
|
24
|
-
[ :
|
25
|
-
:
|
24
|
+
[ :mecab_new2,
|
25
|
+
:mecab_version,
|
26
|
+
:mecab_strerror,
|
26
27
|
:mecab_destroy,
|
28
|
+
:mecab_set_theta,
|
29
|
+
:mecab_set_lattice_level,
|
30
|
+
:mecab_set_all_morphs,
|
27
31
|
:mecab_sparse_tostr,
|
28
|
-
:
|
32
|
+
:mecab_nbest_sparse_tostr,
|
33
|
+
:mecab_nbest_init,
|
34
|
+
:mecab_nbest_sparse_tostr,
|
35
|
+
:mecab_nbest_next_tonode,
|
29
36
|
:mecab_dictionary_info ].each do |f|
|
30
37
|
assert(@klass.respond_to? f)
|
31
38
|
end
|
@@ -4,11 +4,19 @@
|
|
4
4
|
# behavior of Natto::DictionaryInfo
|
5
5
|
class TestDictionaryInfo < Test::Unit::TestCase
|
6
6
|
def setup
|
7
|
-
|
7
|
+
m = Natto::MeCab.new
|
8
|
+
@dicts = m.dicts
|
9
|
+
|
10
|
+
out = `mecab -D`.lines.to_a
|
11
|
+
out.each do |l|
|
12
|
+
tokens = l.split("\t")
|
13
|
+
@sysdic_filename = tokens[1].strip if tokens[0] =~ /filename:/i
|
14
|
+
@sysdic_charset = tokens[1].strip if tokens[0] =~ /charset:/i
|
15
|
+
end
|
8
16
|
end
|
9
17
|
|
10
18
|
def teardown
|
11
|
-
@
|
19
|
+
@dicts = nil
|
12
20
|
end
|
13
21
|
|
14
22
|
# Tests the dictionaries accessor method of Natto::MeCab.
|
@@ -17,28 +25,36 @@ class TestDictionaryInfo < Test::Unit::TestCase
|
|
17
25
|
# b) system dictionary encoding is utf-8
|
18
26
|
# c) only dealing w/ case of 1 dictionary being used
|
19
27
|
def test_dictionaries_accessor
|
20
|
-
dicts
|
21
|
-
|
22
|
-
sysdic
|
23
|
-
assert_equal(
|
24
|
-
assert_equal('utf8', sysdic[:charset])
|
28
|
+
assert @dicts.empty? == false
|
29
|
+
sysdic = @dicts.first
|
30
|
+
assert_equal(@sysdic_filename, sysdic[:filename])
|
31
|
+
assert_equal(@sysdic_charset, sysdic[:charset])
|
25
32
|
assert_equal(0x0, sysdic[:next].address)
|
26
|
-
#assert_nil(sysdic.next)
|
27
33
|
end
|
28
34
|
|
29
35
|
# Tests the to_s method.
|
30
36
|
def test_to_s
|
31
|
-
|
37
|
+
#<Natto::DictionaryInfo:0x288879bc @filename=\"/usr/local/lib/mecab/dic/ipadic/sys.dic\", @charset=\"utf8\">
|
38
|
+
assert(@dicts.first.to_s.include?("filename=\"#{@sysdic_filename}\", charset=\"#{@sysdic_charset}\""))
|
32
39
|
end
|
33
40
|
|
34
41
|
# Tests the accessors of Natto::DictionaryInfo.
|
35
42
|
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
36
43
|
# in 1.8.n
|
37
44
|
def test_dictionary_info_member_accessors
|
38
|
-
sysdic = @
|
39
|
-
members =
|
45
|
+
sysdic = @dicts.first
|
46
|
+
members = [
|
47
|
+
:filename,
|
48
|
+
:charset,
|
49
|
+
:type,
|
50
|
+
:size,
|
51
|
+
:lsize,
|
52
|
+
:rsize,
|
53
|
+
:version,
|
54
|
+
:next
|
55
|
+
]
|
40
56
|
members.each do |nomme|
|
41
|
-
assert_not_nil(sysdic.send nomme
|
57
|
+
assert_not_nil(sysdic.send nomme )
|
42
58
|
end
|
43
59
|
|
44
60
|
# NoMethodError will be raised for anything else!
|
data/test/natto/tc_mecab.rb
CHANGED
@@ -1,8 +1,19 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
require 'rbconfig'
|
3
|
+
require 'nkf'
|
2
4
|
|
3
5
|
# TestMeCab encapsulates tests for the basic
|
4
6
|
# behavior of Natto::MeCab.
|
5
7
|
class TestMeCab < Test::Unit::TestCase
|
8
|
+
|
9
|
+
host_os = RbConfig::CONFIG['host_os']
|
10
|
+
# we need to transfrom from UTF-8 ot SJIS if we are on Windows!
|
11
|
+
if host_os =~ /mswin|mingw/i
|
12
|
+
TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
|
13
|
+
else
|
14
|
+
TEST_STR = '試験ですよ、これが。'
|
15
|
+
end
|
16
|
+
|
6
17
|
def setup
|
7
18
|
@m = Natto::MeCab.new
|
8
19
|
end
|
@@ -37,9 +48,6 @@ class TestMeCab < Test::Unit::TestCase
|
|
37
48
|
res = Natto::MeCab.build_options_str(:output_format_type=>"natto")
|
38
49
|
assert_equal('--output-format-type=natto', res)
|
39
50
|
|
40
|
-
res = Natto::MeCab.build_options_str(:partial=>true)
|
41
|
-
assert_equal('--partial', res)
|
42
|
-
|
43
51
|
res = Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n')
|
44
52
|
assert_equal('--node-format=%m\t%f[7]\n', res)
|
45
53
|
|
@@ -76,10 +84,8 @@ class TestMeCab < Test::Unit::TestCase
|
|
76
84
|
res = Natto::MeCab.build_options_str(:output_format_type=>"natto",
|
77
85
|
:userdic=>"/some/file",
|
78
86
|
:dicdir=>"/some/other/file",
|
79
|
-
:partial=>true,
|
80
87
|
:all_morphs=>true)
|
81
|
-
assert_equal('--dicdir=/some/other/file --userdic=/some/file --all-morphs --output-format-type=natto
|
82
|
-
|
88
|
+
assert_equal('--dicdir=/some/other/file --userdic=/some/file --all-morphs --output-format-type=natto', res)
|
83
89
|
end
|
84
90
|
|
85
91
|
# Tests the construction and initial state of a Natto::MeCab instance.
|
@@ -96,7 +102,13 @@ class TestMeCab < Test::Unit::TestCase
|
|
96
102
|
end
|
97
103
|
assert_equal(opts, m.options)
|
98
104
|
|
99
|
-
opts = {:all_morphs=>true, :
|
105
|
+
opts = {:all_morphs=>true, :allocate_sentence=>true}
|
106
|
+
assert_nothing_raised do
|
107
|
+
m = Natto::MeCab.new(opts)
|
108
|
+
end
|
109
|
+
assert_equal(opts, m.options)
|
110
|
+
|
111
|
+
opts = {:lattice_level=>999}
|
100
112
|
assert_nothing_raised do
|
101
113
|
m = Natto::MeCab.new(opts)
|
102
114
|
end
|
@@ -126,4 +138,41 @@ class TestMeCab < Test::Unit::TestCase
|
|
126
138
|
def test_version_accessor
|
127
139
|
assert_equal('0.98', @m.version)
|
128
140
|
end
|
141
|
+
|
142
|
+
# Tests Natto::MeCab parsing using the --all-morphs option.
|
143
|
+
def test_all_morphs
|
144
|
+
m = Natto::MeCab.new(:all_morphs=>true)
|
145
|
+
expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
|
146
|
+
expected.delete_if {|e| e =~ /^(EOS|BOS)/ }
|
147
|
+
|
148
|
+
actual = m.parse(TEST_STR).lines.to_a
|
149
|
+
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
150
|
+
|
151
|
+
assert_equal(expected, actual)
|
152
|
+
end
|
153
|
+
|
154
|
+
# Tests Natto::MeCab parsing (default parse_tostr).
|
155
|
+
def test_parse_tostr_default
|
156
|
+
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
157
|
+
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
158
|
+
|
159
|
+
actual = @m.parse(TEST_STR).lines.to_a
|
160
|
+
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
161
|
+
|
162
|
+
assert_equal(expected, actual)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Tests Natto::MeCab parsing (default parse_tonode).
|
166
|
+
def test_parse_tonode_default
|
167
|
+
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
168
|
+
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
169
|
+
|
170
|
+
actual = []
|
171
|
+
@m.parse(TEST_STR) do |node|
|
172
|
+
actual << "#{node.surface}\t#{node.feature}\n"
|
173
|
+
end
|
174
|
+
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
175
|
+
|
176
|
+
assert_equal(expected, actual)
|
177
|
+
end
|
129
178
|
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'rbconfig'
|
3
|
+
require 'nkf'
|
4
|
+
|
5
|
+
# TestMeCabNode encapsulates tests for the basic
|
6
|
+
# behavior of Natto::MeCabNode
|
7
|
+
class TestMeCabNode < Test::Unit::TestCase
|
8
|
+
|
9
|
+
host_os = RbConfig::CONFIG['host_os']
|
10
|
+
# we need to transfrom from UTF-8 ot SJIS if we are on Windows!
|
11
|
+
if host_os =~ /mswin|mingw/i
|
12
|
+
TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
|
13
|
+
else
|
14
|
+
TEST_STR = '試験ですよ、これが。'
|
15
|
+
end
|
16
|
+
|
17
|
+
def setup
|
18
|
+
nm = Natto::MeCab.new
|
19
|
+
@nodes = []
|
20
|
+
nm.parse(TEST_STR) { |n| @nodes << n }
|
21
|
+
end
|
22
|
+
|
23
|
+
def teardown
|
24
|
+
@nodes = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
# Tests the surface and feature accessors methods.
|
28
|
+
def test_surface_and_feature_accessors
|
29
|
+
raw = `echo #{TEST_STR} | mecab`.lines.to_a
|
30
|
+
raw.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
31
|
+
expected = {}
|
32
|
+
raw.each do |l|
|
33
|
+
tokens = l.split("\t")
|
34
|
+
expected[tokens[0]]=tokens[1].strip
|
35
|
+
end
|
36
|
+
|
37
|
+
actual = {}
|
38
|
+
@nodes.each do |n|
|
39
|
+
actual[n.surface]=n.feature if (n.stat==Natto::MeCabNode::NOR_NODE ||
|
40
|
+
n.stat==Natto::MeCabNode::UNK_NODE)
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_equal(expected, actual)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Tests MeCabNode#surface to show that it is consistent
|
47
|
+
# no matter how many times it is invoked.
|
48
|
+
def test_manysurfaces
|
49
|
+
@nodes.each do |n|
|
50
|
+
expected = n.surface
|
51
|
+
5.times { assert_equal(expected, n.surface) }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tests MeCabNode#feature to show that it is consistent
|
56
|
+
# no matter how many times it is invoked.
|
57
|
+
def test_manyfeature
|
58
|
+
@nodes.each do |n|
|
59
|
+
expected = n.feature
|
60
|
+
5.times { assert_equal(expected, n.feature) }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Tests that the accessors of Natto::MeCabNode exist.
|
65
|
+
# Note: Object#id is deprecated in 1.9.n, but comes with a warning
|
66
|
+
# in 1.8.n
|
67
|
+
def test_mecabnode_accessors
|
68
|
+
node = @nodes[0]
|
69
|
+
members = [
|
70
|
+
:prev,
|
71
|
+
:next,
|
72
|
+
:enext,
|
73
|
+
:bnext,
|
74
|
+
:rpath,
|
75
|
+
:lpath,
|
76
|
+
:begin_node_list,
|
77
|
+
:end_node_list,
|
78
|
+
:surface,
|
79
|
+
:feature,
|
80
|
+
:id,
|
81
|
+
:length,
|
82
|
+
:rlength,
|
83
|
+
:rcAttr,
|
84
|
+
:lcAttr,
|
85
|
+
:posid,
|
86
|
+
:char_type,
|
87
|
+
:stat,
|
88
|
+
:isbest,
|
89
|
+
:sentence_length,
|
90
|
+
:alpha,
|
91
|
+
:beta,
|
92
|
+
:prob,
|
93
|
+
:wcost,
|
94
|
+
:cost,
|
95
|
+
:token
|
96
|
+
]
|
97
|
+
members.each do |nomme|
|
98
|
+
assert_not_nil(node.respond_to? nomme )
|
99
|
+
end
|
100
|
+
|
101
|
+
# NoMethodError will be raised for anything else!
|
102
|
+
assert_raise NoMethodError do
|
103
|
+
node.send :unknown_attr
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/test/test_natto.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 5
|
9
|
+
- 0
|
10
|
+
version: 0.5.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brooke M. Fujita
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-02-26 00:00:00 +09:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- test/natto/tc_binding.rb
|
52
52
|
- test/natto/tc_dictionaryinfo.rb
|
53
53
|
- test/natto/tc_mecab.rb
|
54
|
+
- test/natto/tc_mecabnode.rb
|
54
55
|
- README.md
|
55
56
|
- LICENSE
|
56
57
|
- CHANGELOG
|