natto 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +6 -0
- data/README.md +12 -3
- data/lib/natto.rb +39 -37
- data/lib/natto/binding.rb +20 -0
- data/lib/natto/version.rb +1 -1
- data/test/natto/tc_binding.rb +33 -0
- data/test/natto/tc_dictionaryinfo.rb +48 -0
- data/test/natto/tc_mecab.rb +126 -0
- data/test/test_natto.rb +5 -165
- metadata +19 -16
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -31,14 +31,16 @@ e.g., for Cygwin
|
|
31
31
|
require 'natto'
|
32
32
|
|
33
33
|
mecab = Natto::MeCab.new
|
34
|
-
=> #<Natto::MeCab:
|
35
|
-
@
|
36
|
-
@
|
34
|
+
=> #<Natto::MeCab:0x289b88e0 @ptr=#<FFI::Pointer address=0x288865c8>, \
|
35
|
+
@options={}, \
|
36
|
+
@version="0.98", \
|
37
|
+
@dicts=[/usr/local/lib/mecab/dic/ipadic/sys.dic]>
|
37
38
|
|
38
39
|
puts mecab.version
|
39
40
|
=> 0.98
|
40
41
|
|
41
42
|
sysdic = mecab.dicts.first
|
43
|
+
|
42
44
|
puts sysdic.filename
|
43
45
|
=> /usr/local/lib/mecab/dic/ipadic/sys.dic
|
44
46
|
|
@@ -70,6 +72,13 @@ e.g., for Cygwin
|
|
70
72
|
|
71
73
|
## Changelog
|
72
74
|
|
75
|
+
- __2011/01/22: 0.3.0 release.
|
76
|
+
- Refactoring of Natto::Binding to make mecab methods available as class methods
|
77
|
+
- Refactoring of Natto::DictionaryInfo to override to_s method to return filename
|
78
|
+
- Refactoring of Natto::MeCab to use class methods in Natto::Binding
|
79
|
+
- Refactoring and logical separation of test cases
|
80
|
+
- Continuing update of documentation
|
81
|
+
|
73
82
|
- __2011/01/19__: 0.2.0 release.
|
74
83
|
- Added support for mecab option allocate-sentence
|
75
84
|
- Continuing update of documentation
|
data/lib/natto.rb
CHANGED
@@ -15,9 +15,10 @@ module Natto
|
|
15
15
|
# require 'natto'
|
16
16
|
#
|
17
17
|
# mecab = Natto::MeCab.new
|
18
|
-
# => #<Natto::MeCab:
|
19
|
-
# @
|
20
|
-
# @
|
18
|
+
# => #<Natto::MeCab:0x289b88e0 @ptr=#<FFI::Pointer address=0x288865c8>, \
|
19
|
+
# @options={}, \
|
20
|
+
# @version="0.98", \
|
21
|
+
# @dicts=[/usr/local/lib/mecab/dic/ipadic/sys.dic]>
|
21
22
|
#
|
22
23
|
# puts mecab.parse("ネバネバの組み合わせ美味しいです。")
|
23
24
|
# ネバネバ 名詞,サ変接続,*,*,*,*,ネバネバ,ネバネバ,ネバネバ
|
@@ -30,8 +31,9 @@ module Natto
|
|
30
31
|
# => nil
|
31
32
|
#
|
32
33
|
class MeCab
|
34
|
+
include Natto::Binding
|
33
35
|
|
34
|
-
attr_reader :options, :dicts
|
36
|
+
attr_reader :options, :dicts, :version
|
35
37
|
|
36
38
|
# Supported options to the <tt>mecab</tt> parser.
|
37
39
|
# See the <tt>mecab</tt> help for more details.
|
@@ -67,9 +69,10 @@ module Natto
|
|
67
69
|
# e.g.<br/>
|
68
70
|
#
|
69
71
|
# mecab = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
70
|
-
# => #<Natto::MeCab:
|
71
|
-
# @
|
72
|
-
# @
|
72
|
+
# => #<Natto::MeCab:0x289b88e0 @ptr=#<FFI::Pointer address=0x288865c8>, \
|
73
|
+
# @options={:node_format=>"%m\\t%f[7]\\n"}, \
|
74
|
+
# @version="0.98", \
|
75
|
+
# @dicts=[/usr/local/lib/mecab/dic/ipadic/sys.dic]>
|
73
76
|
#
|
74
77
|
# puts mecab.parse('簡単で美味しくて良いですよね。')
|
75
78
|
# 簡単 カンタン
|
@@ -91,7 +94,7 @@ module Natto
|
|
91
94
|
@dicts = []
|
92
95
|
|
93
96
|
opt_str = self.class.build_options_str(@options)
|
94
|
-
@ptr =
|
97
|
+
@ptr = self.mecab_new2(opt_str)
|
95
98
|
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
96
99
|
|
97
100
|
@dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
@@ -99,24 +102,18 @@ module Natto
|
|
99
102
|
@dicts << Natto::DictionaryInfo.new(@dicts.last[:next])
|
100
103
|
end
|
101
104
|
|
105
|
+
@version = self.mecab_version
|
106
|
+
|
102
107
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
103
108
|
end
|
104
109
|
|
105
|
-
# Parses the given string <tt>
|
110
|
+
# Parses the given string <tt>str</tt>.
|
106
111
|
#
|
107
|
-
# @param [String]
|
112
|
+
# @param [String] str
|
108
113
|
# @return parsing result from <tt>mecab</tt>
|
109
|
-
# @raise [MeCabError] if the <tt>mecab</tt> parser cannot parse the given string <tt>
|
110
|
-
def parse(
|
111
|
-
|
112
|
-
raise(MeCabError.new(Natto::Binding.mecab_strerror(@ptr)))
|
113
|
-
end
|
114
|
-
|
115
|
-
# Returns the <tt>mecab</tt> library version.
|
116
|
-
#
|
117
|
-
# @return [String] <tt>mecab</tt> library version
|
118
|
-
def version
|
119
|
-
Natto::Binding.mecab_version
|
114
|
+
# @raise [MeCabError] if the <tt>mecab</tt> parser cannot parse the given string <tt>str</tt>
|
115
|
+
def parse(str)
|
116
|
+
self.mecab_sparse_tostr(@ptr, str) || raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
120
117
|
end
|
121
118
|
|
122
119
|
# Returns a <tt>Proc</tt> that will properly free resources
|
@@ -129,7 +126,7 @@ module Natto
|
|
129
126
|
# @return [Proc] to release <tt>mecab</tt> resources properly
|
130
127
|
def self.create_free_proc(ptr)
|
131
128
|
Proc.new do
|
132
|
-
|
129
|
+
self.mecab_destroy(ptr)
|
133
130
|
end
|
134
131
|
end
|
135
132
|
|
@@ -159,12 +156,12 @@ module Natto
|
|
159
156
|
# for the <tt>Natto</tt> module.
|
160
157
|
class MeCabError < RuntimeError; end
|
161
158
|
|
162
|
-
# <tt>DictionaryInfo</tt> is a wrapper for
|
163
|
-
# instance's related dictionary information.
|
159
|
+
# <tt>DictionaryInfo</tt> is a wrapper for the structure holding
|
160
|
+
# the <tt>MeCab</tt> instance's related dictionary information.
|
164
161
|
#
|
165
162
|
# Values for the <tt>mecab</tt> dictionary attributes may be
|
166
163
|
# obtained by using the following <tt>Symbol</tt>s as keys
|
167
|
-
# to the associative array of <tt>FFI::Struct</tt> members.
|
164
|
+
# to the layout associative array of <tt>FFI::Struct</tt> members.
|
168
165
|
#
|
169
166
|
# - :filename
|
170
167
|
# - :charset
|
@@ -177,26 +174,27 @@ module Natto
|
|
177
174
|
#
|
178
175
|
# <h2>Usage</h2>
|
179
176
|
# <tt>mecab</tt> dictionary attributes can be obtained by
|
180
|
-
# using
|
177
|
+
# using their corresponding accessor.
|
181
178
|
#
|
182
179
|
# mecab = Natto::MeCab.new
|
180
|
+
#
|
183
181
|
# sysdic = m.dicts.first
|
184
182
|
#
|
185
183
|
# puts sysdic.filename
|
186
|
-
# =>
|
184
|
+
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
187
185
|
#
|
188
186
|
# puts sysdic.charset
|
189
|
-
# =>
|
187
|
+
# => utf8
|
190
188
|
#
|
191
189
|
# It is also possible to use the <tt>Symbol</tt> for the
|
192
190
|
# <tt>mecab</tt> dictionary member to index into the
|
193
191
|
# <tt>FFI::Struct</tt> layout associative array like so:
|
194
192
|
#
|
195
193
|
# puts sysdic[:filename]
|
196
|
-
# =>
|
194
|
+
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
197
195
|
#
|
198
196
|
# puts sysdic[:charset]
|
199
|
-
# =>
|
197
|
+
# => utf8
|
200
198
|
#
|
201
199
|
class DictionaryInfo < FFI::Struct
|
202
200
|
|
@@ -209,11 +207,11 @@ module Natto
|
|
209
207
|
:version, :ushort,
|
210
208
|
:next, :pointer
|
211
209
|
|
212
|
-
# Hack to avoid that deprecation message Object#type.
|
210
|
+
# Hack to avoid that deprecation message Object#type thrown in Ruby 1.8.7.
|
213
211
|
if RUBY_VERSION.to_f < 1.9
|
214
212
|
alias_method :deprecated_type, :type
|
215
213
|
# <tt>Object#type</tt> override defined when <tt>RUBY_VERSION</tt> is
|
216
|
-
#
|
214
|
+
# older than 1.9. This is a hack to avoid the <tt>Object#type</tt>
|
217
215
|
# deprecation warning thrown up in Ruby 1.8.7.
|
218
216
|
#
|
219
217
|
# <i>This method override is not defined when the Ruby interpreter
|
@@ -226,16 +224,20 @@ module Natto
|
|
226
224
|
|
227
225
|
# Provides accessor methods for the members of the <tt>DictionaryInfo</tt> structure.
|
228
226
|
#
|
229
|
-
# @param [String]
|
227
|
+
# @param [String] attr_name
|
230
228
|
# @return member values for the <tt>mecab</tt> dictionary
|
231
|
-
# @raise [NoMethodError] if <tt>
|
232
|
-
def method_missing(
|
233
|
-
member_sym =
|
229
|
+
# @raise [NoMethodError] if <tt>attr_name</tt> is not a member of this <tt>mecab</tt> dictionary <tt>FFI::Struct</tt>
|
230
|
+
def method_missing(attr_name)
|
231
|
+
member_sym = attr_name.id2name.to_sym
|
234
232
|
if self.members.include?(member_sym)
|
235
233
|
self[member_sym]
|
236
234
|
else
|
237
|
-
raise(NoMethodError.new("undefined method '#{
|
235
|
+
raise(NoMethodError.new("undefined method '#{attr_name}' for #{self}"))
|
238
236
|
end
|
239
237
|
end
|
238
|
+
|
239
|
+
def to_s
|
240
|
+
self[:filename]
|
241
|
+
end
|
240
242
|
end
|
241
243
|
end
|
data/lib/natto/binding.rb
CHANGED
@@ -62,6 +62,26 @@ module Natto
|
|
62
62
|
def mecab_version
|
63
63
|
Natto::Binding.mecab_version
|
64
64
|
end
|
65
|
+
|
66
|
+
def mecab_new2(options_str)
|
67
|
+
Natto::Binding.mecab_new2(options_str)
|
68
|
+
end
|
69
|
+
|
70
|
+
def mecab_destroy(ptr)
|
71
|
+
Natto::Binding.mecab_destroy(ptr)
|
72
|
+
end
|
73
|
+
|
74
|
+
def mecab_sparse_tostr(ptr, str)
|
75
|
+
Natto::Binding.mecab_sparse_tostr(ptr, str)
|
76
|
+
end
|
77
|
+
|
78
|
+
def mecab_strerror(ptr)
|
79
|
+
Natto::Binding.mecab_strerror(ptr)
|
80
|
+
end
|
81
|
+
|
82
|
+
def mecab_dictionary_info(ptr)
|
83
|
+
Natto::Binding.mecab_dictionary_info(ptr)
|
84
|
+
end
|
65
85
|
end
|
66
86
|
end
|
67
87
|
end
|
data/lib/natto/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# TestNatto encapsulates tests for the basic
|
4
|
+
# behavior of the Natto::Binding module.
|
5
|
+
class TestNattoBinding < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@klass = Class.new do
|
8
|
+
include Natto::Binding
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def teardown
|
13
|
+
@klass = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
# Tests the mecab_version function.
|
17
|
+
def test_mecab_version
|
18
|
+
assert_equal('0.98', @klass.mecab_version)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Tests for the inclusion of mecab methods made available
|
22
|
+
# to any classes including the Natto::Binding module.
|
23
|
+
def test_functions_included
|
24
|
+
[ :mecab_version,
|
25
|
+
:mecab_new2,
|
26
|
+
:mecab_destroy,
|
27
|
+
:mecab_sparse_tostr,
|
28
|
+
:mecab_strerror,
|
29
|
+
:mecab_dictionary_info ].each do |f|
|
30
|
+
assert(@klass.respond_to? f)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# TestDictionaryInfo encapsulates tests for the basic
|
4
|
+
# behavior of Natto::DictionaryInfo
|
5
|
+
class TestDictionaryInfo < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@m = Natto::MeCab.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def teardown
|
11
|
+
@m = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# Tests the dictionaries accessor method of Natto::MeCab.
|
15
|
+
# Assumes that:
|
16
|
+
# a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
|
17
|
+
# b) system dictionary encoding is utf-8
|
18
|
+
# c) only dealing w/ case of 1 dictionary being used
|
19
|
+
def test_dictionaries_accessor
|
20
|
+
dicts = @m.dicts
|
21
|
+
assert dicts.empty? == false
|
22
|
+
sysdic = dicts.first
|
23
|
+
assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', sysdic[:filename])
|
24
|
+
assert_equal('utf8', sysdic[:charset])
|
25
|
+
assert_equal(0x0, sysdic[:next].address)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Tests the to_s method.
|
29
|
+
def test_to_s
|
30
|
+
assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', @m.dicts.first.to_s)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Tests the accessors of Natto::DictionaryInfo.
|
34
|
+
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
35
|
+
# in 1.8.n
|
36
|
+
def test_dictionary_info_member_accessors
|
37
|
+
sysdic = @m.dicts.first
|
38
|
+
members = %w( filename charset type size lsize rsize version next )
|
39
|
+
members.each do |nomme|
|
40
|
+
assert_not_nil(sysdic.send nomme.to_sym )
|
41
|
+
end
|
42
|
+
|
43
|
+
# NoMethodError will be raised for anything else!
|
44
|
+
assert_raise NoMethodError do
|
45
|
+
sysdic.send :nomethoderror
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# TestMeCab encapsulates tests for the basic
|
4
|
+
# behavior of Natto::MeCab.
|
5
|
+
class TestMeCab < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@m = Natto::MeCab.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def teardown
|
11
|
+
@m = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# Tests the Natto::MeCab.build_options_str function.
|
15
|
+
def test_build_options_str
|
16
|
+
res = Natto::MeCab.build_options_str
|
17
|
+
assert_equal('', res)
|
18
|
+
|
19
|
+
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
20
|
+
assert_equal('', res)
|
21
|
+
|
22
|
+
res = Natto::MeCab.build_options_str(:rcfile=>"/some/file")
|
23
|
+
assert_equal('--rcfile=/some/file', res)
|
24
|
+
|
25
|
+
res = Natto::MeCab.build_options_str(:dicdir=>"/some/other/file")
|
26
|
+
assert_equal('--dicdir=/some/other/file', res)
|
27
|
+
|
28
|
+
res = Natto::MeCab.build_options_str(:userdic=>"/yet/another/file")
|
29
|
+
assert_equal('--userdic=/yet/another/file', res)
|
30
|
+
|
31
|
+
res = Natto::MeCab.build_options_str(:lattice_level=>42)
|
32
|
+
assert_equal('--lattice-level=42', res)
|
33
|
+
|
34
|
+
res = Natto::MeCab.build_options_str(:all_morphs=>true)
|
35
|
+
assert_equal('--all-morphs', res)
|
36
|
+
|
37
|
+
res = Natto::MeCab.build_options_str(:output_format_type=>"natto")
|
38
|
+
assert_equal('--output-format-type=natto', res)
|
39
|
+
|
40
|
+
res = Natto::MeCab.build_options_str(:partial=>true)
|
41
|
+
assert_equal('--partial', res)
|
42
|
+
|
43
|
+
res = Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n')
|
44
|
+
assert_equal('--node-format=%m\t%f[7]\n', res)
|
45
|
+
|
46
|
+
res = Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n')
|
47
|
+
assert_equal('--unk-format=%m\t%f[7]\n', res)
|
48
|
+
|
49
|
+
res = Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n')
|
50
|
+
assert_equal('--bos-format=%m\t%f[7]\n', res)
|
51
|
+
|
52
|
+
res = Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n')
|
53
|
+
assert_equal('--eos-format=%m\t%f[7]\n', res)
|
54
|
+
|
55
|
+
res = Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n')
|
56
|
+
assert_equal('--eon-format=%m\t%f[7]\n', res)
|
57
|
+
|
58
|
+
res = Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n')
|
59
|
+
assert_equal('--unk-feature=%m\t%f[7]\n', res)
|
60
|
+
|
61
|
+
res = Natto::MeCab.build_options_str(:allocate_sentence=>true)
|
62
|
+
assert_equal('--allocate-sentence', res)
|
63
|
+
|
64
|
+
res = Natto::MeCab.build_options_str(:nbest=>42)
|
65
|
+
assert_equal('--nbest=42', res)
|
66
|
+
|
67
|
+
res = Natto::MeCab.build_options_str(:theta=>0.42)
|
68
|
+
assert_equal('--theta=0.42', res)
|
69
|
+
|
70
|
+
res = Natto::MeCab.build_options_str(:cost_factor=>42)
|
71
|
+
assert_equal('--cost-factor=42', res)
|
72
|
+
|
73
|
+
res = Natto::MeCab.build_options_str(:output_format_type=>"natto",
|
74
|
+
:userdic=>"/some/file",
|
75
|
+
:dicdir=>"/some/other/file",
|
76
|
+
:partial=>true,
|
77
|
+
:all_morphs=>true)
|
78
|
+
assert_equal('--dicdir=/some/other/file --userdic=/some/file --all-morphs --output-format-type=natto --partial', res)
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
# Tests the construction and initial state of a Natto::MeCab instance.
|
83
|
+
def test_construction
|
84
|
+
m = nil
|
85
|
+
assert_nothing_raised do
|
86
|
+
m = Natto::MeCab.new
|
87
|
+
end
|
88
|
+
assert_equal({}, m.options)
|
89
|
+
|
90
|
+
opts = {:output_format_type=>'chasen'}
|
91
|
+
assert_nothing_raised do
|
92
|
+
m = Natto::MeCab.new(opts)
|
93
|
+
end
|
94
|
+
assert_equal(opts, m.options)
|
95
|
+
|
96
|
+
opts = {:all_morphs=>true, :partial=>true, :allocate_sentence=>true}
|
97
|
+
assert_nothing_raised do
|
98
|
+
m = Natto::MeCab.new(opts)
|
99
|
+
end
|
100
|
+
assert_equal(opts, m.options)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Tests the initialize method for error cases for erroneous mecab options.
|
104
|
+
def test_initialize_with_errors
|
105
|
+
assert_raise Natto::MeCabError do
|
106
|
+
Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
|
107
|
+
end
|
108
|
+
|
109
|
+
assert_raise Natto::MeCabError do
|
110
|
+
Natto::MeCab.new(:rcfile=>'/rcfile/does/not/exist')
|
111
|
+
end
|
112
|
+
|
113
|
+
assert_raise Natto::MeCabError do
|
114
|
+
Natto::MeCab.new(:dicdir=>'/dicdir/does/not/exist')
|
115
|
+
end
|
116
|
+
|
117
|
+
assert_raise Natto::MeCabError do
|
118
|
+
Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Tests the mecab version string accessor class method of Natto::MeCab.
|
123
|
+
def test_version_accessor
|
124
|
+
assert_equal('0.98', @m.version)
|
125
|
+
end
|
126
|
+
end
|
data/test/test_natto.rb
CHANGED
@@ -1,171 +1,11 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
2
|
+
$: << 'lib'
|
3
3
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
4
4
|
require 'test/unit'
|
5
5
|
require 'natto'
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@klass = Class.new do
|
12
|
-
include Natto::Binding
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def teardown
|
17
|
-
@klass = nil
|
18
|
-
end
|
19
|
-
|
20
|
-
# Tests the class methods include callback.
|
21
|
-
def test_classmethods_include
|
22
|
-
assert_equal('0.98', @klass.mecab_version)
|
23
|
-
end
|
24
|
-
|
25
|
-
# Tests the build_options_str function.
|
26
|
-
def test_build_options_str
|
27
|
-
res = Natto::MeCab.build_options_str
|
28
|
-
assert_equal('', res)
|
29
|
-
|
30
|
-
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
31
|
-
assert_equal('', res)
|
32
|
-
|
33
|
-
res = Natto::MeCab.build_options_str(:rcfile=>"/some/file")
|
34
|
-
assert_equal('--rcfile=/some/file', res)
|
35
|
-
|
36
|
-
res = Natto::MeCab.build_options_str(:dicdir=>"/some/other/file")
|
37
|
-
assert_equal('--dicdir=/some/other/file', res)
|
38
|
-
|
39
|
-
res = Natto::MeCab.build_options_str(:userdic=>"/yet/another/file")
|
40
|
-
assert_equal('--userdic=/yet/another/file', res)
|
41
|
-
|
42
|
-
res = Natto::MeCab.build_options_str(:lattice_level=>42)
|
43
|
-
assert_equal('--lattice-level=42', res)
|
44
|
-
|
45
|
-
res = Natto::MeCab.build_options_str(:all_morphs=>true)
|
46
|
-
assert_equal('--all-morphs', res)
|
47
|
-
|
48
|
-
res = Natto::MeCab.build_options_str(:output_format_type=>"natto")
|
49
|
-
assert_equal('--output-format-type=natto', res)
|
50
|
-
|
51
|
-
res = Natto::MeCab.build_options_str(:partial=>true)
|
52
|
-
assert_equal('--partial', res)
|
53
|
-
|
54
|
-
res = Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n')
|
55
|
-
assert_equal('--node-format=%m\t%f[7]\n', res)
|
56
|
-
|
57
|
-
res = Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n')
|
58
|
-
assert_equal('--unk-format=%m\t%f[7]\n', res)
|
59
|
-
|
60
|
-
res = Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n')
|
61
|
-
assert_equal('--bos-format=%m\t%f[7]\n', res)
|
62
|
-
|
63
|
-
res = Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n')
|
64
|
-
assert_equal('--eos-format=%m\t%f[7]\n', res)
|
65
|
-
|
66
|
-
res = Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n')
|
67
|
-
assert_equal('--eon-format=%m\t%f[7]\n', res)
|
68
|
-
|
69
|
-
res = Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n')
|
70
|
-
assert_equal('--unk-feature=%m\t%f[7]\n', res)
|
71
|
-
|
72
|
-
res = Natto::MeCab.build_options_str(:allocate_sentence=>true)
|
73
|
-
assert_equal('--allocate-sentence', res)
|
74
|
-
|
75
|
-
res = Natto::MeCab.build_options_str(:nbest=>42)
|
76
|
-
assert_equal('--nbest=42', res)
|
77
|
-
|
78
|
-
res = Natto::MeCab.build_options_str(:theta=>0.42)
|
79
|
-
assert_equal('--theta=0.42', res)
|
80
|
-
|
81
|
-
res = Natto::MeCab.build_options_str(:cost_factor=>42)
|
82
|
-
assert_equal('--cost-factor=42', res)
|
83
|
-
|
84
|
-
res = Natto::MeCab.build_options_str(:output_format_type=>"natto",
|
85
|
-
:userdic=>"/some/file",
|
86
|
-
:dicdir=>"/some/other/file",
|
87
|
-
:partial=>true,
|
88
|
-
:all_morphs=>true)
|
89
|
-
assert_equal('--dicdir=/some/other/file --userdic=/some/file --all-morphs --output-format-type=natto --partial', res)
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
# Tests the state of a Natto::MeCab instance
|
94
|
-
# after construction.
|
95
|
-
def test_construction
|
96
|
-
m = nil
|
97
|
-
assert_nothing_raised do
|
98
|
-
m = Natto::MeCab.new
|
99
|
-
end
|
100
|
-
assert_equal({}, m.options)
|
101
|
-
|
102
|
-
opts = {:output_format_type=>'chasen'}
|
103
|
-
assert_nothing_raised do
|
104
|
-
m = Natto::MeCab.new(opts)
|
105
|
-
end
|
106
|
-
assert_equal(opts, m.options)
|
107
|
-
|
108
|
-
opts = {:all_morphs=>true, :partial=>true, :allocate_sentence=>true}
|
109
|
-
assert_nothing_raised do
|
110
|
-
m = Natto::MeCab.new(opts)
|
111
|
-
end
|
112
|
-
assert_equal(opts, m.options)
|
113
|
-
end
|
114
|
-
|
115
|
-
# Tests the initialize method for error cases for
|
116
|
-
# erroneous mecab options.
|
117
|
-
def test_initialize_with_errors
|
118
|
-
assert_raise Natto::MeCabError do
|
119
|
-
Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
|
120
|
-
end
|
121
|
-
|
122
|
-
assert_raise Natto::MeCabError do
|
123
|
-
Natto::MeCab.new(:rcfile=>'/rcfile/does/not/exist')
|
124
|
-
end
|
125
|
-
|
126
|
-
assert_raise Natto::MeCabError do
|
127
|
-
Natto::MeCab.new(:dicdir=>'/dicdir/does/not/exist')
|
128
|
-
end
|
129
|
-
|
130
|
-
assert_raise Natto::MeCabError do
|
131
|
-
Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# Tests the mecab version string accessor class method of Natto::MeCab.
|
136
|
-
def test_version_accessor
|
137
|
-
m = Natto::MeCab.new
|
138
|
-
assert_equal('0.98', m.version)
|
139
|
-
end
|
140
|
-
|
141
|
-
# Tests the dictionary accessor method of Natto::MeCab
|
142
|
-
# Assumes that:
|
143
|
-
# a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
|
144
|
-
# b) system dictionary encoding is utf-8
|
145
|
-
def test_dictionary_accessor
|
146
|
-
m = Natto::MeCab.new
|
147
|
-
dicts = m.dicts
|
148
|
-
assert dicts.empty? == false
|
149
|
-
sysdic = dicts.first
|
150
|
-
assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', sysdic[:filename])
|
151
|
-
assert_equal('utf8', sysdic[:charset])
|
152
|
-
assert_equal(0x0, sysdic[:next].address)
|
153
|
-
end
|
154
|
-
|
155
|
-
# Tests the accessors of Natto::DictionaryInfo
|
156
|
-
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
157
|
-
# in 1.8.n
|
158
|
-
def test_dictionary_info_member_accessors
|
159
|
-
m = Natto::MeCab.new
|
160
|
-
sysdic = m.dicts.first
|
161
|
-
members = %w( filename charset type size lsize rsize version next )
|
162
|
-
members.each do |nomme|
|
163
|
-
assert_not_nil(sysdic.send nomme.to_sym )
|
164
|
-
end
|
165
|
-
|
166
|
-
# NoMethodError will be raised for anything else!
|
167
|
-
assert_raise NoMethodError do
|
168
|
-
sysdic.send :nomethoderror
|
169
|
-
end
|
170
|
-
end
|
7
|
+
[ '/test/natto/tc_mecab.rb',
|
8
|
+
'/test/natto/tc_dictionaryinfo.rb',
|
9
|
+
'/test/natto/tc_binding.rb' ].each do |tc|
|
10
|
+
require File.join(File.expand_path('.'), tc)
|
171
11
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 3
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.3.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brooke M. Fujita
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-22 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -32,35 +32,38 @@ dependencies:
|
|
32
32
|
version: 0.6.3
|
33
33
|
type: :runtime
|
34
34
|
version_requirements: *id001
|
35
|
-
description:
|
35
|
+
description: |
|
36
|
+
natto is a gem bridging Ruby and MeCab using FFI (foreign function interface).
|
37
|
+
No compilation is necessary, and natto works on any platform and on any OS.
|
38
|
+
|
39
|
+
Find out more about natto by visiting the
|
40
|
+
project homepage at http://code.google.com/p/natto/
|
41
|
+
|
36
42
|
email: buruzaemon@gmail.com
|
37
43
|
executables: []
|
38
44
|
|
39
45
|
extensions: []
|
40
46
|
|
41
|
-
extra_rdoc_files:
|
42
|
-
|
43
|
-
- README.md
|
47
|
+
extra_rdoc_files: []
|
48
|
+
|
44
49
|
files:
|
45
50
|
- lib/natto.rb
|
46
51
|
- lib/natto/binding.rb
|
47
52
|
- lib/natto/version.rb
|
48
53
|
- test/test_natto.rb
|
54
|
+
- test/natto/tc_binding.rb
|
55
|
+
- test/natto/tc_dictionaryinfo.rb
|
56
|
+
- test/natto/tc_mecab.rb
|
49
57
|
- LICENSE
|
50
58
|
- README.md
|
51
|
-
|
59
|
+
- .yardopts
|
60
|
+
has_rdoc: true
|
52
61
|
homepage: http://code.google.com/p/natto/
|
53
62
|
licenses:
|
54
63
|
- BSD
|
55
64
|
post_install_message:
|
56
|
-
rdoc_options:
|
57
|
-
|
58
|
-
- README.md
|
59
|
-
- --title
|
60
|
-
- natto 0.2.0 -- Ruby-Mecab binding
|
61
|
-
- --main
|
62
|
-
- README.md
|
63
|
-
- -c UTF-8
|
65
|
+
rdoc_options: []
|
66
|
+
|
64
67
|
require_paths:
|
65
68
|
- lib
|
66
69
|
required_ruby_version: !ruby/object:Gem::Requirement
|