natto 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +40 -10
- data/lib/natto.rb +70 -26
- data/lib/natto/version.rb +1 -1
- data/test/test_natto.rb +24 -1
- metadata +4 -4
data/README.md
CHANGED
@@ -33,14 +33,18 @@ e.g., for Cygwin
|
|
33
33
|
=> #<Natto::MeCab:0x28d93dd4 @options={}, \
|
34
34
|
@dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
35
35
|
@ptr=#<FFI::Pointer address=0x28af3e58>>
|
36
|
-
puts m.parse(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
puts m.parse('暑い日にはもってこいの一品ですね。')
|
37
|
+
暑い日にはもってこいの一品ですね。
|
38
|
+
暑い 形容詞,自立,*,*,形容詞・アウオ段,基本形,暑い,アツイ,アツイ
|
39
|
+
日 名詞,非自立,副詞可能,*,*,*,日,ヒ,ヒ
|
40
|
+
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
41
|
+
は 助詞,係助詞,*,*,*,*,は,ハ,ワ
|
42
|
+
もってこい 名詞,一般,*,*,*,*,もってこい,モッテコイ,モッテコイ
|
42
43
|
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
43
|
-
|
44
|
+
一品 名詞,一般,*,*,*,*,一品,イッピン,イッピン
|
45
|
+
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
46
|
+
ね 助詞,終助詞,*,*,*,*,ね,ネ,ネ
|
47
|
+
。 終助詞記号,句点,*,*,*,*,。,。,。
|
44
48
|
EOS
|
45
49
|
=> nil
|
46
50
|
|
@@ -55,7 +59,12 @@ e.g., for Cygwin
|
|
55
59
|
|
56
60
|
## Changelog
|
57
61
|
|
58
|
-
- __2011/01/
|
62
|
+
- __2011/01/15__: 0.1.0 release.
|
63
|
+
- Added accessors to Natto::DictionaryInfo
|
64
|
+
- Added accessor for version in Natto::MeCab
|
65
|
+
- Continuing update of documentation
|
66
|
+
|
67
|
+
- __2011/01/13__: 0.0.9 release.
|
59
68
|
- Further development and testing for mecab dictionary access/destruction
|
60
69
|
- Continuing update of documentation
|
61
70
|
|
@@ -90,5 +99,26 @@ e.g., for Cygwin
|
|
90
99
|
- Initial release
|
91
100
|
|
92
101
|
## Copyright
|
93
|
-
|
94
|
-
|
102
|
+
Copyright © 2010-2013, Brooke M. Fujita.
|
103
|
+
All rights reserved.
|
104
|
+
|
105
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
106
|
+
permitted provided that the following conditions are met:
|
107
|
+
|
108
|
+
* Redistributions of source code must retain the above
|
109
|
+
copyright notice, this list of conditions and the
|
110
|
+
following disclaimer.
|
111
|
+
|
112
|
+
* Redistributions in binary form must reproduce the above
|
113
|
+
copyright notice, this list of conditions and the
|
114
|
+
following disclaimer in the documentation and/or other
|
115
|
+
materials provided with the distribution.
|
116
|
+
|
117
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
|
118
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
119
|
+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
120
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
121
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
122
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
123
|
+
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
124
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/lib/natto.rb
CHANGED
@@ -9,25 +9,27 @@ module Natto
|
|
9
9
|
# Options to the <tt>mecab</tt> parser are passed in as a hash at
|
10
10
|
# initialization.
|
11
11
|
#
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# <h2>Usage</h2>
|
13
|
+
# Here is how to use natto under Ruby 1.8.n:<br/>
|
14
|
+
#
|
15
|
+
# require 'rubygems'
|
14
16
|
# require 'natto'
|
15
17
|
#
|
16
18
|
# m = Natto::MeCab.new
|
17
19
|
# => #<Natto::MeCab:0x28d93dd4 @options={}, \
|
18
20
|
# @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
19
21
|
# @ptr=#<FFI::Pointer address=0x28af3e58>>
|
20
|
-
# puts m.parse("
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
22
|
+
# puts m.parse("ネバネバの組み合わせ美味しいです。")
|
23
|
+
# ネバネバ 名詞,サ変接続,*,*,*,*,ネバネバ,ネバネバ,ネバネバ
|
24
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
25
|
+
# 組み合わせ 名詞,一般,*,*,*,*,組み合わせ,クミアワセ,クミアワセ
|
26
|
+
# 美味しいです 形容詞,自立,*,*,形容詞・イ段,基本形,美味しい,オイシイ,オイシイ
|
27
|
+
# です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
28
|
+
# 。 デス記号,句点,*,*,*,*,。,。,。
|
28
29
|
# EOS
|
29
30
|
# => nil
|
30
|
-
#
|
31
|
+
#
|
32
|
+
# The <tt>require 'rubygems'</tt> can be omitted for Ruby 1.9.n.
|
31
33
|
class MeCab
|
32
34
|
|
33
35
|
attr_reader :options, :dicts
|
@@ -61,19 +63,22 @@ module Natto
|
|
61
63
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
62
64
|
# - :cost_factor -- cost factor (integer, default 700)
|
63
65
|
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
66
|
+
# <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
|
67
|
+
# e.g.<br/>
|
68
|
+
#
|
67
69
|
# m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
68
70
|
# => #<Natto::MeCab:0x28d8886c @options={:node_format=>"%m\\t%f[7]\\n"}, \
|
69
|
-
#
|
70
|
-
#
|
71
|
-
# puts m.parse(
|
72
|
-
#
|
73
|
-
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
71
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d8863c>], \
|
72
|
+
# @ptr=#<FFI::Pointer address=0x28e3b268>>
|
73
|
+
# puts m.parse('簡単で美味しくて良いですよね。')
|
74
|
+
# 簡単 カンタン
|
75
|
+
# で デ
|
76
|
+
# 美味しくて オイシクテ
|
77
|
+
# 良い ヨイ
|
78
|
+
# です デス
|
79
|
+
# よ ヨ
|
80
|
+
# ね ネ
|
81
|
+
# 。
|
77
82
|
# EOS
|
78
83
|
# => nil
|
79
84
|
#
|
@@ -106,6 +111,12 @@ module Natto
|
|
106
111
|
raise(MeCabError.new(Natto::Binding.mecab_strerror(@ptr)))
|
107
112
|
end
|
108
113
|
|
114
|
+
# Returns the <tt>mecab</tt> version.
|
115
|
+
#
|
116
|
+
# @return <tt>mecab</tt> version
|
117
|
+
def version
|
118
|
+
Natto::Binding.mecab_version
|
119
|
+
end
|
109
120
|
|
110
121
|
# Returns a <tt>Proc</tt> that is registered to be invoked
|
111
122
|
# after the object owning <tt>ptr</tt> has been destroyed.
|
@@ -146,8 +157,9 @@ module Natto
|
|
146
157
|
# <tt>DictionaryInfo</tt> is a wrapper for a <tt>MeCab</tt>
|
147
158
|
# instance's related dictionary information.
|
148
159
|
#
|
149
|
-
# Values
|
150
|
-
#
|
160
|
+
# Values for the <tt>mecab</tt> dictionary attributes may be
|
161
|
+
# obtained by using the following <tt>Symbol</tt>s as keys
|
162
|
+
# to the associative array of <tt>FFI::Struct</tt> members.
|
151
163
|
#
|
152
164
|
# - :filename
|
153
165
|
# - :charset
|
@@ -158,14 +170,26 @@ module Natto
|
|
158
170
|
# - :version
|
159
171
|
# - :next
|
160
172
|
#
|
161
|
-
#
|
173
|
+
# <h2>Usage</h2>
|
174
|
+
#
|
162
175
|
# m = Natto::MeCab.new
|
163
176
|
# sysdic = m.dicts.first
|
177
|
+
# puts sysdic.filename
|
178
|
+
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
179
|
+
# puts sysdic.charset
|
180
|
+
# => utf8
|
181
|
+
#
|
182
|
+
# It is also possible to use the <tt>Symbol</tt> for the
|
183
|
+
# <tt>mecab</tt> dictionary member to index into the
|
184
|
+
# <tt>FFI::Struct</tt> layout associative array like so:
|
185
|
+
#
|
164
186
|
# puts sysdic[:filename]
|
165
187
|
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
166
188
|
# puts sysdic[:charset]
|
167
189
|
# => utf8
|
190
|
+
#
|
168
191
|
class DictionaryInfo < FFI::Struct
|
192
|
+
|
169
193
|
layout :filename, :string,
|
170
194
|
:charset, :string,
|
171
195
|
:size, :uint,
|
@@ -173,6 +197,26 @@ module Natto
|
|
173
197
|
:lsize, :uint,
|
174
198
|
:rsize, :uint,
|
175
199
|
:version, :ushort,
|
176
|
-
:next, :pointer
|
200
|
+
:next, :pointer
|
201
|
+
|
202
|
+
# Hack to avoid that deprecation message Object#type.
|
203
|
+
if RUBY_VERSION.to_f < 1.9
|
204
|
+
alias_method :deprecated_type, :type
|
205
|
+
# @private
|
206
|
+
def type
|
207
|
+
self[:type]
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# Provides accessor methods for the members of the <tt>DictionaryInfo</tt> structure.
|
212
|
+
#
|
213
|
+
# @param [String] methName
|
214
|
+
# @return member values for the <tt>mecab</tt> dictionary
|
215
|
+
# @raise [NoMethodError] if <tt>methName</tt> is not a member of this <tt>mecab</tt> dictionary <tt>FFI::Struct</tt>
|
216
|
+
def method_missing(methName)
|
217
|
+
member_sym = methName.id2name.to_sym
|
218
|
+
return self[member_sym] if self.members.include?(member_sym)
|
219
|
+
raise(NoMethodError.new("undefined method '#{methName}' for #{self}"))
|
220
|
+
end
|
177
221
|
end
|
178
222
|
end
|
data/lib/natto/version.rb
CHANGED
data/test/test_natto.rb
CHANGED
@@ -129,7 +129,13 @@ class TestNatto < Test::Unit::TestCase
|
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
132
|
-
# Tests the
|
132
|
+
# Tests the mecab version string accessor class method of Natto::MeCab.
|
133
|
+
def test_version_accessor
|
134
|
+
m = Natto::MeCab.new
|
135
|
+
assert_equal('0.98', m.version)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Tests the dictionary accessor method of Natto::MeCab
|
133
139
|
# Assumes that:
|
134
140
|
# a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
|
135
141
|
# b) system dictionary encoding is utf-8
|
@@ -142,4 +148,21 @@ class TestNatto < Test::Unit::TestCase
|
|
142
148
|
assert_equal('utf8', sysdic[:charset])
|
143
149
|
assert_equal(0x0, sysdic[:next].address)
|
144
150
|
end
|
151
|
+
|
152
|
+
# Tests the accessors of Natto::DictionaryInfo
|
153
|
+
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
154
|
+
# in 1.8.n
|
155
|
+
def test_dictionary_info_member_accessors
|
156
|
+
m = Natto::MeCab.new
|
157
|
+
sysdic = m.dicts.first
|
158
|
+
members = %w( filename charset type size lsize rsize version next )
|
159
|
+
members.each do |nomme|
|
160
|
+
assert_not_nil(sysdic.send nomme.to_sym )
|
161
|
+
end
|
162
|
+
|
163
|
+
# NoMethodError will be raised for anything else!
|
164
|
+
assert_raise NoMethodError do
|
165
|
+
sysdic.send :nomethoderror
|
166
|
+
end
|
167
|
+
end
|
145
168
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.9
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brooke M. Fujita
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-15 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -55,7 +55,7 @@ licenses:
|
|
55
55
|
post_install_message:
|
56
56
|
rdoc_options:
|
57
57
|
- --title
|
58
|
-
-
|
58
|
+
- natto 0.1.0 -- Ruby-Mecab binding
|
59
59
|
- --main
|
60
60
|
- README.md
|
61
61
|
- -c UTF-8
|