natto 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +40 -10
- data/lib/natto.rb +70 -26
- data/lib/natto/version.rb +1 -1
- data/test/test_natto.rb +24 -1
- metadata +4 -4
data/README.md
CHANGED
@@ -33,14 +33,18 @@ e.g., for Cygwin
|
|
33
33
|
=> #<Natto::MeCab:0x28d93dd4 @options={}, \
|
34
34
|
@dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
35
35
|
@ptr=#<FFI::Pointer address=0x28af3e58>>
|
36
|
-
puts m.parse(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
puts m.parse('暑い日にはもってこいの一品ですね。')
|
37
|
+
暑い日にはもってこいの一品ですね。
|
38
|
+
暑い 形容詞,自立,*,*,形容詞・アウオ段,基本形,暑い,アツイ,アツイ
|
39
|
+
日 名詞,非自立,副詞可能,*,*,*,日,ヒ,ヒ
|
40
|
+
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
41
|
+
は 助詞,係助詞,*,*,*,*,は,ハ,ワ
|
42
|
+
もってこい 名詞,一般,*,*,*,*,もってこい,モッテコイ,モッテコイ
|
42
43
|
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
43
|
-
|
44
|
+
一品 名詞,一般,*,*,*,*,一品,イッピン,イッピン
|
45
|
+
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
46
|
+
ね 助詞,終助詞,*,*,*,*,ね,ネ,ネ
|
47
|
+
。 終助詞記号,句点,*,*,*,*,。,。,。
|
44
48
|
EOS
|
45
49
|
=> nil
|
46
50
|
|
@@ -55,7 +59,12 @@ e.g., for Cygwin
|
|
55
59
|
|
56
60
|
## Changelog
|
57
61
|
|
58
|
-
- __2011/01/
|
62
|
+
- __2011/01/15__: 0.1.0 release.
|
63
|
+
- Added accessors to Natto::DictionaryInfo
|
64
|
+
- Added accessor for version in Natto::MeCab
|
65
|
+
- Continuing update of documentation
|
66
|
+
|
67
|
+
- __2011/01/13__: 0.0.9 release.
|
59
68
|
- Further development and testing for mecab dictionary access/destruction
|
60
69
|
- Continuing update of documentation
|
61
70
|
|
@@ -90,5 +99,26 @@ e.g., for Cygwin
|
|
90
99
|
- Initial release
|
91
100
|
|
92
101
|
## Copyright
|
93
|
-
|
94
|
-
|
102
|
+
Copyright © 2010-2013, Brooke M. Fujita.
|
103
|
+
All rights reserved.
|
104
|
+
|
105
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
106
|
+
permitted provided that the following conditions are met:
|
107
|
+
|
108
|
+
* Redistributions of source code must retain the above
|
109
|
+
copyright notice, this list of conditions and the
|
110
|
+
following disclaimer.
|
111
|
+
|
112
|
+
* Redistributions in binary form must reproduce the above
|
113
|
+
copyright notice, this list of conditions and the
|
114
|
+
following disclaimer in the documentation and/or other
|
115
|
+
materials provided with the distribution.
|
116
|
+
|
117
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
|
118
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
119
|
+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
120
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
121
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
122
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
123
|
+
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
124
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/lib/natto.rb
CHANGED
@@ -9,25 +9,27 @@ module Natto
|
|
9
9
|
# Options to the <tt>mecab</tt> parser are passed in as a hash at
|
10
10
|
# initialization.
|
11
11
|
#
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# <h2>Usage</h2>
|
13
|
+
# Here is how to use natto under Ruby 1.8.n:<br/>
|
14
|
+
#
|
15
|
+
# require 'rubygems'
|
14
16
|
# require 'natto'
|
15
17
|
#
|
16
18
|
# m = Natto::MeCab.new
|
17
19
|
# => #<Natto::MeCab:0x28d93dd4 @options={}, \
|
18
20
|
# @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
19
21
|
# @ptr=#<FFI::Pointer address=0x28af3e58>>
|
20
|
-
# puts m.parse("
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
22
|
+
# puts m.parse("ネバネバの組み合わせ美味しいです。")
|
23
|
+
# ネバネバ 名詞,サ変接続,*,*,*,*,ネバネバ,ネバネバ,ネバネバ
|
24
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
25
|
+
# 組み合わせ 名詞,一般,*,*,*,*,組み合わせ,クミアワセ,クミアワセ
|
26
|
+
# 美味しいです 形容詞,自立,*,*,形容詞・イ段,基本形,美味しい,オイシイ,オイシイ
|
27
|
+
# です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
28
|
+
# 。 デス記号,句点,*,*,*,*,。,。,。
|
28
29
|
# EOS
|
29
30
|
# => nil
|
30
|
-
#
|
31
|
+
#
|
32
|
+
# The <tt>require 'rubygems'</tt> can be omitted for Ruby 1.9.n.
|
31
33
|
class MeCab
|
32
34
|
|
33
35
|
attr_reader :options, :dicts
|
@@ -61,19 +63,22 @@ module Natto
|
|
61
63
|
# - :theta -- temperature parameter theta (float, default 0.75)
|
62
64
|
# - :cost_factor -- cost factor (integer, default 700)
|
63
65
|
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
66
|
+
# <i>Use single-quotes to preserve format options that contain escape chars.</i><br/>
|
67
|
+
# e.g.<br/>
|
68
|
+
#
|
67
69
|
# m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
68
70
|
# => #<Natto::MeCab:0x28d8886c @options={:node_format=>"%m\\t%f[7]\\n"}, \
|
69
|
-
#
|
70
|
-
#
|
71
|
-
# puts m.parse(
|
72
|
-
#
|
73
|
-
#
|
74
|
-
#
|
75
|
-
#
|
76
|
-
#
|
71
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d8863c>], \
|
72
|
+
# @ptr=#<FFI::Pointer address=0x28e3b268>>
|
73
|
+
# puts m.parse('簡単で美味しくて良いですよね。')
|
74
|
+
# 簡単 カンタン
|
75
|
+
# で デ
|
76
|
+
# 美味しくて オイシクテ
|
77
|
+
# 良い ヨイ
|
78
|
+
# です デス
|
79
|
+
# よ ヨ
|
80
|
+
# ね ネ
|
81
|
+
# 。
|
77
82
|
# EOS
|
78
83
|
# => nil
|
79
84
|
#
|
@@ -106,6 +111,12 @@ module Natto
|
|
106
111
|
raise(MeCabError.new(Natto::Binding.mecab_strerror(@ptr)))
|
107
112
|
end
|
108
113
|
|
114
|
+
# Returns the <tt>mecab</tt> version.
|
115
|
+
#
|
116
|
+
# @return <tt>mecab</tt> version
|
117
|
+
def version
|
118
|
+
Natto::Binding.mecab_version
|
119
|
+
end
|
109
120
|
|
110
121
|
# Returns a <tt>Proc</tt> that is registered to be invoked
|
111
122
|
# after the object owning <tt>ptr</tt> has been destroyed.
|
@@ -146,8 +157,9 @@ module Natto
|
|
146
157
|
# <tt>DictionaryInfo</tt> is a wrapper for a <tt>MeCab</tt>
|
147
158
|
# instance's related dictionary information.
|
148
159
|
#
|
149
|
-
# Values
|
150
|
-
#
|
160
|
+
# Values for the <tt>mecab</tt> dictionary attributes may be
|
161
|
+
# obtained by using the following <tt>Symbol</tt>s as keys
|
162
|
+
# to the associative array of <tt>FFI::Struct</tt> members.
|
151
163
|
#
|
152
164
|
# - :filename
|
153
165
|
# - :charset
|
@@ -158,14 +170,26 @@ module Natto
|
|
158
170
|
# - :version
|
159
171
|
# - :next
|
160
172
|
#
|
161
|
-
#
|
173
|
+
# <h2>Usage</h2>
|
174
|
+
#
|
162
175
|
# m = Natto::MeCab.new
|
163
176
|
# sysdic = m.dicts.first
|
177
|
+
# puts sysdic.filename
|
178
|
+
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
179
|
+
# puts sysdic.charset
|
180
|
+
# => utf8
|
181
|
+
#
|
182
|
+
# It is also possible to use the <tt>Symbol</tt> for the
|
183
|
+
# <tt>mecab</tt> dictionary member to index into the
|
184
|
+
# <tt>FFI::Struct</tt> layout associative array like so:
|
185
|
+
#
|
164
186
|
# puts sysdic[:filename]
|
165
187
|
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
166
188
|
# puts sysdic[:charset]
|
167
189
|
# => utf8
|
190
|
+
#
|
168
191
|
class DictionaryInfo < FFI::Struct
|
192
|
+
|
169
193
|
layout :filename, :string,
|
170
194
|
:charset, :string,
|
171
195
|
:size, :uint,
|
@@ -173,6 +197,26 @@ module Natto
|
|
173
197
|
:lsize, :uint,
|
174
198
|
:rsize, :uint,
|
175
199
|
:version, :ushort,
|
176
|
-
:next, :pointer
|
200
|
+
:next, :pointer
|
201
|
+
|
202
|
+
# Hack to avoid that deprecation message Object#type.
|
203
|
+
if RUBY_VERSION.to_f < 1.9
|
204
|
+
alias_method :deprecated_type, :type
|
205
|
+
# @private
|
206
|
+
def type
|
207
|
+
self[:type]
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# Provides accessor methods for the members of the <tt>DictionaryInfo</tt> structure.
|
212
|
+
#
|
213
|
+
# @param [String] methName
|
214
|
+
# @return member values for the <tt>mecab</tt> dictionary
|
215
|
+
# @raise [NoMethodError] if <tt>methName</tt> is not a member of this <tt>mecab</tt> dictionary <tt>FFI::Struct</tt>
|
216
|
+
def method_missing(methName)
|
217
|
+
member_sym = methName.id2name.to_sym
|
218
|
+
return self[member_sym] if self.members.include?(member_sym)
|
219
|
+
raise(NoMethodError.new("undefined method '#{methName}' for #{self}"))
|
220
|
+
end
|
177
221
|
end
|
178
222
|
end
|
data/lib/natto/version.rb
CHANGED
data/test/test_natto.rb
CHANGED
@@ -129,7 +129,13 @@ class TestNatto < Test::Unit::TestCase
|
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
132
|
-
# Tests the
|
132
|
+
# Tests the mecab version string accessor class method of Natto::MeCab.
|
133
|
+
def test_version_accessor
|
134
|
+
m = Natto::MeCab.new
|
135
|
+
assert_equal('0.98', m.version)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Tests the dictionary accessor method of Natto::MeCab
|
133
139
|
# Assumes that:
|
134
140
|
# a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
|
135
141
|
# b) system dictionary encoding is utf-8
|
@@ -142,4 +148,21 @@ class TestNatto < Test::Unit::TestCase
|
|
142
148
|
assert_equal('utf8', sysdic[:charset])
|
143
149
|
assert_equal(0x0, sysdic[:next].address)
|
144
150
|
end
|
151
|
+
|
152
|
+
# Tests the accessors of Natto::DictionaryInfo
|
153
|
+
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
154
|
+
# in 1.8.n
|
155
|
+
def test_dictionary_info_member_accessors
|
156
|
+
m = Natto::MeCab.new
|
157
|
+
sysdic = m.dicts.first
|
158
|
+
members = %w( filename charset type size lsize rsize version next )
|
159
|
+
members.each do |nomme|
|
160
|
+
assert_not_nil(sysdic.send nomme.to_sym )
|
161
|
+
end
|
162
|
+
|
163
|
+
# NoMethodError will be raised for anything else!
|
164
|
+
assert_raise NoMethodError do
|
165
|
+
sysdic.send :nomethoderror
|
166
|
+
end
|
167
|
+
end
|
145
168
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.9
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brooke M. Fujita
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-01-
|
17
|
+
date: 2011-01-15 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -55,7 +55,7 @@ licenses:
|
|
55
55
|
post_install_message:
|
56
56
|
rdoc_options:
|
57
57
|
- --title
|
58
|
-
-
|
58
|
+
- natto 0.1.0 -- Ruby-Mecab binding
|
59
59
|
- --main
|
60
60
|
- README.md
|
61
61
|
- -c UTF-8
|