natto 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +11 -0
- data/README.md +17 -22
- data/lib/natto.rb +1 -599
- data/lib/natto/binding.rb +36 -17
- data/lib/natto/natto.rb +295 -0
- data/lib/natto/option_parse.rb +36 -28
- data/lib/natto/struct.rb +310 -0
- data/lib/natto/version.rb +16 -16
- metadata +30 -33
- data/lib/natto/utils.rb +0 -16
data/lib/natto/struct.rb
ADDED
@@ -0,0 +1,310 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'natto/binding'
|
3
|
+
require 'natto/option_parse'
|
4
|
+
|
5
|
+
module Natto
|
6
|
+
require 'ffi'
|
7
|
+
|
8
|
+
# `MeCabStruct` is a general base class for `FFI::Struct` objects in
|
9
|
+
# the `Natto` module. Please refer to
|
10
|
+
# [`mecab.h`](http://code.google.com/p/mecab/source/browse/trunk/mecab/src/mecab.h)
|
11
|
+
class MeCabStruct < FFI::Struct
|
12
|
+
# Provides accessor methods for the members of the `mecab` struct.
|
13
|
+
#
|
14
|
+
# @param [String] attr_name
|
15
|
+
# @return member values for the `mecab` struct
|
16
|
+
# @raise [NoMethodError] if `attr_name` is not a member of this `mecab` struct
|
17
|
+
def method_missing(attr_name)
|
18
|
+
member_sym = attr_name.id2name.to_sym
|
19
|
+
return self[member_sym] if self.members.include?(member_sym)
|
20
|
+
raise(NoMethodError.new("undefined method '#{attr_name}' for #{self}"))
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# `DictionaryInfo` is a wrapper for `struct mecab_dictionary_info_t`
|
25
|
+
# that holds the `MeCab` instance's related dictionary information.
|
26
|
+
#
|
27
|
+
# Values for the `mecab` dictionary attributes may be
|
28
|
+
# obtained by using the following `Symbol`s as keys
|
29
|
+
# to the layout associative array of `FFI::Struct` members.
|
30
|
+
#
|
31
|
+
# - :filename
|
32
|
+
# - :charset
|
33
|
+
# - :size
|
34
|
+
# - :type
|
35
|
+
# - :lsize
|
36
|
+
# - :rsize
|
37
|
+
# - :version
|
38
|
+
# - :next
|
39
|
+
#
|
40
|
+
# ## Usage
|
41
|
+
# `mecab` dictionary attributes can be obtained by
|
42
|
+
# using their corresponding accessor.
|
43
|
+
#
|
44
|
+
# nm = Natto::MeCab.new
|
45
|
+
#
|
46
|
+
# sysdic = nm.dicts.first
|
47
|
+
#
|
48
|
+
# puts sysdic.filename
|
49
|
+
# => "/usr/local/lib/mecab/dic/ipadic/sys.dic"
|
50
|
+
#
|
51
|
+
# puts sysdic.charset
|
52
|
+
# => "utf8"
|
53
|
+
#
|
54
|
+
# puts sysdic.is_sysdic?
|
55
|
+
# => true
|
56
|
+
class DictionaryInfo < MeCabStruct
|
57
|
+
# System dictionary.
|
58
|
+
SYS_DIC = 0
|
59
|
+
# User dictionary.
|
60
|
+
USR_DIC = 1
|
61
|
+
# Unknown dictionary.
|
62
|
+
UNK_DIC = 2
|
63
|
+
|
64
|
+
layout :filename, :string,
|
65
|
+
:charset, :string,
|
66
|
+
:size, :uint,
|
67
|
+
:type, :int,
|
68
|
+
:lsize, :uint,
|
69
|
+
:rsize, :uint,
|
70
|
+
:version, :ushort,
|
71
|
+
:next, :pointer
|
72
|
+
|
73
|
+
if Object.respond_to?(:type) && Object.respond_to?(:class)
|
74
|
+
alias_method :deprecated_type, :type
|
75
|
+
# `Object#type` override defined when both `type` and
|
76
|
+
# `class` are Object methods. This is a hack to avoid the
|
77
|
+
# `Object#type` deprecation warning thrown up in Ruby 1.8.7
|
78
|
+
# and in JRuby.
|
79
|
+
#
|
80
|
+
# @return [Fixnum] `mecab` dictionary type
|
81
|
+
def type
|
82
|
+
self[:type]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns human-readable details for this `mecab` dictionary.
|
87
|
+
# Overrides `Object#to_s`.
|
88
|
+
#
|
89
|
+
# - encoded object id
|
90
|
+
# - dictionary type
|
91
|
+
# - full-path dictionary filename
|
92
|
+
# - dictionary charset
|
93
|
+
#
|
94
|
+
# @return [String] encoded object id, type, dictionary filename, and charset
|
95
|
+
def to_s
|
96
|
+
%(#{super.chop} type="#{self.type}", filename="#{self.filename}", charset="#{self.charset}">)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Overrides `Object#inspect`.
|
100
|
+
#
|
101
|
+
# @return [String] encoded object id, dictionary filename, and charset
|
102
|
+
# @see #to_s
|
103
|
+
def inspect
|
104
|
+
self.to_s
|
105
|
+
end
|
106
|
+
|
107
|
+
# Returns `true` if this is a system dictionary.
|
108
|
+
# @return [Boolean]
|
109
|
+
def is_sysdic?
|
110
|
+
self.type == SYS_DIC
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns `true` if this is a user dictionary.
|
114
|
+
# @return [Boolean]
|
115
|
+
def is_usrdic?
|
116
|
+
self.type == USR_DIC
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns `true` if this is a unknown dictionary type.
|
120
|
+
# @return [Boolean]
|
121
|
+
def is_unkdic?
|
122
|
+
self.type == UNK_DIC
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# `MeCabNode` is a wrapper for the structure holding
|
127
|
+
# the parsed `node`.
|
128
|
+
#
|
129
|
+
# Values for the `mecab` node attributes may be
|
130
|
+
# obtained by using the following `Symbol`s as keys
|
131
|
+
# to the layout associative array of `FFI::Struct` members.
|
132
|
+
#
|
133
|
+
# - :prev
|
134
|
+
# - :next
|
135
|
+
# - :enext
|
136
|
+
# - :bnext
|
137
|
+
# - :rpath
|
138
|
+
# - :lpath
|
139
|
+
# - :surface
|
140
|
+
# - :feature
|
141
|
+
# - :id
|
142
|
+
# - :length
|
143
|
+
# - :rlength
|
144
|
+
# - :rcAttr
|
145
|
+
# - :lcAttr
|
146
|
+
# - :posid
|
147
|
+
# - :char_type
|
148
|
+
# - :stat
|
149
|
+
# - :isbest
|
150
|
+
# - :alpha
|
151
|
+
# - :beta
|
152
|
+
# - :prob
|
153
|
+
# - :wcost
|
154
|
+
# - :cost
|
155
|
+
#
|
156
|
+
# ## Usage
|
157
|
+
# An instance of `MeCabNode` is yielded to the block
|
158
|
+
# used with `MeCab#parse`, where the above-mentioned
|
159
|
+
# node attributes may be accessed by name.
|
160
|
+
#
|
161
|
+
# nm = Natto::MeCab.new
|
162
|
+
#
|
163
|
+
# nm.parse('卓球なんて死ぬまでの暇つぶしだよ。') do |n|
|
164
|
+
# puts "#{n.surface}\t#{n.cost}" if n.is_nor?
|
165
|
+
# end
|
166
|
+
# 卓球 2874
|
167
|
+
# な 4398
|
168
|
+
# 死ぬ 9261
|
169
|
+
# まで 9386
|
170
|
+
# の 10007
|
171
|
+
# 暇つぶし 13324
|
172
|
+
# だ 15346
|
173
|
+
# よ 14396
|
174
|
+
# 。 10194
|
175
|
+
#
|
176
|
+
# It is also possible to use the `Symbol` for the
|
177
|
+
# `mecab` node member to index into the
|
178
|
+
# `FFI::Struct` layout associative array like so:
|
179
|
+
#
|
180
|
+
# nm.parse('あいつ笑うと結構可愛い顔してんよ。') {|n| puts n[:feature] }
|
181
|
+
# 名詞,代名詞,一般,*,*,*,あいつ,アイツ,アイツ
|
182
|
+
# 動詞,自立,*,*,五段・ワ行促音便,基本形,笑う,ワラウ,ワラウ
|
183
|
+
# 助詞,接続助詞,*,*,*,*,と,ト,ト
|
184
|
+
# 副詞,一般,*,*,*,*,結構,ケッコウ,ケッコー
|
185
|
+
# 形容詞,自立,*,*,形容詞・イ段,基本形,可愛い,カワイイ,カワイイ
|
186
|
+
# 名詞,一般,*,*,*,*,顔,カオ,カオ
|
187
|
+
# 動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
|
188
|
+
# 動詞,非自立,*,*,一段,体言接続特殊,てる,テン,テン
|
189
|
+
# 助詞,終助詞,*,*,*,*,よ,ヨ,ヨ
|
190
|
+
# 記号,句点,*,*,*,*,。,。,。
|
191
|
+
# BOS/EOS,*,*,*,*,*,*,*,*
|
192
|
+
#
|
193
|
+
class MeCabNode < MeCabStruct
|
194
|
+
attr_accessor :surface, :feature
|
195
|
+
attr_reader :pointer
|
196
|
+
|
197
|
+
# Normal `mecab` node defined in the dictionary.
|
198
|
+
NOR_NODE = 0
|
199
|
+
# Unknown `mecab` node not defined in the dictionary.
|
200
|
+
UNK_NODE = 1
|
201
|
+
# Virtual node representing the beginning of the sentence.
|
202
|
+
BOS_NODE = 2
|
203
|
+
# Virutual node representing the end of the sentence.
|
204
|
+
EOS_NODE = 3
|
205
|
+
# Virtual node representing the end of an N-Best `mecab` node list.
|
206
|
+
EON_NODE = 4
|
207
|
+
|
208
|
+
layout :prev, :pointer,
|
209
|
+
:next, :pointer,
|
210
|
+
:enext, :pointer,
|
211
|
+
:bnext, :pointer,
|
212
|
+
:rpath, :pointer,
|
213
|
+
:lpath, :pointer,
|
214
|
+
:surface, :string,
|
215
|
+
:feature, :string,
|
216
|
+
:id, :uint,
|
217
|
+
:length, :ushort,
|
218
|
+
:rlength, :ushort,
|
219
|
+
:rcAttr, :ushort,
|
220
|
+
:lcAttr, :ushort,
|
221
|
+
:posid, :ushort,
|
222
|
+
:char_type, :uchar,
|
223
|
+
:stat, :uchar,
|
224
|
+
:isbest, :uchar,
|
225
|
+
:alpha, :float,
|
226
|
+
:beta, :float,
|
227
|
+
:prob, :float,
|
228
|
+
:wcost, :short,
|
229
|
+
:cost, :long
|
230
|
+
|
231
|
+
#if RUBY_VERSION.to_f < 1.9
|
232
|
+
# alias_method :deprecated_id, :id
|
233
|
+
# # `Object#id` override defined when `RUBY_VERSION` is
|
234
|
+
# # older than 1.9. This is a hack to avoid the `Object#id`
|
235
|
+
# # deprecation warning thrown up in Ruby 1.8.7.
|
236
|
+
# #
|
237
|
+
# # <i>This method override is not defined when the Ruby interpreter
|
238
|
+
# # is 1.9 or greater.</i>
|
239
|
+
# # @return [Fixnum] `mecab` node id
|
240
|
+
# def id
|
241
|
+
# self[:id]
|
242
|
+
# end
|
243
|
+
#end
|
244
|
+
|
245
|
+
# Initializes this node instance.
|
246
|
+
# Sets the `MeCab` feature value for this node.
|
247
|
+
#
|
248
|
+
# @param [FFI::Pointer]
|
249
|
+
def initialize(ptr)
|
250
|
+
super(ptr)
|
251
|
+
@pointer = ptr
|
252
|
+
|
253
|
+
if self[:feature]
|
254
|
+
@feature = self[:feature].force_encoding(Encoding.default_external)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# Returns human-readable details for the `mecab` node.
|
259
|
+
# Overrides `Object#to_s`.
|
260
|
+
#
|
261
|
+
# - encoded object id
|
262
|
+
# - underlying FFI pointer to MeCab Node
|
263
|
+
# - stat (node type: NOR, UNK, BOS/EOS, EON)
|
264
|
+
# - surface
|
265
|
+
# - feature
|
266
|
+
#
|
267
|
+
# @return [String] encoded object id, underlying FFI pointer, stat, surface, and feature
|
268
|
+
def to_s
|
269
|
+
%(#{super.chop} @pointer=#{@pointer}, stat=#{self[:stat]}, @surface="#{self.surface}", @feature="#{self.feature}">)
|
270
|
+
end
|
271
|
+
|
272
|
+
# Overrides `Object#inspect`.
|
273
|
+
#
|
274
|
+
# @return [String] encoded object id, stat, surface, and feature
|
275
|
+
# @see #to_s
|
276
|
+
def inspect
|
277
|
+
self.to_s
|
278
|
+
end
|
279
|
+
|
280
|
+
# Returns `true` if this is a normal `mecab` node found in the dictionary.
|
281
|
+
# @return [Boolean]
|
282
|
+
def is_nor?
|
283
|
+
self.stat == NOR_NODE
|
284
|
+
end
|
285
|
+
|
286
|
+
# Returns `true` if this is an unknown `mecab` node not found in the dictionary.
|
287
|
+
# @return [Boolean]
|
288
|
+
def is_unk?
|
289
|
+
self.stat == UNK_NODE
|
290
|
+
end
|
291
|
+
|
292
|
+
# Returns `true` if this is a virtual `mecab` node representing the beginning of the sentence.
|
293
|
+
# @return [Boolean]
|
294
|
+
def is_bos?
|
295
|
+
self.stat == BOS_NODE
|
296
|
+
end
|
297
|
+
|
298
|
+
# Returns `true` if this is a virtual `mecab` node representing the end of the sentence.
|
299
|
+
# @return [Boolean]
|
300
|
+
def is_eos?
|
301
|
+
self.stat == EOS_NODE
|
302
|
+
end
|
303
|
+
|
304
|
+
# Returns `true` if this is a virtual `mecab` node representing the end of the node list.
|
305
|
+
# @return [Boolean]
|
306
|
+
def is_eon?
|
307
|
+
self.stat == EON_NODE
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
data/lib/natto/version.rb
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
|
3
|
-
#
|
4
|
-
# a binding to the
|
3
|
+
# `Natto` is the namespace for objects that provide
|
4
|
+
# a binding to the `mecab` tagger and related resources.
|
5
5
|
#
|
6
|
-
#
|
6
|
+
# `Natto::MeCab` is a wrapper class for the `mecab`
|
7
7
|
# tagger.
|
8
8
|
#
|
9
|
-
#
|
9
|
+
# `Natto::MeCabStruct` is a base class for a `mecab`
|
10
10
|
# struct.
|
11
11
|
#
|
12
|
-
#
|
13
|
-
# a
|
12
|
+
# `Natto::MeCabNode` is a wrapper for the struct representing
|
13
|
+
# a `mecab`-parsed node.
|
14
14
|
#
|
15
|
-
#
|
16
|
-
# representing a
|
15
|
+
# `Natto::DictionaryInfo` is a wrapper for the struct
|
16
|
+
# representing a `Natto::MeCab` instance's related
|
17
17
|
# dictionary information.
|
18
18
|
#
|
19
|
-
#
|
20
|
-
#
|
19
|
+
# `Natto::MeCabError` is a general error class for the
|
20
|
+
# `Natto` module.
|
21
21
|
#
|
22
|
-
# Module
|
23
|
-
# which are made available via
|
22
|
+
# Module `Natto::Binding` encapsulates methods and behavior
|
23
|
+
# which are made available via `FFI` bindings to `mecab`.
|
24
24
|
#
|
25
|
-
# Module
|
26
|
-
# for parsing the various
|
27
|
-
#
|
25
|
+
# Module `OptionParse` encapsulates methods and behavior
|
26
|
+
# for parsing the various `mecab` options supported by
|
27
|
+
# `Natto`.
|
28
28
|
module Natto
|
29
29
|
# Version string for this Rubygem.
|
30
|
-
VERSION = "0.9.
|
30
|
+
VERSION = "0.9.6"
|
31
31
|
end
|
metadata
CHANGED
@@ -1,34 +1,31 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 0.9.5
|
4
|
+
version: 0.9.6
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Brooke M. Fujita
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-07-07 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: ffi
|
16
|
-
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
17
16
|
requirements:
|
18
|
-
- -
|
17
|
+
- - '>='
|
19
18
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
21
|
-
none: false
|
22
|
-
requirement: *2056
|
23
|
-
prerelease: false
|
19
|
+
version: 1.9.0
|
24
20
|
type: :runtime
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.9.0
|
27
|
+
description: |
|
28
|
+
natto bridges Ruby and MeCab via FFI (foreign function interface). No compiling is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS. natto provides the most natural, Ruby-esque API for MeCab.
|
32
29
|
email: buruzaemon@gmail.com
|
33
30
|
executables: []
|
34
31
|
extensions: []
|
@@ -36,39 +33,39 @@ extra_rdoc_files: []
|
|
36
33
|
files:
|
37
34
|
- lib/natto.rb
|
38
35
|
- lib/natto/binding.rb
|
36
|
+
- lib/natto/natto.rb
|
39
37
|
- lib/natto/option_parse.rb
|
40
|
-
- lib/natto/
|
38
|
+
- lib/natto/struct.rb
|
41
39
|
- lib/natto/version.rb
|
42
40
|
- README.md
|
43
41
|
- LICENSE
|
44
42
|
- CHANGELOG
|
45
43
|
- .yardopts
|
46
|
-
homepage: https://bitbucket.org/buruzaemon/natto
|
44
|
+
homepage: https://bitbucket.org/buruzaemon/natto
|
47
45
|
licenses:
|
48
46
|
- BSD
|
49
|
-
|
47
|
+
metadata: {}
|
48
|
+
post_install_message:
|
50
49
|
rdoc_options: []
|
51
50
|
require_paths:
|
52
51
|
- lib
|
53
52
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
53
|
requirements:
|
55
|
-
- -
|
54
|
+
- - '>='
|
56
55
|
- !ruby/object:Gem::Version
|
57
|
-
version: 1.
|
58
|
-
none: false
|
56
|
+
version: '1.9'
|
59
57
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
58
|
requirements:
|
61
|
-
- -
|
59
|
+
- - '>='
|
62
60
|
- !ruby/object:Gem::Version
|
63
61
|
version: '0'
|
64
|
-
none: false
|
65
62
|
requirements:
|
66
|
-
- MeCab, 0.
|
67
|
-
- FFI,
|
68
|
-
rubyforge_project:
|
69
|
-
rubygems_version:
|
70
|
-
signing_key:
|
71
|
-
specification_version:
|
72
|
-
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
63
|
+
- MeCab, 0.996 or greater
|
64
|
+
- FFI, 1.9.0 or greater
|
65
|
+
rubyforge_project:
|
66
|
+
rubygems_version: 2.0.0
|
67
|
+
signing_key:
|
68
|
+
specification_version: 4
|
69
|
+
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
70
|
+
and morphological analyzer for the Japanese language.
|
73
71
|
test_files: []
|
74
|
-
...
|