natto 0.9.5 → 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,310 @@
1
+ # coding: utf-8
2
+ require 'natto/binding'
3
+ require 'natto/option_parse'
4
+
5
+ module Natto
6
+ require 'ffi'
7
+
8
+ # `MeCabStruct` is a general base class for `FFI::Struct` objects in
9
+ # the `Natto` module. Please refer to
10
+ # [`mecab.h`](http://code.google.com/p/mecab/source/browse/trunk/mecab/src/mecab.h)
11
+ class MeCabStruct < FFI::Struct
12
+ # Provides accessor methods for the members of the `mecab` struct.
13
+ #
14
+ # @param [String] attr_name
15
+ # @return member values for the `mecab` struct
16
+ # @raise [NoMethodError] if `attr_name` is not a member of this `mecab` struct
17
+ def method_missing(attr_name)
18
+ member_sym = attr_name.id2name.to_sym
19
+ return self[member_sym] if self.members.include?(member_sym)
20
+ raise(NoMethodError.new("undefined method '#{attr_name}' for #{self}"))
21
+ end
22
+ end
23
+
24
+ # `DictionaryInfo` is a wrapper for `struct mecab_dictionary_info_t`
25
+ # that holds the `MeCab` instance's related dictionary information.
26
+ #
27
+ # Values for the `mecab` dictionary attributes may be
28
+ # obtained by using the following `Symbol`s as keys
29
+ # to the layout associative array of `FFI::Struct` members.
30
+ #
31
+ # - :filename
32
+ # - :charset
33
+ # - :size
34
+ # - :type
35
+ # - :lsize
36
+ # - :rsize
37
+ # - :version
38
+ # - :next
39
+ #
40
+ # ## Usage
41
+ # `mecab` dictionary attributes can be obtained by
42
+ # using their corresponding accessor.
43
+ #
44
+ # nm = Natto::MeCab.new
45
+ #
46
+ # sysdic = nm.dicts.first
47
+ #
48
+ # puts sysdic.filename
49
+ # => "/usr/local/lib/mecab/dic/ipadic/sys.dic"
50
+ #
51
+ # puts sysdic.charset
52
+ # => "utf8"
53
+ #
54
+ # puts sysdic.is_sysdic?
55
+ # => true
56
+ class DictionaryInfo < MeCabStruct
57
+ # System dictionary.
58
+ SYS_DIC = 0
59
+ # User dictionary.
60
+ USR_DIC = 1
61
+ # Unknown dictionary.
62
+ UNK_DIC = 2
63
+
64
+ layout :filename, :string,
65
+ :charset, :string,
66
+ :size, :uint,
67
+ :type, :int,
68
+ :lsize, :uint,
69
+ :rsize, :uint,
70
+ :version, :ushort,
71
+ :next, :pointer
72
+
73
+ if Object.respond_to?(:type) && Object.respond_to?(:class)
74
+ alias_method :deprecated_type, :type
75
+ # `Object#type` override defined when both `type` and
76
+ # `class` are Object methods. This is a hack to avoid the
77
+ # `Object#type` deprecation warning thrown up in Ruby 1.8.7
78
+ # and in JRuby.
79
+ #
80
+ # @return [Fixnum] `mecab` dictionary type
81
+ def type
82
+ self[:type]
83
+ end
84
+ end
85
+
86
+ # Returns human-readable details for this `mecab` dictionary.
87
+ # Overrides `Object#to_s`.
88
+ #
89
+ # - encoded object id
90
+ # - dictionary type
91
+ # - full-path dictionary filename
92
+ # - dictionary charset
93
+ #
94
+ # @return [String] encoded object id, type, dictionary filename, and charset
95
+ def to_s
96
+ %(#{super.chop} type="#{self.type}", filename="#{self.filename}", charset="#{self.charset}">)
97
+ end
98
+
99
+ # Overrides `Object#inspect`.
100
+ #
101
+ # @return [String] encoded object id, dictionary filename, and charset
102
+ # @see #to_s
103
+ def inspect
104
+ self.to_s
105
+ end
106
+
107
+ # Returns `true` if this is a system dictionary.
108
+ # @return [Boolean]
109
+ def is_sysdic?
110
+ self.type == SYS_DIC
111
+ end
112
+
113
+ # Returns `true` if this is a user dictionary.
114
+ # @return [Boolean]
115
+ def is_usrdic?
116
+ self.type == USR_DIC
117
+ end
118
+
119
+ # Returns `true` if this is a unknown dictionary type.
120
+ # @return [Boolean]
121
+ def is_unkdic?
122
+ self.type == UNK_DIC
123
+ end
124
+ end
125
+
126
+ # `MeCabNode` is a wrapper for the structure holding
127
+ # the parsed `node`.
128
+ #
129
+ # Values for the `mecab` node attributes may be
130
+ # obtained by using the following `Symbol`s as keys
131
+ # to the layout associative array of `FFI::Struct` members.
132
+ #
133
+ # - :prev
134
+ # - :next
135
+ # - :enext
136
+ # - :bnext
137
+ # - :rpath
138
+ # - :lpath
139
+ # - :surface
140
+ # - :feature
141
+ # - :id
142
+ # - :length
143
+ # - :rlength
144
+ # - :rcAttr
145
+ # - :lcAttr
146
+ # - :posid
147
+ # - :char_type
148
+ # - :stat
149
+ # - :isbest
150
+ # - :alpha
151
+ # - :beta
152
+ # - :prob
153
+ # - :wcost
154
+ # - :cost
155
+ #
156
+ # ## Usage
157
+ # An instance of `MeCabNode` is yielded to the block
158
+ # used with `MeCab#parse`, where the above-mentioned
159
+ # node attributes may be accessed by name.
160
+ #
161
+ # nm = Natto::MeCab.new
162
+ #
163
+ # nm.parse('卓球なんて死ぬまでの暇つぶしだよ。') do |n|
164
+ # puts "#{n.surface}\t#{n.cost}" if n.is_nor?
165
+ # end
166
+ # 卓球 2874
167
+ # な 4398
168
+ # 死ぬ 9261
169
+ # まで 9386
170
+ # の 10007
171
+ # 暇つぶし 13324
172
+ # だ 15346
173
+ # よ 14396
174
+ # 。 10194
175
+ #
176
+ # It is also possible to use the `Symbol` for the
177
+ # `mecab` node member to index into the
178
+ # `FFI::Struct` layout associative array like so:
179
+ #
180
+ # nm.parse('あいつ笑うと結構可愛い顔してんよ。') {|n| puts n[:feature] }
181
+ # 名詞,代名詞,一般,*,*,*,あいつ,アイツ,アイツ
182
+ # 動詞,自立,*,*,五段・ワ行促音便,基本形,笑う,ワラウ,ワラウ
183
+ # 助詞,接続助詞,*,*,*,*,と,ト,ト
184
+ # 副詞,一般,*,*,*,*,結構,ケッコウ,ケッコー
185
+ # 形容詞,自立,*,*,形容詞・イ段,基本形,可愛い,カワイイ,カワイイ
186
+ # 名詞,一般,*,*,*,*,顔,カオ,カオ
187
+ # 動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
188
+ # 動詞,非自立,*,*,一段,体言接続特殊,てる,テン,テン
189
+ # 助詞,終助詞,*,*,*,*,よ,ヨ,ヨ
190
+ # 記号,句点,*,*,*,*,。,。,。
191
+ # BOS/EOS,*,*,*,*,*,*,*,*
192
+ #
193
+ class MeCabNode < MeCabStruct
194
+ attr_accessor :surface, :feature
195
+ attr_reader :pointer
196
+
197
+ # Normal `mecab` node defined in the dictionary.
198
+ NOR_NODE = 0
199
+ # Unknown `mecab` node not defined in the dictionary.
200
+ UNK_NODE = 1
201
+ # Virtual node representing the beginning of the sentence.
202
+ BOS_NODE = 2
203
+ # Virutual node representing the end of the sentence.
204
+ EOS_NODE = 3
205
+ # Virtual node representing the end of an N-Best `mecab` node list.
206
+ EON_NODE = 4
207
+
208
+ layout :prev, :pointer,
209
+ :next, :pointer,
210
+ :enext, :pointer,
211
+ :bnext, :pointer,
212
+ :rpath, :pointer,
213
+ :lpath, :pointer,
214
+ :surface, :string,
215
+ :feature, :string,
216
+ :id, :uint,
217
+ :length, :ushort,
218
+ :rlength, :ushort,
219
+ :rcAttr, :ushort,
220
+ :lcAttr, :ushort,
221
+ :posid, :ushort,
222
+ :char_type, :uchar,
223
+ :stat, :uchar,
224
+ :isbest, :uchar,
225
+ :alpha, :float,
226
+ :beta, :float,
227
+ :prob, :float,
228
+ :wcost, :short,
229
+ :cost, :long
230
+
231
+ #if RUBY_VERSION.to_f < 1.9
232
+ # alias_method :deprecated_id, :id
233
+ # # `Object#id` override defined when `RUBY_VERSION` is
234
+ # # older than 1.9. This is a hack to avoid the `Object#id`
235
+ # # deprecation warning thrown up in Ruby 1.8.7.
236
+ # #
237
+ # # <i>This method override is not defined when the Ruby interpreter
238
+ # # is 1.9 or greater.</i>
239
+ # # @return [Fixnum] `mecab` node id
240
+ # def id
241
+ # self[:id]
242
+ # end
243
+ #end
244
+
245
+ # Initializes this node instance.
246
+ # Sets the `MeCab` feature value for this node.
247
+ #
248
+ # @param [FFI::Pointer]
249
+ def initialize(ptr)
250
+ super(ptr)
251
+ @pointer = ptr
252
+
253
+ if self[:feature]
254
+ @feature = self[:feature].force_encoding(Encoding.default_external)
255
+ end
256
+ end
257
+
258
+ # Returns human-readable details for the `mecab` node.
259
+ # Overrides `Object#to_s`.
260
+ #
261
+ # - encoded object id
262
+ # - underlying FFI pointer to MeCab Node
263
+ # - stat (node type: NOR, UNK, BOS/EOS, EON)
264
+ # - surface
265
+ # - feature
266
+ #
267
+ # @return [String] encoded object id, underlying FFI pointer, stat, surface, and feature
268
+ def to_s
269
+ %(#{super.chop} @pointer=#{@pointer}, stat=#{self[:stat]}, @surface="#{self.surface}", @feature="#{self.feature}">)
270
+ end
271
+
272
+ # Overrides `Object#inspect`.
273
+ #
274
+ # @return [String] encoded object id, stat, surface, and feature
275
+ # @see #to_s
276
+ def inspect
277
+ self.to_s
278
+ end
279
+
280
+ # Returns `true` if this is a normal `mecab` node found in the dictionary.
281
+ # @return [Boolean]
282
+ def is_nor?
283
+ self.stat == NOR_NODE
284
+ end
285
+
286
+ # Returns `true` if this is an unknown `mecab` node not found in the dictionary.
287
+ # @return [Boolean]
288
+ def is_unk?
289
+ self.stat == UNK_NODE
290
+ end
291
+
292
+ # Returns `true` if this is a virtual `mecab` node representing the beginning of the sentence.
293
+ # @return [Boolean]
294
+ def is_bos?
295
+ self.stat == BOS_NODE
296
+ end
297
+
298
+ # Returns `true` if this is a virtual `mecab` node representing the end of the sentence.
299
+ # @return [Boolean]
300
+ def is_eos?
301
+ self.stat == EOS_NODE
302
+ end
303
+
304
+ # Returns `true` if this is a virtual `mecab` node representing the end of the node list.
305
+ # @return [Boolean]
306
+ def is_eon?
307
+ self.stat == EON_NODE
308
+ end
309
+ end
310
+ end
@@ -1,31 +1,31 @@
1
1
  # coding: utf-8
2
2
 
3
- # <tt>Natto</tt> is the namespace for objects that provide
4
- # a binding to the <tt>mecab</tt> tagger and related resources.
3
+ # `Natto` is the namespace for objects that provide
4
+ # a binding to the `mecab` tagger and related resources.
5
5
  #
6
- # <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
6
+ # `Natto::MeCab` is a wrapper class for the `mecab`
7
7
  # tagger.
8
8
  #
9
- # <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
9
+ # `Natto::MeCabStruct` is a base class for a `mecab`
10
10
  # struct.
11
11
  #
12
- # <tt>Natto::MeCabNode</tt> is a wrapper for the struct representing
13
- # a <tt>mecab</tt>-parsed node.
12
+ # `Natto::MeCabNode` is a wrapper for the struct representing
13
+ # a `mecab`-parsed node.
14
14
  #
15
- # <tt>Natto::DictionaryInfo</tt> is a wrapper for the struct
16
- # representing a <tt>Natto::MeCab</tt> instance's related
15
+ # `Natto::DictionaryInfo` is a wrapper for the struct
16
+ # representing a `Natto::MeCab` instance's related
17
17
  # dictionary information.
18
18
  #
19
- # <tt>Natto::MeCabError</tt> is a general error class for the
20
- # <tt>Natto</tt> module.
19
+ # `Natto::MeCabError` is a general error class for the
20
+ # `Natto` module.
21
21
  #
22
- # Module <tt>Natto::Binding</tt> encapsulates methods and behavior
23
- # which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
22
+ # Module `Natto::Binding` encapsulates methods and behavior
23
+ # which are made available via `FFI` bindings to `mecab`.
24
24
  #
25
- # Module <tt>OptionParse</tt> encapsulates methods and behavior
26
- # for parsing the various <tt>mecab</tt> options supported by
27
- # <tt>Natto</tt>.
25
+ # Module `OptionParse` encapsulates methods and behavior
26
+ # for parsing the various `mecab` options supported by
27
+ # `Natto`.
28
28
  module Natto
29
29
  # Version string for this Rubygem.
30
- VERSION = "0.9.5"
30
+ VERSION = "0.9.6"
31
31
  end
metadata CHANGED
@@ -1,34 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natto
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.9.5
4
+ version: 0.9.6
6
5
  platform: ruby
7
6
  authors:
8
7
  - Brooke M. Fujita
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-09-16 00:00:00.000000000Z
11
+ date: 2013-07-07 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: ffi
16
- version_requirements: &2056 !ruby/object:Gem::Requirement
15
+ requirement: !ruby/object:Gem::Requirement
17
16
  requirements:
18
- - - ! '>='
17
+ - - '>='
19
18
  - !ruby/object:Gem::Version
20
- version: 0.6.3
21
- none: false
22
- requirement: *2056
23
- prerelease: false
19
+ version: 1.9.0
24
20
  type: :runtime
25
- description: ! 'natto is a gem bridging Ruby and MeCab using FFI (foreign function
26
- interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
27
- and JRuby (jvm) equally well, on any OS.
28
-
29
- ruby ? mecab ????????? natto ???????????
30
-
31
- '
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.9.0
27
+ description: |
28
+ natto bridges Ruby and MeCab via FFI (foreign function interface). No compiling is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS. natto provides the most natural, Ruby-esque API for MeCab.
32
29
  email: buruzaemon@gmail.com
33
30
  executables: []
34
31
  extensions: []
@@ -36,39 +33,39 @@ extra_rdoc_files: []
36
33
  files:
37
34
  - lib/natto.rb
38
35
  - lib/natto/binding.rb
36
+ - lib/natto/natto.rb
39
37
  - lib/natto/option_parse.rb
40
- - lib/natto/utils.rb
38
+ - lib/natto/struct.rb
41
39
  - lib/natto/version.rb
42
40
  - README.md
43
41
  - LICENSE
44
42
  - CHANGELOG
45
43
  - .yardopts
46
- homepage: https://bitbucket.org/buruzaemon/natto/overview
44
+ homepage: https://bitbucket.org/buruzaemon/natto
47
45
  licenses:
48
46
  - BSD
49
- post_install_message:
47
+ metadata: {}
48
+ post_install_message:
50
49
  rdoc_options: []
51
50
  require_paths:
52
51
  - lib
53
52
  required_ruby_version: !ruby/object:Gem::Requirement
54
53
  requirements:
55
- - - ! '>='
54
+ - - '>='
56
55
  - !ruby/object:Gem::Version
57
- version: 1.8.7
58
- none: false
56
+ version: '1.9'
59
57
  required_rubygems_version: !ruby/object:Gem::Requirement
60
58
  requirements:
61
- - - ! '>='
59
+ - - '>='
62
60
  - !ruby/object:Gem::Version
63
61
  version: '0'
64
- none: false
65
62
  requirements:
66
- - MeCab, 0.994 or greater
67
- - FFI, 0.6.3 or greater
68
- rubyforge_project:
69
- rubygems_version: 1.8.15
70
- signing_key:
71
- specification_version: 3
72
- summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
63
+ - MeCab, 0.996 or greater
64
+ - FFI, 1.9.0 or greater
65
+ rubyforge_project:
66
+ rubygems_version: 2.0.0
67
+ signing_key:
68
+ specification_version: 4
69
+ summary: natto combines the Ruby programming language with MeCab, the part-of-speech
70
+ and morphological analyzer for the Japanese language.
73
71
  test_files: []
74
- ...