natto 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ # coding: utf-8
2
+ require 'natto/binding'
3
+ require 'natto/option_parse'
4
+
5
+ module Natto
6
+ require 'ffi'
7
+
8
+ # `MeCabStruct` is a general base class for `FFI::Struct` objects in
9
+ # the `Natto` module. Please refer to
10
+ # [`mecab.h`](http://code.google.com/p/mecab/source/browse/trunk/mecab/src/mecab.h)
11
+ class MeCabStruct < FFI::Struct
12
+ # Provides accessor methods for the members of the `mecab` struct.
13
+ #
14
+ # @param [String] attr_name
15
+ # @return member values for the `mecab` struct
16
+ # @raise [NoMethodError] if `attr_name` is not a member of this `mecab` struct
17
+ def method_missing(attr_name)
18
+ member_sym = attr_name.id2name.to_sym
19
+ return self[member_sym] if self.members.include?(member_sym)
20
+ raise(NoMethodError.new("undefined method '#{attr_name}' for #{self}"))
21
+ end
22
+ end
23
+
24
+ # `DictionaryInfo` is a wrapper for `struct mecab_dictionary_info_t`
25
+ # that holds the `MeCab` instance's related dictionary information.
26
+ #
27
+ # Values for the `mecab` dictionary attributes may be
28
+ # obtained by using the following `Symbol`s as keys
29
+ # to the layout associative array of `FFI::Struct` members.
30
+ #
31
+ # - :filename
32
+ # - :charset
33
+ # - :size
34
+ # - :type
35
+ # - :lsize
36
+ # - :rsize
37
+ # - :version
38
+ # - :next
39
+ #
40
+ # ## Usage
41
+ # `mecab` dictionary attributes can be obtained by
42
+ # using their corresponding accessor.
43
+ #
44
+ # nm = Natto::MeCab.new
45
+ #
46
+ # sysdic = nm.dicts.first
47
+ #
48
+ # puts sysdic.filename
49
+ # => "/usr/local/lib/mecab/dic/ipadic/sys.dic"
50
+ #
51
+ # puts sysdic.charset
52
+ # => "utf8"
53
+ #
54
+ # puts sysdic.is_sysdic?
55
+ # => true
56
+ class DictionaryInfo < MeCabStruct
57
+ # System dictionary.
58
+ SYS_DIC = 0
59
+ # User dictionary.
60
+ USR_DIC = 1
61
+ # Unknown dictionary.
62
+ UNK_DIC = 2
63
+
64
+ layout :filename, :string,
65
+ :charset, :string,
66
+ :size, :uint,
67
+ :type, :int,
68
+ :lsize, :uint,
69
+ :rsize, :uint,
70
+ :version, :ushort,
71
+ :next, :pointer
72
+
73
+ if Object.respond_to?(:type) && Object.respond_to?(:class)
74
+ alias_method :deprecated_type, :type
75
+ # `Object#type` override defined when both `type` and
76
+ # `class` are Object methods. This is a hack to avoid the
77
+ # `Object#type` deprecation warning thrown up in Ruby 1.8.7
78
+ # and in JRuby.
79
+ #
80
+ # @return [Fixnum] `mecab` dictionary type
81
+ def type
82
+ self[:type]
83
+ end
84
+ end
85
+
86
+ # Returns human-readable details for this `mecab` dictionary.
87
+ # Overrides `Object#to_s`.
88
+ #
89
+ # - encoded object id
90
+ # - dictionary type
91
+ # - full-path dictionary filename
92
+ # - dictionary charset
93
+ #
94
+ # @return [String] encoded object id, type, dictionary filename, and charset
95
+ def to_s
96
+ %(#{super.chop} type="#{self.type}", filename="#{self.filename}", charset="#{self.charset}">)
97
+ end
98
+
99
+ # Overrides `Object#inspect`.
100
+ #
101
+ # @return [String] encoded object id, dictionary filename, and charset
102
+ # @see #to_s
103
+ def inspect
104
+ self.to_s
105
+ end
106
+
107
+ # Returns `true` if this is a system dictionary.
108
+ # @return [Boolean]
109
+ def is_sysdic?
110
+ self.type == SYS_DIC
111
+ end
112
+
113
+ # Returns `true` if this is a user dictionary.
114
+ # @return [Boolean]
115
+ def is_usrdic?
116
+ self.type == USR_DIC
117
+ end
118
+
119
+ # Returns `true` if this is a unknown dictionary type.
120
+ # @return [Boolean]
121
+ def is_unkdic?
122
+ self.type == UNK_DIC
123
+ end
124
+ end
125
+
126
+ # `MeCabNode` is a wrapper for the structure holding
127
+ # the parsed `node`.
128
+ #
129
+ # Values for the `mecab` node attributes may be
130
+ # obtained by using the following `Symbol`s as keys
131
+ # to the layout associative array of `FFI::Struct` members.
132
+ #
133
+ # - :prev
134
+ # - :next
135
+ # - :enext
136
+ # - :bnext
137
+ # - :rpath
138
+ # - :lpath
139
+ # - :surface
140
+ # - :feature
141
+ # - :id
142
+ # - :length
143
+ # - :rlength
144
+ # - :rcAttr
145
+ # - :lcAttr
146
+ # - :posid
147
+ # - :char_type
148
+ # - :stat
149
+ # - :isbest
150
+ # - :alpha
151
+ # - :beta
152
+ # - :prob
153
+ # - :wcost
154
+ # - :cost
155
+ #
156
+ # ## Usage
157
+ # An instance of `MeCabNode` is yielded to the block
158
+ # used with `MeCab#parse`, where the above-mentioned
159
+ # node attributes may be accessed by name.
160
+ #
161
+ # nm = Natto::MeCab.new
162
+ #
163
+ # nm.parse('卓球なんて死ぬまでの暇つぶしだよ。') do |n|
164
+ # puts "#{n.surface}\t#{n.cost}" if n.is_nor?
165
+ # end
166
+ # 卓球 2874
167
+ # な 4398
168
+ # 死ぬ 9261
169
+ # まで 9386
170
+ # の 10007
171
+ # 暇つぶし 13324
172
+ # だ 15346
173
+ # よ 14396
174
+ # 。 10194
175
+ #
176
+ # It is also possible to use the `Symbol` for the
177
+ # `mecab` node member to index into the
178
+ # `FFI::Struct` layout associative array like so:
179
+ #
180
+ # nm.parse('あいつ笑うと結構可愛い顔してんよ。') {|n| puts n[:feature] }
181
+ # 名詞,代名詞,一般,*,*,*,あいつ,アイツ,アイツ
182
+ # 動詞,自立,*,*,五段・ワ行促音便,基本形,笑う,ワラウ,ワラウ
183
+ # 助詞,接続助詞,*,*,*,*,と,ト,ト
184
+ # 副詞,一般,*,*,*,*,結構,ケッコウ,ケッコー
185
+ # 形容詞,自立,*,*,形容詞・イ段,基本形,可愛い,カワイイ,カワイイ
186
+ # 名詞,一般,*,*,*,*,顔,カオ,カオ
187
+ # 動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
188
+ # 動詞,非自立,*,*,一段,体言接続特殊,てる,テン,テン
189
+ # 助詞,終助詞,*,*,*,*,よ,ヨ,ヨ
190
+ # 記号,句点,*,*,*,*,。,。,。
191
+ # BOS/EOS,*,*,*,*,*,*,*,*
192
+ #
193
+ class MeCabNode < MeCabStruct
194
+ attr_accessor :surface, :feature
195
+ attr_reader :pointer
196
+
197
+ # Normal `mecab` node defined in the dictionary.
198
+ NOR_NODE = 0
199
+ # Unknown `mecab` node not defined in the dictionary.
200
+ UNK_NODE = 1
201
+ # Virtual node representing the beginning of the sentence.
202
+ BOS_NODE = 2
203
+ # Virutual node representing the end of the sentence.
204
+ EOS_NODE = 3
205
+ # Virtual node representing the end of an N-Best `mecab` node list.
206
+ EON_NODE = 4
207
+
208
+ layout :prev, :pointer,
209
+ :next, :pointer,
210
+ :enext, :pointer,
211
+ :bnext, :pointer,
212
+ :rpath, :pointer,
213
+ :lpath, :pointer,
214
+ :surface, :string,
215
+ :feature, :string,
216
+ :id, :uint,
217
+ :length, :ushort,
218
+ :rlength, :ushort,
219
+ :rcAttr, :ushort,
220
+ :lcAttr, :ushort,
221
+ :posid, :ushort,
222
+ :char_type, :uchar,
223
+ :stat, :uchar,
224
+ :isbest, :uchar,
225
+ :alpha, :float,
226
+ :beta, :float,
227
+ :prob, :float,
228
+ :wcost, :short,
229
+ :cost, :long
230
+
231
+ #if RUBY_VERSION.to_f < 1.9
232
+ # alias_method :deprecated_id, :id
233
+ # # `Object#id` override defined when `RUBY_VERSION` is
234
+ # # older than 1.9. This is a hack to avoid the `Object#id`
235
+ # # deprecation warning thrown up in Ruby 1.8.7.
236
+ # #
237
+ # # <i>This method override is not defined when the Ruby interpreter
238
+ # # is 1.9 or greater.</i>
239
+ # # @return [Fixnum] `mecab` node id
240
+ # def id
241
+ # self[:id]
242
+ # end
243
+ #end
244
+
245
+ # Initializes this node instance.
246
+ # Sets the `MeCab` feature value for this node.
247
+ #
248
+ # @param [FFI::Pointer]
249
+ def initialize(ptr)
250
+ super(ptr)
251
+ @pointer = ptr
252
+
253
+ if self[:feature]
254
+ @feature = self[:feature].force_encoding(Encoding.default_external)
255
+ end
256
+ end
257
+
258
+ # Returns human-readable details for the `mecab` node.
259
+ # Overrides `Object#to_s`.
260
+ #
261
+ # - encoded object id
262
+ # - underlying FFI pointer to MeCab Node
263
+ # - stat (node type: NOR, UNK, BOS/EOS, EON)
264
+ # - surface
265
+ # - feature
266
+ #
267
+ # @return [String] encoded object id, underlying FFI pointer, stat, surface, and feature
268
+ def to_s
269
+ %(#{super.chop} @pointer=#{@pointer}, stat=#{self[:stat]}, @surface="#{self.surface}", @feature="#{self.feature}">)
270
+ end
271
+
272
+ # Overrides `Object#inspect`.
273
+ #
274
+ # @return [String] encoded object id, stat, surface, and feature
275
+ # @see #to_s
276
+ def inspect
277
+ self.to_s
278
+ end
279
+
280
+ # Returns `true` if this is a normal `mecab` node found in the dictionary.
281
+ # @return [Boolean]
282
+ def is_nor?
283
+ self.stat == NOR_NODE
284
+ end
285
+
286
+ # Returns `true` if this is an unknown `mecab` node not found in the dictionary.
287
+ # @return [Boolean]
288
+ def is_unk?
289
+ self.stat == UNK_NODE
290
+ end
291
+
292
+ # Returns `true` if this is a virtual `mecab` node representing the beginning of the sentence.
293
+ # @return [Boolean]
294
+ def is_bos?
295
+ self.stat == BOS_NODE
296
+ end
297
+
298
+ # Returns `true` if this is a virtual `mecab` node representing the end of the sentence.
299
+ # @return [Boolean]
300
+ def is_eos?
301
+ self.stat == EOS_NODE
302
+ end
303
+
304
+ # Returns `true` if this is a virtual `mecab` node representing the end of the node list.
305
+ # @return [Boolean]
306
+ def is_eon?
307
+ self.stat == EON_NODE
308
+ end
309
+ end
310
+ end
@@ -1,31 +1,31 @@
1
1
  # coding: utf-8
2
2
 
3
- # <tt>Natto</tt> is the namespace for objects that provide
4
- # a binding to the <tt>mecab</tt> tagger and related resources.
3
+ # `Natto` is the namespace for objects that provide
4
+ # a binding to the `mecab` tagger and related resources.
5
5
  #
6
- # <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
6
+ # `Natto::MeCab` is a wrapper class for the `mecab`
7
7
  # tagger.
8
8
  #
9
- # <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
9
+ # `Natto::MeCabStruct` is a base class for a `mecab`
10
10
  # struct.
11
11
  #
12
- # <tt>Natto::MeCabNode</tt> is a wrapper for the struct representing
13
- # a <tt>mecab</tt>-parsed node.
12
+ # `Natto::MeCabNode` is a wrapper for the struct representing
13
+ # a `mecab`-parsed node.
14
14
  #
15
- # <tt>Natto::DictionaryInfo</tt> is a wrapper for the struct
16
- # representing a <tt>Natto::MeCab</tt> instance's related
15
+ # `Natto::DictionaryInfo` is a wrapper for the struct
16
+ # representing a `Natto::MeCab` instance's related
17
17
  # dictionary information.
18
18
  #
19
- # <tt>Natto::MeCabError</tt> is a general error class for the
20
- # <tt>Natto</tt> module.
19
+ # `Natto::MeCabError` is a general error class for the
20
+ # `Natto` module.
21
21
  #
22
- # Module <tt>Natto::Binding</tt> encapsulates methods and behavior
23
- # which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
22
+ # Module `Natto::Binding` encapsulates methods and behavior
23
+ # which are made available via `FFI` bindings to `mecab`.
24
24
  #
25
- # Module <tt>OptionParse</tt> encapsulates methods and behavior
26
- # for parsing the various <tt>mecab</tt> options supported by
27
- # <tt>Natto</tt>.
25
+ # Module `OptionParse` encapsulates methods and behavior
26
+ # for parsing the various `mecab` options supported by
27
+ # `Natto`.
28
28
  module Natto
29
29
  # Version string for this Rubygem.
30
- VERSION = "0.9.5"
30
+ VERSION = "0.9.6"
31
31
  end
metadata CHANGED
@@ -1,34 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natto
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.9.5
4
+ version: 0.9.6
6
5
  platform: ruby
7
6
  authors:
8
7
  - Brooke M. Fujita
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-09-16 00:00:00.000000000Z
11
+ date: 2013-07-07 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: ffi
16
- version_requirements: &2056 !ruby/object:Gem::Requirement
15
+ requirement: !ruby/object:Gem::Requirement
17
16
  requirements:
18
- - - ! '>='
17
+ - - '>='
19
18
  - !ruby/object:Gem::Version
20
- version: 0.6.3
21
- none: false
22
- requirement: *2056
23
- prerelease: false
19
+ version: 1.9.0
24
20
  type: :runtime
25
- description: ! 'natto is a gem bridging Ruby and MeCab using FFI (foreign function
26
- interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
27
- and JRuby (jvm) equally well, on any OS.
28
-
29
- ruby ? mecab ????????? natto ???????????
30
-
31
- '
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 1.9.0
27
+ description: |
28
+ natto bridges Ruby and MeCab via FFI (foreign function interface). No compiling is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS. natto provides the most natural, Ruby-esque API for MeCab.
32
29
  email: buruzaemon@gmail.com
33
30
  executables: []
34
31
  extensions: []
@@ -36,39 +33,39 @@ extra_rdoc_files: []
36
33
  files:
37
34
  - lib/natto.rb
38
35
  - lib/natto/binding.rb
36
+ - lib/natto/natto.rb
39
37
  - lib/natto/option_parse.rb
40
- - lib/natto/utils.rb
38
+ - lib/natto/struct.rb
41
39
  - lib/natto/version.rb
42
40
  - README.md
43
41
  - LICENSE
44
42
  - CHANGELOG
45
43
  - .yardopts
46
- homepage: https://bitbucket.org/buruzaemon/natto/overview
44
+ homepage: https://bitbucket.org/buruzaemon/natto
47
45
  licenses:
48
46
  - BSD
49
- post_install_message:
47
+ metadata: {}
48
+ post_install_message:
50
49
  rdoc_options: []
51
50
  require_paths:
52
51
  - lib
53
52
  required_ruby_version: !ruby/object:Gem::Requirement
54
53
  requirements:
55
- - - ! '>='
54
+ - - '>='
56
55
  - !ruby/object:Gem::Version
57
- version: 1.8.7
58
- none: false
56
+ version: '1.9'
59
57
  required_rubygems_version: !ruby/object:Gem::Requirement
60
58
  requirements:
61
- - - ! '>='
59
+ - - '>='
62
60
  - !ruby/object:Gem::Version
63
61
  version: '0'
64
- none: false
65
62
  requirements:
66
- - MeCab, 0.994 or greater
67
- - FFI, 0.6.3 or greater
68
- rubyforge_project:
69
- rubygems_version: 1.8.15
70
- signing_key:
71
- specification_version: 3
72
- summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
63
+ - MeCab, 0.996 or greater
64
+ - FFI, 1.9.0 or greater
65
+ rubyforge_project:
66
+ rubygems_version: 2.0.0
67
+ signing_key:
68
+ specification_version: 4
69
+ summary: natto combines the Ruby programming language with MeCab, the part-of-speech
70
+ and morphological analyzer for the Japanese language.
73
71
  test_files: []
74
- ...