natto 0.9.9 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +13 -0
- data/README.md +49 -12
- data/lib/natto.rb +1 -0
- data/lib/natto/binding.rb +114 -131
- data/lib/natto/natto.rb +266 -210
- data/lib/natto/option_parse.rb +6 -7
- data/lib/natto/struct.rb +21 -29
- data/lib/natto/version.rb +5 -6
- metadata +21 -31
data/lib/natto/option_parse.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
module Natto
|
3
3
|
|
4
4
|
# Module `OptionParse` encapsulates methods and behavior
|
5
|
-
# for parsing the various
|
5
|
+
# for parsing the various MeCab options supported by
|
6
6
|
# `Natto`.
|
7
7
|
module OptionParse
|
8
8
|
require 'optparse'
|
@@ -10,8 +10,8 @@ module Natto
|
|
10
10
|
WARNING_LATTICE_LEVEL =
|
11
11
|
":lattice-level is DEPRECATED, please use :marginal or :nbest\n".freeze
|
12
12
|
|
13
|
-
# Mapping of
|
14
|
-
#
|
13
|
+
# Mapping of MeCab short-style configuration options to the MeCab
|
14
|
+
# Tagger. See the MeCab help for more details.
|
15
15
|
SUPPORTED_OPTS = { '-r' => :rcfile,
|
16
16
|
'-d' => :dicdir,
|
17
17
|
'-u' => :userdic,
|
@@ -95,16 +95,15 @@ module Natto
|
|
95
95
|
end
|
96
96
|
|
97
97
|
# Returns a string-representation of the options to
|
98
|
-
# be passed in the construction of the
|
99
|
-
#
|
98
|
+
# be passed in the construction of the MeCab Tagger.
|
100
99
|
# @param options[Hash] options for MeCab
|
101
|
-
# @return [String] representation of the options to the
|
100
|
+
# @return [String] representation of the options to the MeCab Tagger
|
102
101
|
def build_options_str(options={})
|
103
102
|
opt = []
|
104
103
|
SUPPORTED_OPTS.values.each do |k|
|
105
104
|
if options.has_key? k
|
106
105
|
key = k.to_s.gsub('_', '-')
|
107
|
-
if
|
106
|
+
if [ :all_morphs, :partial, :marginal, :allocate_sentence ].include?(k)
|
108
107
|
opt << "--#{key}" if options[k]==true
|
109
108
|
else
|
110
109
|
opt << "--#{key}=#{options[k]}"
|
data/lib/natto/struct.rb
CHANGED
@@ -9,11 +9,10 @@ module Natto
|
|
9
9
|
# the `Natto` module. Please refer to `mecab.h` in the source code
|
10
10
|
# distribution.
|
11
11
|
class MeCabStruct < FFI::Struct
|
12
|
-
# Provides accessor methods for the members of the
|
13
|
-
#
|
12
|
+
# Provides accessor methods for the members of the MeCab struct.
|
14
13
|
# @param attr_name [String] attribute name
|
15
|
-
# @return member values for the
|
16
|
-
# @raise [NoMethodError] if `attr_name` is not a member of this
|
14
|
+
# @return member values for the MeCab struct
|
15
|
+
# @raise [NoMethodError] if `attr_name` is not a member of this MeCab struct
|
17
16
|
def method_missing(attr_name)
|
18
17
|
member_sym = attr_name.id2name.to_sym
|
19
18
|
return self[member_sym] if self.members.include?(member_sym)
|
@@ -22,9 +21,9 @@ module Natto
|
|
22
21
|
end
|
23
22
|
|
24
23
|
# `DictionaryInfo` is a wrapper for the `struct mecab_dictionary_info_t`
|
25
|
-
# structure holding the
|
24
|
+
# structure holding the MeCab instance's related dictionary information.
|
26
25
|
#
|
27
|
-
# Values for the
|
26
|
+
# Values for the MeCab dictionary attributes may be
|
28
27
|
# obtained by using the following `Symbol`s as keys
|
29
28
|
# to the layout associative array of `FFI::Struct` members.
|
30
29
|
#
|
@@ -38,7 +37,7 @@ module Natto
|
|
38
37
|
# - :next - pointer to next dictionary in list
|
39
38
|
#
|
40
39
|
# ## Usage
|
41
|
-
#
|
40
|
+
# MeCab dictionary attributes can be obtained by
|
42
41
|
# using their corresponding accessor.
|
43
42
|
#
|
44
43
|
# nm = Natto::MeCab.new
|
@@ -84,8 +83,7 @@ module Natto
|
|
84
83
|
# `class` are Object methods. This is a hack to avoid the
|
85
84
|
# `Object#type` deprecation warning thrown up in Ruby 1.8.7
|
86
85
|
# and in JRuby.
|
87
|
-
#
|
88
|
-
# @return [Fixnum] `mecab` dictionary type
|
86
|
+
# @return [Fixnum] MeCab dictionary type
|
89
87
|
def type
|
90
88
|
self[:type]
|
91
89
|
end
|
@@ -93,7 +91,6 @@ module Natto
|
|
93
91
|
|
94
92
|
# Initializes this dictionary info instance.
|
95
93
|
# Sets the `DictionaryInfo` filepath value.
|
96
|
-
#
|
97
94
|
# @param ptr [FFI::Pointer] pointer to MeCab dictionary
|
98
95
|
def initialize(ptr)
|
99
96
|
super(ptr)
|
@@ -101,14 +98,13 @@ module Natto
|
|
101
98
|
@filepath = File.absolute_path(self[:filename])
|
102
99
|
end
|
103
100
|
|
104
|
-
# Returns human-readable details for this
|
101
|
+
# Returns human-readable details for this MeCab dictionary.
|
105
102
|
# Overrides `Object#to_s`.
|
106
103
|
#
|
107
104
|
# - encoded object id
|
108
105
|
# - real file path to this dictionary
|
109
106
|
# - dictionary charset
|
110
107
|
# - dictionary type
|
111
|
-
#
|
112
108
|
# @return [String] encoded object id, file path to dictionary, charset and
|
113
109
|
# type
|
114
110
|
def to_s
|
@@ -119,7 +115,6 @@ module Natto
|
|
119
115
|
end
|
120
116
|
|
121
117
|
# Overrides `Object#inspect`.
|
122
|
-
#
|
123
118
|
# @return [String] encoded object id, dictionary filename, and charset
|
124
119
|
# @see #to_s
|
125
120
|
def inspect
|
@@ -148,7 +143,7 @@ module Natto
|
|
148
143
|
# `MeCabNode` is a wrapper for the `struct mecab_node_t`
|
149
144
|
# structure holding the parsed `node`.
|
150
145
|
#
|
151
|
-
# Values for the
|
146
|
+
# Values for the MeCab node attributes may be
|
152
147
|
# obtained by using the following `Symbol`s as keys
|
153
148
|
# to the layout associative array of `FFI::Struct` members.
|
154
149
|
#
|
@@ -196,9 +191,9 @@ module Natto
|
|
196
191
|
# 。 10194
|
197
192
|
#
|
198
193
|
# While it is also possible to use the `Symbol` for the
|
199
|
-
#
|
194
|
+
# MeCab node member to index into the
|
200
195
|
# `FFI::Struct` layout associative array, please use the attribute
|
201
|
-
# accessors. In the case of `:surface` and `:feature`,
|
196
|
+
# accessors. In the case of `:surface` and `:feature`, MeCab
|
202
197
|
# returns the raw bytes, so `natto` will convert that into
|
203
198
|
# a string using the default encoding.
|
204
199
|
class MeCabNode < MeCabStruct
|
@@ -209,15 +204,15 @@ module Natto
|
|
209
204
|
# @return [FFI::Pointer] pointer to MeCab node struct.
|
210
205
|
attr_reader :pointer
|
211
206
|
|
212
|
-
# Normal
|
207
|
+
# Normal MeCab node defined in the dictionary, c.f. `stat`.
|
213
208
|
NOR_NODE = 0
|
214
|
-
# Unknown
|
209
|
+
# Unknown MeCab node not defined in the dictionary, c.f. `stat`.
|
215
210
|
UNK_NODE = 1
|
216
211
|
# Virtual node representing the beginning of the sentence, c.f. `stat`.
|
217
212
|
BOS_NODE = 2
|
218
213
|
# Virutual node representing the end of the sentence, c.f. `stat`.
|
219
214
|
EOS_NODE = 3
|
220
|
-
# Virtual node representing the end of an N-Best
|
215
|
+
# Virtual node representing the end of an N-Best MeCab node list, c.f. `stat`.
|
221
216
|
EON_NODE = 4
|
222
217
|
|
223
218
|
layout :prev, :pointer,
|
@@ -244,8 +239,7 @@ module Natto
|
|
244
239
|
:cost, :long
|
245
240
|
|
246
241
|
# Initializes this node instance.
|
247
|
-
# Sets the
|
248
|
-
#
|
242
|
+
# Sets the MeCab feature value for this node.
|
249
243
|
# @param nptr [FFI::Pointer] pointer to MeCab node
|
250
244
|
def initialize(nptr)
|
251
245
|
super(nptr)
|
@@ -256,7 +250,7 @@ module Natto
|
|
256
250
|
end
|
257
251
|
end
|
258
252
|
|
259
|
-
# Returns human-readable details for the
|
253
|
+
# Returns human-readable details for the MeCab node.
|
260
254
|
# Overrides `Object#to_s`.
|
261
255
|
#
|
262
256
|
# - encoded object id
|
@@ -264,7 +258,6 @@ module Natto
|
|
264
258
|
# - stat (node type: NOR, UNK, BOS/EOS, EON)
|
265
259
|
# - surface
|
266
260
|
# - feature
|
267
|
-
#
|
268
261
|
# @return [String] encoded object id, underlying FFI pointer, stat, surface, and feature
|
269
262
|
def to_s
|
270
263
|
[ super.chop,
|
@@ -275,38 +268,37 @@ module Natto
|
|
275
268
|
end
|
276
269
|
|
277
270
|
# Overrides `Object#inspect`.
|
278
|
-
#
|
279
271
|
# @return [String] encoded object id, stat, surface, and feature
|
280
272
|
# @see #to_s
|
281
273
|
def inspect
|
282
274
|
self.to_s
|
283
275
|
end
|
284
276
|
|
285
|
-
# Returns `true` if this is a normal
|
277
|
+
# Returns `true` if this is a normal MeCab node found in the dictionary.
|
286
278
|
# @return [Boolean]
|
287
279
|
def is_nor?
|
288
280
|
self.stat == NOR_NODE
|
289
281
|
end
|
290
282
|
|
291
|
-
# Returns `true` if this is an unknown
|
283
|
+
# Returns `true` if this is an unknown MeCab node not found in the dictionary.
|
292
284
|
# @return [Boolean]
|
293
285
|
def is_unk?
|
294
286
|
self.stat == UNK_NODE
|
295
287
|
end
|
296
288
|
|
297
|
-
# Returns `true` if this is a virtual
|
289
|
+
# Returns `true` if this is a virtual MeCab node representing the beginning of the sentence.
|
298
290
|
# @return [Boolean]
|
299
291
|
def is_bos?
|
300
292
|
self.stat == BOS_NODE
|
301
293
|
end
|
302
294
|
|
303
|
-
# Returns `true` if this is a virtual
|
295
|
+
# Returns `true` if this is a virtual MeCab node representing the end of the sentence.
|
304
296
|
# @return [Boolean]
|
305
297
|
def is_eos?
|
306
298
|
self.stat == EOS_NODE
|
307
299
|
end
|
308
300
|
|
309
|
-
# Returns `true` if this is a virtual
|
301
|
+
# Returns `true` if this is a virtual MeCab node representing the end of the node list.
|
310
302
|
# @return [Boolean]
|
311
303
|
def is_eon?
|
312
304
|
self.stat == EON_NODE
|
data/lib/natto/version.rb
CHANGED
@@ -4,10 +4,9 @@
|
|
4
4
|
# a binding to MeCab and an API for the `Tagger`,
|
5
5
|
# `Node` and `Lattice` objects.
|
6
6
|
#
|
7
|
-
# `Natto::MeCab` is a wrapper class for the MeCab
|
7
|
+
# `Natto::MeCab` is a wrapper class for the MeCab Tagger.
|
8
8
|
#
|
9
|
-
# `Natto::MeCabStruct` is a base class for a
|
10
|
-
# struct.
|
9
|
+
# `Natto::MeCabStruct` is a base class for a MeCab struct.
|
11
10
|
#
|
12
11
|
# `Natto::MeCabNode` is a wrapper for the struct representing
|
13
12
|
# a MeCab `Node`.
|
@@ -20,14 +19,14 @@
|
|
20
19
|
# `Natto` module.
|
21
20
|
#
|
22
21
|
# Module `Natto::Binding` encapsulates methods and behavior
|
23
|
-
# which are made available via `FFI` bindings to
|
22
|
+
# which are made available via `FFI` bindings to MeCab.
|
24
23
|
#
|
25
24
|
# Module `OptionParse` encapsulates methods and behavior
|
26
|
-
# for parsing the various
|
25
|
+
# for parsing the various MeCab options supported by
|
27
26
|
# `Natto`.
|
28
27
|
module Natto
|
29
28
|
# Version string for this Rubygem.
|
30
|
-
VERSION = "0.
|
29
|
+
VERSION = "1.0.0"
|
31
30
|
end
|
32
31
|
|
33
32
|
# Copyright (c) 2015, Brooke M. Fujita.
|
metadata
CHANGED
@@ -1,111 +1,101 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Brooke M. Fujita
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2015-
|
11
|
+
date: 2015-04-14 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: ffi
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 1.9.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 1.9.0
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - ">="
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - ">="
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: minitest
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - ">="
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - ">="
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
|
-
description:
|
63
|
-
on both CRuby (mri/yarv) and JRuby (jvm). It works with MeCab installations on Windows,
|
64
|
-
Unix/Linux, and OS X. No compiler is necessary, as natto is not a C extension.
|
65
|
-
|
66
|
-
'
|
55
|
+
description: |
|
56
|
+
natto provides a naturally Ruby-esque interface to MeCab. It runs on both CRuby (mri/yarv) and JRuby (jvm). It works with MeCab installations on Windows, Unix/Linux, and OS X. No compiler is necessary, as natto is not a C extension.
|
67
57
|
email: buruzaemon@gmail.com
|
68
58
|
executables: []
|
69
59
|
extensions: []
|
70
60
|
extra_rdoc_files: []
|
71
61
|
files:
|
62
|
+
- ".yardopts"
|
63
|
+
- CHANGELOG
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
72
66
|
- lib/natto.rb
|
73
67
|
- lib/natto/binding.rb
|
74
68
|
- lib/natto/natto.rb
|
75
69
|
- lib/natto/option_parse.rb
|
76
70
|
- lib/natto/struct.rb
|
77
71
|
- lib/natto/version.rb
|
78
|
-
- README.md
|
79
|
-
- LICENSE
|
80
|
-
- CHANGELOG
|
81
|
-
- .yardopts
|
82
72
|
homepage: https://github.com/buruzaemon/natto
|
83
73
|
licenses:
|
84
74
|
- BSD
|
75
|
+
metadata: {}
|
85
76
|
post_install_message:
|
86
77
|
rdoc_options: []
|
87
78
|
require_paths:
|
88
79
|
- lib
|
89
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
81
|
requirements:
|
92
|
-
- -
|
82
|
+
- - ">="
|
93
83
|
- !ruby/object:Gem::Version
|
94
84
|
version: '1.9'
|
95
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
-
none: false
|
97
86
|
requirements:
|
98
|
-
- -
|
87
|
+
- - ">="
|
99
88
|
- !ruby/object:Gem::Version
|
100
89
|
version: '0'
|
101
90
|
requirements:
|
102
|
-
- MeCab
|
91
|
+
- MeCab 0.996
|
103
92
|
- FFI, 1.9.0 or greater
|
104
93
|
rubyforge_project:
|
105
|
-
rubygems_version:
|
94
|
+
rubygems_version: 2.4.5
|
106
95
|
signing_key:
|
107
|
-
specification_version:
|
96
|
+
specification_version: 4
|
108
97
|
summary: A gem leveraging FFI (foreign function interface), natto combines the Ruby
|
109
98
|
programming language with MeCab, the part-of-speech and morphological analyzer for
|
110
99
|
the Japanese language.
|
111
100
|
test_files: []
|
101
|
+
has_rdoc:
|