natto 0.9.9 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +13 -0
- data/README.md +49 -12
- data/lib/natto.rb +1 -0
- data/lib/natto/binding.rb +114 -131
- data/lib/natto/natto.rb +266 -210
- data/lib/natto/option_parse.rb +6 -7
- data/lib/natto/struct.rb +21 -29
- data/lib/natto/version.rb +5 -6
- metadata +21 -31
data/lib/natto/option_parse.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
module Natto
|
3
3
|
|
4
4
|
# Module `OptionParse` encapsulates methods and behavior
|
5
|
-
# for parsing the various
|
5
|
+
# for parsing the various MeCab options supported by
|
6
6
|
# `Natto`.
|
7
7
|
module OptionParse
|
8
8
|
require 'optparse'
|
@@ -10,8 +10,8 @@ module Natto
|
|
10
10
|
WARNING_LATTICE_LEVEL =
|
11
11
|
":lattice-level is DEPRECATED, please use :marginal or :nbest\n".freeze
|
12
12
|
|
13
|
-
# Mapping of
|
14
|
-
#
|
13
|
+
# Mapping of MeCab short-style configuration options to the MeCab
|
14
|
+
# Tagger. See the MeCab help for more details.
|
15
15
|
SUPPORTED_OPTS = { '-r' => :rcfile,
|
16
16
|
'-d' => :dicdir,
|
17
17
|
'-u' => :userdic,
|
@@ -95,16 +95,15 @@ module Natto
|
|
95
95
|
end
|
96
96
|
|
97
97
|
# Returns a string-representation of the options to
|
98
|
-
# be passed in the construction of the
|
99
|
-
#
|
98
|
+
# be passed in the construction of the MeCab Tagger.
|
100
99
|
# @param options[Hash] options for MeCab
|
101
|
-
# @return [String] representation of the options to the
|
100
|
+
# @return [String] representation of the options to the MeCab Tagger
|
102
101
|
def build_options_str(options={})
|
103
102
|
opt = []
|
104
103
|
SUPPORTED_OPTS.values.each do |k|
|
105
104
|
if options.has_key? k
|
106
105
|
key = k.to_s.gsub('_', '-')
|
107
|
-
if
|
106
|
+
if [ :all_morphs, :partial, :marginal, :allocate_sentence ].include?(k)
|
108
107
|
opt << "--#{key}" if options[k]==true
|
109
108
|
else
|
110
109
|
opt << "--#{key}=#{options[k]}"
|
data/lib/natto/struct.rb
CHANGED
@@ -9,11 +9,10 @@ module Natto
|
|
9
9
|
# the `Natto` module. Please refer to `mecab.h` in the source code
|
10
10
|
# distribution.
|
11
11
|
class MeCabStruct < FFI::Struct
|
12
|
-
# Provides accessor methods for the members of the
|
13
|
-
#
|
12
|
+
# Provides accessor methods for the members of the MeCab struct.
|
14
13
|
# @param attr_name [String] attribute name
|
15
|
-
# @return member values for the
|
16
|
-
# @raise [NoMethodError] if `attr_name` is not a member of this
|
14
|
+
# @return member values for the MeCab struct
|
15
|
+
# @raise [NoMethodError] if `attr_name` is not a member of this MeCab struct
|
17
16
|
def method_missing(attr_name)
|
18
17
|
member_sym = attr_name.id2name.to_sym
|
19
18
|
return self[member_sym] if self.members.include?(member_sym)
|
@@ -22,9 +21,9 @@ module Natto
|
|
22
21
|
end
|
23
22
|
|
24
23
|
# `DictionaryInfo` is a wrapper for the `struct mecab_dictionary_info_t`
|
25
|
-
# structure holding the
|
24
|
+
# structure holding the MeCab instance's related dictionary information.
|
26
25
|
#
|
27
|
-
# Values for the
|
26
|
+
# Values for the MeCab dictionary attributes may be
|
28
27
|
# obtained by using the following `Symbol`s as keys
|
29
28
|
# to the layout associative array of `FFI::Struct` members.
|
30
29
|
#
|
@@ -38,7 +37,7 @@ module Natto
|
|
38
37
|
# - :next - pointer to next dictionary in list
|
39
38
|
#
|
40
39
|
# ## Usage
|
41
|
-
#
|
40
|
+
# MeCab dictionary attributes can be obtained by
|
42
41
|
# using their corresponding accessor.
|
43
42
|
#
|
44
43
|
# nm = Natto::MeCab.new
|
@@ -84,8 +83,7 @@ module Natto
|
|
84
83
|
# `class` are Object methods. This is a hack to avoid the
|
85
84
|
# `Object#type` deprecation warning thrown up in Ruby 1.8.7
|
86
85
|
# and in JRuby.
|
87
|
-
#
|
88
|
-
# @return [Fixnum] `mecab` dictionary type
|
86
|
+
# @return [Fixnum] MeCab dictionary type
|
89
87
|
def type
|
90
88
|
self[:type]
|
91
89
|
end
|
@@ -93,7 +91,6 @@ module Natto
|
|
93
91
|
|
94
92
|
# Initializes this dictionary info instance.
|
95
93
|
# Sets the `DictionaryInfo` filepath value.
|
96
|
-
#
|
97
94
|
# @param ptr [FFI::Pointer] pointer to MeCab dictionary
|
98
95
|
def initialize(ptr)
|
99
96
|
super(ptr)
|
@@ -101,14 +98,13 @@ module Natto
|
|
101
98
|
@filepath = File.absolute_path(self[:filename])
|
102
99
|
end
|
103
100
|
|
104
|
-
# Returns human-readable details for this
|
101
|
+
# Returns human-readable details for this MeCab dictionary.
|
105
102
|
# Overrides `Object#to_s`.
|
106
103
|
#
|
107
104
|
# - encoded object id
|
108
105
|
# - real file path to this dictionary
|
109
106
|
# - dictionary charset
|
110
107
|
# - dictionary type
|
111
|
-
#
|
112
108
|
# @return [String] encoded object id, file path to dictionary, charset and
|
113
109
|
# type
|
114
110
|
def to_s
|
@@ -119,7 +115,6 @@ module Natto
|
|
119
115
|
end
|
120
116
|
|
121
117
|
# Overrides `Object#inspect`.
|
122
|
-
#
|
123
118
|
# @return [String] encoded object id, dictionary filename, and charset
|
124
119
|
# @see #to_s
|
125
120
|
def inspect
|
@@ -148,7 +143,7 @@ module Natto
|
|
148
143
|
# `MeCabNode` is a wrapper for the `struct mecab_node_t`
|
149
144
|
# structure holding the parsed `node`.
|
150
145
|
#
|
151
|
-
# Values for the
|
146
|
+
# Values for the MeCab node attributes may be
|
152
147
|
# obtained by using the following `Symbol`s as keys
|
153
148
|
# to the layout associative array of `FFI::Struct` members.
|
154
149
|
#
|
@@ -196,9 +191,9 @@ module Natto
|
|
196
191
|
# 。 10194
|
197
192
|
#
|
198
193
|
# While it is also possible to use the `Symbol` for the
|
199
|
-
#
|
194
|
+
# MeCab node member to index into the
|
200
195
|
# `FFI::Struct` layout associative array, please use the attribute
|
201
|
-
# accessors. In the case of `:surface` and `:feature`,
|
196
|
+
# accessors. In the case of `:surface` and `:feature`, MeCab
|
202
197
|
# returns the raw bytes, so `natto` will convert that into
|
203
198
|
# a string using the default encoding.
|
204
199
|
class MeCabNode < MeCabStruct
|
@@ -209,15 +204,15 @@ module Natto
|
|
209
204
|
# @return [FFI::Pointer] pointer to MeCab node struct.
|
210
205
|
attr_reader :pointer
|
211
206
|
|
212
|
-
# Normal
|
207
|
+
# Normal MeCab node defined in the dictionary, c.f. `stat`.
|
213
208
|
NOR_NODE = 0
|
214
|
-
# Unknown
|
209
|
+
# Unknown MeCab node not defined in the dictionary, c.f. `stat`.
|
215
210
|
UNK_NODE = 1
|
216
211
|
# Virtual node representing the beginning of the sentence, c.f. `stat`.
|
217
212
|
BOS_NODE = 2
|
218
213
|
# Virutual node representing the end of the sentence, c.f. `stat`.
|
219
214
|
EOS_NODE = 3
|
220
|
-
# Virtual node representing the end of an N-Best
|
215
|
+
# Virtual node representing the end of an N-Best MeCab node list, c.f. `stat`.
|
221
216
|
EON_NODE = 4
|
222
217
|
|
223
218
|
layout :prev, :pointer,
|
@@ -244,8 +239,7 @@ module Natto
|
|
244
239
|
:cost, :long
|
245
240
|
|
246
241
|
# Initializes this node instance.
|
247
|
-
# Sets the
|
248
|
-
#
|
242
|
+
# Sets the MeCab feature value for this node.
|
249
243
|
# @param nptr [FFI::Pointer] pointer to MeCab node
|
250
244
|
def initialize(nptr)
|
251
245
|
super(nptr)
|
@@ -256,7 +250,7 @@ module Natto
|
|
256
250
|
end
|
257
251
|
end
|
258
252
|
|
259
|
-
# Returns human-readable details for the
|
253
|
+
# Returns human-readable details for the MeCab node.
|
260
254
|
# Overrides `Object#to_s`.
|
261
255
|
#
|
262
256
|
# - encoded object id
|
@@ -264,7 +258,6 @@ module Natto
|
|
264
258
|
# - stat (node type: NOR, UNK, BOS/EOS, EON)
|
265
259
|
# - surface
|
266
260
|
# - feature
|
267
|
-
#
|
268
261
|
# @return [String] encoded object id, underlying FFI pointer, stat, surface, and feature
|
269
262
|
def to_s
|
270
263
|
[ super.chop,
|
@@ -275,38 +268,37 @@ module Natto
|
|
275
268
|
end
|
276
269
|
|
277
270
|
# Overrides `Object#inspect`.
|
278
|
-
#
|
279
271
|
# @return [String] encoded object id, stat, surface, and feature
|
280
272
|
# @see #to_s
|
281
273
|
def inspect
|
282
274
|
self.to_s
|
283
275
|
end
|
284
276
|
|
285
|
-
# Returns `true` if this is a normal
|
277
|
+
# Returns `true` if this is a normal MeCab node found in the dictionary.
|
286
278
|
# @return [Boolean]
|
287
279
|
def is_nor?
|
288
280
|
self.stat == NOR_NODE
|
289
281
|
end
|
290
282
|
|
291
|
-
# Returns `true` if this is an unknown
|
283
|
+
# Returns `true` if this is an unknown MeCab node not found in the dictionary.
|
292
284
|
# @return [Boolean]
|
293
285
|
def is_unk?
|
294
286
|
self.stat == UNK_NODE
|
295
287
|
end
|
296
288
|
|
297
|
-
# Returns `true` if this is a virtual
|
289
|
+
# Returns `true` if this is a virtual MeCab node representing the beginning of the sentence.
|
298
290
|
# @return [Boolean]
|
299
291
|
def is_bos?
|
300
292
|
self.stat == BOS_NODE
|
301
293
|
end
|
302
294
|
|
303
|
-
# Returns `true` if this is a virtual
|
295
|
+
# Returns `true` if this is a virtual MeCab node representing the end of the sentence.
|
304
296
|
# @return [Boolean]
|
305
297
|
def is_eos?
|
306
298
|
self.stat == EOS_NODE
|
307
299
|
end
|
308
300
|
|
309
|
-
# Returns `true` if this is a virtual
|
301
|
+
# Returns `true` if this is a virtual MeCab node representing the end of the node list.
|
310
302
|
# @return [Boolean]
|
311
303
|
def is_eon?
|
312
304
|
self.stat == EON_NODE
|
data/lib/natto/version.rb
CHANGED
@@ -4,10 +4,9 @@
|
|
4
4
|
# a binding to MeCab and an API for the `Tagger`,
|
5
5
|
# `Node` and `Lattice` objects.
|
6
6
|
#
|
7
|
-
# `Natto::MeCab` is a wrapper class for the MeCab
|
7
|
+
# `Natto::MeCab` is a wrapper class for the MeCab Tagger.
|
8
8
|
#
|
9
|
-
# `Natto::MeCabStruct` is a base class for a
|
10
|
-
# struct.
|
9
|
+
# `Natto::MeCabStruct` is a base class for a MeCab struct.
|
11
10
|
#
|
12
11
|
# `Natto::MeCabNode` is a wrapper for the struct representing
|
13
12
|
# a MeCab `Node`.
|
@@ -20,14 +19,14 @@
|
|
20
19
|
# `Natto` module.
|
21
20
|
#
|
22
21
|
# Module `Natto::Binding` encapsulates methods and behavior
|
23
|
-
# which are made available via `FFI` bindings to
|
22
|
+
# which are made available via `FFI` bindings to MeCab.
|
24
23
|
#
|
25
24
|
# Module `OptionParse` encapsulates methods and behavior
|
26
|
-
# for parsing the various
|
25
|
+
# for parsing the various MeCab options supported by
|
27
26
|
# `Natto`.
|
28
27
|
module Natto
|
29
28
|
# Version string for this Rubygem.
|
30
|
-
VERSION = "0.
|
29
|
+
VERSION = "1.0.0"
|
31
30
|
end
|
32
31
|
|
33
32
|
# Copyright (c) 2015, Brooke M. Fujita.
|
metadata
CHANGED
@@ -1,111 +1,101 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Brooke M. Fujita
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2015-
|
11
|
+
date: 2015-04-14 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: ffi
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 1.9.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 1.9.0
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - ">="
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - ">="
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: minitest
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- -
|
45
|
+
- - ">="
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- -
|
52
|
+
- - ">="
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
|
-
description:
|
63
|
-
on both CRuby (mri/yarv) and JRuby (jvm). It works with MeCab installations on Windows,
|
64
|
-
Unix/Linux, and OS X. No compiler is necessary, as natto is not a C extension.
|
65
|
-
|
66
|
-
'
|
55
|
+
description: |
|
56
|
+
natto provides a naturally Ruby-esque interface to MeCab. It runs on both CRuby (mri/yarv) and JRuby (jvm). It works with MeCab installations on Windows, Unix/Linux, and OS X. No compiler is necessary, as natto is not a C extension.
|
67
57
|
email: buruzaemon@gmail.com
|
68
58
|
executables: []
|
69
59
|
extensions: []
|
70
60
|
extra_rdoc_files: []
|
71
61
|
files:
|
62
|
+
- ".yardopts"
|
63
|
+
- CHANGELOG
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
72
66
|
- lib/natto.rb
|
73
67
|
- lib/natto/binding.rb
|
74
68
|
- lib/natto/natto.rb
|
75
69
|
- lib/natto/option_parse.rb
|
76
70
|
- lib/natto/struct.rb
|
77
71
|
- lib/natto/version.rb
|
78
|
-
- README.md
|
79
|
-
- LICENSE
|
80
|
-
- CHANGELOG
|
81
|
-
- .yardopts
|
82
72
|
homepage: https://github.com/buruzaemon/natto
|
83
73
|
licenses:
|
84
74
|
- BSD
|
75
|
+
metadata: {}
|
85
76
|
post_install_message:
|
86
77
|
rdoc_options: []
|
87
78
|
require_paths:
|
88
79
|
- lib
|
89
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
81
|
requirements:
|
92
|
-
- -
|
82
|
+
- - ">="
|
93
83
|
- !ruby/object:Gem::Version
|
94
84
|
version: '1.9'
|
95
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
-
none: false
|
97
86
|
requirements:
|
98
|
-
- -
|
87
|
+
- - ">="
|
99
88
|
- !ruby/object:Gem::Version
|
100
89
|
version: '0'
|
101
90
|
requirements:
|
102
|
-
- MeCab
|
91
|
+
- MeCab 0.996
|
103
92
|
- FFI, 1.9.0 or greater
|
104
93
|
rubyforge_project:
|
105
|
-
rubygems_version:
|
94
|
+
rubygems_version: 2.4.5
|
106
95
|
signing_key:
|
107
|
-
specification_version:
|
96
|
+
specification_version: 4
|
108
97
|
summary: A gem leveraging FFI (foreign function interface), natto combines the Ruby
|
109
98
|
programming language with MeCab, the part-of-speech and morphological analyzer for
|
110
99
|
the Japanese language.
|
111
100
|
test_files: []
|
101
|
+
has_rdoc:
|