natto 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -1
- data/lib/natto.rb +34 -21
- data/lib/natto/version.rb +1 -1
- metadata +5 -5
data/CHANGELOG
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
-
- __2011/
|
3
|
+
- __2011/03/23__: 0.5.1 release.
|
4
|
+
- Corrected mojibake issue for surface and feature values
|
5
|
+
when node-parsing.
|
6
|
+
- Corrected call to NBest initializer in lambdas
|
7
|
+
for Natto::MeCab#initialize.
|
8
|
+
|
9
|
+
- __2011/02/26__: 0.5.0 release.
|
4
10
|
- Added support for node parsing using blocks
|
5
11
|
- Added support for mecab options nbest, all-morphs
|
6
12
|
- Pulling support for mecab option partial, since it is more of a command-line feature
|
data/lib/natto.rb
CHANGED
@@ -99,7 +99,7 @@ module Natto
|
|
99
99
|
# @raise [MeCabError] if <tt>mecab</tt> cannot be initialized with the given <tt>options</tt>
|
100
100
|
# @see MeCab::SUPPORTED_OPTS
|
101
101
|
def initialize(options={})
|
102
|
-
@options = options
|
102
|
+
@options = {}.merge(options)
|
103
103
|
@dicts = []
|
104
104
|
|
105
105
|
opt_str = self.class.build_options_str(@options)
|
@@ -111,17 +111,21 @@ module Natto
|
|
111
111
|
self.mecab_set_lattice_level(@ptr, @options[:lattice_level].to_i) if @options[:lattice_level]
|
112
112
|
self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
|
113
113
|
|
114
|
-
#
|
114
|
+
# Set mecab parsing implementations for N-best and regular parsing,
|
115
|
+
# for both parsing as string and yielding a node object
|
116
|
+
# N-Best parsing implementations
|
115
117
|
if @options[:nbest] && @options[:nbest] > 1
|
116
|
-
# N-Best parsing implementations
|
117
|
-
self.mecab_nbest_init(@ptr, str)
|
118
118
|
# nbest parsing require lattice level >= 1
|
119
119
|
self.mecab_set_lattice_level(@ptr, (@options[:lattice_level] || 1))
|
120
120
|
@parse_tostr = lambda { |str|
|
121
|
+
self.mecab_nbest_init(@ptr, str)
|
121
122
|
return self.mecab_nbest_sparse_tostr(@ptr, @options[:nbest], str) ||
|
122
123
|
raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
123
124
|
}
|
124
|
-
@parse_tonode = lambda { |str|
|
125
|
+
@parse_tonode = lambda { |str|
|
126
|
+
self.mecab_nbest_init(@ptr, str)
|
127
|
+
return self.mecab_nbest_next_tonode(@ptr)
|
128
|
+
}
|
125
129
|
else
|
126
130
|
# default parsing implementations
|
127
131
|
@parse_tostr = lambda { |str|
|
@@ -156,9 +160,14 @@ module Natto
|
|
156
160
|
head = Natto::MeCabNode.new(m_node_ptr)
|
157
161
|
if head && head[:next].address != 0x0
|
158
162
|
node = Natto::MeCabNode.new(head[:next])
|
159
|
-
|
163
|
+
i = 0
|
164
|
+
while node.nil? == false
|
165
|
+
if node.length > 0
|
166
|
+
node.surface = str.bytes.to_a()[i, node.length].pack('C*')
|
167
|
+
end
|
160
168
|
yield node
|
161
169
|
if node[:next].address != 0x0
|
170
|
+
i += node.length
|
162
171
|
node = Natto::MeCabNode.new(node[:next])
|
163
172
|
else
|
164
173
|
break
|
@@ -226,7 +235,7 @@ module Natto
|
|
226
235
|
end
|
227
236
|
end
|
228
237
|
end
|
229
|
-
opt.join(" ")
|
238
|
+
opt.empty? ? "" : opt.join(" ")
|
230
239
|
end
|
231
240
|
end
|
232
241
|
|
@@ -404,6 +413,7 @@ module Natto
|
|
404
413
|
# => nil
|
405
414
|
#
|
406
415
|
class MeCabNode < MeCabStruct
|
416
|
+
attr_accessor :surface, :feature
|
407
417
|
|
408
418
|
# Normal <tt>mecab</tt> node.
|
409
419
|
NOR_NODE = 0
|
@@ -457,24 +467,27 @@ module Natto
|
|
457
467
|
end
|
458
468
|
end
|
459
469
|
|
460
|
-
#
|
470
|
+
# Initializes this node instance.
|
471
|
+
# Sets the <ttMeCab</tt> feature value for this node.
|
461
472
|
#
|
462
|
-
# @
|
463
|
-
def
|
464
|
-
|
465
|
-
|
466
|
-
|
473
|
+
# @param [FFI::Pointer]
|
474
|
+
def initialize(ptr)
|
475
|
+
super(ptr)
|
476
|
+
|
477
|
+
if self[:feature]
|
478
|
+
@feature = self[:feature]
|
479
|
+
@feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
|
467
480
|
end
|
468
|
-
@surface
|
469
481
|
end
|
470
|
-
|
471
|
-
#
|
482
|
+
|
483
|
+
# Sets the morpheme surface value for this node.
|
472
484
|
#
|
473
|
-
# @
|
474
|
-
def
|
475
|
-
|
476
|
-
|
477
|
-
|
485
|
+
# @param [String]
|
486
|
+
def surface=(str)
|
487
|
+
if str && self[:length] > 0
|
488
|
+
@surface = str
|
489
|
+
@surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
|
490
|
+
end
|
478
491
|
end
|
479
492
|
|
480
493
|
# Returns human-readable details for the <tt>mecab</tt> node.
|
data/lib/natto/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 9
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 1
|
10
|
+
version: 0.5.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brooke M. Fujita
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-03-23 00:00:00 +09:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -89,7 +89,7 @@ requirements:
|
|
89
89
|
- MeCab, 0.98 or greater
|
90
90
|
- FFI, 0.6.3 or greater
|
91
91
|
rubyforge_project:
|
92
|
-
rubygems_version: 1.
|
92
|
+
rubygems_version: 1.6.2
|
93
93
|
signing_key:
|
94
94
|
specification_version: 3
|
95
95
|
summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
|