natto 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -1
- data/lib/natto.rb +34 -21
- data/lib/natto/version.rb +1 -1
- metadata +5 -5
data/CHANGELOG
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
## CHANGELOG
|
2
2
|
|
3
|
-
- __2011/
|
3
|
+
- __2011/03/23__: 0.5.1 release.
|
4
|
+
- Corrected mojibake issue for surface and feature values
|
5
|
+
when node-parsing.
|
6
|
+
- Corrected call to NBest initializer in lambdas
|
7
|
+
for Natto::MeCab#initialize.
|
8
|
+
|
9
|
+
- __2011/02/26__: 0.5.0 release.
|
4
10
|
- Added support for node parsing using blocks
|
5
11
|
- Added support for mecab options nbest, all-morphs
|
6
12
|
- Pulling support for mecab option partial, since it is more of a command-line feature
|
data/lib/natto.rb
CHANGED
@@ -99,7 +99,7 @@ module Natto
|
|
99
99
|
# @raise [MeCabError] if <tt>mecab</tt> cannot be initialized with the given <tt>options</tt>
|
100
100
|
# @see MeCab::SUPPORTED_OPTS
|
101
101
|
def initialize(options={})
|
102
|
-
@options = options
|
102
|
+
@options = {}.merge(options)
|
103
103
|
@dicts = []
|
104
104
|
|
105
105
|
opt_str = self.class.build_options_str(@options)
|
@@ -111,17 +111,21 @@ module Natto
|
|
111
111
|
self.mecab_set_lattice_level(@ptr, @options[:lattice_level].to_i) if @options[:lattice_level]
|
112
112
|
self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
|
113
113
|
|
114
|
-
#
|
114
|
+
# Set mecab parsing implementations for N-best and regular parsing,
|
115
|
+
# for both parsing as string and yielding a node object
|
116
|
+
# N-Best parsing implementations
|
115
117
|
if @options[:nbest] && @options[:nbest] > 1
|
116
|
-
# N-Best parsing implementations
|
117
|
-
self.mecab_nbest_init(@ptr, str)
|
118
118
|
# nbest parsing require lattice level >= 1
|
119
119
|
self.mecab_set_lattice_level(@ptr, (@options[:lattice_level] || 1))
|
120
120
|
@parse_tostr = lambda { |str|
|
121
|
+
self.mecab_nbest_init(@ptr, str)
|
121
122
|
return self.mecab_nbest_sparse_tostr(@ptr, @options[:nbest], str) ||
|
122
123
|
raise(MeCabError.new(self.mecab_strerror(@ptr)))
|
123
124
|
}
|
124
|
-
@parse_tonode = lambda { |str|
|
125
|
+
@parse_tonode = lambda { |str|
|
126
|
+
self.mecab_nbest_init(@ptr, str)
|
127
|
+
return self.mecab_nbest_next_tonode(@ptr)
|
128
|
+
}
|
125
129
|
else
|
126
130
|
# default parsing implementations
|
127
131
|
@parse_tostr = lambda { |str|
|
@@ -156,9 +160,14 @@ module Natto
|
|
156
160
|
head = Natto::MeCabNode.new(m_node_ptr)
|
157
161
|
if head && head[:next].address != 0x0
|
158
162
|
node = Natto::MeCabNode.new(head[:next])
|
159
|
-
|
163
|
+
i = 0
|
164
|
+
while node.nil? == false
|
165
|
+
if node.length > 0
|
166
|
+
node.surface = str.bytes.to_a()[i, node.length].pack('C*')
|
167
|
+
end
|
160
168
|
yield node
|
161
169
|
if node[:next].address != 0x0
|
170
|
+
i += node.length
|
162
171
|
node = Natto::MeCabNode.new(node[:next])
|
163
172
|
else
|
164
173
|
break
|
@@ -226,7 +235,7 @@ module Natto
|
|
226
235
|
end
|
227
236
|
end
|
228
237
|
end
|
229
|
-
opt.join(" ")
|
238
|
+
opt.empty? ? "" : opt.join(" ")
|
230
239
|
end
|
231
240
|
end
|
232
241
|
|
@@ -404,6 +413,7 @@ module Natto
|
|
404
413
|
# => nil
|
405
414
|
#
|
406
415
|
class MeCabNode < MeCabStruct
|
416
|
+
attr_accessor :surface, :feature
|
407
417
|
|
408
418
|
# Normal <tt>mecab</tt> node.
|
409
419
|
NOR_NODE = 0
|
@@ -457,24 +467,27 @@ module Natto
|
|
457
467
|
end
|
458
468
|
end
|
459
469
|
|
460
|
-
#
|
470
|
+
# Initializes this node instance.
|
471
|
+
# Sets the <ttMeCab</tt> feature value for this node.
|
461
472
|
#
|
462
|
-
# @
|
463
|
-
def
|
464
|
-
|
465
|
-
|
466
|
-
|
473
|
+
# @param [FFI::Pointer]
|
474
|
+
def initialize(ptr)
|
475
|
+
super(ptr)
|
476
|
+
|
477
|
+
if self[:feature]
|
478
|
+
@feature = self[:feature]
|
479
|
+
@feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
|
467
480
|
end
|
468
|
-
@surface
|
469
481
|
end
|
470
|
-
|
471
|
-
#
|
482
|
+
|
483
|
+
# Sets the morpheme surface value for this node.
|
472
484
|
#
|
473
|
-
# @
|
474
|
-
def
|
475
|
-
|
476
|
-
|
477
|
-
|
485
|
+
# @param [String]
|
486
|
+
def surface=(str)
|
487
|
+
if str && self[:length] > 0
|
488
|
+
@surface = str
|
489
|
+
@surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
|
490
|
+
end
|
478
491
|
end
|
479
492
|
|
480
493
|
# Returns human-readable details for the <tt>mecab</tt> node.
|
data/lib/natto/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 9
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 1
|
10
|
+
version: 0.5.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brooke M. Fujita
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-03-23 00:00:00 +09:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -89,7 +89,7 @@ requirements:
|
|
89
89
|
- MeCab, 0.98 or greater
|
90
90
|
- FFI, 0.6.3 or greater
|
91
91
|
rubyforge_project:
|
92
|
-
rubygems_version: 1.
|
92
|
+
rubygems_version: 1.6.2
|
93
93
|
signing_key:
|
94
94
|
specification_version: 3
|
95
95
|
summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
|