natto 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/CHANGELOG +7 -1
  2. data/lib/natto.rb +34 -21
  3. data/lib/natto/version.rb +1 -1
  4. metadata +5 -5
data/CHANGELOG CHANGED
@@ -1,6 +1,12 @@
1
1
  ## CHANGELOG
2
2
 
3
- - __2011/02/2?__: 0.5.0 release.
3
+ - __2011/03/23__: 0.5.1 release.
4
+ - Corrected mojibake issue for surface and feature values
5
+ when node-parsing.
6
+ - Corrected call to NBest initializer in lambdas
7
+ for Natto::MeCab#initialize.
8
+
9
+ - __2011/02/26__: 0.5.0 release.
4
10
  - Added support for node parsing using blocks
5
11
  - Added support for mecab options nbest, all-morphs
6
12
  - Pulling support for mecab option partial, since it is more of a command-line feature
data/lib/natto.rb CHANGED
@@ -99,7 +99,7 @@ module Natto
99
99
  # @raise [MeCabError] if <tt>mecab</tt> cannot be initialized with the given <tt>options</tt>
100
100
  # @see MeCab::SUPPORTED_OPTS
101
101
  def initialize(options={})
102
- @options = options
102
+ @options = {}.merge(options)
103
103
  @dicts = []
104
104
 
105
105
  opt_str = self.class.build_options_str(@options)
@@ -111,17 +111,21 @@ module Natto
111
111
  self.mecab_set_lattice_level(@ptr, @options[:lattice_level].to_i) if @options[:lattice_level]
112
112
  self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
113
113
 
114
- # set mecab parsing implementations
114
+ # Set mecab parsing implementations for N-best and regular parsing,
115
+ # for both parsing as string and yielding a node object
116
+ # N-Best parsing implementations
115
117
  if @options[:nbest] && @options[:nbest] > 1
116
- # N-Best parsing implementations
117
- self.mecab_nbest_init(@ptr, str)
118
118
  # nbest parsing require lattice level >= 1
119
119
  self.mecab_set_lattice_level(@ptr, (@options[:lattice_level] || 1))
120
120
  @parse_tostr = lambda { |str|
121
+ self.mecab_nbest_init(@ptr, str)
121
122
  return self.mecab_nbest_sparse_tostr(@ptr, @options[:nbest], str) ||
122
123
  raise(MeCabError.new(self.mecab_strerror(@ptr)))
123
124
  }
124
- @parse_tonode = lambda { |str| return self.mecab_nbest_next_tonode(@ptr) }
125
+ @parse_tonode = lambda { |str|
126
+ self.mecab_nbest_init(@ptr, str)
127
+ return self.mecab_nbest_next_tonode(@ptr)
128
+ }
125
129
  else
126
130
  # default parsing implementations
127
131
  @parse_tostr = lambda { |str|
@@ -156,9 +160,14 @@ module Natto
156
160
  head = Natto::MeCabNode.new(m_node_ptr)
157
161
  if head && head[:next].address != 0x0
158
162
  node = Natto::MeCabNode.new(head[:next])
159
- while (node.nil? == false)
163
+ i = 0
164
+ while node.nil? == false
165
+ if node.length > 0
166
+ node.surface = str.bytes.to_a()[i, node.length].pack('C*')
167
+ end
160
168
  yield node
161
169
  if node[:next].address != 0x0
170
+ i += node.length
162
171
  node = Natto::MeCabNode.new(node[:next])
163
172
  else
164
173
  break
@@ -226,7 +235,7 @@ module Natto
226
235
  end
227
236
  end
228
237
  end
229
- opt.join(" ")
238
+ opt.empty? ? "" : opt.join(" ")
230
239
  end
231
240
  end
232
241
 
@@ -404,6 +413,7 @@ module Natto
404
413
  # => nil
405
414
  #
406
415
  class MeCabNode < MeCabStruct
416
+ attr_accessor :surface, :feature
407
417
 
408
418
  # Normal <tt>mecab</tt> node.
409
419
  NOR_NODE = 0
@@ -457,24 +467,27 @@ module Natto
457
467
  end
458
468
  end
459
469
 
460
- # Returns the <tt>surface</tt> value for this node.
470
+ # Initializes this node instance.
471
+ # Sets the <ttMeCab</tt> feature value for this node.
461
472
  #
462
- # @return [String] <tt>mecab</tt> node surface value
463
- def surface
464
- if self[:surface] && self[:length] > 0
465
- @surface ||= self[:surface].bytes.to_a()[0,self[:length]].pack('C*')
466
- @surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
473
+ # @param [FFI::Pointer]
474
+ def initialize(ptr)
475
+ super(ptr)
476
+
477
+ if self[:feature]
478
+ @feature = self[:feature]
479
+ @feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
467
480
  end
468
- @surface
469
481
  end
470
-
471
- # Returns the <tt>feature</tt> value for this node.
482
+
483
+ # Sets the morpheme surface value for this node.
472
484
  #
473
- # @return [String] <tt>mecab</tt> node feature value
474
- def feature
475
- @feature ||= self[:feature]
476
- @feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
477
- @feature
485
+ # @param [String]
486
+ def surface=(str)
487
+ if str && self[:length] > 0
488
+ @surface = str
489
+ @surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
490
+ end
478
491
  end
479
492
 
480
493
  # Returns human-readable details for the <tt>mecab</tt> node.
data/lib/natto/version.rb CHANGED
@@ -23,5 +23,5 @@
23
23
  # which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
24
24
  module Natto
25
25
  # Version string for this Rubygem.
26
- VERSION = "0.5.0"
26
+ VERSION = "0.5.1"
27
27
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natto
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 9
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 0
10
- version: 0.5.0
9
+ - 1
10
+ version: 0.5.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brooke M. Fujita
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-02-26 00:00:00 +09:00
18
+ date: 2011-03-23 00:00:00 +09:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -89,7 +89,7 @@ requirements:
89
89
  - MeCab, 0.98 or greater
90
90
  - FFI, 0.6.3 or greater
91
91
  rubyforge_project:
92
- rubygems_version: 1.4.2
92
+ rubygems_version: 1.6.2
93
93
  signing_key:
94
94
  specification_version: 3
95
95
  summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.