natto 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/CHANGELOG +7 -1
  2. data/lib/natto.rb +34 -21
  3. data/lib/natto/version.rb +1 -1
  4. metadata +5 -5
data/CHANGELOG CHANGED
@@ -1,6 +1,12 @@
1
1
  ## CHANGELOG
2
2
 
3
- - __2011/02/2?__: 0.5.0 release.
3
+ - __2011/03/23__: 0.5.1 release.
4
+ - Corrected mojibake issue for surface and feature values
5
+ when node-parsing.
6
+ - Corrected call to NBest initializer in lambdas
7
+ for Natto::MeCab#initialize.
8
+
9
+ - __2011/02/26__: 0.5.0 release.
4
10
  - Added support for node parsing using blocks
5
11
  - Added support for mecab options nbest, all-morphs
6
12
  - Pulling support for mecab option partial, since it is more of a command-line feature
data/lib/natto.rb CHANGED
@@ -99,7 +99,7 @@ module Natto
99
99
  # @raise [MeCabError] if <tt>mecab</tt> cannot be initialized with the given <tt>options</tt>
100
100
  # @see MeCab::SUPPORTED_OPTS
101
101
  def initialize(options={})
102
- @options = options
102
+ @options = {}.merge(options)
103
103
  @dicts = []
104
104
 
105
105
  opt_str = self.class.build_options_str(@options)
@@ -111,17 +111,21 @@ module Natto
111
111
  self.mecab_set_lattice_level(@ptr, @options[:lattice_level].to_i) if @options[:lattice_level]
112
112
  self.mecab_set_all_morphs(@ptr, 1) if @options[:all_morphs]
113
113
 
114
- # set mecab parsing implementations
114
+ # Set mecab parsing implementations for N-best and regular parsing,
115
+ # for both parsing as string and yielding a node object
116
+ # N-Best parsing implementations
115
117
  if @options[:nbest] && @options[:nbest] > 1
116
- # N-Best parsing implementations
117
- self.mecab_nbest_init(@ptr, str)
118
118
  # nbest parsing require lattice level >= 1
119
119
  self.mecab_set_lattice_level(@ptr, (@options[:lattice_level] || 1))
120
120
  @parse_tostr = lambda { |str|
121
+ self.mecab_nbest_init(@ptr, str)
121
122
  return self.mecab_nbest_sparse_tostr(@ptr, @options[:nbest], str) ||
122
123
  raise(MeCabError.new(self.mecab_strerror(@ptr)))
123
124
  }
124
- @parse_tonode = lambda { |str| return self.mecab_nbest_next_tonode(@ptr) }
125
+ @parse_tonode = lambda { |str|
126
+ self.mecab_nbest_init(@ptr, str)
127
+ return self.mecab_nbest_next_tonode(@ptr)
128
+ }
125
129
  else
126
130
  # default parsing implementations
127
131
  @parse_tostr = lambda { |str|
@@ -156,9 +160,14 @@ module Natto
156
160
  head = Natto::MeCabNode.new(m_node_ptr)
157
161
  if head && head[:next].address != 0x0
158
162
  node = Natto::MeCabNode.new(head[:next])
159
- while (node.nil? == false)
163
+ i = 0
164
+ while node.nil? == false
165
+ if node.length > 0
166
+ node.surface = str.bytes.to_a()[i, node.length].pack('C*')
167
+ end
160
168
  yield node
161
169
  if node[:next].address != 0x0
170
+ i += node.length
162
171
  node = Natto::MeCabNode.new(node[:next])
163
172
  else
164
173
  break
@@ -226,7 +235,7 @@ module Natto
226
235
  end
227
236
  end
228
237
  end
229
- opt.join(" ")
238
+ opt.empty? ? "" : opt.join(" ")
230
239
  end
231
240
  end
232
241
 
@@ -404,6 +413,7 @@ module Natto
404
413
  # => nil
405
414
  #
406
415
  class MeCabNode < MeCabStruct
416
+ attr_accessor :surface, :feature
407
417
 
408
418
  # Normal <tt>mecab</tt> node.
409
419
  NOR_NODE = 0
@@ -457,24 +467,27 @@ module Natto
457
467
  end
458
468
  end
459
469
 
460
- # Returns the <tt>surface</tt> value for this node.
470
+ # Initializes this node instance.
471
+ # Sets the <ttMeCab</tt> feature value for this node.
461
472
  #
462
- # @return [String] <tt>mecab</tt> node surface value
463
- def surface
464
- if self[:surface] && self[:length] > 0
465
- @surface ||= self[:surface].bytes.to_a()[0,self[:length]].pack('C*')
466
- @surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
473
+ # @param [FFI::Pointer]
474
+ def initialize(ptr)
475
+ super(ptr)
476
+
477
+ if self[:feature]
478
+ @feature = self[:feature]
479
+ @feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
467
480
  end
468
- @surface
469
481
  end
470
-
471
- # Returns the <tt>feature</tt> value for this node.
482
+
483
+ # Sets the morpheme surface value for this node.
472
484
  #
473
- # @return [String] <tt>mecab</tt> node feature value
474
- def feature
475
- @feature ||= self[:feature]
476
- @feature.force_encoding(Encoding.default_external) if @feature.respond_to?(:encoding) && @feature.encoding!=Encoding.default_external
477
- @feature
485
+ # @param [String]
486
+ def surface=(str)
487
+ if str && self[:length] > 0
488
+ @surface = str
489
+ @surface.force_encoding(Encoding.default_external) if @surface.respond_to?(:encoding) && @surface.encoding!=Encoding.default_external
490
+ end
478
491
  end
479
492
 
480
493
  # Returns human-readable details for the <tt>mecab</tt> node.
data/lib/natto/version.rb CHANGED
@@ -23,5 +23,5 @@
23
23
  # which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
24
24
  module Natto
25
25
  # Version string for this Rubygem.
26
- VERSION = "0.5.0"
26
+ VERSION = "0.5.1"
27
27
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natto
3
3
  version: !ruby/object:Gem::Version
4
- hash: 11
4
+ hash: 9
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 0
10
- version: 0.5.0
9
+ - 1
10
+ version: 0.5.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brooke M. Fujita
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-02-26 00:00:00 +09:00
18
+ date: 2011-03-23 00:00:00 +09:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -89,7 +89,7 @@ requirements:
89
89
  - MeCab, 0.98 or greater
90
90
  - FFI, 0.6.3 or greater
91
91
  rubyforge_project:
92
- rubygems_version: 1.4.2
92
+ rubygems_version: 1.6.2
93
93
  signing_key:
94
94
  specification_version: 3
95
95
  summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.