natto 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,7 +2,6 @@
2
2
  A Tasty Ruby Binding with MeCab
3
3
 
4
4
  ## What is natto?
5
-
6
5
  natto combines the [Ruby programming language](http://www.ruby-lang.org/) with [MeCab](http://mecab.sourceforge.net/), the part-of-speech and morphological analyzer for the Japanese language.
7
6
 
8
7
  ## Requirements
@@ -31,6 +30,9 @@ e.g., for Cygwin
31
30
  require 'natto'
32
31
 
33
32
  m = Natto::MeCab.new
33
+ => #<Natto::MeCab:0x28d93dd4 @options={}, \
34
+ @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
35
+ @ptr=#<FFI::Pointer address=0x28af3e58>>
34
36
  puts m.parse("すもももももももものうち")
35
37
  すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
36
38
  も 助詞,係助詞,*,*,*,*,も,モ,モ
@@ -53,7 +55,11 @@ e.g., for Cygwin
53
55
 
54
56
  ## Changelog
55
57
 
56
- - __2010/12/30: 0.0.7 release.
58
+ - __2011/01/07__: 0.0.8 release.
59
+ - Adding support for accessing dictionaries
60
+ - Further tweaking of documentation with markdown
61
+
62
+ - __2010/12/30__: 0.0.7 release.
57
63
  - Adding support for all-morphs and partial options
58
64
  - Further updating of documentation with markdown
59
65
 
data/lib/natto/binding.rb CHANGED
@@ -1,44 +1,5 @@
1
- # natto combines the Ruby programming language with MeCab,
2
- # the part-of-speech and morphological analyzer for the
3
- # Japanese language.
4
- #
5
- # ## Requirements
6
- # natto requires the following:
7
- #
8
- # - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
9
- # - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
10
- # - Ruby _1.8.7 or greater_
11
- #
12
- # ## Installation
13
- # Install natto with the following gem command:
14
- # gem install natto
15
- #
16
- # ## Configuration
17
- # - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
18
- # - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
19
- #
20
- # e.g., for bash on UNIX/Linux
21
- # export MECAB_PATH=mecab.so
22
- # e.g., on Windows
23
- # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
24
- # e.g., for Cygwin
25
- # export MECAB_PATH=cygmecab-1
26
- #
27
- # ## Usage
28
- # require 'natto'
29
- #
30
- # m = Natto::MeCab.new
31
- # puts m.parse("すもももももももものうち")
32
- # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
33
- # も 助詞,係助詞,*,*,*,*,も,モ,モ
34
- # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
35
- # も 助詞,係助詞,*,*,*,*,も,モ,モ
36
- # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
37
- # の 助詞,連体化,*,*,*,*,の,ノ,ノ
38
- # うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
39
- # EOS
40
- # => nil
41
- #
1
+ # coding: utf-8
2
+
42
3
  module Natto
43
4
 
44
5
  # Module <tt>Binding</tt> encapsulates methods and behavior
data/lib/natto/version.rb CHANGED
@@ -1,5 +1,50 @@
1
- # -*- encoding: utf-8 -*-
1
+ # coding: utf-8
2
+
3
+ # natto combines the Ruby programming language with MeCab,
4
+ # the part-of-speech and morphological analyzer for the
5
+ # Japanese language.
6
+ #
7
+ # ## Requirements
8
+ # natto requires the following:
9
+ #
10
+ # - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
11
+ # - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
12
+ # - Ruby _1.8.7 or greater_
13
+ #
14
+ # ## Installation
15
+ # Install natto with the following gem command:
16
+ # gem install natto
17
+ #
18
+ # ## Configuration
19
+ # - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
20
+ # - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
21
+ #
22
+ # e.g., for bash on UNIX/Linux
23
+ # export MECAB_PATH=mecab.so
24
+ # e.g., on Windows
25
+ # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
26
+ # e.g., for Cygwin
27
+ # export MECAB_PATH=cygmecab-1
28
+ #
29
+ # ## Usage
30
+ #
31
+ # require 'natto'
32
+ #
33
+ # m = Natto::MeCab.new
34
+ # => #<Natto::MeCab:0x28d93dd4 @options={}, \
35
+ # @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
36
+ # @ptr=#<FFI::Pointer address=0x28af3e58>>
37
+ # puts m.parse("すもももももももものうち")
38
+ # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
39
+ # も 助詞,係助詞,*,*,*,*,も,モ,モ
40
+ # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
41
+ # も 助詞,係助詞,*,*,*,*,も,モ,モ
42
+ # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
43
+ # の 助詞,連体化,*,*,*,*,の,ノ,ノ
44
+ # うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
45
+ # EOS
46
+ # => nil
2
47
  module Natto
3
48
  # Version string for this Rubygem.
4
- VERSION = "0.0.7"
49
+ VERSION = "0.0.8"
5
50
  end
data/lib/natto.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  require 'rubygems' if RUBY_VERSION.to_f < 1.9
2
3
  require 'natto/binding'
3
4
 
@@ -9,9 +10,13 @@ module Natto
9
10
  # initialization.
10
11
  #
11
12
  # ## Usage
13
+ # _Here is how to use natto under Ruby 1.9:_
12
14
  # require 'natto'
13
15
  #
14
16
  # m = Natto::MeCab.new
17
+ # => #<Natto::MeCab:0x28d93dd4 @options={}, \
18
+ # @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
19
+ # @ptr=#<FFI::Pointer address=0x28af3e58>>
15
20
  # puts m.parse("すもももももももものうち")
16
21
  # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
17
22
  # も 助詞,係助詞,*,*,*,*,も,モ,モ
@@ -23,10 +28,9 @@ module Natto
23
28
  # EOS
24
29
  # => nil
25
30
  #
26
- # @see SUPPORTED_OPTS
27
31
  class MeCab
28
32
 
29
- attr_reader :options
33
+ attr_reader :options, :dicts
30
34
 
31
35
  # Supported options to the <tt>mecab</tt> parser.
32
36
  # See the <tt>mecab</tt> help for more details.
@@ -34,10 +38,6 @@ module Natto
34
38
  :output_format_type, :partial, :node_format, :unk_format,
35
39
  :bos_format, :eos_format, :eon_format, :unk_feature,
36
40
  :nbest, :theta, :cost_factor ].freeze
37
- # :allocate_sentence ]
38
-
39
- #OPTION_DEFAULTS = { :lattice_level=>0, :all_morphs=>false, :nbest=>1,
40
- # :theta=>0.75, :cost_factor=>700 }.freeze
41
41
 
42
42
  # Initializes the wrapped <tt>mecab</tt> instance with the
43
43
  # given <tt>options</tt> hash.
@@ -65,6 +65,9 @@ module Natto
65
65
  #
66
66
  # e.g.
67
67
  # m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
68
+ # => #<Natto::MeCab:0x28d8886c @options={:node_format=>"%m\\t%f[7]\\n"}, \
69
+ # @dicts=[#<Natto::DictionaryInfo:0x28d8863c>], \
70
+ # @ptr=#<FFI::Pointer address=0x28e3b268>>
68
71
  # puts m.parse("日本語は難しいです。")
69
72
  # 日本語 ニホンゴ
70
73
  # は ハ
@@ -79,10 +82,17 @@ module Natto
79
82
  # @see MeCab::SUPPORTED_OPTS
80
83
  def initialize(options={})
81
84
  @options = options
85
+ @dicts = []
86
+
82
87
  opt_str = self.class.build_options_str(@options)
83
88
  @ptr = Natto::Binding.mecab_new2(opt_str)
84
- raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0
85
- #@dict = Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
89
+ raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
90
+
91
+ @dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
92
+ while @dicts.last[:next].address != 0x0
93
+ @dicts << Natto::DictionaryInfo.new(@dicts.last[:next])
94
+ end
95
+
86
96
  ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
87
97
  end
88
98
 
@@ -123,12 +133,6 @@ module Natto
123
133
  else
124
134
  opt << "--#{key}=#{options[k]}"
125
135
  end
126
-
127
- #if key.end_with? '_format_' or key.end_with? '_feature'
128
- # opt << "--#{key}="+options[k]
129
- #else
130
- # opt << "--#{key}=#{options[k]}"
131
- #end
132
136
  end
133
137
  end
134
138
  opt.join(" ")
@@ -155,11 +159,11 @@ module Natto
155
159
  # - :next
156
160
  #
157
161
  # # Usage:
158
- #
159
- # dict = Natto::DictionaryInfo.new(mecab_ptr)
160
- # puts dict[:filename]
162
+ # m = Natto::MeCab.new
163
+ # sysdic = m.dicts.first
164
+ # puts sysdic[:filename]
161
165
  # => /usr/local/lib/mecab/dic/ipadic/sys.dic
162
- # puts dict[:charset]
166
+ # puts sysdic[:charset]
163
167
  # => utf8
164
168
  class DictionaryInfo < FFI::Struct
165
169
  layout :filename, :string,
data/test/test_natto.rb CHANGED
@@ -119,4 +119,14 @@ class TestNatto < Test::Unit::TestCase
119
119
  Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
120
120
  end
121
121
  end
122
+
123
+ def test_dictionary_accessor
124
+ m = Natto::MeCab.new
125
+ dicts = m.dicts
126
+ assert dicts.empty? == false
127
+ sysdic = dicts.first
128
+ assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', sysdic[:filename])
129
+ assert_equal('utf8', sysdic[:charset])
130
+ assert_equal(0x0, sysdic[:next].address)
131
+ end
122
132
  end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natto
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
5
4
  prerelease: false
6
5
  segments:
7
6
  - 0
8
7
  - 0
9
- - 7
10
- version: 0.0.7
8
+ - 8
9
+ version: 0.0.8
11
10
  platform: ruby
12
11
  authors:
13
12
  - Brooke M. Fujita
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-30 00:00:00 +09:00
17
+ date: 2011-01-07 00:00:00 +09:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -26,7 +25,6 @@ dependencies:
26
25
  requirements:
27
26
  - - ">="
28
27
  - !ruby/object:Gem::Version
29
- hash: 1
30
28
  segments:
31
29
  - 0
32
30
  - 6
@@ -55,8 +53,12 @@ homepage: http://code.google.com/p/natto/
55
53
  licenses:
56
54
  - BSD
57
55
  post_install_message:
58
- rdoc_options: []
59
-
56
+ rdoc_options:
57
+ - --title
58
+ - "natto #{Natto::VERSION} -- Ruby-Mecab binding"
59
+ - --main
60
+ - README
61
+ - -c UTF-8
60
62
  require_paths:
61
63
  - lib
62
64
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -64,7 +66,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
64
66
  requirements:
65
67
  - - ">="
66
68
  - !ruby/object:Gem::Version
67
- hash: 57
68
69
  segments:
69
70
  - 1
70
71
  - 8
@@ -75,7 +76,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
76
  requirements:
76
77
  - - ">="
77
78
  - !ruby/object:Gem::Version
78
- hash: 3
79
79
  segments:
80
80
  - 0
81
81
  version: "0"