natto 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -2,7 +2,6 @@
2
2
  A Tasty Ruby Binding with MeCab
3
3
 
4
4
  ## What is natto?
5
-
6
5
  natto combines the [Ruby programming language](http://www.ruby-lang.org/) with [MeCab](http://mecab.sourceforge.net/), the part-of-speech and morphological analyzer for the Japanese language.
7
6
 
8
7
  ## Requirements
@@ -31,6 +30,9 @@ e.g., for Cygwin
31
30
  require 'natto'
32
31
 
33
32
  m = Natto::MeCab.new
33
+ => #<Natto::MeCab:0x28d93dd4 @options={}, \
34
+ @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
35
+ @ptr=#<FFI::Pointer address=0x28af3e58>>
34
36
  puts m.parse("すもももももももものうち")
35
37
  すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
36
38
  も 助詞,係助詞,*,*,*,*,も,モ,モ
@@ -53,7 +55,11 @@ e.g., for Cygwin
53
55
 
54
56
  ## Changelog
55
57
 
56
- - __2010/12/30: 0.0.7 release.
58
+ - __2011/01/07__: 0.0.8 release.
59
+ - Adding support for accessing dictionaries
60
+ - Further tweaking of documentation with markdown
61
+
62
+ - __2010/12/30__: 0.0.7 release.
57
63
  - Adding support for all-morphs and partial options
58
64
  - Further updating of documentation with markdown
59
65
 
data/lib/natto/binding.rb CHANGED
@@ -1,44 +1,5 @@
1
- # natto combines the Ruby programming language with MeCab,
2
- # the part-of-speech and morphological analyzer for the
3
- # Japanese language.
4
- #
5
- # ## Requirements
6
- # natto requires the following:
7
- #
8
- # - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
9
- # - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
10
- # - Ruby _1.8.7 or greater_
11
- #
12
- # ## Installation
13
- # Install natto with the following gem command:
14
- # gem install natto
15
- #
16
- # ## Configuration
17
- # - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
18
- # - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
19
- #
20
- # e.g., for bash on UNIX/Linux
21
- # export MECAB_PATH=mecab.so
22
- # e.g., on Windows
23
- # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
24
- # e.g., for Cygwin
25
- # export MECAB_PATH=cygmecab-1
26
- #
27
- # ## Usage
28
- # require 'natto'
29
- #
30
- # m = Natto::MeCab.new
31
- # puts m.parse("すもももももももものうち")
32
- # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
33
- # も 助詞,係助詞,*,*,*,*,も,モ,モ
34
- # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
35
- # も 助詞,係助詞,*,*,*,*,も,モ,モ
36
- # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
37
- # の 助詞,連体化,*,*,*,*,の,ノ,ノ
38
- # うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
39
- # EOS
40
- # => nil
41
- #
1
+ # coding: utf-8
2
+
42
3
  module Natto
43
4
 
44
5
  # Module <tt>Binding</tt> encapsulates methods and behavior
data/lib/natto/version.rb CHANGED
@@ -1,5 +1,50 @@
1
- # -*- encoding: utf-8 -*-
1
+ # coding: utf-8
2
+
3
+ # natto combines the Ruby programming language with MeCab,
4
+ # the part-of-speech and morphological analyzer for the
5
+ # Japanese language.
6
+ #
7
+ # ## Requirements
8
+ # natto requires the following:
9
+ #
10
+ # - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
11
+ # - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
12
+ # - Ruby _1.8.7 or greater_
13
+ #
14
+ # ## Installation
15
+ # Install natto with the following gem command:
16
+ # gem install natto
17
+ #
18
+ # ## Configuration
19
+ # - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
20
+ # - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
21
+ #
22
+ # e.g., for bash on UNIX/Linux
23
+ # export MECAB_PATH=mecab.so
24
+ # e.g., on Windows
25
+ # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
26
+ # e.g., for Cygwin
27
+ # export MECAB_PATH=cygmecab-1
28
+ #
29
+ # ## Usage
30
+ #
31
+ # require 'natto'
32
+ #
33
+ # m = Natto::MeCab.new
34
+ # => #<Natto::MeCab:0x28d93dd4 @options={}, \
35
+ # @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
36
+ # @ptr=#<FFI::Pointer address=0x28af3e58>>
37
+ # puts m.parse("すもももももももものうち")
38
+ # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
39
+ # も 助詞,係助詞,*,*,*,*,も,モ,モ
40
+ # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
41
+ # も 助詞,係助詞,*,*,*,*,も,モ,モ
42
+ # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
43
+ # の 助詞,連体化,*,*,*,*,の,ノ,ノ
44
+ # うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
45
+ # EOS
46
+ # => nil
2
47
  module Natto
3
48
  # Version string for this Rubygem.
4
- VERSION = "0.0.7"
49
+ VERSION = "0.0.8"
5
50
  end
data/lib/natto.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  require 'rubygems' if RUBY_VERSION.to_f < 1.9
2
3
  require 'natto/binding'
3
4
 
@@ -9,9 +10,13 @@ module Natto
9
10
  # initialization.
10
11
  #
11
12
  # ## Usage
13
+ # _Here is how to use natto under Ruby 1.9:_
12
14
  # require 'natto'
13
15
  #
14
16
  # m = Natto::MeCab.new
17
+ # => #<Natto::MeCab:0x28d93dd4 @options={}, \
18
+ # @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
19
+ # @ptr=#<FFI::Pointer address=0x28af3e58>>
15
20
  # puts m.parse("すもももももももものうち")
16
21
  # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
17
22
  # も 助詞,係助詞,*,*,*,*,も,モ,モ
@@ -23,10 +28,9 @@ module Natto
23
28
  # EOS
24
29
  # => nil
25
30
  #
26
- # @see SUPPORTED_OPTS
27
31
  class MeCab
28
32
 
29
- attr_reader :options
33
+ attr_reader :options, :dicts
30
34
 
31
35
  # Supported options to the <tt>mecab</tt> parser.
32
36
  # See the <tt>mecab</tt> help for more details.
@@ -34,10 +38,6 @@ module Natto
34
38
  :output_format_type, :partial, :node_format, :unk_format,
35
39
  :bos_format, :eos_format, :eon_format, :unk_feature,
36
40
  :nbest, :theta, :cost_factor ].freeze
37
- # :allocate_sentence ]
38
-
39
- #OPTION_DEFAULTS = { :lattice_level=>0, :all_morphs=>false, :nbest=>1,
40
- # :theta=>0.75, :cost_factor=>700 }.freeze
41
41
 
42
42
  # Initializes the wrapped <tt>mecab</tt> instance with the
43
43
  # given <tt>options</tt> hash.
@@ -65,6 +65,9 @@ module Natto
65
65
  #
66
66
  # e.g.
67
67
  # m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
68
+ # => #<Natto::MeCab:0x28d8886c @options={:node_format=>"%m\\t%f[7]\\n"}, \
69
+ # @dicts=[#<Natto::DictionaryInfo:0x28d8863c>], \
70
+ # @ptr=#<FFI::Pointer address=0x28e3b268>>
68
71
  # puts m.parse("日本語は難しいです。")
69
72
  # 日本語 ニホンゴ
70
73
  # は ハ
@@ -79,10 +82,17 @@ module Natto
79
82
  # @see MeCab::SUPPORTED_OPTS
80
83
  def initialize(options={})
81
84
  @options = options
85
+ @dicts = []
86
+
82
87
  opt_str = self.class.build_options_str(@options)
83
88
  @ptr = Natto::Binding.mecab_new2(opt_str)
84
- raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0
85
- #@dict = Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
89
+ raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
90
+
91
+ @dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
92
+ while @dicts.last[:next].address != 0x0
93
+ @dicts << Natto::DictionaryInfo.new(@dicts.last[:next])
94
+ end
95
+
86
96
  ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
87
97
  end
88
98
 
@@ -123,12 +133,6 @@ module Natto
123
133
  else
124
134
  opt << "--#{key}=#{options[k]}"
125
135
  end
126
-
127
- #if key.end_with? '_format_' or key.end_with? '_feature'
128
- # opt << "--#{key}="+options[k]
129
- #else
130
- # opt << "--#{key}=#{options[k]}"
131
- #end
132
136
  end
133
137
  end
134
138
  opt.join(" ")
@@ -155,11 +159,11 @@ module Natto
155
159
  # - :next
156
160
  #
157
161
  # # Usage:
158
- #
159
- # dict = Natto::DictionaryInfo.new(mecab_ptr)
160
- # puts dict[:filename]
162
+ # m = Natto::MeCab.new
163
+ # sysdic = m.dicts.first
164
+ # puts sysdic[:filename]
161
165
  # => /usr/local/lib/mecab/dic/ipadic/sys.dic
162
- # puts dict[:charset]
166
+ # puts sysdic[:charset]
163
167
  # => utf8
164
168
  class DictionaryInfo < FFI::Struct
165
169
  layout :filename, :string,
data/test/test_natto.rb CHANGED
@@ -119,4 +119,14 @@ class TestNatto < Test::Unit::TestCase
119
119
  Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
120
120
  end
121
121
  end
122
+
123
+ def test_dictionary_accessor
124
+ m = Natto::MeCab.new
125
+ dicts = m.dicts
126
+ assert dicts.empty? == false
127
+ sysdic = dicts.first
128
+ assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', sysdic[:filename])
129
+ assert_equal('utf8', sysdic[:charset])
130
+ assert_equal(0x0, sysdic[:next].address)
131
+ end
122
132
  end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natto
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
5
4
  prerelease: false
6
5
  segments:
7
6
  - 0
8
7
  - 0
9
- - 7
10
- version: 0.0.7
8
+ - 8
9
+ version: 0.0.8
11
10
  platform: ruby
12
11
  authors:
13
12
  - Brooke M. Fujita
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-30 00:00:00 +09:00
17
+ date: 2011-01-07 00:00:00 +09:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -26,7 +25,6 @@ dependencies:
26
25
  requirements:
27
26
  - - ">="
28
27
  - !ruby/object:Gem::Version
29
- hash: 1
30
28
  segments:
31
29
  - 0
32
30
  - 6
@@ -55,8 +53,12 @@ homepage: http://code.google.com/p/natto/
55
53
  licenses:
56
54
  - BSD
57
55
  post_install_message:
58
- rdoc_options: []
59
-
56
+ rdoc_options:
57
+ - --title
58
+ - "natto #{Natto::VERSION} -- Ruby-Mecab binding"
59
+ - --main
60
+ - README
61
+ - -c UTF-8
60
62
  require_paths:
61
63
  - lib
62
64
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -64,7 +66,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
64
66
  requirements:
65
67
  - - ">="
66
68
  - !ruby/object:Gem::Version
67
- hash: 57
68
69
  segments:
69
70
  - 1
70
71
  - 8
@@ -75,7 +76,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
76
  requirements:
76
77
  - - ">="
77
78
  - !ruby/object:Gem::Version
78
- hash: 3
79
79
  segments:
80
80
  - 0
81
81
  version: "0"