natto 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +8 -2
- data/lib/natto/binding.rb +2 -41
- data/lib/natto/version.rb +47 -2
- data/lib/natto.rb +22 -18
- data/test/test_natto.rb +10 -0
- metadata +9 -9
data/README.md
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
A Tasty Ruby Binding with MeCab
|
3
3
|
|
4
4
|
## What is natto?
|
5
|
-
|
6
5
|
natto combines the [Ruby programming language](http://www.ruby-lang.org/) with [MeCab](http://mecab.sourceforge.net/), the part-of-speech and morphological analyzer for the Japanese language.
|
7
6
|
|
8
7
|
## Requirements
|
@@ -31,6 +30,9 @@ e.g., for Cygwin
|
|
31
30
|
require 'natto'
|
32
31
|
|
33
32
|
m = Natto::MeCab.new
|
33
|
+
=> #<Natto::MeCab:0x28d93dd4 @options={}, \
|
34
|
+
@dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
35
|
+
@ptr=#<FFI::Pointer address=0x28af3e58>>
|
34
36
|
puts m.parse("すもももももももものうち")
|
35
37
|
すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
36
38
|
も 助詞,係助詞,*,*,*,*,も,モ,モ
|
@@ -53,7 +55,11 @@ e.g., for Cygwin
|
|
53
55
|
|
54
56
|
## Changelog
|
55
57
|
|
56
|
-
-
|
58
|
+
- __2011/01/07__: 0.0.8 release.
|
59
|
+
- Adding support for accessing dictionaries
|
60
|
+
- Further tweaking of documentation with markdown
|
61
|
+
|
62
|
+
- __2010/12/30__: 0.0.7 release.
|
57
63
|
- Adding support for all-morphs and partial options
|
58
64
|
- Further updating of documentation with markdown
|
59
65
|
|
data/lib/natto/binding.rb
CHANGED
@@ -1,44 +1,5 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
# Japanese language.
|
4
|
-
#
|
5
|
-
# ## Requirements
|
6
|
-
# natto requires the following:
|
7
|
-
#
|
8
|
-
# - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
|
9
|
-
# - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
|
10
|
-
# - Ruby _1.8.7 or greater_
|
11
|
-
#
|
12
|
-
# ## Installation
|
13
|
-
# Install natto with the following gem command:
|
14
|
-
# gem install natto
|
15
|
-
#
|
16
|
-
# ## Configuration
|
17
|
-
# - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
|
18
|
-
# - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
|
19
|
-
#
|
20
|
-
# e.g., for bash on UNIX/Linux
|
21
|
-
# export MECAB_PATH=mecab.so
|
22
|
-
# e.g., on Windows
|
23
|
-
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
24
|
-
# e.g., for Cygwin
|
25
|
-
# export MECAB_PATH=cygmecab-1
|
26
|
-
#
|
27
|
-
# ## Usage
|
28
|
-
# require 'natto'
|
29
|
-
#
|
30
|
-
# m = Natto::MeCab.new
|
31
|
-
# puts m.parse("すもももももももものうち")
|
32
|
-
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
33
|
-
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
34
|
-
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
35
|
-
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
36
|
-
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
37
|
-
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
38
|
-
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
39
|
-
# EOS
|
40
|
-
# => nil
|
41
|
-
#
|
1
|
+
# coding: utf-8
|
2
|
+
|
42
3
|
module Natto
|
43
4
|
|
44
5
|
# Module <tt>Binding</tt> encapsulates methods and behavior
|
data/lib/natto/version.rb
CHANGED
@@ -1,5 +1,50 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# natto combines the Ruby programming language with MeCab,
|
4
|
+
# the part-of-speech and morphological analyzer for the
|
5
|
+
# Japanese language.
|
6
|
+
#
|
7
|
+
# ## Requirements
|
8
|
+
# natto requires the following:
|
9
|
+
#
|
10
|
+
# - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
|
11
|
+
# - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
|
12
|
+
# - Ruby _1.8.7 or greater_
|
13
|
+
#
|
14
|
+
# ## Installation
|
15
|
+
# Install natto with the following gem command:
|
16
|
+
# gem install natto
|
17
|
+
#
|
18
|
+
# ## Configuration
|
19
|
+
# - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
|
20
|
+
# - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
|
21
|
+
#
|
22
|
+
# e.g., for bash on UNIX/Linux
|
23
|
+
# export MECAB_PATH=mecab.so
|
24
|
+
# e.g., on Windows
|
25
|
+
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
26
|
+
# e.g., for Cygwin
|
27
|
+
# export MECAB_PATH=cygmecab-1
|
28
|
+
#
|
29
|
+
# ## Usage
|
30
|
+
#
|
31
|
+
# require 'natto'
|
32
|
+
#
|
33
|
+
# m = Natto::MeCab.new
|
34
|
+
# => #<Natto::MeCab:0x28d93dd4 @options={}, \
|
35
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
36
|
+
# @ptr=#<FFI::Pointer address=0x28af3e58>>
|
37
|
+
# puts m.parse("すもももももももものうち")
|
38
|
+
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
39
|
+
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
40
|
+
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
41
|
+
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
42
|
+
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
43
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
44
|
+
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
45
|
+
# EOS
|
46
|
+
# => nil
|
2
47
|
module Natto
|
3
48
|
# Version string for this Rubygem.
|
4
|
-
VERSION = "0.0.
|
49
|
+
VERSION = "0.0.8"
|
5
50
|
end
|
data/lib/natto.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
2
3
|
require 'natto/binding'
|
3
4
|
|
@@ -9,9 +10,13 @@ module Natto
|
|
9
10
|
# initialization.
|
10
11
|
#
|
11
12
|
# ## Usage
|
13
|
+
# _Here is how to use natto under Ruby 1.9:_
|
12
14
|
# require 'natto'
|
13
15
|
#
|
14
16
|
# m = Natto::MeCab.new
|
17
|
+
# => #<Natto::MeCab:0x28d93dd4 @options={}, \
|
18
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
19
|
+
# @ptr=#<FFI::Pointer address=0x28af3e58>>
|
15
20
|
# puts m.parse("すもももももももものうち")
|
16
21
|
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
17
22
|
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
@@ -23,10 +28,9 @@ module Natto
|
|
23
28
|
# EOS
|
24
29
|
# => nil
|
25
30
|
#
|
26
|
-
# @see SUPPORTED_OPTS
|
27
31
|
class MeCab
|
28
32
|
|
29
|
-
attr_reader :options
|
33
|
+
attr_reader :options, :dicts
|
30
34
|
|
31
35
|
# Supported options to the <tt>mecab</tt> parser.
|
32
36
|
# See the <tt>mecab</tt> help for more details.
|
@@ -34,10 +38,6 @@ module Natto
|
|
34
38
|
:output_format_type, :partial, :node_format, :unk_format,
|
35
39
|
:bos_format, :eos_format, :eon_format, :unk_feature,
|
36
40
|
:nbest, :theta, :cost_factor ].freeze
|
37
|
-
# :allocate_sentence ]
|
38
|
-
|
39
|
-
#OPTION_DEFAULTS = { :lattice_level=>0, :all_morphs=>false, :nbest=>1,
|
40
|
-
# :theta=>0.75, :cost_factor=>700 }.freeze
|
41
41
|
|
42
42
|
# Initializes the wrapped <tt>mecab</tt> instance with the
|
43
43
|
# given <tt>options</tt> hash.
|
@@ -65,6 +65,9 @@ module Natto
|
|
65
65
|
#
|
66
66
|
# e.g.
|
67
67
|
# m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
68
|
+
# => #<Natto::MeCab:0x28d8886c @options={:node_format=>"%m\\t%f[7]\\n"}, \
|
69
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d8863c>], \
|
70
|
+
# @ptr=#<FFI::Pointer address=0x28e3b268>>
|
68
71
|
# puts m.parse("日本語は難しいです。")
|
69
72
|
# 日本語 ニホンゴ
|
70
73
|
# は ハ
|
@@ -79,10 +82,17 @@ module Natto
|
|
79
82
|
# @see MeCab::SUPPORTED_OPTS
|
80
83
|
def initialize(options={})
|
81
84
|
@options = options
|
85
|
+
@dicts = []
|
86
|
+
|
82
87
|
opt_str = self.class.build_options_str(@options)
|
83
88
|
@ptr = Natto::Binding.mecab_new2(opt_str)
|
84
|
-
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address ==
|
85
|
-
|
89
|
+
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
90
|
+
|
91
|
+
@dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
92
|
+
while @dicts.last[:next].address != 0x0
|
93
|
+
@dicts << Natto::DictionaryInfo.new(@dicts.last[:next])
|
94
|
+
end
|
95
|
+
|
86
96
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
87
97
|
end
|
88
98
|
|
@@ -123,12 +133,6 @@ module Natto
|
|
123
133
|
else
|
124
134
|
opt << "--#{key}=#{options[k]}"
|
125
135
|
end
|
126
|
-
|
127
|
-
#if key.end_with? '_format_' or key.end_with? '_feature'
|
128
|
-
# opt << "--#{key}="+options[k]
|
129
|
-
#else
|
130
|
-
# opt << "--#{key}=#{options[k]}"
|
131
|
-
#end
|
132
136
|
end
|
133
137
|
end
|
134
138
|
opt.join(" ")
|
@@ -155,11 +159,11 @@ module Natto
|
|
155
159
|
# - :next
|
156
160
|
#
|
157
161
|
# # Usage:
|
158
|
-
#
|
159
|
-
#
|
160
|
-
# puts
|
162
|
+
# m = Natto::MeCab.new
|
163
|
+
# sysdic = m.dicts.first
|
164
|
+
# puts sysdic[:filename]
|
161
165
|
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
162
|
-
# puts
|
166
|
+
# puts sysdic[:charset]
|
163
167
|
# => utf8
|
164
168
|
class DictionaryInfo < FFI::Struct
|
165
169
|
layout :filename, :string,
|
data/test/test_natto.rb
CHANGED
@@ -119,4 +119,14 @@ class TestNatto < Test::Unit::TestCase
|
|
119
119
|
Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
|
120
120
|
end
|
121
121
|
end
|
122
|
+
|
123
|
+
def test_dictionary_accessor
|
124
|
+
m = Natto::MeCab.new
|
125
|
+
dicts = m.dicts
|
126
|
+
assert dicts.empty? == false
|
127
|
+
sysdic = dicts.first
|
128
|
+
assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', sysdic[:filename])
|
129
|
+
assert_equal('utf8', sysdic[:charset])
|
130
|
+
assert_equal(0x0, sysdic[:next].address)
|
131
|
+
end
|
122
132
|
end
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 17
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
7
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
8
|
+
- 8
|
9
|
+
version: 0.0.8
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Brooke M. Fujita
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-01-07 00:00:00 +09:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,7 +25,6 @@ dependencies:
|
|
26
25
|
requirements:
|
27
26
|
- - ">="
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 1
|
30
28
|
segments:
|
31
29
|
- 0
|
32
30
|
- 6
|
@@ -55,8 +53,12 @@ homepage: http://code.google.com/p/natto/
|
|
55
53
|
licenses:
|
56
54
|
- BSD
|
57
55
|
post_install_message:
|
58
|
-
rdoc_options:
|
59
|
-
|
56
|
+
rdoc_options:
|
57
|
+
- --title
|
58
|
+
- "natto #{Natto::VERSION} -- Ruby-Mecab binding"
|
59
|
+
- --main
|
60
|
+
- README
|
61
|
+
- -c UTF-8
|
60
62
|
require_paths:
|
61
63
|
- lib
|
62
64
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -64,7 +66,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
66
|
requirements:
|
65
67
|
- - ">="
|
66
68
|
- !ruby/object:Gem::Version
|
67
|
-
hash: 57
|
68
69
|
segments:
|
69
70
|
- 1
|
70
71
|
- 8
|
@@ -75,7 +76,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
requirements:
|
76
77
|
- - ">="
|
77
78
|
- !ruby/object:Gem::Version
|
78
|
-
hash: 3
|
79
79
|
segments:
|
80
80
|
- 0
|
81
81
|
version: "0"
|