natto 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +8 -2
- data/lib/natto/binding.rb +2 -41
- data/lib/natto/version.rb +47 -2
- data/lib/natto.rb +22 -18
- data/test/test_natto.rb +10 -0
- metadata +9 -9
data/README.md
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
A Tasty Ruby Binding with MeCab
|
3
3
|
|
4
4
|
## What is natto?
|
5
|
-
|
6
5
|
natto combines the [Ruby programming language](http://www.ruby-lang.org/) with [MeCab](http://mecab.sourceforge.net/), the part-of-speech and morphological analyzer for the Japanese language.
|
7
6
|
|
8
7
|
## Requirements
|
@@ -31,6 +30,9 @@ e.g., for Cygwin
|
|
31
30
|
require 'natto'
|
32
31
|
|
33
32
|
m = Natto::MeCab.new
|
33
|
+
=> #<Natto::MeCab:0x28d93dd4 @options={}, \
|
34
|
+
@dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
35
|
+
@ptr=#<FFI::Pointer address=0x28af3e58>>
|
34
36
|
puts m.parse("すもももももももものうち")
|
35
37
|
すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
36
38
|
も 助詞,係助詞,*,*,*,*,も,モ,モ
|
@@ -53,7 +55,11 @@ e.g., for Cygwin
|
|
53
55
|
|
54
56
|
## Changelog
|
55
57
|
|
56
|
-
-
|
58
|
+
- __2011/01/07__: 0.0.8 release.
|
59
|
+
- Adding support for accessing dictionaries
|
60
|
+
- Further tweaking of documentation with markdown
|
61
|
+
|
62
|
+
- __2010/12/30__: 0.0.7 release.
|
57
63
|
- Adding support for all-morphs and partial options
|
58
64
|
- Further updating of documentation with markdown
|
59
65
|
|
data/lib/natto/binding.rb
CHANGED
@@ -1,44 +1,5 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
# Japanese language.
|
4
|
-
#
|
5
|
-
# ## Requirements
|
6
|
-
# natto requires the following:
|
7
|
-
#
|
8
|
-
# - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
|
9
|
-
# - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
|
10
|
-
# - Ruby _1.8.7 or greater_
|
11
|
-
#
|
12
|
-
# ## Installation
|
13
|
-
# Install natto with the following gem command:
|
14
|
-
# gem install natto
|
15
|
-
#
|
16
|
-
# ## Configuration
|
17
|
-
# - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
|
18
|
-
# - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
|
19
|
-
#
|
20
|
-
# e.g., for bash on UNIX/Linux
|
21
|
-
# export MECAB_PATH=mecab.so
|
22
|
-
# e.g., on Windows
|
23
|
-
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
24
|
-
# e.g., for Cygwin
|
25
|
-
# export MECAB_PATH=cygmecab-1
|
26
|
-
#
|
27
|
-
# ## Usage
|
28
|
-
# require 'natto'
|
29
|
-
#
|
30
|
-
# m = Natto::MeCab.new
|
31
|
-
# puts m.parse("すもももももももものうち")
|
32
|
-
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
33
|
-
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
34
|
-
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
35
|
-
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
36
|
-
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
37
|
-
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
38
|
-
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
39
|
-
# EOS
|
40
|
-
# => nil
|
41
|
-
#
|
1
|
+
# coding: utf-8
|
2
|
+
|
42
3
|
module Natto
|
43
4
|
|
44
5
|
# Module <tt>Binding</tt> encapsulates methods and behavior
|
data/lib/natto/version.rb
CHANGED
@@ -1,5 +1,50 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# natto combines the Ruby programming language with MeCab,
|
4
|
+
# the part-of-speech and morphological analyzer for the
|
5
|
+
# Japanese language.
|
6
|
+
#
|
7
|
+
# ## Requirements
|
8
|
+
# natto requires the following:
|
9
|
+
#
|
10
|
+
# - [MeCab _0.98_](http://sourceforge.net/projects/mecab/files/mecab/0.98/)
|
11
|
+
# - [ffi _0.6.3 or greater_](http://rubygems.org/gems/ffi)
|
12
|
+
# - Ruby _1.8.7 or greater_
|
13
|
+
#
|
14
|
+
# ## Installation
|
15
|
+
# Install natto with the following gem command:
|
16
|
+
# gem install natto
|
17
|
+
#
|
18
|
+
# ## Configuration
|
19
|
+
# - natto will try to locate the <tt>mecab</tt> library based upon its runtime environment.
|
20
|
+
# - In case of <tt>LoadError</tt>, please set the <tt>MECAB_PATH</tt> environment variable to the exact name/path to your <tt>mecab</tt> library.
|
21
|
+
#
|
22
|
+
# e.g., for bash on UNIX/Linux
|
23
|
+
# export MECAB_PATH=mecab.so
|
24
|
+
# e.g., on Windows
|
25
|
+
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
26
|
+
# e.g., for Cygwin
|
27
|
+
# export MECAB_PATH=cygmecab-1
|
28
|
+
#
|
29
|
+
# ## Usage
|
30
|
+
#
|
31
|
+
# require 'natto'
|
32
|
+
#
|
33
|
+
# m = Natto::MeCab.new
|
34
|
+
# => #<Natto::MeCab:0x28d93dd4 @options={}, \
|
35
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
36
|
+
# @ptr=#<FFI::Pointer address=0x28af3e58>>
|
37
|
+
# puts m.parse("すもももももももものうち")
|
38
|
+
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
39
|
+
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
40
|
+
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
41
|
+
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
42
|
+
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
43
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
44
|
+
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
45
|
+
# EOS
|
46
|
+
# => nil
|
2
47
|
module Natto
|
3
48
|
# Version string for this Rubygem.
|
4
|
-
VERSION = "0.0.
|
49
|
+
VERSION = "0.0.8"
|
5
50
|
end
|
data/lib/natto.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
2
3
|
require 'natto/binding'
|
3
4
|
|
@@ -9,9 +10,13 @@ module Natto
|
|
9
10
|
# initialization.
|
10
11
|
#
|
11
12
|
# ## Usage
|
13
|
+
# _Here is how to use natto under Ruby 1.9:_
|
12
14
|
# require 'natto'
|
13
15
|
#
|
14
16
|
# m = Natto::MeCab.new
|
17
|
+
# => #<Natto::MeCab:0x28d93dd4 @options={}, \
|
18
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d93d34>], \
|
19
|
+
# @ptr=#<FFI::Pointer address=0x28af3e58>>
|
15
20
|
# puts m.parse("すもももももももものうち")
|
16
21
|
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
17
22
|
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
@@ -23,10 +28,9 @@ module Natto
|
|
23
28
|
# EOS
|
24
29
|
# => nil
|
25
30
|
#
|
26
|
-
# @see SUPPORTED_OPTS
|
27
31
|
class MeCab
|
28
32
|
|
29
|
-
attr_reader :options
|
33
|
+
attr_reader :options, :dicts
|
30
34
|
|
31
35
|
# Supported options to the <tt>mecab</tt> parser.
|
32
36
|
# See the <tt>mecab</tt> help for more details.
|
@@ -34,10 +38,6 @@ module Natto
|
|
34
38
|
:output_format_type, :partial, :node_format, :unk_format,
|
35
39
|
:bos_format, :eos_format, :eon_format, :unk_feature,
|
36
40
|
:nbest, :theta, :cost_factor ].freeze
|
37
|
-
# :allocate_sentence ]
|
38
|
-
|
39
|
-
#OPTION_DEFAULTS = { :lattice_level=>0, :all_morphs=>false, :nbest=>1,
|
40
|
-
# :theta=>0.75, :cost_factor=>700 }.freeze
|
41
41
|
|
42
42
|
# Initializes the wrapped <tt>mecab</tt> instance with the
|
43
43
|
# given <tt>options</tt> hash.
|
@@ -65,6 +65,9 @@ module Natto
|
|
65
65
|
#
|
66
66
|
# e.g.
|
67
67
|
# m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
68
|
+
# => #<Natto::MeCab:0x28d8886c @options={:node_format=>"%m\\t%f[7]\\n"}, \
|
69
|
+
# @dicts=[#<Natto::DictionaryInfo:0x28d8863c>], \
|
70
|
+
# @ptr=#<FFI::Pointer address=0x28e3b268>>
|
68
71
|
# puts m.parse("日本語は難しいです。")
|
69
72
|
# 日本語 ニホンゴ
|
70
73
|
# は ハ
|
@@ -79,10 +82,17 @@ module Natto
|
|
79
82
|
# @see MeCab::SUPPORTED_OPTS
|
80
83
|
def initialize(options={})
|
81
84
|
@options = options
|
85
|
+
@dicts = []
|
86
|
+
|
82
87
|
opt_str = self.class.build_options_str(@options)
|
83
88
|
@ptr = Natto::Binding.mecab_new2(opt_str)
|
84
|
-
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address ==
|
85
|
-
|
89
|
+
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0x0
|
90
|
+
|
91
|
+
@dicts << Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
92
|
+
while @dicts.last[:next].address != 0x0
|
93
|
+
@dicts << Natto::DictionaryInfo.new(@dicts.last[:next])
|
94
|
+
end
|
95
|
+
|
86
96
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
87
97
|
end
|
88
98
|
|
@@ -123,12 +133,6 @@ module Natto
|
|
123
133
|
else
|
124
134
|
opt << "--#{key}=#{options[k]}"
|
125
135
|
end
|
126
|
-
|
127
|
-
#if key.end_with? '_format_' or key.end_with? '_feature'
|
128
|
-
# opt << "--#{key}="+options[k]
|
129
|
-
#else
|
130
|
-
# opt << "--#{key}=#{options[k]}"
|
131
|
-
#end
|
132
136
|
end
|
133
137
|
end
|
134
138
|
opt.join(" ")
|
@@ -155,11 +159,11 @@ module Natto
|
|
155
159
|
# - :next
|
156
160
|
#
|
157
161
|
# # Usage:
|
158
|
-
#
|
159
|
-
#
|
160
|
-
# puts
|
162
|
+
# m = Natto::MeCab.new
|
163
|
+
# sysdic = m.dicts.first
|
164
|
+
# puts sysdic[:filename]
|
161
165
|
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
162
|
-
# puts
|
166
|
+
# puts sysdic[:charset]
|
163
167
|
# => utf8
|
164
168
|
class DictionaryInfo < FFI::Struct
|
165
169
|
layout :filename, :string,
|
data/test/test_natto.rb
CHANGED
@@ -119,4 +119,14 @@ class TestNatto < Test::Unit::TestCase
|
|
119
119
|
Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
|
120
120
|
end
|
121
121
|
end
|
122
|
+
|
123
|
+
def test_dictionary_accessor
|
124
|
+
m = Natto::MeCab.new
|
125
|
+
dicts = m.dicts
|
126
|
+
assert dicts.empty? == false
|
127
|
+
sysdic = dicts.first
|
128
|
+
assert_equal('/usr/local/lib/mecab/dic/ipadic/sys.dic', sysdic[:filename])
|
129
|
+
assert_equal('utf8', sysdic[:charset])
|
130
|
+
assert_equal(0x0, sysdic[:next].address)
|
131
|
+
end
|
122
132
|
end
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 17
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
7
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
8
|
+
- 8
|
9
|
+
version: 0.0.8
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- Brooke M. Fujita
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-01-07 00:00:00 +09:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,7 +25,6 @@ dependencies:
|
|
26
25
|
requirements:
|
27
26
|
- - ">="
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 1
|
30
28
|
segments:
|
31
29
|
- 0
|
32
30
|
- 6
|
@@ -55,8 +53,12 @@ homepage: http://code.google.com/p/natto/
|
|
55
53
|
licenses:
|
56
54
|
- BSD
|
57
55
|
post_install_message:
|
58
|
-
rdoc_options:
|
59
|
-
|
56
|
+
rdoc_options:
|
57
|
+
- --title
|
58
|
+
- "natto #{Natto::VERSION} -- Ruby-Mecab binding"
|
59
|
+
- --main
|
60
|
+
- README
|
61
|
+
- -c UTF-8
|
60
62
|
require_paths:
|
61
63
|
- lib
|
62
64
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -64,7 +66,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
66
|
requirements:
|
65
67
|
- - ">="
|
66
68
|
- !ruby/object:Gem::Version
|
67
|
-
hash: 57
|
68
69
|
segments:
|
69
70
|
- 1
|
70
71
|
- 8
|
@@ -75,7 +76,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
requirements:
|
76
77
|
- - ">="
|
77
78
|
- !ruby/object:Gem::Version
|
78
|
-
hash: 3
|
79
79
|
segments:
|
80
80
|
- 0
|
81
81
|
version: "0"
|