natto 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/natto.rb +15 -4
- data/test/test_natto.rb +42 -6
- metadata +8 -6
data/lib/natto.rb
CHANGED
@@ -9,6 +9,7 @@ require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
|
9
9
|
# natto requires the following:
|
10
10
|
# * {http://sourceforge.net/projects/mecab/files/mecab/ MeCab 0.98}
|
11
11
|
# * {http://rubygems.org/gems/ffi ffi 0.63 or greater}
|
12
|
+
# * Ruby 1.8.7 or greater
|
12
13
|
#
|
13
14
|
# === Installation
|
14
15
|
# Install natto with the following gem command:
|
@@ -58,7 +59,7 @@ module Natto
|
|
58
59
|
# * :dicdir -- system dicdir
|
59
60
|
# * :userdic -- user dictionary
|
60
61
|
# * :lattice_level -- lattice information level (integer, default 0)
|
61
|
-
# * :output_format_type -- output format type (wakati, chasen, yomi,
|
62
|
+
# * :output_format_type -- output format type (wakati, chasen, yomi, etc.)
|
62
63
|
# * :node_format -- user-defined node format
|
63
64
|
# * :unk_format -- user-defined unknown node format
|
64
65
|
# * :bos_format -- user-defined beginning-of-sentence format
|
@@ -68,13 +69,18 @@ module Natto
|
|
68
69
|
# * :nbest -- output N best results (integer, default 1)
|
69
70
|
# * :theta -- temperature parameter theta (float, default 0.75)
|
70
71
|
# * :cost_factor -- cost factor (integer, default 700)
|
72
|
+
# <br/>
|
73
|
+
# Use single-quotes to preserve format options that contain escape chars.
|
74
|
+
# <br/>
|
75
|
+
# e.g.
|
76
|
+
# m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
71
77
|
#
|
72
78
|
# @param [Hash]
|
73
79
|
# @see {SUPPORTED_OPTS}
|
74
80
|
def initialize(options={})
|
75
81
|
opt_str = self.class.build_options_str(options)
|
76
82
|
@ptr = Natto::Binding.mecab_new2(opt_str)
|
77
|
-
raise MeCabError.new("
|
83
|
+
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0
|
78
84
|
#@dict = Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
79
85
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
80
86
|
end
|
@@ -87,6 +93,7 @@ module Natto
|
|
87
93
|
raise(MeCabError.new(Natto::Binding.mecab_strerror(@ptr)))
|
88
94
|
end
|
89
95
|
|
96
|
+
|
90
97
|
# Returns a <tt>Proc</tt> that is registered to be invoked
|
91
98
|
# after the object owning <tt>ptr</tt> has been destroyed.
|
92
99
|
#
|
@@ -106,7 +113,11 @@ module Natto
|
|
106
113
|
SUPPORTED_OPTS.each do |k|
|
107
114
|
if options.has_key? k
|
108
115
|
key = k.to_s.gsub('_', '-')
|
109
|
-
|
116
|
+
if key.end_with? '_format_' or key.end_with? '_feature'
|
117
|
+
opt << "--#{key}="+options[k]
|
118
|
+
else
|
119
|
+
opt << "--#{key}=#{options[k]}"
|
120
|
+
end
|
110
121
|
end
|
111
122
|
end
|
112
123
|
opt.join(" ")
|
@@ -131,7 +142,7 @@ module Natto
|
|
131
142
|
# * :version
|
132
143
|
# * :next
|
133
144
|
# <br>
|
134
|
-
#
|
145
|
+
# Usage:
|
135
146
|
# dict = Natto::DictionaryInfo.new(mecab_ptr)
|
136
147
|
# puts dict[:filename]
|
137
148
|
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
data/test/test_natto.rb
CHANGED
@@ -25,17 +25,52 @@ class TestNatto < Test::Unit::TestCase
|
|
25
25
|
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
26
26
|
assert_equal('', res)
|
27
27
|
|
28
|
-
res = Natto::MeCab.build_options_str(:
|
29
|
-
assert_equal('--
|
28
|
+
res = Natto::MeCab.build_options_str(:rcfile=>"/some/file")
|
29
|
+
assert_equal('--rcfile=/some/file', res)
|
30
30
|
|
31
|
-
res = Natto::MeCab.build_options_str(:
|
32
|
-
assert_equal('--
|
31
|
+
res = Natto::MeCab.build_options_str(:dicdir=>"/some/other/file")
|
32
|
+
assert_equal('--dicdir=/some/other/file', res)
|
33
|
+
|
34
|
+
res = Natto::MeCab.build_options_str(:userdic=>"/yet/another/file")
|
35
|
+
assert_equal('--userdic=/yet/another/file', res)
|
33
36
|
|
34
37
|
res = Natto::MeCab.build_options_str(:output_format_type=>"c")
|
35
38
|
assert_equal('--output-format-type=c', res)
|
36
39
|
|
37
|
-
res = Natto::MeCab.build_options_str(:output_format_type=>"c",
|
38
|
-
|
40
|
+
res = Natto::MeCab.build_options_str(:output_format_type=>"c",
|
41
|
+
:userdic=>"/some/file",
|
42
|
+
:dicdir=>"/some/other/file")
|
43
|
+
assert_equal('--dicdir=/some/other/file --userdic=/some/file --output-format-type=c', res)
|
44
|
+
|
45
|
+
res = Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n')
|
46
|
+
assert_equal('--node-format=%m\t%f[7]\n', res)
|
47
|
+
|
48
|
+
res = Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n')
|
49
|
+
assert_equal('--unk-format=%m\t%f[7]\n', res)
|
50
|
+
|
51
|
+
res = Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n')
|
52
|
+
assert_equal('--bos-format=%m\t%f[7]\n', res)
|
53
|
+
|
54
|
+
res = Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n')
|
55
|
+
assert_equal('--eos-format=%m\t%f[7]\n', res)
|
56
|
+
|
57
|
+
res = Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n')
|
58
|
+
assert_equal('--eon-format=%m\t%f[7]\n', res)
|
59
|
+
|
60
|
+
res = Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n')
|
61
|
+
assert_equal('--unk-feature=%m\t%f[7]\n', res)
|
62
|
+
|
63
|
+
res = Natto::MeCab.build_options_str(:lattice_level=>42)
|
64
|
+
assert_equal('--lattice-level=42', res)
|
65
|
+
|
66
|
+
res = Natto::MeCab.build_options_str(:nbest=>42)
|
67
|
+
assert_equal('--nbest=42', res)
|
68
|
+
|
69
|
+
res = Natto::MeCab.build_options_str(:theta=>0.42)
|
70
|
+
assert_equal('--theta=0.42', res)
|
71
|
+
|
72
|
+
res = Natto::MeCab.build_options_str(:cost_factor=>42)
|
73
|
+
assert_equal('--cost-factor=42', res)
|
39
74
|
end
|
40
75
|
|
41
76
|
def test_initialize
|
@@ -43,4 +78,5 @@ class TestNatto < Test::Unit::TestCase
|
|
43
78
|
Natto::MeCab.new(:output_format_type=>'UNDEFINED')
|
44
79
|
end
|
45
80
|
end
|
81
|
+
|
46
82
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brooke M. Fujita
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-27 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -38,14 +38,15 @@ executables: []
|
|
38
38
|
|
39
39
|
extensions: []
|
40
40
|
|
41
|
-
extra_rdoc_files:
|
42
|
-
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README
|
43
44
|
files:
|
44
45
|
- lib/natto.rb
|
45
46
|
- test/test_natto.rb
|
46
47
|
- LICENSE
|
47
48
|
- README
|
48
|
-
has_rdoc:
|
49
|
+
has_rdoc: yard
|
49
50
|
homepage: http://code.google.com/p/natto/
|
50
51
|
licenses:
|
51
52
|
- BSD
|
@@ -74,6 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
74
75
|
version: "0"
|
75
76
|
requirements:
|
76
77
|
- MeCab, 0.98 or greater
|
78
|
+
- FFI, 0.6.3 or greater
|
77
79
|
rubyforge_project:
|
78
80
|
rubygems_version: 1.3.7
|
79
81
|
signing_key:
|