natto 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/natto.rb +15 -4
- data/test/test_natto.rb +42 -6
- metadata +8 -6
data/lib/natto.rb
CHANGED
@@ -9,6 +9,7 @@ require 'rubygems' if RUBY_VERSION.to_f < 1.9
|
|
9
9
|
# natto requires the following:
|
10
10
|
# * {http://sourceforge.net/projects/mecab/files/mecab/ MeCab 0.98}
|
11
11
|
# * {http://rubygems.org/gems/ffi ffi 0.63 or greater}
|
12
|
+
# * Ruby 1.8.7 or greater
|
12
13
|
#
|
13
14
|
# === Installation
|
14
15
|
# Install natto with the following gem command:
|
@@ -58,7 +59,7 @@ module Natto
|
|
58
59
|
# * :dicdir -- system dicdir
|
59
60
|
# * :userdic -- user dictionary
|
60
61
|
# * :lattice_level -- lattice information level (integer, default 0)
|
61
|
-
# * :output_format_type -- output format type (wakati, chasen, yomi,
|
62
|
+
# * :output_format_type -- output format type (wakati, chasen, yomi, etc.)
|
62
63
|
# * :node_format -- user-defined node format
|
63
64
|
# * :unk_format -- user-defined unknown node format
|
64
65
|
# * :bos_format -- user-defined beginning-of-sentence format
|
@@ -68,13 +69,18 @@ module Natto
|
|
68
69
|
# * :nbest -- output N best results (integer, default 1)
|
69
70
|
# * :theta -- temperature parameter theta (float, default 0.75)
|
70
71
|
# * :cost_factor -- cost factor (integer, default 700)
|
72
|
+
# <br/>
|
73
|
+
# Use single-quotes to preserve format options that contain escape chars.
|
74
|
+
# <br/>
|
75
|
+
# e.g.
|
76
|
+
# m = Natto::MeCab.new(:node_format=>'%m\t%f[7]\n')
|
71
77
|
#
|
72
78
|
# @param [Hash]
|
73
79
|
# @see {SUPPORTED_OPTS}
|
74
80
|
def initialize(options={})
|
75
81
|
opt_str = self.class.build_options_str(options)
|
76
82
|
@ptr = Natto::Binding.mecab_new2(opt_str)
|
77
|
-
raise MeCabError.new("
|
83
|
+
raise MeCabError.new("Could not initialize MeCab with options: '#{opt_str}'") if @ptr.address == 0
|
78
84
|
#@dict = Natto::DictionaryInfo.new(Natto::Binding.mecab_dictionary_info(@ptr))
|
79
85
|
ObjectSpace.define_finalizer(self, self.class.create_free_proc(@ptr))
|
80
86
|
end
|
@@ -87,6 +93,7 @@ module Natto
|
|
87
93
|
raise(MeCabError.new(Natto::Binding.mecab_strerror(@ptr)))
|
88
94
|
end
|
89
95
|
|
96
|
+
|
90
97
|
# Returns a <tt>Proc</tt> that is registered to be invoked
|
91
98
|
# after the object owning <tt>ptr</tt> has been destroyed.
|
92
99
|
#
|
@@ -106,7 +113,11 @@ module Natto
|
|
106
113
|
SUPPORTED_OPTS.each do |k|
|
107
114
|
if options.has_key? k
|
108
115
|
key = k.to_s.gsub('_', '-')
|
109
|
-
|
116
|
+
if key.end_with? '_format_' or key.end_with? '_feature'
|
117
|
+
opt << "--#{key}="+options[k]
|
118
|
+
else
|
119
|
+
opt << "--#{key}=#{options[k]}"
|
120
|
+
end
|
110
121
|
end
|
111
122
|
end
|
112
123
|
opt.join(" ")
|
@@ -131,7 +142,7 @@ module Natto
|
|
131
142
|
# * :version
|
132
143
|
# * :next
|
133
144
|
# <br>
|
134
|
-
#
|
145
|
+
# Usage:
|
135
146
|
# dict = Natto::DictionaryInfo.new(mecab_ptr)
|
136
147
|
# puts dict[:filename]
|
137
148
|
# => /usr/local/lib/mecab/dic/ipadic/sys.dic
|
data/test/test_natto.rb
CHANGED
@@ -25,17 +25,52 @@ class TestNatto < Test::Unit::TestCase
|
|
25
25
|
res = Natto::MeCab.build_options_str(:unknown=>"ignore")
|
26
26
|
assert_equal('', res)
|
27
27
|
|
28
|
-
res = Natto::MeCab.build_options_str(:
|
29
|
-
assert_equal('--
|
28
|
+
res = Natto::MeCab.build_options_str(:rcfile=>"/some/file")
|
29
|
+
assert_equal('--rcfile=/some/file', res)
|
30
30
|
|
31
|
-
res = Natto::MeCab.build_options_str(:
|
32
|
-
assert_equal('--
|
31
|
+
res = Natto::MeCab.build_options_str(:dicdir=>"/some/other/file")
|
32
|
+
assert_equal('--dicdir=/some/other/file', res)
|
33
|
+
|
34
|
+
res = Natto::MeCab.build_options_str(:userdic=>"/yet/another/file")
|
35
|
+
assert_equal('--userdic=/yet/another/file', res)
|
33
36
|
|
34
37
|
res = Natto::MeCab.build_options_str(:output_format_type=>"c")
|
35
38
|
assert_equal('--output-format-type=c', res)
|
36
39
|
|
37
|
-
res = Natto::MeCab.build_options_str(:output_format_type=>"c",
|
38
|
-
|
40
|
+
res = Natto::MeCab.build_options_str(:output_format_type=>"c",
|
41
|
+
:userdic=>"/some/file",
|
42
|
+
:dicdir=>"/some/other/file")
|
43
|
+
assert_equal('--dicdir=/some/other/file --userdic=/some/file --output-format-type=c', res)
|
44
|
+
|
45
|
+
res = Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n')
|
46
|
+
assert_equal('--node-format=%m\t%f[7]\n', res)
|
47
|
+
|
48
|
+
res = Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n')
|
49
|
+
assert_equal('--unk-format=%m\t%f[7]\n', res)
|
50
|
+
|
51
|
+
res = Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n')
|
52
|
+
assert_equal('--bos-format=%m\t%f[7]\n', res)
|
53
|
+
|
54
|
+
res = Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n')
|
55
|
+
assert_equal('--eos-format=%m\t%f[7]\n', res)
|
56
|
+
|
57
|
+
res = Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n')
|
58
|
+
assert_equal('--eon-format=%m\t%f[7]\n', res)
|
59
|
+
|
60
|
+
res = Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n')
|
61
|
+
assert_equal('--unk-feature=%m\t%f[7]\n', res)
|
62
|
+
|
63
|
+
res = Natto::MeCab.build_options_str(:lattice_level=>42)
|
64
|
+
assert_equal('--lattice-level=42', res)
|
65
|
+
|
66
|
+
res = Natto::MeCab.build_options_str(:nbest=>42)
|
67
|
+
assert_equal('--nbest=42', res)
|
68
|
+
|
69
|
+
res = Natto::MeCab.build_options_str(:theta=>0.42)
|
70
|
+
assert_equal('--theta=0.42', res)
|
71
|
+
|
72
|
+
res = Natto::MeCab.build_options_str(:cost_factor=>42)
|
73
|
+
assert_equal('--cost-factor=42', res)
|
39
74
|
end
|
40
75
|
|
41
76
|
def test_initialize
|
@@ -43,4 +78,5 @@ class TestNatto < Test::Unit::TestCase
|
|
43
78
|
Natto::MeCab.new(:output_format_type=>'UNDEFINED')
|
44
79
|
end
|
45
80
|
end
|
81
|
+
|
46
82
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Brooke M. Fujita
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-27 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -38,14 +38,15 @@ executables: []
|
|
38
38
|
|
39
39
|
extensions: []
|
40
40
|
|
41
|
-
extra_rdoc_files:
|
42
|
-
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README
|
43
44
|
files:
|
44
45
|
- lib/natto.rb
|
45
46
|
- test/test_natto.rb
|
46
47
|
- LICENSE
|
47
48
|
- README
|
48
|
-
has_rdoc:
|
49
|
+
has_rdoc: yard
|
49
50
|
homepage: http://code.google.com/p/natto/
|
50
51
|
licenses:
|
51
52
|
- BSD
|
@@ -74,6 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
74
75
|
version: "0"
|
75
76
|
requirements:
|
76
77
|
- MeCab, 0.98 or greater
|
78
|
+
- FFI, 0.6.3 or greater
|
77
79
|
rubyforge_project:
|
78
80
|
rubygems_version: 1.3.7
|
79
81
|
signing_key:
|