natto 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +13 -2
- data/README.md +47 -22
- data/lib/natto.rb +242 -140
- data/lib/natto/binding.rb +40 -29
- data/lib/natto/utils.rb +16 -0
- data/lib/natto/version.rb +3 -3
- metadata +39 -60
- data/test/natto/tc_binding.rb +0 -41
- data/test/natto/tc_dictionaryinfo.rb +0 -65
- data/test/natto/tc_mecab.rb +0 -295
- data/test/natto/tc_mecabnode.rb +0 -101
- data/test/test_natto.rb +0 -12
data/lib/natto/binding.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
2
|
module Natto
|
4
3
|
|
5
4
|
# Module <tt>Binding</tt> encapsulates methods and behavior
|
@@ -31,18 +30,25 @@ module Natto
|
|
31
30
|
# @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
|
32
31
|
# <br/>
|
33
32
|
# e.g., for bash on UNIX/Linux
|
33
|
+
#
|
34
34
|
# export MECAB_PATH=/usr/local/lib/libmecab.so
|
35
|
+
#
|
35
36
|
# e.g., on Windows
|
37
|
+
#
|
36
38
|
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
39
|
+
#
|
37
40
|
# e.g., for Cygwin
|
41
|
+
#
|
38
42
|
# export MECAB_PATH=cygmecab-1
|
43
|
+
#
|
39
44
|
# e.g., from within a Ruby program
|
45
|
+
#
|
40
46
|
# ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
|
41
47
|
def self.find_library
|
42
48
|
host_os = RbConfig::CONFIG['host_os']
|
43
49
|
|
44
50
|
if host_os =~ /mswin|mingw/i
|
45
|
-
raise LoadError, "Please set #{MECAB_PATH} to full path to libmecab.dll"
|
51
|
+
raise LoadError, "Please set #{MECAB_PATH} to the full path to libmecab.dll"
|
46
52
|
elsif host_os =~ /cygwin/i
|
47
53
|
'cygmecab-1'
|
48
54
|
else
|
@@ -56,18 +62,15 @@ module Natto
|
|
56
62
|
attach_function :mecab_version, [], :string
|
57
63
|
attach_function :mecab_strerror, [:pointer],:string
|
58
64
|
attach_function :mecab_destroy, [:pointer], :void
|
59
|
-
|
60
65
|
attach_function :mecab_set_theta, [:pointer, :float], :void
|
61
66
|
attach_function :mecab_set_lattice_level, [:pointer, :int], :void
|
62
67
|
attach_function :mecab_set_all_morphs, [:pointer, :int], :void
|
63
|
-
|
64
68
|
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
|
65
69
|
attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
|
66
|
-
|
67
70
|
attach_function :mecab_nbest_init, [:pointer, :string], :int
|
68
71
|
attach_function :mecab_nbest_sparse_tostr, [:pointer, :int, :string], :string
|
69
72
|
attach_function :mecab_nbest_next_tonode, [:pointer], :pointer
|
70
|
-
|
73
|
+
attach_function :mecab_format_node, [:pointer, :pointer], :string
|
71
74
|
attach_function :mecab_dictionary_info, [:pointer], :pointer
|
72
75
|
|
73
76
|
# @private
|
@@ -80,48 +83,56 @@ module Natto
|
|
80
83
|
Natto::Binding.mecab_version
|
81
84
|
end
|
82
85
|
|
83
|
-
def mecab_strerror(
|
84
|
-
Natto::Binding.mecab_strerror(
|
86
|
+
def mecab_strerror(m_ptr)
|
87
|
+
Natto::Binding.mecab_strerror(m_ptr)
|
85
88
|
end
|
86
89
|
|
87
|
-
def mecab_destroy(
|
88
|
-
Natto::Binding.mecab_destroy(
|
90
|
+
def mecab_destroy(m_ptr)
|
91
|
+
Natto::Binding.mecab_destroy(m_ptr)
|
89
92
|
end
|
90
93
|
|
91
|
-
def mecab_set_theta(
|
92
|
-
Natto::Binding.mecab_set_theta(
|
94
|
+
def mecab_set_theta(m_ptr, t)
|
95
|
+
Natto::Binding.mecab_set_theta(m_ptr, t)
|
93
96
|
end
|
94
97
|
|
95
|
-
def mecab_set_lattice_level(
|
96
|
-
Natto::Binding.mecab_set_lattice_level(
|
98
|
+
def mecab_set_lattice_level(m_ptr, ll)
|
99
|
+
Natto::Binding.mecab_set_lattice_level(m_ptr, ll)
|
97
100
|
end
|
98
|
-
|
99
|
-
def mecab_set_all_morphs(
|
100
|
-
Natto::Binding.mecab_set_all_morphs(
|
101
|
+
|
102
|
+
def mecab_set_all_morphs(m_ptr, am)
|
103
|
+
Natto::Binding.mecab_set_all_morphs(m_ptr, am)
|
101
104
|
end
|
102
105
|
|
103
|
-
def mecab_sparse_tostr(
|
104
|
-
Natto::Binding.mecab_sparse_tostr(
|
106
|
+
def mecab_sparse_tostr(m_ptr, str)
|
107
|
+
Natto::Binding.mecab_sparse_tostr(m_ptr, str)
|
105
108
|
end
|
106
109
|
|
107
|
-
def mecab_sparse_tonode(
|
108
|
-
Natto::Binding.mecab_sparse_tonode(
|
110
|
+
def mecab_sparse_tonode(m_ptr, str)
|
111
|
+
Natto::Binding.mecab_sparse_tonode(m_ptr, str)
|
112
|
+
end
|
113
|
+
|
114
|
+
def mecab_nbest_next_tonode(m_ptr)
|
115
|
+
Natto::Binding.mecab_nbest_next_tonode(m_ptr)
|
109
116
|
end
|
110
117
|
|
111
|
-
def
|
112
|
-
Natto::Binding.
|
118
|
+
def mecab_nbest_init(m_ptr, str)
|
119
|
+
Natto::Binding.mecab_nbest_init(m_ptr, str)
|
113
120
|
end
|
114
121
|
|
115
|
-
def
|
116
|
-
Natto::Binding.
|
122
|
+
def mecab_nbest_sparse_tostr(m_ptr, n, str)
|
123
|
+
Natto::Binding.mecab_nbest_sparse_tostr(m_ptr, n, str)
|
117
124
|
end
|
118
125
|
|
119
|
-
def
|
120
|
-
Natto::Binding.
|
126
|
+
def mecab_nbest_next_tonode(m_ptr)
|
127
|
+
Natto::Binding.mecab_nbest_next_tonode(m_ptr)
|
121
128
|
end
|
122
129
|
|
123
|
-
def
|
124
|
-
Natto::Binding.
|
130
|
+
def mecab_format_node(m_ptr, n_ptr)
|
131
|
+
Natto::Binding.mecab_format_node(m_ptr, n_ptr)
|
132
|
+
end
|
133
|
+
|
134
|
+
def mecab_dictionary_info(m_ptr)
|
135
|
+
Natto::Binding.mecab_dictionary_info(m_ptr)
|
125
136
|
end
|
126
137
|
end
|
127
138
|
end
|
data/lib/natto/utils.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Natto
|
3
|
+
module Utils
|
4
|
+
# @private
|
5
|
+
def self.included(base)
|
6
|
+
base.extend(ClassMethods)
|
7
|
+
end
|
8
|
+
|
9
|
+
module ClassMethods
|
10
|
+
def force_enc(str)
|
11
|
+
str.force_encoding(Encoding.default_external) if str.respond_to?(:encoding) && str.encoding!=Encoding.default_external
|
12
|
+
str
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/natto/version.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
|
3
3
|
# <tt>Natto</tt> is the namespace for objects that provide
|
4
|
-
# a binding to the <tt>mecab</tt>
|
4
|
+
# a binding to the <tt>mecab</tt> tagger and related resources.
|
5
5
|
#
|
6
6
|
# <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
|
7
|
-
#
|
7
|
+
# tagger.
|
8
8
|
#
|
9
9
|
# <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
|
10
10
|
# struct.
|
@@ -23,5 +23,5 @@
|
|
23
23
|
# which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
|
24
24
|
module Natto
|
25
25
|
# Version string for this Rubygem.
|
26
|
-
VERSION = "0.9.
|
26
|
+
VERSION = "0.9.4"
|
27
27
|
end
|
metadata
CHANGED
@@ -1,95 +1,74 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.4
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 9
|
9
|
-
- 3
|
10
|
-
version: 0.9.3
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Brooke M. Fujita
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-02-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: ffi
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &341055920 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 1
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
- 6
|
32
|
-
- 3
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
33
21
|
version: 0.6.3
|
34
22
|
type: :runtime
|
35
|
-
|
36
|
-
|
37
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *341055920
|
25
|
+
description: ! 'natto is a gem bridging Ruby and MeCab using FFI (foreign function
|
26
|
+
interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
|
27
|
+
and JRuby (jvm) equally well, on any OS.
|
28
|
+
|
29
|
+
ruby で mecab を自由に利用できる natto をぜひ試してください。
|
30
|
+
|
31
|
+
'
|
38
32
|
email: buruzaemon@gmail.com
|
39
33
|
executables: []
|
40
|
-
|
41
34
|
extensions: []
|
42
|
-
|
43
35
|
extra_rdoc_files: []
|
44
|
-
|
45
|
-
files:
|
36
|
+
files:
|
46
37
|
- lib/natto.rb
|
47
38
|
- lib/natto/binding.rb
|
48
39
|
- lib/natto/version.rb
|
49
|
-
-
|
50
|
-
- test/natto/tc_binding.rb
|
51
|
-
- test/natto/tc_dictionaryinfo.rb
|
52
|
-
- test/natto/tc_mecab.rb
|
53
|
-
- test/natto/tc_mecabnode.rb
|
40
|
+
- lib/natto/utils.rb
|
54
41
|
- README.md
|
55
42
|
- LICENSE
|
56
43
|
- CHANGELOG
|
57
44
|
- .yardopts
|
58
45
|
homepage: https://bitbucket.org/buruzaemon/natto/overview
|
59
|
-
licenses:
|
46
|
+
licenses:
|
60
47
|
- BSD
|
61
48
|
post_install_message:
|
62
49
|
rdoc_options: []
|
63
|
-
|
64
|
-
require_paths:
|
50
|
+
require_paths:
|
65
51
|
- lib
|
66
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
53
|
none: false
|
68
|
-
requirements:
|
69
|
-
- -
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
hash: 57
|
72
|
-
segments:
|
73
|
-
- 1
|
74
|
-
- 8
|
75
|
-
- 7
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
76
57
|
version: 1.8.7
|
77
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
59
|
none: false
|
79
|
-
requirements:
|
80
|
-
- -
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
|
83
|
-
|
84
|
-
- 0
|
85
|
-
version: "0"
|
86
|
-
requirements:
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
requirements:
|
87
65
|
- MeCab, 0.993 or greater
|
88
66
|
- FFI, 0.6.3 or greater
|
89
67
|
rubyforge_project:
|
90
|
-
rubygems_version: 1.8.
|
68
|
+
rubygems_version: 1.8.17
|
91
69
|
signing_key:
|
92
70
|
specification_version: 3
|
93
|
-
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
94
|
-
|
95
|
-
|
71
|
+
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
72
|
+
and morphological analyzer for the Japanese language.
|
73
|
+
test_files: []
|
74
|
+
has_rdoc:
|
data/test/natto/tc_binding.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
# TestNatto encapsulates tests for the basic
|
4
|
-
# behavior of the Natto::Binding module.
|
5
|
-
class TestNattoBinding < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
@klass = Class.new do
|
8
|
-
include Natto::Binding
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
def teardown
|
13
|
-
@klass = nil
|
14
|
-
end
|
15
|
-
|
16
|
-
# Tests the mecab_version function.
|
17
|
-
def test_mecab_version
|
18
|
-
mv = `mecab -v`.split.last
|
19
|
-
assert_equal(mv, @klass.mecab_version)
|
20
|
-
end
|
21
|
-
|
22
|
-
# Tests for the inclusion of mecab methods made available
|
23
|
-
# to any classes including the Natto::Binding module.
|
24
|
-
def test_functions_included
|
25
|
-
[ :mecab_new2,
|
26
|
-
:mecab_version,
|
27
|
-
:mecab_strerror,
|
28
|
-
:mecab_destroy,
|
29
|
-
:mecab_set_theta,
|
30
|
-
:mecab_set_lattice_level,
|
31
|
-
:mecab_set_all_morphs,
|
32
|
-
:mecab_sparse_tostr,
|
33
|
-
:mecab_nbest_sparse_tostr,
|
34
|
-
:mecab_nbest_init,
|
35
|
-
:mecab_nbest_sparse_tostr,
|
36
|
-
:mecab_nbest_next_tonode,
|
37
|
-
:mecab_dictionary_info ].each do |f|
|
38
|
-
assert(@klass.respond_to? f)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,65 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
# TestDictionaryInfo encapsulates tests for the basic
|
4
|
-
# behavior of Natto::DictionaryInfo
|
5
|
-
class TestDictionaryInfo < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
m = Natto::MeCab.new
|
8
|
-
@dicts = m.dicts
|
9
|
-
|
10
|
-
out = `mecab -D`.lines.to_a
|
11
|
-
out.each do |l|
|
12
|
-
tokens = l.split("\t")
|
13
|
-
@sysdic_filename = tokens[1].strip if tokens[0] =~ /filename:/i
|
14
|
-
@sysdic_charset = tokens[1].strip if tokens[0] =~ /charset:/i
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def teardown
|
19
|
-
@dicts = nil
|
20
|
-
end
|
21
|
-
|
22
|
-
# Tests the dictionaries accessor method of Natto::MeCab.
|
23
|
-
# Assumes that:
|
24
|
-
# a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
|
25
|
-
# b) system dictionary encoding is utf-8
|
26
|
-
# c) only dealing w/ case of 1 dictionary being used
|
27
|
-
def test_dictionaries_accessor
|
28
|
-
assert @dicts.empty? == false
|
29
|
-
sysdic = @dicts.first
|
30
|
-
assert_equal(@sysdic_filename, sysdic[:filename])
|
31
|
-
assert_equal(@sysdic_charset, sysdic[:charset])
|
32
|
-
assert_equal(0x0, sysdic[:next].address)
|
33
|
-
end
|
34
|
-
|
35
|
-
# Tests the to_s method.
|
36
|
-
def test_to_s
|
37
|
-
#<Natto::DictionaryInfo:0x288879bc @filename=\"/usr/local/lib/mecab/dic/ipadic/sys.dic\", @charset=\"utf8\">
|
38
|
-
assert(@dicts.first.to_s.include?("filename=\"#{@sysdic_filename}\", charset=\"#{@sysdic_charset}\""))
|
39
|
-
end
|
40
|
-
|
41
|
-
# Tests the accessors of Natto::DictionaryInfo.
|
42
|
-
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
43
|
-
# in 1.8.n
|
44
|
-
def test_dictionary_info_member_accessors
|
45
|
-
sysdic = @dicts.first
|
46
|
-
members = [
|
47
|
-
:filename,
|
48
|
-
:charset,
|
49
|
-
:type,
|
50
|
-
:size,
|
51
|
-
:lsize,
|
52
|
-
:rsize,
|
53
|
-
:version,
|
54
|
-
:next
|
55
|
-
]
|
56
|
-
members.each do |nomme|
|
57
|
-
assert_not_nil(sysdic.send nomme )
|
58
|
-
end
|
59
|
-
|
60
|
-
# NoMethodError will be raised for anything else!
|
61
|
-
assert_raise NoMethodError do
|
62
|
-
sysdic.send :unknown_attr
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
data/test/natto/tc_mecab.rb
DELETED
@@ -1,295 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require 'rbconfig'
|
3
|
-
require 'nkf'
|
4
|
-
|
5
|
-
# TestMeCab encapsulates tests for the basic
|
6
|
-
# behavior of Natto::MeCab.
|
7
|
-
class TestMeCab < Test::Unit::TestCase
|
8
|
-
|
9
|
-
host_os = RbConfig::CONFIG['host_os']
|
10
|
-
# we need to transfrom from UTF-8 ot SJIS if we are on Windows!
|
11
|
-
if host_os =~ /mswin|mingw/i
|
12
|
-
TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
|
13
|
-
else
|
14
|
-
TEST_STR = '試験ですよ、これが。'
|
15
|
-
end
|
16
|
-
|
17
|
-
def setup
|
18
|
-
@m = Natto::MeCab.new
|
19
|
-
@ver = `mecab -v`.strip.split.last
|
20
|
-
end
|
21
|
-
|
22
|
-
def teardown
|
23
|
-
@m = nil
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_parse_mecab_options
|
27
|
-
[ '-r /some/file',
|
28
|
-
'-r/some/file',
|
29
|
-
'--rcfile=/some/file',
|
30
|
-
'--rcfile /some/file',
|
31
|
-
{:rcfile=>"/some/file"} ].each do |opts|
|
32
|
-
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(opts))
|
33
|
-
end
|
34
|
-
|
35
|
-
[ '-d /some/other/file',
|
36
|
-
'-d/some/other/file',
|
37
|
-
'--dicdir=/some/other/file',
|
38
|
-
'--dicdir /some/other/file',
|
39
|
-
{:dicdir=>"/some/other/file"} ].each do |opts|
|
40
|
-
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(opts))
|
41
|
-
end
|
42
|
-
|
43
|
-
[ '-u /yet/another/file',
|
44
|
-
'-u/yet/another/file',
|
45
|
-
'--userdic=/yet/another/file',
|
46
|
-
'--userdic /yet/another/file',
|
47
|
-
{:userdic=>"/yet/another/file"} ].each do |opts|
|
48
|
-
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(opts))
|
49
|
-
end
|
50
|
-
|
51
|
-
[ '-l 42',
|
52
|
-
'-l42',
|
53
|
-
'--lattice-level=42',
|
54
|
-
'--lattice-level 42',
|
55
|
-
{:lattice_level=>42}
|
56
|
-
].each do |opts|
|
57
|
-
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(opts))
|
58
|
-
end
|
59
|
-
|
60
|
-
[ '-a',
|
61
|
-
'--all-morphs',
|
62
|
-
{:all_morphs=>true} ].each do |opts|
|
63
|
-
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(opts))
|
64
|
-
end
|
65
|
-
|
66
|
-
[ '-O natto',
|
67
|
-
'-Onatto',
|
68
|
-
'--output-format-type=natto',
|
69
|
-
'--output-format-type natto',
|
70
|
-
{:output_format_type=>"natto"} ].each do |opts|
|
71
|
-
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(opts))
|
72
|
-
end
|
73
|
-
|
74
|
-
[ '-N 42',
|
75
|
-
'-N42',
|
76
|
-
'--nbest=42',
|
77
|
-
'--nbest 42',
|
78
|
-
{:nbest=>42}
|
79
|
-
].each do |opts|
|
80
|
-
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(opts))
|
81
|
-
end
|
82
|
-
[ '--nbest=-1', '--nbest=0', '--nbest=513' ].each do |bad|
|
83
|
-
assert_raise Natto::MeCabError do
|
84
|
-
Natto::MeCab.parse_mecab_options(bad)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
[ '-F %m\t%f[7]\n',
|
89
|
-
'-F%m\t%f[7]\n',
|
90
|
-
'--node-format=%m\t%f[7]\n',
|
91
|
-
'--node-format %m\t%f[7]\n',
|
92
|
-
{:node_format=>'%m\t%f[7]\n'} ].each do |opts|
|
93
|
-
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
94
|
-
end
|
95
|
-
|
96
|
-
[ '-U %m\t%f[7]\n',
|
97
|
-
'-U%m\t%f[7]\n',
|
98
|
-
'--unk-format=%m\t%f[7]\n',
|
99
|
-
'--unk-format %m\t%f[7]\n',
|
100
|
-
{:unk_format=>'%m\t%f[7]\n'} ].each do |opts|
|
101
|
-
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
102
|
-
end
|
103
|
-
|
104
|
-
[ '-B %m\t%f[7]\n',
|
105
|
-
'-B%m\t%f[7]\n',
|
106
|
-
'--bos-format=%m\t%f[7]\n',
|
107
|
-
'--bos-format %m\t%f[7]\n',
|
108
|
-
{:bos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
109
|
-
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
110
|
-
end
|
111
|
-
|
112
|
-
[ '-E %m\t%f[7]\n',
|
113
|
-
'-E%m\t%f[7]\n',
|
114
|
-
'--eos-format=%m\t%f[7]\n',
|
115
|
-
'--eos-format %m\t%f[7]\n',
|
116
|
-
{:eos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
117
|
-
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
118
|
-
end
|
119
|
-
|
120
|
-
[ '-S %m\t%f[7]\n',
|
121
|
-
'-S%m\t%f[7]\n',
|
122
|
-
'--eon-format=%m\t%f[7]\n',
|
123
|
-
'--eon-format %m\t%f[7]\n',
|
124
|
-
{:eon_format=>'%m\t%f[7]\n'} ].each do |opts|
|
125
|
-
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
126
|
-
end
|
127
|
-
|
128
|
-
[ '-x %m\t%f[7]\n',
|
129
|
-
'-x%m\t%f[7]\n',
|
130
|
-
'--unk-feature=%m\t%f[7]\n',
|
131
|
-
'--unk-feature %m\t%f[7]\n',
|
132
|
-
{:unk_feature=>'%m\t%f[7]\n'} ].each do |opts|
|
133
|
-
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
134
|
-
end
|
135
|
-
|
136
|
-
[ '-b 102400',
|
137
|
-
'-b102400',
|
138
|
-
'--input-buffer-size=102400',
|
139
|
-
'--input-buffer-size 102400',
|
140
|
-
{:input_buffer_size=>102400} ].each do |opts|
|
141
|
-
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(opts))
|
142
|
-
end
|
143
|
-
|
144
|
-
[ '-C',
|
145
|
-
'--allocate-sentence',
|
146
|
-
{:allocate_sentence=>true} ].each do |opts|
|
147
|
-
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(opts))
|
148
|
-
end
|
149
|
-
|
150
|
-
[ '-t 0.42',
|
151
|
-
'-t0.42',
|
152
|
-
'--theta=0.42',
|
153
|
-
'--theta 0.42',
|
154
|
-
{:theta=>0.42} ].each do |opts|
|
155
|
-
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(opts))
|
156
|
-
end
|
157
|
-
|
158
|
-
[ '-c 42',
|
159
|
-
'-c42',
|
160
|
-
'--cost-factor=42',
|
161
|
-
'--cost-factor 42',
|
162
|
-
{:cost_factor=>42} ].each do |opts|
|
163
|
-
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(opts))
|
164
|
-
end
|
165
|
-
|
166
|
-
assert_equal({}, Natto::MeCab.parse_mecab_options)
|
167
|
-
assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
|
168
|
-
end
|
169
|
-
|
170
|
-
def test_build_options_str
|
171
|
-
assert_equal('--rcfile=/some/file', Natto::MeCab.build_options_str(:rcfile=>"/some/file"))
|
172
|
-
assert_equal('--dicdir=/some/other/file', Natto::MeCab.build_options_str(:dicdir=>"/some/other/file"))
|
173
|
-
assert_equal('--userdic=/yet/another/file', Natto::MeCab.build_options_str(:userdic=>"/yet/another/file"))
|
174
|
-
assert_equal('--lattice-level=42', Natto::MeCab.build_options_str(:lattice_level=>42))
|
175
|
-
assert_equal('--all-morphs', Natto::MeCab.build_options_str(:all_morphs=>true))
|
176
|
-
assert_equal('--output-format-type=natto', Natto::MeCab.build_options_str(:output_format_type=>"natto"))
|
177
|
-
assert_equal('--node-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n'))
|
178
|
-
assert_equal('--unk-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n'))
|
179
|
-
assert_equal('--bos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n'))
|
180
|
-
assert_equal('--eos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n'))
|
181
|
-
assert_equal('--eon-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n'))
|
182
|
-
assert_equal('--unk-feature=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n'))
|
183
|
-
assert_equal('--input-buffer-size=102400',Natto::MeCab.build_options_str(:input_buffer_size=>102400))
|
184
|
-
assert_equal('--allocate-sentence', Natto::MeCab.build_options_str(:allocate_sentence=>true))
|
185
|
-
assert_equal('--nbest=42', Natto::MeCab.build_options_str(:nbest=>42))
|
186
|
-
assert_equal('--theta=0.42', Natto::MeCab.build_options_str(:theta=>0.42))
|
187
|
-
assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
|
188
|
-
end
|
189
|
-
|
190
|
-
def test_construction
|
191
|
-
m = nil
|
192
|
-
assert_nothing_raised do
|
193
|
-
m = Natto::MeCab.new
|
194
|
-
end
|
195
|
-
assert_equal({}, m.options)
|
196
|
-
|
197
|
-
opts = {:output_format_type=>'chasen'}
|
198
|
-
assert_nothing_raised do
|
199
|
-
m = Natto::MeCab.new(opts)
|
200
|
-
end
|
201
|
-
assert_equal(opts, m.options)
|
202
|
-
assert_nothing_raised do
|
203
|
-
m = Natto::MeCab.new("-O chasen")
|
204
|
-
end
|
205
|
-
assert_equal(opts, m.options)
|
206
|
-
assert_nothing_raised do
|
207
|
-
m = Natto::MeCab.new("--output-format-type=chasen")
|
208
|
-
end
|
209
|
-
assert_equal(opts, m.options)
|
210
|
-
|
211
|
-
opts = {:all_morphs=>true, :allocate_sentence=>true}
|
212
|
-
assert_nothing_raised do
|
213
|
-
m = Natto::MeCab.new(opts)
|
214
|
-
end
|
215
|
-
assert_equal(opts, m.options)
|
216
|
-
assert_nothing_raised do
|
217
|
-
m = Natto::MeCab.new('-a -C')
|
218
|
-
end
|
219
|
-
assert_equal(opts, m.options)
|
220
|
-
assert_nothing_raised do
|
221
|
-
m = Natto::MeCab.new('--all-morphs --allocate-sentence')
|
222
|
-
end
|
223
|
-
assert_equal(opts, m.options)
|
224
|
-
|
225
|
-
opts = {:lattice_level=>999}
|
226
|
-
assert_nothing_raised do
|
227
|
-
m = Natto::MeCab.new(opts)
|
228
|
-
end
|
229
|
-
assert_equal(opts, m.options)
|
230
|
-
assert_nothing_raised do
|
231
|
-
m = Natto::MeCab.new('-l 999')
|
232
|
-
end
|
233
|
-
assert_equal(opts, m.options)
|
234
|
-
assert_nothing_raised do
|
235
|
-
m = Natto::MeCab.new('--lattice-level=999')
|
236
|
-
end
|
237
|
-
assert_equal(opts, m.options)
|
238
|
-
end
|
239
|
-
|
240
|
-
def test_initialize_with_errors
|
241
|
-
assert_raise Natto::MeCabError do
|
242
|
-
Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
|
243
|
-
end
|
244
|
-
|
245
|
-
assert_raise Natto::MeCabError do
|
246
|
-
Natto::MeCab.new(:rcfile=>'/rcfile/does/not/exist')
|
247
|
-
end
|
248
|
-
|
249
|
-
assert_raise Natto::MeCabError do
|
250
|
-
Natto::MeCab.new(:dicdir=>'/dicdir/does/not/exist')
|
251
|
-
end
|
252
|
-
|
253
|
-
assert_raise Natto::MeCabError do
|
254
|
-
Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
def test_version_accessor
|
259
|
-
assert_equal(@ver, @m.version)
|
260
|
-
end
|
261
|
-
|
262
|
-
def test_all_morphs
|
263
|
-
m = Natto::MeCab.new(:all_morphs=>true)
|
264
|
-
expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
|
265
|
-
expected.delete_if {|e| e =~ /^(EOS|BOS)/ }
|
266
|
-
|
267
|
-
actual = m.parse(TEST_STR).lines.to_a
|
268
|
-
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
269
|
-
|
270
|
-
assert_equal(expected, actual)
|
271
|
-
end
|
272
|
-
|
273
|
-
def test_parse_tostr_default
|
274
|
-
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
275
|
-
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
276
|
-
|
277
|
-
actual = @m.parse(TEST_STR).lines.to_a
|
278
|
-
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
279
|
-
|
280
|
-
assert_equal(expected, actual)
|
281
|
-
end
|
282
|
-
|
283
|
-
def test_parse_tonode_default
|
284
|
-
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
285
|
-
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
286
|
-
|
287
|
-
actual = []
|
288
|
-
@m.parse(TEST_STR) do |node|
|
289
|
-
actual << "#{node.surface}\t#{node.feature}\n"
|
290
|
-
end
|
291
|
-
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
292
|
-
|
293
|
-
assert_equal(expected, actual)
|
294
|
-
end
|
295
|
-
end
|