natto 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +13 -2
- data/README.md +47 -22
- data/lib/natto.rb +242 -140
- data/lib/natto/binding.rb +40 -29
- data/lib/natto/utils.rb +16 -0
- data/lib/natto/version.rb +3 -3
- metadata +39 -60
- data/test/natto/tc_binding.rb +0 -41
- data/test/natto/tc_dictionaryinfo.rb +0 -65
- data/test/natto/tc_mecab.rb +0 -295
- data/test/natto/tc_mecabnode.rb +0 -101
- data/test/test_natto.rb +0 -12
data/lib/natto/binding.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
2
|
module Natto
|
4
3
|
|
5
4
|
# Module <tt>Binding</tt> encapsulates methods and behavior
|
@@ -31,18 +30,25 @@ module Natto
|
|
31
30
|
# @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
|
32
31
|
# <br/>
|
33
32
|
# e.g., for bash on UNIX/Linux
|
33
|
+
#
|
34
34
|
# export MECAB_PATH=/usr/local/lib/libmecab.so
|
35
|
+
#
|
35
36
|
# e.g., on Windows
|
37
|
+
#
|
36
38
|
# set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
|
39
|
+
#
|
37
40
|
# e.g., for Cygwin
|
41
|
+
#
|
38
42
|
# export MECAB_PATH=cygmecab-1
|
43
|
+
#
|
39
44
|
# e.g., from within a Ruby program
|
45
|
+
#
|
40
46
|
# ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
|
41
47
|
def self.find_library
|
42
48
|
host_os = RbConfig::CONFIG['host_os']
|
43
49
|
|
44
50
|
if host_os =~ /mswin|mingw/i
|
45
|
-
raise LoadError, "Please set #{MECAB_PATH} to full path to libmecab.dll"
|
51
|
+
raise LoadError, "Please set #{MECAB_PATH} to the full path to libmecab.dll"
|
46
52
|
elsif host_os =~ /cygwin/i
|
47
53
|
'cygmecab-1'
|
48
54
|
else
|
@@ -56,18 +62,15 @@ module Natto
|
|
56
62
|
attach_function :mecab_version, [], :string
|
57
63
|
attach_function :mecab_strerror, [:pointer],:string
|
58
64
|
attach_function :mecab_destroy, [:pointer], :void
|
59
|
-
|
60
65
|
attach_function :mecab_set_theta, [:pointer, :float], :void
|
61
66
|
attach_function :mecab_set_lattice_level, [:pointer, :int], :void
|
62
67
|
attach_function :mecab_set_all_morphs, [:pointer, :int], :void
|
63
|
-
|
64
68
|
attach_function :mecab_sparse_tostr, [:pointer, :string], :string
|
65
69
|
attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
|
66
|
-
|
67
70
|
attach_function :mecab_nbest_init, [:pointer, :string], :int
|
68
71
|
attach_function :mecab_nbest_sparse_tostr, [:pointer, :int, :string], :string
|
69
72
|
attach_function :mecab_nbest_next_tonode, [:pointer], :pointer
|
70
|
-
|
73
|
+
attach_function :mecab_format_node, [:pointer, :pointer], :string
|
71
74
|
attach_function :mecab_dictionary_info, [:pointer], :pointer
|
72
75
|
|
73
76
|
# @private
|
@@ -80,48 +83,56 @@ module Natto
|
|
80
83
|
Natto::Binding.mecab_version
|
81
84
|
end
|
82
85
|
|
83
|
-
def mecab_strerror(
|
84
|
-
Natto::Binding.mecab_strerror(
|
86
|
+
def mecab_strerror(m_ptr)
|
87
|
+
Natto::Binding.mecab_strerror(m_ptr)
|
85
88
|
end
|
86
89
|
|
87
|
-
def mecab_destroy(
|
88
|
-
Natto::Binding.mecab_destroy(
|
90
|
+
def mecab_destroy(m_ptr)
|
91
|
+
Natto::Binding.mecab_destroy(m_ptr)
|
89
92
|
end
|
90
93
|
|
91
|
-
def mecab_set_theta(
|
92
|
-
Natto::Binding.mecab_set_theta(
|
94
|
+
def mecab_set_theta(m_ptr, t)
|
95
|
+
Natto::Binding.mecab_set_theta(m_ptr, t)
|
93
96
|
end
|
94
97
|
|
95
|
-
def mecab_set_lattice_level(
|
96
|
-
Natto::Binding.mecab_set_lattice_level(
|
98
|
+
def mecab_set_lattice_level(m_ptr, ll)
|
99
|
+
Natto::Binding.mecab_set_lattice_level(m_ptr, ll)
|
97
100
|
end
|
98
|
-
|
99
|
-
def mecab_set_all_morphs(
|
100
|
-
Natto::Binding.mecab_set_all_morphs(
|
101
|
+
|
102
|
+
def mecab_set_all_morphs(m_ptr, am)
|
103
|
+
Natto::Binding.mecab_set_all_morphs(m_ptr, am)
|
101
104
|
end
|
102
105
|
|
103
|
-
def mecab_sparse_tostr(
|
104
|
-
Natto::Binding.mecab_sparse_tostr(
|
106
|
+
def mecab_sparse_tostr(m_ptr, str)
|
107
|
+
Natto::Binding.mecab_sparse_tostr(m_ptr, str)
|
105
108
|
end
|
106
109
|
|
107
|
-
def mecab_sparse_tonode(
|
108
|
-
Natto::Binding.mecab_sparse_tonode(
|
110
|
+
def mecab_sparse_tonode(m_ptr, str)
|
111
|
+
Natto::Binding.mecab_sparse_tonode(m_ptr, str)
|
112
|
+
end
|
113
|
+
|
114
|
+
def mecab_nbest_next_tonode(m_ptr)
|
115
|
+
Natto::Binding.mecab_nbest_next_tonode(m_ptr)
|
109
116
|
end
|
110
117
|
|
111
|
-
def
|
112
|
-
Natto::Binding.
|
118
|
+
def mecab_nbest_init(m_ptr, str)
|
119
|
+
Natto::Binding.mecab_nbest_init(m_ptr, str)
|
113
120
|
end
|
114
121
|
|
115
|
-
def
|
116
|
-
Natto::Binding.
|
122
|
+
def mecab_nbest_sparse_tostr(m_ptr, n, str)
|
123
|
+
Natto::Binding.mecab_nbest_sparse_tostr(m_ptr, n, str)
|
117
124
|
end
|
118
125
|
|
119
|
-
def
|
120
|
-
Natto::Binding.
|
126
|
+
def mecab_nbest_next_tonode(m_ptr)
|
127
|
+
Natto::Binding.mecab_nbest_next_tonode(m_ptr)
|
121
128
|
end
|
122
129
|
|
123
|
-
def
|
124
|
-
Natto::Binding.
|
130
|
+
def mecab_format_node(m_ptr, n_ptr)
|
131
|
+
Natto::Binding.mecab_format_node(m_ptr, n_ptr)
|
132
|
+
end
|
133
|
+
|
134
|
+
def mecab_dictionary_info(m_ptr)
|
135
|
+
Natto::Binding.mecab_dictionary_info(m_ptr)
|
125
136
|
end
|
126
137
|
end
|
127
138
|
end
|
data/lib/natto/utils.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Natto
|
3
|
+
module Utils
|
4
|
+
# @private
|
5
|
+
def self.included(base)
|
6
|
+
base.extend(ClassMethods)
|
7
|
+
end
|
8
|
+
|
9
|
+
module ClassMethods
|
10
|
+
def force_enc(str)
|
11
|
+
str.force_encoding(Encoding.default_external) if str.respond_to?(:encoding) && str.encoding!=Encoding.default_external
|
12
|
+
str
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/natto/version.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
|
3
3
|
# <tt>Natto</tt> is the namespace for objects that provide
|
4
|
-
# a binding to the <tt>mecab</tt>
|
4
|
+
# a binding to the <tt>mecab</tt> tagger and related resources.
|
5
5
|
#
|
6
6
|
# <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
|
7
|
-
#
|
7
|
+
# tagger.
|
8
8
|
#
|
9
9
|
# <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
|
10
10
|
# struct.
|
@@ -23,5 +23,5 @@
|
|
23
23
|
# which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
|
24
24
|
module Natto
|
25
25
|
# Version string for this Rubygem.
|
26
|
-
VERSION = "0.9.
|
26
|
+
VERSION = "0.9.4"
|
27
27
|
end
|
metadata
CHANGED
@@ -1,95 +1,74 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: natto
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.4
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 9
|
9
|
-
- 3
|
10
|
-
version: 0.9.3
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Brooke M. Fujita
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-02-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: ffi
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &341055920 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
hash: 1
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
- 6
|
32
|
-
- 3
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
33
21
|
version: 0.6.3
|
34
22
|
type: :runtime
|
35
|
-
|
36
|
-
|
37
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *341055920
|
25
|
+
description: ! 'natto is a gem bridging Ruby and MeCab using FFI (foreign function
|
26
|
+
interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
|
27
|
+
and JRuby (jvm) equally well, on any OS.
|
28
|
+
|
29
|
+
ruby で mecab を自由に利用できる natto をぜひ試してください。
|
30
|
+
|
31
|
+
'
|
38
32
|
email: buruzaemon@gmail.com
|
39
33
|
executables: []
|
40
|
-
|
41
34
|
extensions: []
|
42
|
-
|
43
35
|
extra_rdoc_files: []
|
44
|
-
|
45
|
-
files:
|
36
|
+
files:
|
46
37
|
- lib/natto.rb
|
47
38
|
- lib/natto/binding.rb
|
48
39
|
- lib/natto/version.rb
|
49
|
-
-
|
50
|
-
- test/natto/tc_binding.rb
|
51
|
-
- test/natto/tc_dictionaryinfo.rb
|
52
|
-
- test/natto/tc_mecab.rb
|
53
|
-
- test/natto/tc_mecabnode.rb
|
40
|
+
- lib/natto/utils.rb
|
54
41
|
- README.md
|
55
42
|
- LICENSE
|
56
43
|
- CHANGELOG
|
57
44
|
- .yardopts
|
58
45
|
homepage: https://bitbucket.org/buruzaemon/natto/overview
|
59
|
-
licenses:
|
46
|
+
licenses:
|
60
47
|
- BSD
|
61
48
|
post_install_message:
|
62
49
|
rdoc_options: []
|
63
|
-
|
64
|
-
require_paths:
|
50
|
+
require_paths:
|
65
51
|
- lib
|
66
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
53
|
none: false
|
68
|
-
requirements:
|
69
|
-
- -
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
hash: 57
|
72
|
-
segments:
|
73
|
-
- 1
|
74
|
-
- 8
|
75
|
-
- 7
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
76
57
|
version: 1.8.7
|
77
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
59
|
none: false
|
79
|
-
requirements:
|
80
|
-
- -
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
|
83
|
-
|
84
|
-
- 0
|
85
|
-
version: "0"
|
86
|
-
requirements:
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
requirements:
|
87
65
|
- MeCab, 0.993 or greater
|
88
66
|
- FFI, 0.6.3 or greater
|
89
67
|
rubyforge_project:
|
90
|
-
rubygems_version: 1.8.
|
68
|
+
rubygems_version: 1.8.17
|
91
69
|
signing_key:
|
92
70
|
specification_version: 3
|
93
|
-
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
94
|
-
|
95
|
-
|
71
|
+
summary: natto combines the Ruby programming language with MeCab, the part-of-speech
|
72
|
+
and morphological analyzer for the Japanese language.
|
73
|
+
test_files: []
|
74
|
+
has_rdoc:
|
data/test/natto/tc_binding.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
# TestNatto encapsulates tests for the basic
|
4
|
-
# behavior of the Natto::Binding module.
|
5
|
-
class TestNattoBinding < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
@klass = Class.new do
|
8
|
-
include Natto::Binding
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
def teardown
|
13
|
-
@klass = nil
|
14
|
-
end
|
15
|
-
|
16
|
-
# Tests the mecab_version function.
|
17
|
-
def test_mecab_version
|
18
|
-
mv = `mecab -v`.split.last
|
19
|
-
assert_equal(mv, @klass.mecab_version)
|
20
|
-
end
|
21
|
-
|
22
|
-
# Tests for the inclusion of mecab methods made available
|
23
|
-
# to any classes including the Natto::Binding module.
|
24
|
-
def test_functions_included
|
25
|
-
[ :mecab_new2,
|
26
|
-
:mecab_version,
|
27
|
-
:mecab_strerror,
|
28
|
-
:mecab_destroy,
|
29
|
-
:mecab_set_theta,
|
30
|
-
:mecab_set_lattice_level,
|
31
|
-
:mecab_set_all_morphs,
|
32
|
-
:mecab_sparse_tostr,
|
33
|
-
:mecab_nbest_sparse_tostr,
|
34
|
-
:mecab_nbest_init,
|
35
|
-
:mecab_nbest_sparse_tostr,
|
36
|
-
:mecab_nbest_next_tonode,
|
37
|
-
:mecab_dictionary_info ].each do |f|
|
38
|
-
assert(@klass.respond_to? f)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,65 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
# TestDictionaryInfo encapsulates tests for the basic
|
4
|
-
# behavior of Natto::DictionaryInfo
|
5
|
-
class TestDictionaryInfo < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
m = Natto::MeCab.new
|
8
|
-
@dicts = m.dicts
|
9
|
-
|
10
|
-
out = `mecab -D`.lines.to_a
|
11
|
-
out.each do |l|
|
12
|
-
tokens = l.split("\t")
|
13
|
-
@sysdic_filename = tokens[1].strip if tokens[0] =~ /filename:/i
|
14
|
-
@sysdic_charset = tokens[1].strip if tokens[0] =~ /charset:/i
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def teardown
|
19
|
-
@dicts = nil
|
20
|
-
end
|
21
|
-
|
22
|
-
# Tests the dictionaries accessor method of Natto::MeCab.
|
23
|
-
# Assumes that:
|
24
|
-
# a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
|
25
|
-
# b) system dictionary encoding is utf-8
|
26
|
-
# c) only dealing w/ case of 1 dictionary being used
|
27
|
-
def test_dictionaries_accessor
|
28
|
-
assert @dicts.empty? == false
|
29
|
-
sysdic = @dicts.first
|
30
|
-
assert_equal(@sysdic_filename, sysdic[:filename])
|
31
|
-
assert_equal(@sysdic_charset, sysdic[:charset])
|
32
|
-
assert_equal(0x0, sysdic[:next].address)
|
33
|
-
end
|
34
|
-
|
35
|
-
# Tests the to_s method.
|
36
|
-
def test_to_s
|
37
|
-
#<Natto::DictionaryInfo:0x288879bc @filename=\"/usr/local/lib/mecab/dic/ipadic/sys.dic\", @charset=\"utf8\">
|
38
|
-
assert(@dicts.first.to_s.include?("filename=\"#{@sysdic_filename}\", charset=\"#{@sysdic_charset}\""))
|
39
|
-
end
|
40
|
-
|
41
|
-
# Tests the accessors of Natto::DictionaryInfo.
|
42
|
-
# Note: Object#type is deprecated in 1.9.n, but comes with a warning
|
43
|
-
# in 1.8.n
|
44
|
-
def test_dictionary_info_member_accessors
|
45
|
-
sysdic = @dicts.first
|
46
|
-
members = [
|
47
|
-
:filename,
|
48
|
-
:charset,
|
49
|
-
:type,
|
50
|
-
:size,
|
51
|
-
:lsize,
|
52
|
-
:rsize,
|
53
|
-
:version,
|
54
|
-
:next
|
55
|
-
]
|
56
|
-
members.each do |nomme|
|
57
|
-
assert_not_nil(sysdic.send nomme )
|
58
|
-
end
|
59
|
-
|
60
|
-
# NoMethodError will be raised for anything else!
|
61
|
-
assert_raise NoMethodError do
|
62
|
-
sysdic.send :unknown_attr
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
data/test/natto/tc_mecab.rb
DELETED
@@ -1,295 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
require 'rbconfig'
|
3
|
-
require 'nkf'
|
4
|
-
|
5
|
-
# TestMeCab encapsulates tests for the basic
|
6
|
-
# behavior of Natto::MeCab.
|
7
|
-
class TestMeCab < Test::Unit::TestCase
|
8
|
-
|
9
|
-
host_os = RbConfig::CONFIG['host_os']
|
10
|
-
# we need to transfrom from UTF-8 ot SJIS if we are on Windows!
|
11
|
-
if host_os =~ /mswin|mingw/i
|
12
|
-
TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
|
13
|
-
else
|
14
|
-
TEST_STR = '試験ですよ、これが。'
|
15
|
-
end
|
16
|
-
|
17
|
-
def setup
|
18
|
-
@m = Natto::MeCab.new
|
19
|
-
@ver = `mecab -v`.strip.split.last
|
20
|
-
end
|
21
|
-
|
22
|
-
def teardown
|
23
|
-
@m = nil
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_parse_mecab_options
|
27
|
-
[ '-r /some/file',
|
28
|
-
'-r/some/file',
|
29
|
-
'--rcfile=/some/file',
|
30
|
-
'--rcfile /some/file',
|
31
|
-
{:rcfile=>"/some/file"} ].each do |opts|
|
32
|
-
assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(opts))
|
33
|
-
end
|
34
|
-
|
35
|
-
[ '-d /some/other/file',
|
36
|
-
'-d/some/other/file',
|
37
|
-
'--dicdir=/some/other/file',
|
38
|
-
'--dicdir /some/other/file',
|
39
|
-
{:dicdir=>"/some/other/file"} ].each do |opts|
|
40
|
-
assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(opts))
|
41
|
-
end
|
42
|
-
|
43
|
-
[ '-u /yet/another/file',
|
44
|
-
'-u/yet/another/file',
|
45
|
-
'--userdic=/yet/another/file',
|
46
|
-
'--userdic /yet/another/file',
|
47
|
-
{:userdic=>"/yet/another/file"} ].each do |opts|
|
48
|
-
assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(opts))
|
49
|
-
end
|
50
|
-
|
51
|
-
[ '-l 42',
|
52
|
-
'-l42',
|
53
|
-
'--lattice-level=42',
|
54
|
-
'--lattice-level 42',
|
55
|
-
{:lattice_level=>42}
|
56
|
-
].each do |opts|
|
57
|
-
assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(opts))
|
58
|
-
end
|
59
|
-
|
60
|
-
[ '-a',
|
61
|
-
'--all-morphs',
|
62
|
-
{:all_morphs=>true} ].each do |opts|
|
63
|
-
assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(opts))
|
64
|
-
end
|
65
|
-
|
66
|
-
[ '-O natto',
|
67
|
-
'-Onatto',
|
68
|
-
'--output-format-type=natto',
|
69
|
-
'--output-format-type natto',
|
70
|
-
{:output_format_type=>"natto"} ].each do |opts|
|
71
|
-
assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(opts))
|
72
|
-
end
|
73
|
-
|
74
|
-
[ '-N 42',
|
75
|
-
'-N42',
|
76
|
-
'--nbest=42',
|
77
|
-
'--nbest 42',
|
78
|
-
{:nbest=>42}
|
79
|
-
].each do |opts|
|
80
|
-
assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(opts))
|
81
|
-
end
|
82
|
-
[ '--nbest=-1', '--nbest=0', '--nbest=513' ].each do |bad|
|
83
|
-
assert_raise Natto::MeCabError do
|
84
|
-
Natto::MeCab.parse_mecab_options(bad)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
[ '-F %m\t%f[7]\n',
|
89
|
-
'-F%m\t%f[7]\n',
|
90
|
-
'--node-format=%m\t%f[7]\n',
|
91
|
-
'--node-format %m\t%f[7]\n',
|
92
|
-
{:node_format=>'%m\t%f[7]\n'} ].each do |opts|
|
93
|
-
assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
94
|
-
end
|
95
|
-
|
96
|
-
[ '-U %m\t%f[7]\n',
|
97
|
-
'-U%m\t%f[7]\n',
|
98
|
-
'--unk-format=%m\t%f[7]\n',
|
99
|
-
'--unk-format %m\t%f[7]\n',
|
100
|
-
{:unk_format=>'%m\t%f[7]\n'} ].each do |opts|
|
101
|
-
assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
102
|
-
end
|
103
|
-
|
104
|
-
[ '-B %m\t%f[7]\n',
|
105
|
-
'-B%m\t%f[7]\n',
|
106
|
-
'--bos-format=%m\t%f[7]\n',
|
107
|
-
'--bos-format %m\t%f[7]\n',
|
108
|
-
{:bos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
109
|
-
assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
110
|
-
end
|
111
|
-
|
112
|
-
[ '-E %m\t%f[7]\n',
|
113
|
-
'-E%m\t%f[7]\n',
|
114
|
-
'--eos-format=%m\t%f[7]\n',
|
115
|
-
'--eos-format %m\t%f[7]\n',
|
116
|
-
{:eos_format=>'%m\t%f[7]\n'} ].each do |opts|
|
117
|
-
assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
118
|
-
end
|
119
|
-
|
120
|
-
[ '-S %m\t%f[7]\n',
|
121
|
-
'-S%m\t%f[7]\n',
|
122
|
-
'--eon-format=%m\t%f[7]\n',
|
123
|
-
'--eon-format %m\t%f[7]\n',
|
124
|
-
{:eon_format=>'%m\t%f[7]\n'} ].each do |opts|
|
125
|
-
assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
126
|
-
end
|
127
|
-
|
128
|
-
[ '-x %m\t%f[7]\n',
|
129
|
-
'-x%m\t%f[7]\n',
|
130
|
-
'--unk-feature=%m\t%f[7]\n',
|
131
|
-
'--unk-feature %m\t%f[7]\n',
|
132
|
-
{:unk_feature=>'%m\t%f[7]\n'} ].each do |opts|
|
133
|
-
assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
|
134
|
-
end
|
135
|
-
|
136
|
-
[ '-b 102400',
|
137
|
-
'-b102400',
|
138
|
-
'--input-buffer-size=102400',
|
139
|
-
'--input-buffer-size 102400',
|
140
|
-
{:input_buffer_size=>102400} ].each do |opts|
|
141
|
-
assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(opts))
|
142
|
-
end
|
143
|
-
|
144
|
-
[ '-C',
|
145
|
-
'--allocate-sentence',
|
146
|
-
{:allocate_sentence=>true} ].each do |opts|
|
147
|
-
assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(opts))
|
148
|
-
end
|
149
|
-
|
150
|
-
[ '-t 0.42',
|
151
|
-
'-t0.42',
|
152
|
-
'--theta=0.42',
|
153
|
-
'--theta 0.42',
|
154
|
-
{:theta=>0.42} ].each do |opts|
|
155
|
-
assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(opts))
|
156
|
-
end
|
157
|
-
|
158
|
-
[ '-c 42',
|
159
|
-
'-c42',
|
160
|
-
'--cost-factor=42',
|
161
|
-
'--cost-factor 42',
|
162
|
-
{:cost_factor=>42} ].each do |opts|
|
163
|
-
assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(opts))
|
164
|
-
end
|
165
|
-
|
166
|
-
assert_equal({}, Natto::MeCab.parse_mecab_options)
|
167
|
-
assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
|
168
|
-
end
|
169
|
-
|
170
|
-
def test_build_options_str
|
171
|
-
assert_equal('--rcfile=/some/file', Natto::MeCab.build_options_str(:rcfile=>"/some/file"))
|
172
|
-
assert_equal('--dicdir=/some/other/file', Natto::MeCab.build_options_str(:dicdir=>"/some/other/file"))
|
173
|
-
assert_equal('--userdic=/yet/another/file', Natto::MeCab.build_options_str(:userdic=>"/yet/another/file"))
|
174
|
-
assert_equal('--lattice-level=42', Natto::MeCab.build_options_str(:lattice_level=>42))
|
175
|
-
assert_equal('--all-morphs', Natto::MeCab.build_options_str(:all_morphs=>true))
|
176
|
-
assert_equal('--output-format-type=natto', Natto::MeCab.build_options_str(:output_format_type=>"natto"))
|
177
|
-
assert_equal('--node-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n'))
|
178
|
-
assert_equal('--unk-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n'))
|
179
|
-
assert_equal('--bos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n'))
|
180
|
-
assert_equal('--eos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n'))
|
181
|
-
assert_equal('--eon-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n'))
|
182
|
-
assert_equal('--unk-feature=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n'))
|
183
|
-
assert_equal('--input-buffer-size=102400',Natto::MeCab.build_options_str(:input_buffer_size=>102400))
|
184
|
-
assert_equal('--allocate-sentence', Natto::MeCab.build_options_str(:allocate_sentence=>true))
|
185
|
-
assert_equal('--nbest=42', Natto::MeCab.build_options_str(:nbest=>42))
|
186
|
-
assert_equal('--theta=0.42', Natto::MeCab.build_options_str(:theta=>0.42))
|
187
|
-
assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
|
188
|
-
end
|
189
|
-
|
190
|
-
def test_construction
|
191
|
-
m = nil
|
192
|
-
assert_nothing_raised do
|
193
|
-
m = Natto::MeCab.new
|
194
|
-
end
|
195
|
-
assert_equal({}, m.options)
|
196
|
-
|
197
|
-
opts = {:output_format_type=>'chasen'}
|
198
|
-
assert_nothing_raised do
|
199
|
-
m = Natto::MeCab.new(opts)
|
200
|
-
end
|
201
|
-
assert_equal(opts, m.options)
|
202
|
-
assert_nothing_raised do
|
203
|
-
m = Natto::MeCab.new("-O chasen")
|
204
|
-
end
|
205
|
-
assert_equal(opts, m.options)
|
206
|
-
assert_nothing_raised do
|
207
|
-
m = Natto::MeCab.new("--output-format-type=chasen")
|
208
|
-
end
|
209
|
-
assert_equal(opts, m.options)
|
210
|
-
|
211
|
-
opts = {:all_morphs=>true, :allocate_sentence=>true}
|
212
|
-
assert_nothing_raised do
|
213
|
-
m = Natto::MeCab.new(opts)
|
214
|
-
end
|
215
|
-
assert_equal(opts, m.options)
|
216
|
-
assert_nothing_raised do
|
217
|
-
m = Natto::MeCab.new('-a -C')
|
218
|
-
end
|
219
|
-
assert_equal(opts, m.options)
|
220
|
-
assert_nothing_raised do
|
221
|
-
m = Natto::MeCab.new('--all-morphs --allocate-sentence')
|
222
|
-
end
|
223
|
-
assert_equal(opts, m.options)
|
224
|
-
|
225
|
-
opts = {:lattice_level=>999}
|
226
|
-
assert_nothing_raised do
|
227
|
-
m = Natto::MeCab.new(opts)
|
228
|
-
end
|
229
|
-
assert_equal(opts, m.options)
|
230
|
-
assert_nothing_raised do
|
231
|
-
m = Natto::MeCab.new('-l 999')
|
232
|
-
end
|
233
|
-
assert_equal(opts, m.options)
|
234
|
-
assert_nothing_raised do
|
235
|
-
m = Natto::MeCab.new('--lattice-level=999')
|
236
|
-
end
|
237
|
-
assert_equal(opts, m.options)
|
238
|
-
end
|
239
|
-
|
240
|
-
def test_initialize_with_errors
|
241
|
-
assert_raise Natto::MeCabError do
|
242
|
-
Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
|
243
|
-
end
|
244
|
-
|
245
|
-
assert_raise Natto::MeCabError do
|
246
|
-
Natto::MeCab.new(:rcfile=>'/rcfile/does/not/exist')
|
247
|
-
end
|
248
|
-
|
249
|
-
assert_raise Natto::MeCabError do
|
250
|
-
Natto::MeCab.new(:dicdir=>'/dicdir/does/not/exist')
|
251
|
-
end
|
252
|
-
|
253
|
-
assert_raise Natto::MeCabError do
|
254
|
-
Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
def test_version_accessor
|
259
|
-
assert_equal(@ver, @m.version)
|
260
|
-
end
|
261
|
-
|
262
|
-
def test_all_morphs
|
263
|
-
m = Natto::MeCab.new(:all_morphs=>true)
|
264
|
-
expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
|
265
|
-
expected.delete_if {|e| e =~ /^(EOS|BOS)/ }
|
266
|
-
|
267
|
-
actual = m.parse(TEST_STR).lines.to_a
|
268
|
-
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
269
|
-
|
270
|
-
assert_equal(expected, actual)
|
271
|
-
end
|
272
|
-
|
273
|
-
def test_parse_tostr_default
|
274
|
-
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
275
|
-
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
276
|
-
|
277
|
-
actual = @m.parse(TEST_STR).lines.to_a
|
278
|
-
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
279
|
-
|
280
|
-
assert_equal(expected, actual)
|
281
|
-
end
|
282
|
-
|
283
|
-
def test_parse_tonode_default
|
284
|
-
expected = `echo #{TEST_STR} | mecab`.lines.to_a
|
285
|
-
expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
286
|
-
|
287
|
-
actual = []
|
288
|
-
@m.parse(TEST_STR) do |node|
|
289
|
-
actual << "#{node.surface}\t#{node.feature}\n"
|
290
|
-
end
|
291
|
-
actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
|
292
|
-
|
293
|
-
assert_equal(expected, actual)
|
294
|
-
end
|
295
|
-
end
|