natto 0.9.3 → 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/natto/binding.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  # coding: utf-8
2
-
3
2
  module Natto
4
3
 
5
4
  # Module <tt>Binding</tt> encapsulates methods and behavior
@@ -31,18 +30,25 @@ module Natto
31
30
  # @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
32
31
  # <br/>
33
32
  # e.g., for bash on UNIX/Linux
33
+ #
34
34
  # export MECAB_PATH=/usr/local/lib/libmecab.so
35
+ #
35
36
  # e.g., on Windows
37
+ #
36
38
  # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
39
+ #
37
40
  # e.g., for Cygwin
41
+ #
38
42
  # export MECAB_PATH=cygmecab-1
43
+ #
39
44
  # e.g., from within a Ruby program
45
+ #
40
46
  # ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
41
47
  def self.find_library
42
48
  host_os = RbConfig::CONFIG['host_os']
43
49
 
44
50
  if host_os =~ /mswin|mingw/i
45
- raise LoadError, "Please set #{MECAB_PATH} to full path to libmecab.dll"
51
+ raise LoadError, "Please set #{MECAB_PATH} to the full path to libmecab.dll"
46
52
  elsif host_os =~ /cygwin/i
47
53
  'cygmecab-1'
48
54
  else
@@ -56,18 +62,15 @@ module Natto
56
62
  attach_function :mecab_version, [], :string
57
63
  attach_function :mecab_strerror, [:pointer],:string
58
64
  attach_function :mecab_destroy, [:pointer], :void
59
-
60
65
  attach_function :mecab_set_theta, [:pointer, :float], :void
61
66
  attach_function :mecab_set_lattice_level, [:pointer, :int], :void
62
67
  attach_function :mecab_set_all_morphs, [:pointer, :int], :void
63
-
64
68
  attach_function :mecab_sparse_tostr, [:pointer, :string], :string
65
69
  attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
66
-
67
70
  attach_function :mecab_nbest_init, [:pointer, :string], :int
68
71
  attach_function :mecab_nbest_sparse_tostr, [:pointer, :int, :string], :string
69
72
  attach_function :mecab_nbest_next_tonode, [:pointer], :pointer
70
-
73
+ attach_function :mecab_format_node, [:pointer, :pointer], :string
71
74
  attach_function :mecab_dictionary_info, [:pointer], :pointer
72
75
 
73
76
  # @private
@@ -80,48 +83,56 @@ module Natto
80
83
  Natto::Binding.mecab_version
81
84
  end
82
85
 
83
- def mecab_strerror(ptr)
84
- Natto::Binding.mecab_strerror(ptr)
86
+ def mecab_strerror(m_ptr)
87
+ Natto::Binding.mecab_strerror(m_ptr)
85
88
  end
86
89
 
87
- def mecab_destroy(ptr)
88
- Natto::Binding.mecab_destroy(ptr)
90
+ def mecab_destroy(m_ptr)
91
+ Natto::Binding.mecab_destroy(m_ptr)
89
92
  end
90
93
 
91
- def mecab_set_theta(ptr, t)
92
- Natto::Binding.mecab_set_theta(ptr, t)
94
+ def mecab_set_theta(m_ptr, t)
95
+ Natto::Binding.mecab_set_theta(m_ptr, t)
93
96
  end
94
97
 
95
- def mecab_set_lattice_level(ptr, ll)
96
- Natto::Binding.mecab_set_lattice_level(ptr, ll)
98
+ def mecab_set_lattice_level(m_ptr, ll)
99
+ Natto::Binding.mecab_set_lattice_level(m_ptr, ll)
97
100
  end
98
-
99
- def mecab_set_all_morphs(ptr, am)
100
- Natto::Binding.mecab_set_all_morphs(ptr, am)
101
+
102
+ def mecab_set_all_morphs(m_ptr, am)
103
+ Natto::Binding.mecab_set_all_morphs(m_ptr, am)
101
104
  end
102
105
 
103
- def mecab_sparse_tostr(ptr, str)
104
- Natto::Binding.mecab_sparse_tostr(ptr, str)
106
+ def mecab_sparse_tostr(m_ptr, str)
107
+ Natto::Binding.mecab_sparse_tostr(m_ptr, str)
105
108
  end
106
109
 
107
- def mecab_sparse_tonode(ptr, str)
108
- Natto::Binding.mecab_sparse_tonode(ptr, str)
110
+ def mecab_sparse_tonode(m_ptr, str)
111
+ Natto::Binding.mecab_sparse_tonode(m_ptr, str)
112
+ end
113
+
114
+ def mecab_nbest_next_tonode(m_ptr)
115
+ Natto::Binding.mecab_nbest_next_tonode(m_ptr)
109
116
  end
110
117
 
111
- def mecab_nbest_next_tonode(ptr)
112
- Natto::Binding.mecab_nbest_next_tonode(ptr)
118
+ def mecab_nbest_init(m_ptr, str)
119
+ Natto::Binding.mecab_nbest_init(m_ptr, str)
113
120
  end
114
121
 
115
- def mecab_nbest_init(ptr, str)
116
- Natto::Binding.mecab_nbest_init(ptr, str)
122
+ def mecab_nbest_sparse_tostr(m_ptr, n, str)
123
+ Natto::Binding.mecab_nbest_sparse_tostr(m_ptr, n, str)
117
124
  end
118
125
 
119
- def mecab_nbest_sparse_tostr(ptr, n, str)
120
- Natto::Binding.mecab_nbest_sparse_tostr(ptr, n, str)
126
+ def mecab_nbest_next_tonode(m_ptr)
127
+ Natto::Binding.mecab_nbest_next_tonode(m_ptr)
121
128
  end
122
129
 
123
- def mecab_dictionary_info(ptr)
124
- Natto::Binding.mecab_dictionary_info(ptr)
130
+ def mecab_format_node(m_ptr, n_ptr)
131
+ Natto::Binding.mecab_format_node(m_ptr, n_ptr)
132
+ end
133
+
134
+ def mecab_dictionary_info(m_ptr)
135
+ Natto::Binding.mecab_dictionary_info(m_ptr)
125
136
  end
126
137
  end
127
138
  end
@@ -0,0 +1,16 @@
1
+ # coding: utf-8
2
+ module Natto
3
+ module Utils
4
+ # @private
5
+ def self.included(base)
6
+ base.extend(ClassMethods)
7
+ end
8
+
9
+ module ClassMethods
10
+ def force_enc(str)
11
+ str.force_encoding(Encoding.default_external) if str.respond_to?(:encoding) && str.encoding!=Encoding.default_external
12
+ str
13
+ end
14
+ end
15
+ end
16
+ end
data/lib/natto/version.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  # coding: utf-8
2
2
 
3
3
  # <tt>Natto</tt> is the namespace for objects that provide
4
- # a binding to the <tt>mecab</tt> parser and related resources.
4
+ # a binding to the <tt>mecab</tt> tagger and related resources.
5
5
  #
6
6
  # <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
7
- # parser.
7
+ # tagger.
8
8
  #
9
9
  # <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
10
10
  # struct.
@@ -23,5 +23,5 @@
23
23
  # which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
24
24
  module Natto
25
25
  # Version string for this Rubygem.
26
- VERSION = "0.9.3"
26
+ VERSION = "0.9.4"
27
27
  end
metadata CHANGED
@@ -1,95 +1,74 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: natto
3
- version: !ruby/object:Gem::Version
4
- hash: 61
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.4
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 9
9
- - 3
10
- version: 0.9.3
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Brooke M. Fujita
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-02-13 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: ffi
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &341055920 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 1
29
- segments:
30
- - 0
31
- - 6
32
- - 3
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
33
21
  version: 0.6.3
34
22
  type: :runtime
35
- version_requirements: *id001
36
- description: "natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS.\n\
37
- ruby \xE3\x81\xA7 mecab \xE3\x82\x92\xE8\x87\xAA\xE7\x94\xB1\xE3\x81\xAB\xE5\x88\xA9\xE7\x94\xA8\xE3\x81\xA7\xE3\x81\x8D\xE3\x82\x8B natto \xE3\x82\x92\xE3\x81\x9C\xE3\x81\xB2\xE8\xA9\xA6\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\xE3\x81\xA0\xE3\x81\x95\xE3\x81\x84\xE3\x80\x82\n"
23
+ prerelease: false
24
+ version_requirements: *341055920
25
+ description: ! 'natto is a gem bridging Ruby and MeCab using FFI (foreign function
26
+ interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
27
+ and JRuby (jvm) equally well, on any OS.
28
+
29
+ ruby で mecab を自由に利用できる natto をぜひ試してください。
30
+
31
+ '
38
32
  email: buruzaemon@gmail.com
39
33
  executables: []
40
-
41
34
  extensions: []
42
-
43
35
  extra_rdoc_files: []
44
-
45
- files:
36
+ files:
46
37
  - lib/natto.rb
47
38
  - lib/natto/binding.rb
48
39
  - lib/natto/version.rb
49
- - test/test_natto.rb
50
- - test/natto/tc_binding.rb
51
- - test/natto/tc_dictionaryinfo.rb
52
- - test/natto/tc_mecab.rb
53
- - test/natto/tc_mecabnode.rb
40
+ - lib/natto/utils.rb
54
41
  - README.md
55
42
  - LICENSE
56
43
  - CHANGELOG
57
44
  - .yardopts
58
45
  homepage: https://bitbucket.org/buruzaemon/natto/overview
59
- licenses:
46
+ licenses:
60
47
  - BSD
61
48
  post_install_message:
62
49
  rdoc_options: []
63
-
64
- require_paths:
50
+ require_paths:
65
51
  - lib
66
- required_ruby_version: !ruby/object:Gem::Requirement
52
+ required_ruby_version: !ruby/object:Gem::Requirement
67
53
  none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 57
72
- segments:
73
- - 1
74
- - 8
75
- - 7
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
76
57
  version: 1.8.7
77
- required_rubygems_version: !ruby/object:Gem::Requirement
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
59
  none: false
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- hash: 3
83
- segments:
84
- - 0
85
- version: "0"
86
- requirements:
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements:
87
65
  - MeCab, 0.993 or greater
88
66
  - FFI, 0.6.3 or greater
89
67
  rubyforge_project:
90
- rubygems_version: 1.8.10
68
+ rubygems_version: 1.8.17
91
69
  signing_key:
92
70
  specification_version: 3
93
- summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
94
- test_files:
95
- - test/test_natto.rb
71
+ summary: natto combines the Ruby programming language with MeCab, the part-of-speech
72
+ and morphological analyzer for the Japanese language.
73
+ test_files: []
74
+ has_rdoc:
@@ -1,41 +0,0 @@
1
- # coding: utf-8
2
-
3
- # TestNatto encapsulates tests for the basic
4
- # behavior of the Natto::Binding module.
5
- class TestNattoBinding < Test::Unit::TestCase
6
- def setup
7
- @klass = Class.new do
8
- include Natto::Binding
9
- end
10
- end
11
-
12
- def teardown
13
- @klass = nil
14
- end
15
-
16
- # Tests the mecab_version function.
17
- def test_mecab_version
18
- mv = `mecab -v`.split.last
19
- assert_equal(mv, @klass.mecab_version)
20
- end
21
-
22
- # Tests for the inclusion of mecab methods made available
23
- # to any classes including the Natto::Binding module.
24
- def test_functions_included
25
- [ :mecab_new2,
26
- :mecab_version,
27
- :mecab_strerror,
28
- :mecab_destroy,
29
- :mecab_set_theta,
30
- :mecab_set_lattice_level,
31
- :mecab_set_all_morphs,
32
- :mecab_sparse_tostr,
33
- :mecab_nbest_sparse_tostr,
34
- :mecab_nbest_init,
35
- :mecab_nbest_sparse_tostr,
36
- :mecab_nbest_next_tonode,
37
- :mecab_dictionary_info ].each do |f|
38
- assert(@klass.respond_to? f)
39
- end
40
- end
41
- end
@@ -1,65 +0,0 @@
1
- # coding: utf-8
2
-
3
- # TestDictionaryInfo encapsulates tests for the basic
4
- # behavior of Natto::DictionaryInfo
5
- class TestDictionaryInfo < Test::Unit::TestCase
6
- def setup
7
- m = Natto::MeCab.new
8
- @dicts = m.dicts
9
-
10
- out = `mecab -D`.lines.to_a
11
- out.each do |l|
12
- tokens = l.split("\t")
13
- @sysdic_filename = tokens[1].strip if tokens[0] =~ /filename:/i
14
- @sysdic_charset = tokens[1].strip if tokens[0] =~ /charset:/i
15
- end
16
- end
17
-
18
- def teardown
19
- @dicts = nil
20
- end
21
-
22
- # Tests the dictionaries accessor method of Natto::MeCab.
23
- # Assumes that:
24
- # a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
25
- # b) system dictionary encoding is utf-8
26
- # c) only dealing w/ case of 1 dictionary being used
27
- def test_dictionaries_accessor
28
- assert @dicts.empty? == false
29
- sysdic = @dicts.first
30
- assert_equal(@sysdic_filename, sysdic[:filename])
31
- assert_equal(@sysdic_charset, sysdic[:charset])
32
- assert_equal(0x0, sysdic[:next].address)
33
- end
34
-
35
- # Tests the to_s method.
36
- def test_to_s
37
- #<Natto::DictionaryInfo:0x288879bc @filename=\"/usr/local/lib/mecab/dic/ipadic/sys.dic\", @charset=\"utf8\">
38
- assert(@dicts.first.to_s.include?("filename=\"#{@sysdic_filename}\", charset=\"#{@sysdic_charset}\""))
39
- end
40
-
41
- # Tests the accessors of Natto::DictionaryInfo.
42
- # Note: Object#type is deprecated in 1.9.n, but comes with a warning
43
- # in 1.8.n
44
- def test_dictionary_info_member_accessors
45
- sysdic = @dicts.first
46
- members = [
47
- :filename,
48
- :charset,
49
- :type,
50
- :size,
51
- :lsize,
52
- :rsize,
53
- :version,
54
- :next
55
- ]
56
- members.each do |nomme|
57
- assert_not_nil(sysdic.send nomme )
58
- end
59
-
60
- # NoMethodError will be raised for anything else!
61
- assert_raise NoMethodError do
62
- sysdic.send :unknown_attr
63
- end
64
- end
65
- end
@@ -1,295 +0,0 @@
1
- # coding: utf-8
2
- require 'rbconfig'
3
- require 'nkf'
4
-
5
- # TestMeCab encapsulates tests for the basic
6
- # behavior of Natto::MeCab.
7
- class TestMeCab < Test::Unit::TestCase
8
-
9
- host_os = RbConfig::CONFIG['host_os']
10
- # we need to transfrom from UTF-8 ot SJIS if we are on Windows!
11
- if host_os =~ /mswin|mingw/i
12
- TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
13
- else
14
- TEST_STR = '試験ですよ、これが。'
15
- end
16
-
17
- def setup
18
- @m = Natto::MeCab.new
19
- @ver = `mecab -v`.strip.split.last
20
- end
21
-
22
- def teardown
23
- @m = nil
24
- end
25
-
26
- def test_parse_mecab_options
27
- [ '-r /some/file',
28
- '-r/some/file',
29
- '--rcfile=/some/file',
30
- '--rcfile /some/file',
31
- {:rcfile=>"/some/file"} ].each do |opts|
32
- assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(opts))
33
- end
34
-
35
- [ '-d /some/other/file',
36
- '-d/some/other/file',
37
- '--dicdir=/some/other/file',
38
- '--dicdir /some/other/file',
39
- {:dicdir=>"/some/other/file"} ].each do |opts|
40
- assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(opts))
41
- end
42
-
43
- [ '-u /yet/another/file',
44
- '-u/yet/another/file',
45
- '--userdic=/yet/another/file',
46
- '--userdic /yet/another/file',
47
- {:userdic=>"/yet/another/file"} ].each do |opts|
48
- assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(opts))
49
- end
50
-
51
- [ '-l 42',
52
- '-l42',
53
- '--lattice-level=42',
54
- '--lattice-level 42',
55
- {:lattice_level=>42}
56
- ].each do |opts|
57
- assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(opts))
58
- end
59
-
60
- [ '-a',
61
- '--all-morphs',
62
- {:all_morphs=>true} ].each do |opts|
63
- assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(opts))
64
- end
65
-
66
- [ '-O natto',
67
- '-Onatto',
68
- '--output-format-type=natto',
69
- '--output-format-type natto',
70
- {:output_format_type=>"natto"} ].each do |opts|
71
- assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(opts))
72
- end
73
-
74
- [ '-N 42',
75
- '-N42',
76
- '--nbest=42',
77
- '--nbest 42',
78
- {:nbest=>42}
79
- ].each do |opts|
80
- assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(opts))
81
- end
82
- [ '--nbest=-1', '--nbest=0', '--nbest=513' ].each do |bad|
83
- assert_raise Natto::MeCabError do
84
- Natto::MeCab.parse_mecab_options(bad)
85
- end
86
- end
87
-
88
- [ '-F %m\t%f[7]\n',
89
- '-F%m\t%f[7]\n',
90
- '--node-format=%m\t%f[7]\n',
91
- '--node-format %m\t%f[7]\n',
92
- {:node_format=>'%m\t%f[7]\n'} ].each do |opts|
93
- assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
94
- end
95
-
96
- [ '-U %m\t%f[7]\n',
97
- '-U%m\t%f[7]\n',
98
- '--unk-format=%m\t%f[7]\n',
99
- '--unk-format %m\t%f[7]\n',
100
- {:unk_format=>'%m\t%f[7]\n'} ].each do |opts|
101
- assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
102
- end
103
-
104
- [ '-B %m\t%f[7]\n',
105
- '-B%m\t%f[7]\n',
106
- '--bos-format=%m\t%f[7]\n',
107
- '--bos-format %m\t%f[7]\n',
108
- {:bos_format=>'%m\t%f[7]\n'} ].each do |opts|
109
- assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
110
- end
111
-
112
- [ '-E %m\t%f[7]\n',
113
- '-E%m\t%f[7]\n',
114
- '--eos-format=%m\t%f[7]\n',
115
- '--eos-format %m\t%f[7]\n',
116
- {:eos_format=>'%m\t%f[7]\n'} ].each do |opts|
117
- assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
118
- end
119
-
120
- [ '-S %m\t%f[7]\n',
121
- '-S%m\t%f[7]\n',
122
- '--eon-format=%m\t%f[7]\n',
123
- '--eon-format %m\t%f[7]\n',
124
- {:eon_format=>'%m\t%f[7]\n'} ].each do |opts|
125
- assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
126
- end
127
-
128
- [ '-x %m\t%f[7]\n',
129
- '-x%m\t%f[7]\n',
130
- '--unk-feature=%m\t%f[7]\n',
131
- '--unk-feature %m\t%f[7]\n',
132
- {:unk_feature=>'%m\t%f[7]\n'} ].each do |opts|
133
- assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
134
- end
135
-
136
- [ '-b 102400',
137
- '-b102400',
138
- '--input-buffer-size=102400',
139
- '--input-buffer-size 102400',
140
- {:input_buffer_size=>102400} ].each do |opts|
141
- assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(opts))
142
- end
143
-
144
- [ '-C',
145
- '--allocate-sentence',
146
- {:allocate_sentence=>true} ].each do |opts|
147
- assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(opts))
148
- end
149
-
150
- [ '-t 0.42',
151
- '-t0.42',
152
- '--theta=0.42',
153
- '--theta 0.42',
154
- {:theta=>0.42} ].each do |opts|
155
- assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(opts))
156
- end
157
-
158
- [ '-c 42',
159
- '-c42',
160
- '--cost-factor=42',
161
- '--cost-factor 42',
162
- {:cost_factor=>42} ].each do |opts|
163
- assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(opts))
164
- end
165
-
166
- assert_equal({}, Natto::MeCab.parse_mecab_options)
167
- assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
168
- end
169
-
170
- def test_build_options_str
171
- assert_equal('--rcfile=/some/file', Natto::MeCab.build_options_str(:rcfile=>"/some/file"))
172
- assert_equal('--dicdir=/some/other/file', Natto::MeCab.build_options_str(:dicdir=>"/some/other/file"))
173
- assert_equal('--userdic=/yet/another/file', Natto::MeCab.build_options_str(:userdic=>"/yet/another/file"))
174
- assert_equal('--lattice-level=42', Natto::MeCab.build_options_str(:lattice_level=>42))
175
- assert_equal('--all-morphs', Natto::MeCab.build_options_str(:all_morphs=>true))
176
- assert_equal('--output-format-type=natto', Natto::MeCab.build_options_str(:output_format_type=>"natto"))
177
- assert_equal('--node-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n'))
178
- assert_equal('--unk-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n'))
179
- assert_equal('--bos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n'))
180
- assert_equal('--eos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n'))
181
- assert_equal('--eon-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n'))
182
- assert_equal('--unk-feature=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n'))
183
- assert_equal('--input-buffer-size=102400',Natto::MeCab.build_options_str(:input_buffer_size=>102400))
184
- assert_equal('--allocate-sentence', Natto::MeCab.build_options_str(:allocate_sentence=>true))
185
- assert_equal('--nbest=42', Natto::MeCab.build_options_str(:nbest=>42))
186
- assert_equal('--theta=0.42', Natto::MeCab.build_options_str(:theta=>0.42))
187
- assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
188
- end
189
-
190
- def test_construction
191
- m = nil
192
- assert_nothing_raised do
193
- m = Natto::MeCab.new
194
- end
195
- assert_equal({}, m.options)
196
-
197
- opts = {:output_format_type=>'chasen'}
198
- assert_nothing_raised do
199
- m = Natto::MeCab.new(opts)
200
- end
201
- assert_equal(opts, m.options)
202
- assert_nothing_raised do
203
- m = Natto::MeCab.new("-O chasen")
204
- end
205
- assert_equal(opts, m.options)
206
- assert_nothing_raised do
207
- m = Natto::MeCab.new("--output-format-type=chasen")
208
- end
209
- assert_equal(opts, m.options)
210
-
211
- opts = {:all_morphs=>true, :allocate_sentence=>true}
212
- assert_nothing_raised do
213
- m = Natto::MeCab.new(opts)
214
- end
215
- assert_equal(opts, m.options)
216
- assert_nothing_raised do
217
- m = Natto::MeCab.new('-a -C')
218
- end
219
- assert_equal(opts, m.options)
220
- assert_nothing_raised do
221
- m = Natto::MeCab.new('--all-morphs --allocate-sentence')
222
- end
223
- assert_equal(opts, m.options)
224
-
225
- opts = {:lattice_level=>999}
226
- assert_nothing_raised do
227
- m = Natto::MeCab.new(opts)
228
- end
229
- assert_equal(opts, m.options)
230
- assert_nothing_raised do
231
- m = Natto::MeCab.new('-l 999')
232
- end
233
- assert_equal(opts, m.options)
234
- assert_nothing_raised do
235
- m = Natto::MeCab.new('--lattice-level=999')
236
- end
237
- assert_equal(opts, m.options)
238
- end
239
-
240
- def test_initialize_with_errors
241
- assert_raise Natto::MeCabError do
242
- Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
243
- end
244
-
245
- assert_raise Natto::MeCabError do
246
- Natto::MeCab.new(:rcfile=>'/rcfile/does/not/exist')
247
- end
248
-
249
- assert_raise Natto::MeCabError do
250
- Natto::MeCab.new(:dicdir=>'/dicdir/does/not/exist')
251
- end
252
-
253
- assert_raise Natto::MeCabError do
254
- Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
255
- end
256
- end
257
-
258
- def test_version_accessor
259
- assert_equal(@ver, @m.version)
260
- end
261
-
262
- def test_all_morphs
263
- m = Natto::MeCab.new(:all_morphs=>true)
264
- expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
265
- expected.delete_if {|e| e =~ /^(EOS|BOS)/ }
266
-
267
- actual = m.parse(TEST_STR).lines.to_a
268
- actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
269
-
270
- assert_equal(expected, actual)
271
- end
272
-
273
- def test_parse_tostr_default
274
- expected = `echo #{TEST_STR} | mecab`.lines.to_a
275
- expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
276
-
277
- actual = @m.parse(TEST_STR).lines.to_a
278
- actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
279
-
280
- assert_equal(expected, actual)
281
- end
282
-
283
- def test_parse_tonode_default
284
- expected = `echo #{TEST_STR} | mecab`.lines.to_a
285
- expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
286
-
287
- actual = []
288
- @m.parse(TEST_STR) do |node|
289
- actual << "#{node.surface}\t#{node.feature}\n"
290
- end
291
- actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
292
-
293
- assert_equal(expected, actual)
294
- end
295
- end