natto 0.9.3 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/natto/binding.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  # coding: utf-8
2
-
3
2
  module Natto
4
3
 
5
4
  # Module <tt>Binding</tt> encapsulates methods and behavior
@@ -31,18 +30,25 @@ module Natto
31
30
  # @raise [LoadError] if MECAB_PATH environment variable is not set in Windows
32
31
  # <br/>
33
32
  # e.g., for bash on UNIX/Linux
33
+ #
34
34
  # export MECAB_PATH=/usr/local/lib/libmecab.so
35
+ #
35
36
  # e.g., on Windows
37
+ #
36
38
  # set MECAB_PATH=C:\Program Files\MeCab\bin\libmecab.dll
39
+ #
37
40
  # e.g., for Cygwin
41
+ #
38
42
  # export MECAB_PATH=cygmecab-1
43
+ #
39
44
  # e.g., from within a Ruby program
45
+ #
40
46
  # ENV['MECAB_PATH']=/usr/local/lib/libmecab.so
41
47
  def self.find_library
42
48
  host_os = RbConfig::CONFIG['host_os']
43
49
 
44
50
  if host_os =~ /mswin|mingw/i
45
- raise LoadError, "Please set #{MECAB_PATH} to full path to libmecab.dll"
51
+ raise LoadError, "Please set #{MECAB_PATH} to the full path to libmecab.dll"
46
52
  elsif host_os =~ /cygwin/i
47
53
  'cygmecab-1'
48
54
  else
@@ -56,18 +62,15 @@ module Natto
56
62
  attach_function :mecab_version, [], :string
57
63
  attach_function :mecab_strerror, [:pointer],:string
58
64
  attach_function :mecab_destroy, [:pointer], :void
59
-
60
65
  attach_function :mecab_set_theta, [:pointer, :float], :void
61
66
  attach_function :mecab_set_lattice_level, [:pointer, :int], :void
62
67
  attach_function :mecab_set_all_morphs, [:pointer, :int], :void
63
-
64
68
  attach_function :mecab_sparse_tostr, [:pointer, :string], :string
65
69
  attach_function :mecab_sparse_tonode, [:pointer, :string], :pointer
66
-
67
70
  attach_function :mecab_nbest_init, [:pointer, :string], :int
68
71
  attach_function :mecab_nbest_sparse_tostr, [:pointer, :int, :string], :string
69
72
  attach_function :mecab_nbest_next_tonode, [:pointer], :pointer
70
-
73
+ attach_function :mecab_format_node, [:pointer, :pointer], :string
71
74
  attach_function :mecab_dictionary_info, [:pointer], :pointer
72
75
 
73
76
  # @private
@@ -80,48 +83,56 @@ module Natto
80
83
  Natto::Binding.mecab_version
81
84
  end
82
85
 
83
- def mecab_strerror(ptr)
84
- Natto::Binding.mecab_strerror(ptr)
86
+ def mecab_strerror(m_ptr)
87
+ Natto::Binding.mecab_strerror(m_ptr)
85
88
  end
86
89
 
87
- def mecab_destroy(ptr)
88
- Natto::Binding.mecab_destroy(ptr)
90
+ def mecab_destroy(m_ptr)
91
+ Natto::Binding.mecab_destroy(m_ptr)
89
92
  end
90
93
 
91
- def mecab_set_theta(ptr, t)
92
- Natto::Binding.mecab_set_theta(ptr, t)
94
+ def mecab_set_theta(m_ptr, t)
95
+ Natto::Binding.mecab_set_theta(m_ptr, t)
93
96
  end
94
97
 
95
- def mecab_set_lattice_level(ptr, ll)
96
- Natto::Binding.mecab_set_lattice_level(ptr, ll)
98
+ def mecab_set_lattice_level(m_ptr, ll)
99
+ Natto::Binding.mecab_set_lattice_level(m_ptr, ll)
97
100
  end
98
-
99
- def mecab_set_all_morphs(ptr, am)
100
- Natto::Binding.mecab_set_all_morphs(ptr, am)
101
+
102
+ def mecab_set_all_morphs(m_ptr, am)
103
+ Natto::Binding.mecab_set_all_morphs(m_ptr, am)
101
104
  end
102
105
 
103
- def mecab_sparse_tostr(ptr, str)
104
- Natto::Binding.mecab_sparse_tostr(ptr, str)
106
+ def mecab_sparse_tostr(m_ptr, str)
107
+ Natto::Binding.mecab_sparse_tostr(m_ptr, str)
105
108
  end
106
109
 
107
- def mecab_sparse_tonode(ptr, str)
108
- Natto::Binding.mecab_sparse_tonode(ptr, str)
110
+ def mecab_sparse_tonode(m_ptr, str)
111
+ Natto::Binding.mecab_sparse_tonode(m_ptr, str)
112
+ end
113
+
114
+ def mecab_nbest_next_tonode(m_ptr)
115
+ Natto::Binding.mecab_nbest_next_tonode(m_ptr)
109
116
  end
110
117
 
111
- def mecab_nbest_next_tonode(ptr)
112
- Natto::Binding.mecab_nbest_next_tonode(ptr)
118
+ def mecab_nbest_init(m_ptr, str)
119
+ Natto::Binding.mecab_nbest_init(m_ptr, str)
113
120
  end
114
121
 
115
- def mecab_nbest_init(ptr, str)
116
- Natto::Binding.mecab_nbest_init(ptr, str)
122
+ def mecab_nbest_sparse_tostr(m_ptr, n, str)
123
+ Natto::Binding.mecab_nbest_sparse_tostr(m_ptr, n, str)
117
124
  end
118
125
 
119
- def mecab_nbest_sparse_tostr(ptr, n, str)
120
- Natto::Binding.mecab_nbest_sparse_tostr(ptr, n, str)
126
+ def mecab_nbest_next_tonode(m_ptr)
127
+ Natto::Binding.mecab_nbest_next_tonode(m_ptr)
121
128
  end
122
129
 
123
- def mecab_dictionary_info(ptr)
124
- Natto::Binding.mecab_dictionary_info(ptr)
130
+ def mecab_format_node(m_ptr, n_ptr)
131
+ Natto::Binding.mecab_format_node(m_ptr, n_ptr)
132
+ end
133
+
134
+ def mecab_dictionary_info(m_ptr)
135
+ Natto::Binding.mecab_dictionary_info(m_ptr)
125
136
  end
126
137
  end
127
138
  end
@@ -0,0 +1,16 @@
1
+ # coding: utf-8
2
+ module Natto
3
+ module Utils
4
+ # @private
5
+ def self.included(base)
6
+ base.extend(ClassMethods)
7
+ end
8
+
9
+ module ClassMethods
10
+ def force_enc(str)
11
+ str.force_encoding(Encoding.default_external) if str.respond_to?(:encoding) && str.encoding!=Encoding.default_external
12
+ str
13
+ end
14
+ end
15
+ end
16
+ end
data/lib/natto/version.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  # coding: utf-8
2
2
 
3
3
  # <tt>Natto</tt> is the namespace for objects that provide
4
- # a binding to the <tt>mecab</tt> parser and related resources.
4
+ # a binding to the <tt>mecab</tt> tagger and related resources.
5
5
  #
6
6
  # <tt>Natto::MeCab</tt> is a wrapper class for the <tt>mecab</tt>
7
- # parser.
7
+ # tagger.
8
8
  #
9
9
  # <tt>Natto::MeCabStruct</tt> is a base class for a <tt>mecab</tt>
10
10
  # struct.
@@ -23,5 +23,5 @@
23
23
  # which are made available via <tt>FFI</tt> bindings to <tt>mecab</tt>.
24
24
  module Natto
25
25
  # Version string for this Rubygem.
26
- VERSION = "0.9.3"
26
+ VERSION = "0.9.4"
27
27
  end
metadata CHANGED
@@ -1,95 +1,74 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: natto
3
- version: !ruby/object:Gem::Version
4
- hash: 61
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.4
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 9
9
- - 3
10
- version: 0.9.3
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Brooke M. Fujita
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-02-13 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-02-26 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: ffi
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &341055920 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 1
29
- segments:
30
- - 0
31
- - 6
32
- - 3
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
33
21
  version: 0.6.3
34
22
  type: :runtime
35
- version_requirements: *id001
36
- description: "natto is a gem bridging Ruby and MeCab using FFI (foreign function interface). No compilation is necessary, and natto will run on CRuby (mri/yarv) and JRuby (jvm) equally well, on any OS.\n\
37
- ruby \xE3\x81\xA7 mecab \xE3\x82\x92\xE8\x87\xAA\xE7\x94\xB1\xE3\x81\xAB\xE5\x88\xA9\xE7\x94\xA8\xE3\x81\xA7\xE3\x81\x8D\xE3\x82\x8B natto \xE3\x82\x92\xE3\x81\x9C\xE3\x81\xB2\xE8\xA9\xA6\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\xE3\x81\xA0\xE3\x81\x95\xE3\x81\x84\xE3\x80\x82\n"
23
+ prerelease: false
24
+ version_requirements: *341055920
25
+ description: ! 'natto is a gem bridging Ruby and MeCab using FFI (foreign function
26
+ interface). No compilation is necessary, and natto will run on CRuby (mri/yarv)
27
+ and JRuby (jvm) equally well, on any OS.
28
+
29
+ ruby で mecab を自由に利用できる natto をぜひ試してください。
30
+
31
+ '
38
32
  email: buruzaemon@gmail.com
39
33
  executables: []
40
-
41
34
  extensions: []
42
-
43
35
  extra_rdoc_files: []
44
-
45
- files:
36
+ files:
46
37
  - lib/natto.rb
47
38
  - lib/natto/binding.rb
48
39
  - lib/natto/version.rb
49
- - test/test_natto.rb
50
- - test/natto/tc_binding.rb
51
- - test/natto/tc_dictionaryinfo.rb
52
- - test/natto/tc_mecab.rb
53
- - test/natto/tc_mecabnode.rb
40
+ - lib/natto/utils.rb
54
41
  - README.md
55
42
  - LICENSE
56
43
  - CHANGELOG
57
44
  - .yardopts
58
45
  homepage: https://bitbucket.org/buruzaemon/natto/overview
59
- licenses:
46
+ licenses:
60
47
  - BSD
61
48
  post_install_message:
62
49
  rdoc_options: []
63
-
64
- require_paths:
50
+ require_paths:
65
51
  - lib
66
- required_ruby_version: !ruby/object:Gem::Requirement
52
+ required_ruby_version: !ruby/object:Gem::Requirement
67
53
  none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 57
72
- segments:
73
- - 1
74
- - 8
75
- - 7
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
76
57
  version: 1.8.7
77
- required_rubygems_version: !ruby/object:Gem::Requirement
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
59
  none: false
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- hash: 3
83
- segments:
84
- - 0
85
- version: "0"
86
- requirements:
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements:
87
65
  - MeCab, 0.993 or greater
88
66
  - FFI, 0.6.3 or greater
89
67
  rubyforge_project:
90
- rubygems_version: 1.8.10
68
+ rubygems_version: 1.8.17
91
69
  signing_key:
92
70
  specification_version: 3
93
- summary: natto combines the Ruby programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.
94
- test_files:
95
- - test/test_natto.rb
71
+ summary: natto combines the Ruby programming language with MeCab, the part-of-speech
72
+ and morphological analyzer for the Japanese language.
73
+ test_files: []
74
+ has_rdoc:
@@ -1,41 +0,0 @@
1
- # coding: utf-8
2
-
3
- # TestNatto encapsulates tests for the basic
4
- # behavior of the Natto::Binding module.
5
- class TestNattoBinding < Test::Unit::TestCase
6
- def setup
7
- @klass = Class.new do
8
- include Natto::Binding
9
- end
10
- end
11
-
12
- def teardown
13
- @klass = nil
14
- end
15
-
16
- # Tests the mecab_version function.
17
- def test_mecab_version
18
- mv = `mecab -v`.split.last
19
- assert_equal(mv, @klass.mecab_version)
20
- end
21
-
22
- # Tests for the inclusion of mecab methods made available
23
- # to any classes including the Natto::Binding module.
24
- def test_functions_included
25
- [ :mecab_new2,
26
- :mecab_version,
27
- :mecab_strerror,
28
- :mecab_destroy,
29
- :mecab_set_theta,
30
- :mecab_set_lattice_level,
31
- :mecab_set_all_morphs,
32
- :mecab_sparse_tostr,
33
- :mecab_nbest_sparse_tostr,
34
- :mecab_nbest_init,
35
- :mecab_nbest_sparse_tostr,
36
- :mecab_nbest_next_tonode,
37
- :mecab_dictionary_info ].each do |f|
38
- assert(@klass.respond_to? f)
39
- end
40
- end
41
- end
@@ -1,65 +0,0 @@
1
- # coding: utf-8
2
-
3
- # TestDictionaryInfo encapsulates tests for the basic
4
- # behavior of Natto::DictionaryInfo
5
- class TestDictionaryInfo < Test::Unit::TestCase
6
- def setup
7
- m = Natto::MeCab.new
8
- @dicts = m.dicts
9
-
10
- out = `mecab -D`.lines.to_a
11
- out.each do |l|
12
- tokens = l.split("\t")
13
- @sysdic_filename = tokens[1].strip if tokens[0] =~ /filename:/i
14
- @sysdic_charset = tokens[1].strip if tokens[0] =~ /charset:/i
15
- end
16
- end
17
-
18
- def teardown
19
- @dicts = nil
20
- end
21
-
22
- # Tests the dictionaries accessor method of Natto::MeCab.
23
- # Assumes that:
24
- # a) system dictionary is /usr/local/lib/mecab/dic/ipadic/sys.dic
25
- # b) system dictionary encoding is utf-8
26
- # c) only dealing w/ case of 1 dictionary being used
27
- def test_dictionaries_accessor
28
- assert @dicts.empty? == false
29
- sysdic = @dicts.first
30
- assert_equal(@sysdic_filename, sysdic[:filename])
31
- assert_equal(@sysdic_charset, sysdic[:charset])
32
- assert_equal(0x0, sysdic[:next].address)
33
- end
34
-
35
- # Tests the to_s method.
36
- def test_to_s
37
- #<Natto::DictionaryInfo:0x288879bc @filename=\"/usr/local/lib/mecab/dic/ipadic/sys.dic\", @charset=\"utf8\">
38
- assert(@dicts.first.to_s.include?("filename=\"#{@sysdic_filename}\", charset=\"#{@sysdic_charset}\""))
39
- end
40
-
41
- # Tests the accessors of Natto::DictionaryInfo.
42
- # Note: Object#type is deprecated in 1.9.n, but comes with a warning
43
- # in 1.8.n
44
- def test_dictionary_info_member_accessors
45
- sysdic = @dicts.first
46
- members = [
47
- :filename,
48
- :charset,
49
- :type,
50
- :size,
51
- :lsize,
52
- :rsize,
53
- :version,
54
- :next
55
- ]
56
- members.each do |nomme|
57
- assert_not_nil(sysdic.send nomme )
58
- end
59
-
60
- # NoMethodError will be raised for anything else!
61
- assert_raise NoMethodError do
62
- sysdic.send :unknown_attr
63
- end
64
- end
65
- end
@@ -1,295 +0,0 @@
1
- # coding: utf-8
2
- require 'rbconfig'
3
- require 'nkf'
4
-
5
- # TestMeCab encapsulates tests for the basic
6
- # behavior of Natto::MeCab.
7
- class TestMeCab < Test::Unit::TestCase
8
-
9
- host_os = RbConfig::CONFIG['host_os']
10
- # we need to transfrom from UTF-8 ot SJIS if we are on Windows!
11
- if host_os =~ /mswin|mingw/i
12
- TEST_STR = NKF.nkf("-Ws", '試験ですよ、これが。')
13
- else
14
- TEST_STR = '試験ですよ、これが。'
15
- end
16
-
17
- def setup
18
- @m = Natto::MeCab.new
19
- @ver = `mecab -v`.strip.split.last
20
- end
21
-
22
- def teardown
23
- @m = nil
24
- end
25
-
26
- def test_parse_mecab_options
27
- [ '-r /some/file',
28
- '-r/some/file',
29
- '--rcfile=/some/file',
30
- '--rcfile /some/file',
31
- {:rcfile=>"/some/file"} ].each do |opts|
32
- assert_equal({:rcfile => '/some/file'}, Natto::MeCab.parse_mecab_options(opts))
33
- end
34
-
35
- [ '-d /some/other/file',
36
- '-d/some/other/file',
37
- '--dicdir=/some/other/file',
38
- '--dicdir /some/other/file',
39
- {:dicdir=>"/some/other/file"} ].each do |opts|
40
- assert_equal({:dicdir => '/some/other/file'}, Natto::MeCab.parse_mecab_options(opts))
41
- end
42
-
43
- [ '-u /yet/another/file',
44
- '-u/yet/another/file',
45
- '--userdic=/yet/another/file',
46
- '--userdic /yet/another/file',
47
- {:userdic=>"/yet/another/file"} ].each do |opts|
48
- assert_equal({:userdic => '/yet/another/file'}, Natto::MeCab.parse_mecab_options(opts))
49
- end
50
-
51
- [ '-l 42',
52
- '-l42',
53
- '--lattice-level=42',
54
- '--lattice-level 42',
55
- {:lattice_level=>42}
56
- ].each do |opts|
57
- assert_equal({:lattice_level => 42}, Natto::MeCab.parse_mecab_options(opts))
58
- end
59
-
60
- [ '-a',
61
- '--all-morphs',
62
- {:all_morphs=>true} ].each do |opts|
63
- assert_equal({:all_morphs => true}, Natto::MeCab.parse_mecab_options(opts))
64
- end
65
-
66
- [ '-O natto',
67
- '-Onatto',
68
- '--output-format-type=natto',
69
- '--output-format-type natto',
70
- {:output_format_type=>"natto"} ].each do |opts|
71
- assert_equal({:output_format_type => 'natto'}, Natto::MeCab.parse_mecab_options(opts))
72
- end
73
-
74
- [ '-N 42',
75
- '-N42',
76
- '--nbest=42',
77
- '--nbest 42',
78
- {:nbest=>42}
79
- ].each do |opts|
80
- assert_equal({:nbest => 42}, Natto::MeCab.parse_mecab_options(opts))
81
- end
82
- [ '--nbest=-1', '--nbest=0', '--nbest=513' ].each do |bad|
83
- assert_raise Natto::MeCabError do
84
- Natto::MeCab.parse_mecab_options(bad)
85
- end
86
- end
87
-
88
- [ '-F %m\t%f[7]\n',
89
- '-F%m\t%f[7]\n',
90
- '--node-format=%m\t%f[7]\n',
91
- '--node-format %m\t%f[7]\n',
92
- {:node_format=>'%m\t%f[7]\n'} ].each do |opts|
93
- assert_equal({:node_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
94
- end
95
-
96
- [ '-U %m\t%f[7]\n',
97
- '-U%m\t%f[7]\n',
98
- '--unk-format=%m\t%f[7]\n',
99
- '--unk-format %m\t%f[7]\n',
100
- {:unk_format=>'%m\t%f[7]\n'} ].each do |opts|
101
- assert_equal({:unk_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
102
- end
103
-
104
- [ '-B %m\t%f[7]\n',
105
- '-B%m\t%f[7]\n',
106
- '--bos-format=%m\t%f[7]\n',
107
- '--bos-format %m\t%f[7]\n',
108
- {:bos_format=>'%m\t%f[7]\n'} ].each do |opts|
109
- assert_equal({:bos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
110
- end
111
-
112
- [ '-E %m\t%f[7]\n',
113
- '-E%m\t%f[7]\n',
114
- '--eos-format=%m\t%f[7]\n',
115
- '--eos-format %m\t%f[7]\n',
116
- {:eos_format=>'%m\t%f[7]\n'} ].each do |opts|
117
- assert_equal({:eos_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
118
- end
119
-
120
- [ '-S %m\t%f[7]\n',
121
- '-S%m\t%f[7]\n',
122
- '--eon-format=%m\t%f[7]\n',
123
- '--eon-format %m\t%f[7]\n',
124
- {:eon_format=>'%m\t%f[7]\n'} ].each do |opts|
125
- assert_equal({:eon_format => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
126
- end
127
-
128
- [ '-x %m\t%f[7]\n',
129
- '-x%m\t%f[7]\n',
130
- '--unk-feature=%m\t%f[7]\n',
131
- '--unk-feature %m\t%f[7]\n',
132
- {:unk_feature=>'%m\t%f[7]\n'} ].each do |opts|
133
- assert_equal({:unk_feature => '%m\t%f[7]\n'}, Natto::MeCab.parse_mecab_options(opts))
134
- end
135
-
136
- [ '-b 102400',
137
- '-b102400',
138
- '--input-buffer-size=102400',
139
- '--input-buffer-size 102400',
140
- {:input_buffer_size=>102400} ].each do |opts|
141
- assert_equal({:input_buffer_size => 102400}, Natto::MeCab.parse_mecab_options(opts))
142
- end
143
-
144
- [ '-C',
145
- '--allocate-sentence',
146
- {:allocate_sentence=>true} ].each do |opts|
147
- assert_equal({:allocate_sentence => true}, Natto::MeCab.parse_mecab_options(opts))
148
- end
149
-
150
- [ '-t 0.42',
151
- '-t0.42',
152
- '--theta=0.42',
153
- '--theta 0.42',
154
- {:theta=>0.42} ].each do |opts|
155
- assert_equal({:theta => 0.42}, Natto::MeCab.parse_mecab_options(opts))
156
- end
157
-
158
- [ '-c 42',
159
- '-c42',
160
- '--cost-factor=42',
161
- '--cost-factor 42',
162
- {:cost_factor=>42} ].each do |opts|
163
- assert_equal({:cost_factor => 42}, Natto::MeCab.parse_mecab_options(opts))
164
- end
165
-
166
- assert_equal({}, Natto::MeCab.parse_mecab_options)
167
- assert_equal({}, Natto::MeCab.parse_mecab_options(:unknown=>"ignore"))
168
- end
169
-
170
- def test_build_options_str
171
- assert_equal('--rcfile=/some/file', Natto::MeCab.build_options_str(:rcfile=>"/some/file"))
172
- assert_equal('--dicdir=/some/other/file', Natto::MeCab.build_options_str(:dicdir=>"/some/other/file"))
173
- assert_equal('--userdic=/yet/another/file', Natto::MeCab.build_options_str(:userdic=>"/yet/another/file"))
174
- assert_equal('--lattice-level=42', Natto::MeCab.build_options_str(:lattice_level=>42))
175
- assert_equal('--all-morphs', Natto::MeCab.build_options_str(:all_morphs=>true))
176
- assert_equal('--output-format-type=natto', Natto::MeCab.build_options_str(:output_format_type=>"natto"))
177
- assert_equal('--node-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:node_format=>'%m\t%f[7]\n'))
178
- assert_equal('--unk-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_format=>'%m\t%f[7]\n'))
179
- assert_equal('--bos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:bos_format=>'%m\t%f[7]\n'))
180
- assert_equal('--eos-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eos_format=>'%m\t%f[7]\n'))
181
- assert_equal('--eon-format=%m\t%f[7]\n', Natto::MeCab.build_options_str(:eon_format=>'%m\t%f[7]\n'))
182
- assert_equal('--unk-feature=%m\t%f[7]\n', Natto::MeCab.build_options_str(:unk_feature=>'%m\t%f[7]\n'))
183
- assert_equal('--input-buffer-size=102400',Natto::MeCab.build_options_str(:input_buffer_size=>102400))
184
- assert_equal('--allocate-sentence', Natto::MeCab.build_options_str(:allocate_sentence=>true))
185
- assert_equal('--nbest=42', Natto::MeCab.build_options_str(:nbest=>42))
186
- assert_equal('--theta=0.42', Natto::MeCab.build_options_str(:theta=>0.42))
187
- assert_equal('--cost-factor=42', Natto::MeCab.build_options_str(:cost_factor=>42))
188
- end
189
-
190
- def test_construction
191
- m = nil
192
- assert_nothing_raised do
193
- m = Natto::MeCab.new
194
- end
195
- assert_equal({}, m.options)
196
-
197
- opts = {:output_format_type=>'chasen'}
198
- assert_nothing_raised do
199
- m = Natto::MeCab.new(opts)
200
- end
201
- assert_equal(opts, m.options)
202
- assert_nothing_raised do
203
- m = Natto::MeCab.new("-O chasen")
204
- end
205
- assert_equal(opts, m.options)
206
- assert_nothing_raised do
207
- m = Natto::MeCab.new("--output-format-type=chasen")
208
- end
209
- assert_equal(opts, m.options)
210
-
211
- opts = {:all_morphs=>true, :allocate_sentence=>true}
212
- assert_nothing_raised do
213
- m = Natto::MeCab.new(opts)
214
- end
215
- assert_equal(opts, m.options)
216
- assert_nothing_raised do
217
- m = Natto::MeCab.new('-a -C')
218
- end
219
- assert_equal(opts, m.options)
220
- assert_nothing_raised do
221
- m = Natto::MeCab.new('--all-morphs --allocate-sentence')
222
- end
223
- assert_equal(opts, m.options)
224
-
225
- opts = {:lattice_level=>999}
226
- assert_nothing_raised do
227
- m = Natto::MeCab.new(opts)
228
- end
229
- assert_equal(opts, m.options)
230
- assert_nothing_raised do
231
- m = Natto::MeCab.new('-l 999')
232
- end
233
- assert_equal(opts, m.options)
234
- assert_nothing_raised do
235
- m = Natto::MeCab.new('--lattice-level=999')
236
- end
237
- assert_equal(opts, m.options)
238
- end
239
-
240
- def test_initialize_with_errors
241
- assert_raise Natto::MeCabError do
242
- Natto::MeCab.new(:output_format_type=>'not_defined_anywhere')
243
- end
244
-
245
- assert_raise Natto::MeCabError do
246
- Natto::MeCab.new(:rcfile=>'/rcfile/does/not/exist')
247
- end
248
-
249
- assert_raise Natto::MeCabError do
250
- Natto::MeCab.new(:dicdir=>'/dicdir/does/not/exist')
251
- end
252
-
253
- assert_raise Natto::MeCabError do
254
- Natto::MeCab.new(:userdic=>'/userdic/does/not/exist')
255
- end
256
- end
257
-
258
- def test_version_accessor
259
- assert_equal(@ver, @m.version)
260
- end
261
-
262
- def test_all_morphs
263
- m = Natto::MeCab.new(:all_morphs=>true)
264
- expected = `echo #{TEST_STR} | mecab --all-morphs`.lines.to_a
265
- expected.delete_if {|e| e =~ /^(EOS|BOS)/ }
266
-
267
- actual = m.parse(TEST_STR).lines.to_a
268
- actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
269
-
270
- assert_equal(expected, actual)
271
- end
272
-
273
- def test_parse_tostr_default
274
- expected = `echo #{TEST_STR} | mecab`.lines.to_a
275
- expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
276
-
277
- actual = @m.parse(TEST_STR).lines.to_a
278
- actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
279
-
280
- assert_equal(expected, actual)
281
- end
282
-
283
- def test_parse_tonode_default
284
- expected = `echo #{TEST_STR} | mecab`.lines.to_a
285
- expected.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
286
-
287
- actual = []
288
- @m.parse(TEST_STR) do |node|
289
- actual << "#{node.surface}\t#{node.feature}\n"
290
- end
291
- actual.delete_if {|e| e =~ /^(EOS|BOS|\t)/ }
292
-
293
- assert_equal(expected, actual)
294
- end
295
- end