ve 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ # Encoding: UTF-8
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class MecabIpadicProviderTest < MiniTest::Unit::TestCase
6
+ # TODO: make these run without running mecab
7
+
8
+ def test_should_be_able_to_start
9
+ skip
10
+ mecab = Ve::Provider::MecabIpadic.new
11
+ assert mecab.works?
12
+ end
13
+
14
+ def test_can_parse
15
+ skip
16
+ mecab = Ve::Provider::MecabIpadic.new
17
+ parse = mecab.parse('')
18
+ assert_equal Ve::Parse::MecabIpadic, parse.class
19
+ end
20
+
21
+ end
data/tests/test_helper.rb CHANGED
@@ -2,14 +2,15 @@ require 'rubygems'
2
2
  require 'bundler/setup'
3
3
 
4
4
  require File.expand_path(File.dirname(__FILE__) + "/../lib/ve")
5
- require 'test/unit'
5
+ require 'minitest/autorun'
6
+ require 'mocha'
6
7
 
7
- class Test::Unit::TestCase
8
+ class MiniTest::Unit::TestCase
8
9
 
9
10
  private
10
11
 
11
- def assert_parses_into_words(provider, expected, text)
12
- parse = provider.parse(text)
12
+ def assert_parses_into_words(parse_klass, expected, text, raw)
13
+ parse = parse_klass.new(text, raw)
13
14
  words = parse.words
14
15
  tokens = parse.tokens
15
16
 
data/tests/ve_test.rb CHANGED
@@ -2,17 +2,21 @@
2
2
 
3
3
  require_relative 'test_helper'
4
4
 
5
- class VeTest < Test::Unit::TestCase
5
+ class VeTest < MiniTest::Unit::TestCase
6
+ # TODO: Set these up to run properly
6
7
 
7
8
  def test_get
9
+ skip
8
10
  assert_equal ['日本語', 'です'], Ve.get('日本語です', :ja, :words).collect(&:word)
9
11
  end
10
12
 
11
13
  def test_in
14
+ skip
12
15
  assert_equal ['日本語', 'です'], Ve.in(:ja).words('日本語です').collect(&:word)
13
16
  end
14
17
 
15
18
  def test_http_interface
19
+ skip
16
20
  Ve.config(Ve::HTTPInterface, :url => 'http://localhost:4567')
17
21
  assert_equal ['日本語', 'です'], Ve.in(:ja).words('日本語です').collect(&:word)
18
22
  end
data/ve.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 've'
3
- s.version = '0.0.2'
3
+ s.version = '0.0.3'
4
4
  s.platform = Gem::Platform::RUBY
5
5
  s.authors = ["Kim Ahlström"]
6
6
  s.email = ["kim.ahlstrom@gmail.com"]
metadata CHANGED
@@ -1,29 +1,24 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: ve
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.0.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
6
5
  platform: ruby
7
- authors:
8
- - "Kim Ahlstr\xC3\xB6m"
6
+ authors:
7
+ - Kim Ahlström
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
-
13
- date: 2011-11-12 00:00:00 Z
11
+ date: 2014-06-06 00:00:00.000000000 Z
14
12
  dependencies: []
15
-
16
13
  description: Ve is a linguistic framework for programmers.
17
- email:
14
+ email:
18
15
  - kim.ahlstrom@gmail.com
19
16
  executables: []
20
-
21
17
  extensions: []
22
-
23
18
  extra_rdoc_files: []
24
-
25
- files:
19
+ files:
26
20
  - .gitignore
21
+ - .travis.yml
27
22
  - Gemfile
28
23
  - Gemfile.lock
29
24
  - Rakefile
@@ -43,38 +38,35 @@ files:
43
38
  - lib/ve.rb
44
39
  - lib/word.rb
45
40
  - sinatra/server.rb
46
- - tests/freeling_en_test.rb
41
+ - tests/freeling_en_parse_test.rb
42
+ - tests/freeling_en_provider_test.rb
47
43
  - tests/japanese_transliterators_test.rb
48
- - tests/mecab_ipadic_test.rb
44
+ - tests/mecab_ipadic_parse_test.rb
45
+ - tests/mecab_ipadic_provider_test.rb
49
46
  - tests/test_helper.rb
50
47
  - tests/ve_test.rb
51
48
  - ve.gemspec
52
49
  homepage: http://github.com/kimtaro/ve
53
50
  licenses: []
54
-
51
+ metadata: {}
55
52
  post_install_message:
56
53
  rdoc_options: []
57
-
58
- require_paths:
54
+ require_paths:
59
55
  - lib
60
- required_ruby_version: !ruby/object:Gem::Requirement
61
- none: false
62
- requirements:
63
- - - ">="
64
- - !ruby/object:Gem::Version
65
- version: "0"
66
- required_rubygems_version: !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- version: "0"
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
72
66
  requirements: []
73
-
74
67
  rubyforge_project:
75
- rubygems_version: 1.8.11
68
+ rubygems_version: 2.0.3
76
69
  signing_key:
77
- specification_version: 3
70
+ specification_version: 4
78
71
  summary: Ve is a linguistic framework for programmers
79
72
  test_files: []
80
-
@@ -1,452 +0,0 @@
1
- # Encoding: UTF-8
2
-
3
- require_relative 'test_helper'
4
-
5
- class MecabIpadicTest < Test::Unit::TestCase
6
-
7
- def test_should_be_able_to_start
8
- mecab = Ve::Provider::MecabIpadic.new
9
- assert mecab.works?
10
- end
11
-
12
- def test_can_parse
13
- mecab = Ve::Provider::MecabIpadic.new
14
- parse = mecab.parse('')
15
- assert_equal Ve::Parse::MecabIpadic, parse.class
16
- end
17
-
18
- def test_all_literals_should_equal_the_input_text
19
- text = <<-EOS
20
- 古池や
21
- 蛙飛び込む
22
- 水の音
23
-
24
- EOS
25
- mecab = Ve::Provider::MecabIpadic.new
26
- parse = mecab.parse(text)
27
- assert_equal text, parse.tokens.collect { |t| t[:literal] }.join
28
- end
29
-
30
- def test_tokens_must_be_created_for_parsed_and_unparsed_text
31
- mecab = Ve::Provider::MecabIpadic.new
32
- parse = mecab.parse(' A B ')
33
- assert_equal [:unparsed, :parsed, :unparsed, :parsed, :unparsed, :sentence_split], parse.tokens.collect { |t| t[:type] }
34
- assert_equal [' ', 'A', ' ', 'B', ' ', ''], parse.tokens.collect { |t| t[:literal] }
35
- assert_equal [0..0, 1..1, 2..4, 5..5, 6..7, nil], parse.tokens.collect { |t| t[:characters] }
36
- end
37
-
38
- def test_tokens_should_not_be_modified_when_attached_to_words
39
- mecab = Ve::Provider::MecabIpadic.new
40
- parse = mecab.parse('悪化する')
41
- tokens = parse.tokens
42
- assert_equal '悪化', tokens[0][:literal]
43
- assert_equal '悪化', tokens[0][:lemma]
44
- end
45
-
46
- def test_sentences
47
- mecab = Ve::Provider::MecabIpadic.new
48
- parse = mecab.parse('これは文章である。で、also containing some Englishですね')
49
- assert_equal ['これは文章である。', 'で、also containing some Englishですね'], parse.sentences
50
- end
51
-
52
- def test_this_shouldnt_crash
53
- mecab = Ve::Provider::MecabIpadic.new
54
- parse = mecab.parse('チューたろうは田中さんの犬です。')
55
- assert_equal 11, parse.tokens.size
56
- end
57
-
58
- def test_this_shouldnt_crash_either
59
- mecab = Ve::Provider::MecabIpadic.new
60
- parse = mecab.parse('三十年式歩兵銃')
61
- assert_equal 7, parse.tokens.size
62
- end
63
-
64
- def test_words
65
- mecab = Ve::Provider::MecabIpadic.new
66
-
67
- # Meishi
68
- assert_parses_into_words(mecab, {:words => ['車'],
69
- :lemmas => ['車'],
70
- :pos => [Ve::PartOfSpeech::Noun],
71
- :extra => [{:reading => 'クルマ', :transcription => 'クルマ', :grammar => nil}],
72
- :tokens => [0..0]},
73
- '車')
74
-
75
- # Koyuumeishi
76
- assert_parses_into_words(mecab, {:words => ['太郎'],
77
- :lemmas => ['太郎'],
78
- :pos => [Ve::PartOfSpeech::ProperNoun],
79
- :extra => [{:reading => 'タロウ', :transcription => 'タロー', :grammar => nil}],
80
- :tokens => [0..0]},
81
- '太郎')
82
-
83
- # Daimeishi
84
- assert_parses_into_words(mecab, {:words => ['彼'],
85
- :lemmas => ['彼'],
86
- :pos => [Ve::PartOfSpeech::Pronoun],
87
- :extra => [{:reading => 'カレ', :transcription => 'カレ', :grammar => nil}],
88
- :tokens => [0..0]},
89
- '彼')
90
-
91
- # Fukushikanou
92
- assert_parses_into_words(mecab, {:words => ['午後に'],
93
- :lemmas => ['午後に'],
94
- :pos => [Ve::PartOfSpeech::Adverb],
95
- :extra => [{:reading => 'ゴゴニ', :transcription => 'ゴゴニ', :grammar => nil}],
96
- :tokens => [0..1]},
97
- '午後に')
98
-
99
- # Kazu
100
- assert_parses_into_words(mecab, {:words => ['一'],
101
- :lemmas => ['一'],
102
- :pos => [Ve::PartOfSpeech::Number],
103
- :extra => [{:reading => 'イチ', :transcription => 'イチ', :grammar => nil}],
104
- :tokens => [0..0]},
105
- '一')
106
-
107
- assert_parses_into_words(mecab, {:words => ['123'],
108
- :lemmas => ['123'],
109
- :pos => [Ve::PartOfSpeech::Number],
110
- :extra => [{:reading => 'イチニサン', :transcription => 'イチニサン', :grammar => nil}],
111
- :tokens => [0..2]},
112
- '123')
113
-
114
- # Sahensetsuzoku + tokumi ta
115
- assert_parses_into_words(mecab, {:words => ['悪化した'],
116
- :lemmas => ['悪化する'],
117
- :pos => [Ve::PartOfSpeech::Verb],
118
- :extra => [{:reading => 'アッカシタ', :transcription => 'アッカシタ', :grammar => nil}],
119
- :tokens => [0..2]},
120
- '悪化した')
121
-
122
- # Keiyoudoushigokan
123
- assert_parses_into_words(mecab, {:words => ['重要な'],
124
- :lemmas => ['重要'],
125
- :pos => [Ve::PartOfSpeech::Adjective],
126
- :extra => [{:reading => 'ジュウヨウナ', :transcription => 'ジューヨーナ', :grammar => nil}],
127
- :tokens => [0..1]},
128
- '重要な')
129
-
130
- # Naikeiyoushigokan
131
- assert_parses_into_words(mecab, {:words => ['とんでもない'],
132
- :lemmas => ['とんでもない'],
133
- :pos => [Ve::PartOfSpeech::Adjective],
134
- :extra => [{:reading => 'トンデモナイ', :transcription => 'トンデモナイ', :grammar => nil}],
135
- :tokens => [0..1]},
136
- 'とんでもない')
137
-
138
- # Meishi hijiritsu fukushikanou
139
- assert_parses_into_words(mecab, {:words => ['の', 'うちに'],
140
- :lemmas => ['の', 'うちに'],
141
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
142
- :extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
143
- {:reading => 'ウチニ', :transcription => 'ウチニ', :grammar => nil}],
144
- :tokens => [0..0, 1..2]},
145
- 'のうちに')
146
-
147
- # Meishi hijiritsu jodoushigokan
148
- assert_parses_into_words(mecab, {:words => ['の', 'ような'],
149
- :lemmas => ['の', 'ようだ'],
150
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Verb],
151
- :extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
152
- {:reading => 'ヨウナ', :transcription => 'ヨーナ', :grammar => :auxillary}],
153
- :tokens => [0..0, 1..2]},
154
- 'のような')
155
-
156
- assert_parses_into_words(mecab, {:words => ['の', 'ように'],
157
- :lemmas => ['の', 'ように'],
158
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
159
- :extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
160
- {:reading => 'ヨウニ', :transcription => 'ヨーニ', :grammar => nil}],
161
- :tokens => [0..0, 1..2]},
162
- 'のように')
163
-
164
- # Meishi hijiritsu keiyoudoushigokan
165
- assert_parses_into_words(mecab, {:words => ['みたいな'],
166
- :lemmas => ['みたいだ'],
167
- :pos => [Ve::PartOfSpeech::Adjective],
168
- :extra => [{:reading => 'ミタイナ', :transcription => 'ミタイナ', :grammar => nil}],
169
- :tokens => [0..1]},
170
- 'みたいな')
171
-
172
- assert_parses_into_words(mecab, {:words => ['みたいの'],
173
- :lemmas => ['みたいの'],
174
- :pos => [Ve::PartOfSpeech::Adjective],
175
- :extra => [{:reading => 'ミタイノ', :transcription => 'ミタイノ', :grammar => nil}],
176
- :tokens => [0..1]},
177
- 'みたいの')
178
-
179
- assert_parses_into_words(mecab, {:words => ['みたい', 'だ'],
180
- :lemmas => ['みたい', 'だ'],
181
- :pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Verb],
182
- :extra => [{:reading => 'ミタイ', :transcription => 'ミタイ', :grammar => nil},
183
- {:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
184
- :tokens => [0..0, 1..1]},
185
- 'みたいだ')
186
-
187
- # Meishi tokushu jodoushigokan
188
- assert_parses_into_words(mecab, {:words => ['行く', 'そう', 'だ'],
189
- :lemmas => ['行く', 'そう', 'だ'],
190
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
191
- :extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil},
192
- {:reading => 'ソウ', :transcription => 'ソー', :grammar => :auxillary},
193
- {:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
194
- :tokens => [0..0, 1..1, 2..2]},
195
- '行くそうだ')
196
-
197
- # Meishi setsubi
198
- # TODO: This should maybe be parsed as one noun instead
199
- assert_parses_into_words(mecab, {:words => ['楽し', 'さ'],
200
- :lemmas => ['楽しい', 'さ'],
201
- :pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Suffix],
202
- :extra => [{:reading => 'タノシ', :transcription => 'タノシ', :grammar => nil},
203
- {:reading => 'サ', :transcription => 'サ', :grammar => nil}],
204
- :tokens => [0..0, 1..1]},
205
- '楽しさ')
206
-
207
- # Meishi setsuzokushiteki
208
- assert_parses_into_words(mecab, {:words => ['日本', '対', 'アメリカ'],
209
- :lemmas => ['日本', '対', 'アメリカ'],
210
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Conjunction, Ve::PartOfSpeech::ProperNoun],
211
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
212
- {:reading => 'タイ', :transcription => 'タイ', :grammar => nil},
213
- {:reading => 'アメリカ', :transcription => 'アメリカ', :grammar => nil}],
214
- :tokens => [0..0, 1..1, 2..2]},
215
- '日本対アメリカ')
216
-
217
- # Meishi doushihijiritsuteki
218
- assert_parses_into_words(mecab, {:words => ['見て', 'ごらん'],
219
- :lemmas => ['見る', 'ごらん'],
220
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
221
- :extra => [{:reading => 'ミテ', :transcription => 'ミテ', :grammar => nil},
222
- {:reading => 'ゴラン', :transcription => 'ゴラン', :grammar => :nominal}],
223
- :tokens => [0..1, 2..2]},
224
- '見てごらん')
225
-
226
- # Settoushi
227
- assert_parses_into_words(mecab, {:words => ['お', '座り'],
228
- :lemmas => ['お', '座り'],
229
- :pos => [Ve::PartOfSpeech::Prefix, Ve::PartOfSpeech::Noun],
230
- :extra => [{:reading => 'オ', :transcription => 'オ', :grammar => nil},
231
- {:reading => 'スワリ', :transcription => 'スワリ', :grammar => nil}],
232
- :tokens => [0..0, 1..1]},
233
- 'お座り')
234
-
235
- # Kigou
236
- assert_parses_into_words(mecab, {:words => ['。'],
237
- :lemmas => ['。'],
238
- :pos => [Ve::PartOfSpeech::Symbol],
239
- :extra => [{:reading => '。', :transcription => '。', :grammar => nil}],
240
- :tokens => [0..0]},
241
- '。')
242
-
243
- # Firaa
244
- assert_parses_into_words(mecab, {:words => ['えと'],
245
- :lemmas => ['えと'],
246
- :pos => [Ve::PartOfSpeech::Interjection],
247
- :extra => [{:reading => 'エト', :transcription => 'エト', :grammar => nil}],
248
- :tokens => [0..0]},
249
- 'えと')
250
-
251
- # Sonota
252
- assert_parses_into_words(mecab, {:words => ['だ', 'ァ'],
253
- :lemmas => ['だ', 'ァ'],
254
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Other],
255
- :extra => [{:reading => 'ダ', :transcription => 'ダ', :grammar => nil},
256
- {:reading => 'ァ', :transcription => 'ア', :grammar => nil}],
257
- :tokens => [0..0, 1..1]},
258
- 'だァ')
259
-
260
- # Kandoushi
261
- assert_parses_into_words(mecab, {:words => ['おはよう'],
262
- :lemmas => ['おはよう'],
263
- :pos => [Ve::PartOfSpeech::Interjection],
264
- :extra => [{:reading => 'オハヨウ', :transcription => 'オハヨー', :grammar => nil}],
265
- :tokens => [0..0]},
266
- 'おはよう')
267
-
268
- # Rentaishi
269
- assert_parses_into_words(mecab, {:words => ['この'],
270
- :lemmas => ['この'],
271
- :pos => [Ve::PartOfSpeech::Determiner],
272
- :extra => [{:reading => 'コノ', :transcription => 'コノ', :grammar => nil}],
273
- :tokens => [0..0]},
274
- 'この')
275
-
276
- # Setsuzokushi
277
- assert_parses_into_words(mecab, {:words => ['そして'],
278
- :lemmas => ['そして'],
279
- :pos => [Ve::PartOfSpeech::Conjunction],
280
- :extra => [{:reading => 'ソシテ', :transcription => 'ソシテ', :grammar => nil}],
281
- :tokens => [0..0]},
282
- 'そして')
283
-
284
- # Fukushi
285
- assert_parses_into_words(mecab, {:words => ['多分'],
286
- :lemmas => ['多分'],
287
- :pos => [Ve::PartOfSpeech::Adverb],
288
- :extra => [{:reading => 'タブン', :transcription => 'タブン', :grammar => nil}],
289
- :tokens => [0..0]},
290
- '多分')
291
-
292
- # Doushi
293
- assert_parses_into_words(mecab, {:words => ['行く'],
294
- :lemmas => ['行く'],
295
- :pos => [Ve::PartOfSpeech::Verb],
296
- :extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil}],
297
- :tokens => [0..0]},
298
- '行く')
299
-
300
- assert_parses_into_words(mecab, {:words => ['行かない'],
301
- :lemmas => ['行く'],
302
- :pos => [Ve::PartOfSpeech::Verb],
303
- :extra => [{:reading => 'イカナイ', :transcription => 'イカナイ', :grammar => nil}],
304
- :tokens => [0..1]},
305
- '行かない')
306
-
307
- assert_parses_into_words(mecab, {:words => ['行って', 'きて'],
308
- :lemmas => ['行く', 'くる'],
309
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
310
- :extra => [{:reading => 'イッテ', :transcription => 'イッテ', :grammar => nil},
311
- {:reading => 'キテ', :transcription => 'キテ', :grammar => :auxillary}],
312
- :tokens => [0..1, 2..3]},
313
- '行ってきて')
314
-
315
- # Doushi setsubi
316
- assert_parses_into_words(mecab, {:words => ['行かれる'],
317
- :lemmas => ['行く'],
318
- :pos => [Ve::PartOfSpeech::Verb],
319
- :extra => [{:reading => 'イカレル', :transcription => 'イカレル', :grammar => nil}],
320
- :tokens => [0..1]},
321
- '行かれる')
322
-
323
- assert_parses_into_words(mecab, {:words => ['食べさせられた'],
324
- :lemmas => ['食べる'],
325
- :pos => [Ve::PartOfSpeech::Verb],
326
- :extra => [{:reading => 'タベサセラレタ', :transcription => 'タベサセラレタ', :grammar => nil}],
327
- :tokens => [0..3]},
328
- '食べさせられた')
329
-
330
- # Doushi + jodoushi
331
- assert_parses_into_words(mecab, {:words => ['食べました'],
332
- :lemmas => ['食べる'],
333
- :pos => [Ve::PartOfSpeech::Verb],
334
- :extra => [{:reading => 'タベマシタ', :transcription => 'タベマシタ', :grammar => nil}],
335
- :tokens => [0..2]},
336
- '食べました')
337
-
338
- # Keiyoushi
339
- assert_parses_into_words(mecab, {:words => ['寒い'],
340
- :lemmas => ['寒い'],
341
- :pos => [Ve::PartOfSpeech::Adjective],
342
- :extra => [{:reading => 'サムイ', :transcription => 'サムイ', :grammar => nil}],
343
- :tokens => [0..0]},
344
- '寒い')
345
-
346
- assert_parses_into_words(mecab, {:words => ['寒くて'],
347
- :lemmas => ['寒い'],
348
- :pos => [Ve::PartOfSpeech::Adjective],
349
- :extra => [{:reading => 'サムクテ', :transcription => 'サムクテ', :grammar => nil}],
350
- :tokens => [0..1]},
351
- '寒くて')
352
-
353
- assert_parses_into_words(mecab, {:words => ['寒かった'],
354
- :lemmas => ['寒い'],
355
- :pos => [Ve::PartOfSpeech::Adjective],
356
- :extra => [{:reading => 'サムカッタ', :transcription => 'サムカッタ', :grammar => nil}],
357
- :tokens => [0..1]},
358
- '寒かった')
359
-
360
- assert_parses_into_words(mecab, {:words => ['寒ければ'],
361
- :lemmas => ['寒い'],
362
- :pos => [Ve::PartOfSpeech::Adjective],
363
- :extra => [{:reading => 'サムケレバ', :transcription => 'サムケレバ', :grammar => nil}],
364
- :tokens => [0..1]},
365
- '寒ければ')
366
-
367
- assert_parses_into_words(mecab, {:words => ['寒けりゃ'],
368
- :lemmas => ['寒い'],
369
- :pos => [Ve::PartOfSpeech::Adjective],
370
- :extra => [{:reading => 'サムケリャ', :transcription => 'サムケリャ', :grammar => nil}],
371
- :tokens => [0..0]},
372
- '寒けりゃ')
373
-
374
- assert_parses_into_words(mecab, {:words => ['食べたい'],
375
- :lemmas => ['食べる'],
376
- :pos => [Ve::PartOfSpeech::Verb],
377
- :extra => [{:reading => 'タベタイ', :transcription => 'タベタイ', :grammar => nil}],
378
- :tokens => [0..1]},
379
- '食べたい')
380
-
381
- # Joshi
382
- assert_parses_into_words(mecab, {:words => ['日本', 'から'],
383
- :lemmas => ['日本', 'から'],
384
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Postposition],
385
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
386
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
387
- :tokens => [0..0, 1..1]},
388
- '日本から')
389
-
390
- # The copula
391
- assert_parses_into_words(mecab, {:words => ['日本', 'です'],
392
- :lemmas => ['日本', 'です'],
393
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
394
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
395
- {:reading => 'デス', :transcription => 'デス', :grammar => nil}],
396
- :tokens => [0..0, 1..1]},
397
- '日本です')
398
-
399
- assert_parses_into_words(mecab, {:words => ['日本', 'だった'],
400
- :lemmas => ['日本', 'だ'],
401
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
402
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
403
- {:reading => 'ダッタ', :transcription => 'ダッタ', :grammar => nil}],
404
- :tokens => [0..0, 1..2]},
405
- '日本だった')
406
-
407
- # いるから
408
- assert_parses_into_words(mecab, {:words => ['いる', 'から'],
409
- :lemmas => ['いる', 'から'],
410
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
411
- :extra => [{:reading => 'イル', :transcription => 'イル', :grammar => nil},
412
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
413
- :tokens => [0..0, 1..1]},
414
- 'いるから')
415
-
416
- # しているから
417
- assert_parses_into_words(mecab, {:words => ['して', 'いる', 'から'],
418
- :lemmas => ['する', 'いる', 'から'],
419
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
420
- :extra => [{:reading => 'シテ', :transcription => 'シテ', :grammar => nil},
421
- {:reading => 'イル', :transcription => 'イル', :grammar => :auxillary},
422
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
423
- :tokens => [0..0, 1..1, 2..2]},
424
- 'しているから')
425
-
426
- # 基準があるが、
427
- assert_parses_into_words(mecab, {:words => ['して', 'いる', 'から'],
428
- :lemmas => ['する', 'いる', 'から'],
429
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
430
- :extra => [{:reading => 'シテ', :transcription => 'シテ', :grammar => nil},
431
- {:reading => 'イル', :transcription => 'イル', :grammar => :auxillary},
432
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
433
- :tokens => [0..0, 1..1, 2..2]},
434
- '基準があるが、')
435
-
436
- # TODO: xした should parse as adjective?
437
- assert_parses_into_words(mecab, {:words => [],
438
- :lemmas => [],
439
- :pos => [],
440
- :extra => [],
441
- :tokens => []},
442
- '')
443
- end
444
-
445
- def todo_test_word_transliteration
446
- mecab = Ve::Provider::MecabIpadic.new
447
- parse = mecab.parse('日本', :transliterate_words => :latn)
448
-
449
- assert_equal 'nihon', parse.words.first.transliteration(:latn)
450
- end
451
-
452
- end