ve 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ # Encoding: UTF-8
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class MecabIpadicProviderTest < MiniTest::Unit::TestCase
6
+ # TODO: make these run without running mecab
7
+
8
+ def test_should_be_able_to_start
9
+ skip
10
+ mecab = Ve::Provider::MecabIpadic.new
11
+ assert mecab.works?
12
+ end
13
+
14
+ def test_can_parse
15
+ skip
16
+ mecab = Ve::Provider::MecabIpadic.new
17
+ parse = mecab.parse('')
18
+ assert_equal Ve::Parse::MecabIpadic, parse.class
19
+ end
20
+
21
+ end
data/tests/test_helper.rb CHANGED
@@ -2,14 +2,15 @@ require 'rubygems'
2
2
  require 'bundler/setup'
3
3
 
4
4
  require File.expand_path(File.dirname(__FILE__) + "/../lib/ve")
5
- require 'test/unit'
5
+ require 'minitest/autorun'
6
+ require 'mocha'
6
7
 
7
- class Test::Unit::TestCase
8
+ class MiniTest::Unit::TestCase
8
9
 
9
10
  private
10
11
 
11
- def assert_parses_into_words(provider, expected, text)
12
- parse = provider.parse(text)
12
+ def assert_parses_into_words(parse_klass, expected, text, raw)
13
+ parse = parse_klass.new(text, raw)
13
14
  words = parse.words
14
15
  tokens = parse.tokens
15
16
 
data/tests/ve_test.rb CHANGED
@@ -2,17 +2,21 @@
2
2
 
3
3
  require_relative 'test_helper'
4
4
 
5
- class VeTest < Test::Unit::TestCase
5
+ class VeTest < MiniTest::Unit::TestCase
6
+ # TODO: Set these up to run properly
6
7
 
7
8
  def test_get
9
+ skip
8
10
  assert_equal ['日本語', 'です'], Ve.get('日本語です', :ja, :words).collect(&:word)
9
11
  end
10
12
 
11
13
  def test_in
14
+ skip
12
15
  assert_equal ['日本語', 'です'], Ve.in(:ja).words('日本語です').collect(&:word)
13
16
  end
14
17
 
15
18
  def test_http_interface
19
+ skip
16
20
  Ve.config(Ve::HTTPInterface, :url => 'http://localhost:4567')
17
21
  assert_equal ['日本語', 'です'], Ve.in(:ja).words('日本語です').collect(&:word)
18
22
  end
data/ve.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 've'
3
- s.version = '0.0.2'
3
+ s.version = '0.0.3'
4
4
  s.platform = Gem::Platform::RUBY
5
5
  s.authors = ["Kim Ahlström"]
6
6
  s.email = ["kim.ahlstrom@gmail.com"]
metadata CHANGED
@@ -1,29 +1,24 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: ve
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.0.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
6
5
  platform: ruby
7
- authors:
8
- - "Kim Ahlstr\xC3\xB6m"
6
+ authors:
7
+ - Kim Ahlström
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
-
13
- date: 2011-11-12 00:00:00 Z
11
+ date: 2014-06-06 00:00:00.000000000 Z
14
12
  dependencies: []
15
-
16
13
  description: Ve is a linguistic framework for programmers.
17
- email:
14
+ email:
18
15
  - kim.ahlstrom@gmail.com
19
16
  executables: []
20
-
21
17
  extensions: []
22
-
23
18
  extra_rdoc_files: []
24
-
25
- files:
19
+ files:
26
20
  - .gitignore
21
+ - .travis.yml
27
22
  - Gemfile
28
23
  - Gemfile.lock
29
24
  - Rakefile
@@ -43,38 +38,35 @@ files:
43
38
  - lib/ve.rb
44
39
  - lib/word.rb
45
40
  - sinatra/server.rb
46
- - tests/freeling_en_test.rb
41
+ - tests/freeling_en_parse_test.rb
42
+ - tests/freeling_en_provider_test.rb
47
43
  - tests/japanese_transliterators_test.rb
48
- - tests/mecab_ipadic_test.rb
44
+ - tests/mecab_ipadic_parse_test.rb
45
+ - tests/mecab_ipadic_provider_test.rb
49
46
  - tests/test_helper.rb
50
47
  - tests/ve_test.rb
51
48
  - ve.gemspec
52
49
  homepage: http://github.com/kimtaro/ve
53
50
  licenses: []
54
-
51
+ metadata: {}
55
52
  post_install_message:
56
53
  rdoc_options: []
57
-
58
- require_paths:
54
+ require_paths:
59
55
  - lib
60
- required_ruby_version: !ruby/object:Gem::Requirement
61
- none: false
62
- requirements:
63
- - - ">="
64
- - !ruby/object:Gem::Version
65
- version: "0"
66
- required_rubygems_version: !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- version: "0"
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
72
66
  requirements: []
73
-
74
67
  rubyforge_project:
75
- rubygems_version: 1.8.11
68
+ rubygems_version: 2.0.3
76
69
  signing_key:
77
- specification_version: 3
70
+ specification_version: 4
78
71
  summary: Ve is a linguistic framework for programmers
79
72
  test_files: []
80
-
@@ -1,452 +0,0 @@
1
- # Encoding: UTF-8
2
-
3
- require_relative 'test_helper'
4
-
5
- class MecabIpadicTest < Test::Unit::TestCase
6
-
7
- def test_should_be_able_to_start
8
- mecab = Ve::Provider::MecabIpadic.new
9
- assert mecab.works?
10
- end
11
-
12
- def test_can_parse
13
- mecab = Ve::Provider::MecabIpadic.new
14
- parse = mecab.parse('')
15
- assert_equal Ve::Parse::MecabIpadic, parse.class
16
- end
17
-
18
- def test_all_literals_should_equal_the_input_text
19
- text = <<-EOS
20
- 古池や
21
- 蛙飛び込む
22
- 水の音
23
-
24
- EOS
25
- mecab = Ve::Provider::MecabIpadic.new
26
- parse = mecab.parse(text)
27
- assert_equal text, parse.tokens.collect { |t| t[:literal] }.join
28
- end
29
-
30
- def test_tokens_must_be_created_for_parsed_and_unparsed_text
31
- mecab = Ve::Provider::MecabIpadic.new
32
- parse = mecab.parse(' A B ')
33
- assert_equal [:unparsed, :parsed, :unparsed, :parsed, :unparsed, :sentence_split], parse.tokens.collect { |t| t[:type] }
34
- assert_equal [' ', 'A', ' ', 'B', ' ', ''], parse.tokens.collect { |t| t[:literal] }
35
- assert_equal [0..0, 1..1, 2..4, 5..5, 6..7, nil], parse.tokens.collect { |t| t[:characters] }
36
- end
37
-
38
- def test_tokens_should_not_be_modified_when_attached_to_words
39
- mecab = Ve::Provider::MecabIpadic.new
40
- parse = mecab.parse('悪化する')
41
- tokens = parse.tokens
42
- assert_equal '悪化', tokens[0][:literal]
43
- assert_equal '悪化', tokens[0][:lemma]
44
- end
45
-
46
- def test_sentences
47
- mecab = Ve::Provider::MecabIpadic.new
48
- parse = mecab.parse('これは文章である。で、also containing some Englishですね')
49
- assert_equal ['これは文章である。', 'で、also containing some Englishですね'], parse.sentences
50
- end
51
-
52
- def test_this_shouldnt_crash
53
- mecab = Ve::Provider::MecabIpadic.new
54
- parse = mecab.parse('チューたろうは田中さんの犬です。')
55
- assert_equal 11, parse.tokens.size
56
- end
57
-
58
- def test_this_shouldnt_crash_either
59
- mecab = Ve::Provider::MecabIpadic.new
60
- parse = mecab.parse('三十年式歩兵銃')
61
- assert_equal 7, parse.tokens.size
62
- end
63
-
64
- def test_words
65
- mecab = Ve::Provider::MecabIpadic.new
66
-
67
- # Meishi
68
- assert_parses_into_words(mecab, {:words => ['車'],
69
- :lemmas => ['車'],
70
- :pos => [Ve::PartOfSpeech::Noun],
71
- :extra => [{:reading => 'クルマ', :transcription => 'クルマ', :grammar => nil}],
72
- :tokens => [0..0]},
73
- '車')
74
-
75
- # Koyuumeishi
76
- assert_parses_into_words(mecab, {:words => ['太郎'],
77
- :lemmas => ['太郎'],
78
- :pos => [Ve::PartOfSpeech::ProperNoun],
79
- :extra => [{:reading => 'タロウ', :transcription => 'タロー', :grammar => nil}],
80
- :tokens => [0..0]},
81
- '太郎')
82
-
83
- # Daimeishi
84
- assert_parses_into_words(mecab, {:words => ['彼'],
85
- :lemmas => ['彼'],
86
- :pos => [Ve::PartOfSpeech::Pronoun],
87
- :extra => [{:reading => 'カレ', :transcription => 'カレ', :grammar => nil}],
88
- :tokens => [0..0]},
89
- '彼')
90
-
91
- # Fukushikanou
92
- assert_parses_into_words(mecab, {:words => ['午後に'],
93
- :lemmas => ['午後に'],
94
- :pos => [Ve::PartOfSpeech::Adverb],
95
- :extra => [{:reading => 'ゴゴニ', :transcription => 'ゴゴニ', :grammar => nil}],
96
- :tokens => [0..1]},
97
- '午後に')
98
-
99
- # Kazu
100
- assert_parses_into_words(mecab, {:words => ['一'],
101
- :lemmas => ['一'],
102
- :pos => [Ve::PartOfSpeech::Number],
103
- :extra => [{:reading => 'イチ', :transcription => 'イチ', :grammar => nil}],
104
- :tokens => [0..0]},
105
- '一')
106
-
107
- assert_parses_into_words(mecab, {:words => ['123'],
108
- :lemmas => ['123'],
109
- :pos => [Ve::PartOfSpeech::Number],
110
- :extra => [{:reading => 'イチニサン', :transcription => 'イチニサン', :grammar => nil}],
111
- :tokens => [0..2]},
112
- '123')
113
-
114
- # Sahensetsuzoku + tokumi ta
115
- assert_parses_into_words(mecab, {:words => ['悪化した'],
116
- :lemmas => ['悪化する'],
117
- :pos => [Ve::PartOfSpeech::Verb],
118
- :extra => [{:reading => 'アッカシタ', :transcription => 'アッカシタ', :grammar => nil}],
119
- :tokens => [0..2]},
120
- '悪化した')
121
-
122
- # Keiyoudoushigokan
123
- assert_parses_into_words(mecab, {:words => ['重要な'],
124
- :lemmas => ['重要'],
125
- :pos => [Ve::PartOfSpeech::Adjective],
126
- :extra => [{:reading => 'ジュウヨウナ', :transcription => 'ジューヨーナ', :grammar => nil}],
127
- :tokens => [0..1]},
128
- '重要な')
129
-
130
- # Naikeiyoushigokan
131
- assert_parses_into_words(mecab, {:words => ['とんでもない'],
132
- :lemmas => ['とんでもない'],
133
- :pos => [Ve::PartOfSpeech::Adjective],
134
- :extra => [{:reading => 'トンデモナイ', :transcription => 'トンデモナイ', :grammar => nil}],
135
- :tokens => [0..1]},
136
- 'とんでもない')
137
-
138
- # Meishi hijiritsu fukushikanou
139
- assert_parses_into_words(mecab, {:words => ['の', 'うちに'],
140
- :lemmas => ['の', 'うちに'],
141
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
142
- :extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
143
- {:reading => 'ウチニ', :transcription => 'ウチニ', :grammar => nil}],
144
- :tokens => [0..0, 1..2]},
145
- 'のうちに')
146
-
147
- # Meishi hijiritsu jodoushigokan
148
- assert_parses_into_words(mecab, {:words => ['の', 'ような'],
149
- :lemmas => ['の', 'ようだ'],
150
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Verb],
151
- :extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
152
- {:reading => 'ヨウナ', :transcription => 'ヨーナ', :grammar => :auxillary}],
153
- :tokens => [0..0, 1..2]},
154
- 'のような')
155
-
156
- assert_parses_into_words(mecab, {:words => ['の', 'ように'],
157
- :lemmas => ['の', 'ように'],
158
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
159
- :extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
160
- {:reading => 'ヨウニ', :transcription => 'ヨーニ', :grammar => nil}],
161
- :tokens => [0..0, 1..2]},
162
- 'のように')
163
-
164
- # Meishi hijiritsu keiyoudoushigokan
165
- assert_parses_into_words(mecab, {:words => ['みたいな'],
166
- :lemmas => ['みたいだ'],
167
- :pos => [Ve::PartOfSpeech::Adjective],
168
- :extra => [{:reading => 'ミタイナ', :transcription => 'ミタイナ', :grammar => nil}],
169
- :tokens => [0..1]},
170
- 'みたいな')
171
-
172
- assert_parses_into_words(mecab, {:words => ['みたいの'],
173
- :lemmas => ['みたいの'],
174
- :pos => [Ve::PartOfSpeech::Adjective],
175
- :extra => [{:reading => 'ミタイノ', :transcription => 'ミタイノ', :grammar => nil}],
176
- :tokens => [0..1]},
177
- 'みたいの')
178
-
179
- assert_parses_into_words(mecab, {:words => ['みたい', 'だ'],
180
- :lemmas => ['みたい', 'だ'],
181
- :pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Verb],
182
- :extra => [{:reading => 'ミタイ', :transcription => 'ミタイ', :grammar => nil},
183
- {:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
184
- :tokens => [0..0, 1..1]},
185
- 'みたいだ')
186
-
187
- # Meishi tokushu jodoushigokan
188
- assert_parses_into_words(mecab, {:words => ['行く', 'そう', 'だ'],
189
- :lemmas => ['行く', 'そう', 'だ'],
190
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
191
- :extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil},
192
- {:reading => 'ソウ', :transcription => 'ソー', :grammar => :auxillary},
193
- {:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
194
- :tokens => [0..0, 1..1, 2..2]},
195
- '行くそうだ')
196
-
197
- # Meishi setsubi
198
- # TODO: This should maybe be parsed as one noun instead
199
- assert_parses_into_words(mecab, {:words => ['楽し', 'さ'],
200
- :lemmas => ['楽しい', 'さ'],
201
- :pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Suffix],
202
- :extra => [{:reading => 'タノシ', :transcription => 'タノシ', :grammar => nil},
203
- {:reading => 'サ', :transcription => 'サ', :grammar => nil}],
204
- :tokens => [0..0, 1..1]},
205
- '楽しさ')
206
-
207
- # Meishi setsuzokushiteki
208
- assert_parses_into_words(mecab, {:words => ['日本', '対', 'アメリカ'],
209
- :lemmas => ['日本', '対', 'アメリカ'],
210
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Conjunction, Ve::PartOfSpeech::ProperNoun],
211
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
212
- {:reading => 'タイ', :transcription => 'タイ', :grammar => nil},
213
- {:reading => 'アメリカ', :transcription => 'アメリカ', :grammar => nil}],
214
- :tokens => [0..0, 1..1, 2..2]},
215
- '日本対アメリカ')
216
-
217
- # Meishi doushihijiritsuteki
218
- assert_parses_into_words(mecab, {:words => ['見て', 'ごらん'],
219
- :lemmas => ['見る', 'ごらん'],
220
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
221
- :extra => [{:reading => 'ミテ', :transcription => 'ミテ', :grammar => nil},
222
- {:reading => 'ゴラン', :transcription => 'ゴラン', :grammar => :nominal}],
223
- :tokens => [0..1, 2..2]},
224
- '見てごらん')
225
-
226
- # Settoushi
227
- assert_parses_into_words(mecab, {:words => ['お', '座り'],
228
- :lemmas => ['お', '座り'],
229
- :pos => [Ve::PartOfSpeech::Prefix, Ve::PartOfSpeech::Noun],
230
- :extra => [{:reading => 'オ', :transcription => 'オ', :grammar => nil},
231
- {:reading => 'スワリ', :transcription => 'スワリ', :grammar => nil}],
232
- :tokens => [0..0, 1..1]},
233
- 'お座り')
234
-
235
- # Kigou
236
- assert_parses_into_words(mecab, {:words => ['。'],
237
- :lemmas => ['。'],
238
- :pos => [Ve::PartOfSpeech::Symbol],
239
- :extra => [{:reading => '。', :transcription => '。', :grammar => nil}],
240
- :tokens => [0..0]},
241
- '。')
242
-
243
- # Firaa
244
- assert_parses_into_words(mecab, {:words => ['えと'],
245
- :lemmas => ['えと'],
246
- :pos => [Ve::PartOfSpeech::Interjection],
247
- :extra => [{:reading => 'エト', :transcription => 'エト', :grammar => nil}],
248
- :tokens => [0..0]},
249
- 'えと')
250
-
251
- # Sonota
252
- assert_parses_into_words(mecab, {:words => ['だ', 'ァ'],
253
- :lemmas => ['だ', 'ァ'],
254
- :pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Other],
255
- :extra => [{:reading => 'ダ', :transcription => 'ダ', :grammar => nil},
256
- {:reading => 'ァ', :transcription => 'ア', :grammar => nil}],
257
- :tokens => [0..0, 1..1]},
258
- 'だァ')
259
-
260
- # Kandoushi
261
- assert_parses_into_words(mecab, {:words => ['おはよう'],
262
- :lemmas => ['おはよう'],
263
- :pos => [Ve::PartOfSpeech::Interjection],
264
- :extra => [{:reading => 'オハヨウ', :transcription => 'オハヨー', :grammar => nil}],
265
- :tokens => [0..0]},
266
- 'おはよう')
267
-
268
- # Rentaishi
269
- assert_parses_into_words(mecab, {:words => ['この'],
270
- :lemmas => ['この'],
271
- :pos => [Ve::PartOfSpeech::Determiner],
272
- :extra => [{:reading => 'コノ', :transcription => 'コノ', :grammar => nil}],
273
- :tokens => [0..0]},
274
- 'この')
275
-
276
- # Setsuzokushi
277
- assert_parses_into_words(mecab, {:words => ['そして'],
278
- :lemmas => ['そして'],
279
- :pos => [Ve::PartOfSpeech::Conjunction],
280
- :extra => [{:reading => 'ソシテ', :transcription => 'ソシテ', :grammar => nil}],
281
- :tokens => [0..0]},
282
- 'そして')
283
-
284
- # Fukushi
285
- assert_parses_into_words(mecab, {:words => ['多分'],
286
- :lemmas => ['多分'],
287
- :pos => [Ve::PartOfSpeech::Adverb],
288
- :extra => [{:reading => 'タブン', :transcription => 'タブン', :grammar => nil}],
289
- :tokens => [0..0]},
290
- '多分')
291
-
292
- # Doushi
293
- assert_parses_into_words(mecab, {:words => ['行く'],
294
- :lemmas => ['行く'],
295
- :pos => [Ve::PartOfSpeech::Verb],
296
- :extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil}],
297
- :tokens => [0..0]},
298
- '行く')
299
-
300
- assert_parses_into_words(mecab, {:words => ['行かない'],
301
- :lemmas => ['行く'],
302
- :pos => [Ve::PartOfSpeech::Verb],
303
- :extra => [{:reading => 'イカナイ', :transcription => 'イカナイ', :grammar => nil}],
304
- :tokens => [0..1]},
305
- '行かない')
306
-
307
- assert_parses_into_words(mecab, {:words => ['行って', 'きて'],
308
- :lemmas => ['行く', 'くる'],
309
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
310
- :extra => [{:reading => 'イッテ', :transcription => 'イッテ', :grammar => nil},
311
- {:reading => 'キテ', :transcription => 'キテ', :grammar => :auxillary}],
312
- :tokens => [0..1, 2..3]},
313
- '行ってきて')
314
-
315
- # Doushi setsubi
316
- assert_parses_into_words(mecab, {:words => ['行かれる'],
317
- :lemmas => ['行く'],
318
- :pos => [Ve::PartOfSpeech::Verb],
319
- :extra => [{:reading => 'イカレル', :transcription => 'イカレル', :grammar => nil}],
320
- :tokens => [0..1]},
321
- '行かれる')
322
-
323
- assert_parses_into_words(mecab, {:words => ['食べさせられた'],
324
- :lemmas => ['食べる'],
325
- :pos => [Ve::PartOfSpeech::Verb],
326
- :extra => [{:reading => 'タベサセラレタ', :transcription => 'タベサセラレタ', :grammar => nil}],
327
- :tokens => [0..3]},
328
- '食べさせられた')
329
-
330
- # Doushi + jodoushi
331
- assert_parses_into_words(mecab, {:words => ['食べました'],
332
- :lemmas => ['食べる'],
333
- :pos => [Ve::PartOfSpeech::Verb],
334
- :extra => [{:reading => 'タベマシタ', :transcription => 'タベマシタ', :grammar => nil}],
335
- :tokens => [0..2]},
336
- '食べました')
337
-
338
- # Keiyoushi
339
- assert_parses_into_words(mecab, {:words => ['寒い'],
340
- :lemmas => ['寒い'],
341
- :pos => [Ve::PartOfSpeech::Adjective],
342
- :extra => [{:reading => 'サムイ', :transcription => 'サムイ', :grammar => nil}],
343
- :tokens => [0..0]},
344
- '寒い')
345
-
346
- assert_parses_into_words(mecab, {:words => ['寒くて'],
347
- :lemmas => ['寒い'],
348
- :pos => [Ve::PartOfSpeech::Adjective],
349
- :extra => [{:reading => 'サムクテ', :transcription => 'サムクテ', :grammar => nil}],
350
- :tokens => [0..1]},
351
- '寒くて')
352
-
353
- assert_parses_into_words(mecab, {:words => ['寒かった'],
354
- :lemmas => ['寒い'],
355
- :pos => [Ve::PartOfSpeech::Adjective],
356
- :extra => [{:reading => 'サムカッタ', :transcription => 'サムカッタ', :grammar => nil}],
357
- :tokens => [0..1]},
358
- '寒かった')
359
-
360
- assert_parses_into_words(mecab, {:words => ['寒ければ'],
361
- :lemmas => ['寒い'],
362
- :pos => [Ve::PartOfSpeech::Adjective],
363
- :extra => [{:reading => 'サムケレバ', :transcription => 'サムケレバ', :grammar => nil}],
364
- :tokens => [0..1]},
365
- '寒ければ')
366
-
367
- assert_parses_into_words(mecab, {:words => ['寒けりゃ'],
368
- :lemmas => ['寒い'],
369
- :pos => [Ve::PartOfSpeech::Adjective],
370
- :extra => [{:reading => 'サムケリャ', :transcription => 'サムケリャ', :grammar => nil}],
371
- :tokens => [0..0]},
372
- '寒けりゃ')
373
-
374
- assert_parses_into_words(mecab, {:words => ['食べたい'],
375
- :lemmas => ['食べる'],
376
- :pos => [Ve::PartOfSpeech::Verb],
377
- :extra => [{:reading => 'タベタイ', :transcription => 'タベタイ', :grammar => nil}],
378
- :tokens => [0..1]},
379
- '食べたい')
380
-
381
- # Joshi
382
- assert_parses_into_words(mecab, {:words => ['日本', 'から'],
383
- :lemmas => ['日本', 'から'],
384
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Postposition],
385
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
386
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
387
- :tokens => [0..0, 1..1]},
388
- '日本から')
389
-
390
- # The copula
391
- assert_parses_into_words(mecab, {:words => ['日本', 'です'],
392
- :lemmas => ['日本', 'です'],
393
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
394
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
395
- {:reading => 'デス', :transcription => 'デス', :grammar => nil}],
396
- :tokens => [0..0, 1..1]},
397
- '日本です')
398
-
399
- assert_parses_into_words(mecab, {:words => ['日本', 'だった'],
400
- :lemmas => ['日本', 'だ'],
401
- :pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
402
- :extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
403
- {:reading => 'ダッタ', :transcription => 'ダッタ', :grammar => nil}],
404
- :tokens => [0..0, 1..2]},
405
- '日本だった')
406
-
407
- # いるから
408
- assert_parses_into_words(mecab, {:words => ['いる', 'から'],
409
- :lemmas => ['いる', 'から'],
410
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
411
- :extra => [{:reading => 'イル', :transcription => 'イル', :grammar => nil},
412
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
413
- :tokens => [0..0, 1..1]},
414
- 'いるから')
415
-
416
- # しているから
417
- assert_parses_into_words(mecab, {:words => ['して', 'いる', 'から'],
418
- :lemmas => ['する', 'いる', 'から'],
419
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
420
- :extra => [{:reading => 'シテ', :transcription => 'シテ', :grammar => nil},
421
- {:reading => 'イル', :transcription => 'イル', :grammar => :auxillary},
422
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
423
- :tokens => [0..0, 1..1, 2..2]},
424
- 'しているから')
425
-
426
- # 基準があるが、
427
- assert_parses_into_words(mecab, {:words => ['して', 'いる', 'から'],
428
- :lemmas => ['する', 'いる', 'から'],
429
- :pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
430
- :extra => [{:reading => 'シテ', :transcription => 'シテ', :grammar => nil},
431
- {:reading => 'イル', :transcription => 'イル', :grammar => :auxillary},
432
- {:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
433
- :tokens => [0..0, 1..1, 2..2]},
434
- '基準があるが、')
435
-
436
- # TODO: xした should parse as adjective?
437
- assert_parses_into_words(mecab, {:words => [],
438
- :lemmas => [],
439
- :pos => [],
440
- :extra => [],
441
- :tokens => []},
442
- '')
443
- end
444
-
445
- def todo_test_word_transliteration
446
- mecab = Ve::Provider::MecabIpadic.new
447
- parse = mecab.parse('日本', :transliterate_words => :latn)
448
-
449
- assert_equal 'nihon', parse.words.first.transliteration(:latn)
450
- end
451
-
452
- end