ve 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +6 -0
- data/lib/providers/freeling_en.rb +4 -2
- data/lib/providers/mecab_ipadic.rb +52 -32
- data/lib/ve.rb +15 -4
- data/tests/{freeling_en_test.rb → freeling_en_parse_test.rb} +37 -48
- data/tests/freeling_en_provider_test.rb +38 -0
- data/tests/japanese_transliterators_test.rb +1 -1
- data/tests/mecab_ipadic_parse_test.rb +772 -0
- data/tests/mecab_ipadic_provider_test.rb +21 -0
- data/tests/test_helper.rb +5 -4
- data/tests/ve_test.rb +5 -1
- data/ve.gemspec +1 -1
- metadata +27 -35
- data/tests/mecab_ipadic_test.rb +0 -452
@@ -0,0 +1,38 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class FreelingEnProviderTest < MiniTest::Unit::TestCase
|
6
|
+
# TODO: Make these tests not require running freeling
|
7
|
+
|
8
|
+
def test_should_be_able_to_start
|
9
|
+
skip
|
10
|
+
Ve::Provider::FreelingEn.any_instance.expects(:start!).returns({})
|
11
|
+
Ve::Provider::FreelingEn.any_instance.expects(:parse).returns(Ve::Parse::FreelingEn.new("Wrote", ["Wrote write VBD 1", ""]))
|
12
|
+
freeling = Ve::Provider::FreelingEn.new
|
13
|
+
assert freeling.works?
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_doesnt_die_on_japanese
|
17
|
+
skip
|
18
|
+
freeling = Ve::Provider::FreelingEn.new
|
19
|
+
parse = freeling.parse('これは日本語です')
|
20
|
+
assert_equal Ve::Parse::FreelingEn, parse.class
|
21
|
+
end
|
22
|
+
|
23
|
+
# TODO: UTF-8 handling
|
24
|
+
def test_can_handle_utf8
|
25
|
+
skip
|
26
|
+
freeling = Ve::Provider::FreelingEn.new
|
27
|
+
parse = freeling.parse('I’m')
|
28
|
+
assert_equal ['I\'m'], parse.tokens.collect { |t| t[:literal] }
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_can_parse
|
32
|
+
skip
|
33
|
+
freeling = Ve::Provider::FreelingEn.new
|
34
|
+
parse = freeling.parse('')
|
35
|
+
assert_equal Ve::Parse::FreelingEn, parse.class
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require_relative 'test_helper'
|
4
4
|
|
5
|
-
class JapaneseTransliteratorsTest <
|
5
|
+
class JapaneseTransliteratorsTest < MiniTest::Unit::TestCase
|
6
6
|
|
7
7
|
KATAKANA = "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ"
|
8
8
|
HIRAGANA = "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ"
|
@@ -0,0 +1,772 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class MecabIpadicParseTest < MiniTest::Unit::TestCase
|
6
|
+
|
7
|
+
def test_all_literals_should_equal_the_input_text
|
8
|
+
text = <<-EOS
|
9
|
+
古池や
|
10
|
+
蛙飛び込む
|
11
|
+
水の音
|
12
|
+
|
13
|
+
EOS
|
14
|
+
raw = <<-EOR.split("\n")
|
15
|
+
古池 名詞,固有名詞,一般,*,*,*,古池,フルイケ,フルイケ
|
16
|
+
や 助動詞,*,*,*,特殊・ヤ,基本形,や,ヤ,ヤ
|
17
|
+
EOS
|
18
|
+
蛙 名詞,一般,*,*,*,*,蛙,カエル,カエル
|
19
|
+
飛び込む 動詞,自立,*,*,五段・マ行,基本形,飛び込む,トビコム,トビコム
|
20
|
+
EOS
|
21
|
+
水 名詞,一般,*,*,*,*,水,ミズ,ミズ
|
22
|
+
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
23
|
+
音 名詞,一般,*,*,*,*,音,オト,オト
|
24
|
+
EOS
|
25
|
+
EOR
|
26
|
+
parse = Ve::Parse::MecabIpadic.new(text, raw)
|
27
|
+
assert_equal text, parse.tokens.collect { |t| t[:literal] }.join
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_tokens_must_be_created_for_parsed_and_unparsed_text
|
31
|
+
text = " A B "
|
32
|
+
raw = <<-EOR.split("\n")
|
33
|
+
A 名詞,固有名詞,組織,*,*,*,*
|
34
|
+
B 名詞,一般,*,*,*,*,*
|
35
|
+
EOS
|
36
|
+
EOR
|
37
|
+
parse = Ve::Parse::MecabIpadic.new(text, raw)
|
38
|
+
assert_equal [:unparsed, :parsed, :unparsed, :parsed, :unparsed, :sentence_split], parse.tokens.collect { |t| t[:type] }
|
39
|
+
assert_equal [' ', 'A', ' ', 'B', ' ', ''], parse.tokens.collect { |t| t[:literal] }
|
40
|
+
assert_equal [0..0, 1..1, 2..4, 5..5, 6..7, nil], parse.tokens.collect { |t| t[:characters] }
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_tokens_should_not_be_modified_when_attached_to_words
|
44
|
+
text = '悪化する'
|
45
|
+
raw = <<-EOR.split("\n")
|
46
|
+
悪化 名詞,サ変接続,*,*,*,*,悪化,アッカ,アッカ
|
47
|
+
する 動詞,自立,*,*,サ変・スル,基本形,する,スル,スル
|
48
|
+
EOS
|
49
|
+
EOR
|
50
|
+
parse = Ve::Parse::MecabIpadic.new(text, raw)
|
51
|
+
tokens = parse.tokens
|
52
|
+
assert_equal '悪化', tokens[0][:literal]
|
53
|
+
assert_equal '悪化', tokens[0][:lemma]
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_sentences
|
57
|
+
text = "これは文章である。で、also containing some Englishですね"
|
58
|
+
raw = <<-EOR.split("\n")
|
59
|
+
これ 名詞,代名詞,一般,*,*,*,これ,コレ,コレ
|
60
|
+
は 助詞,係助詞,*,*,*,*,は,ハ,ワ
|
61
|
+
文章 名詞,一般,*,*,*,*,文章,ブンショウ,ブンショー
|
62
|
+
で 助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ
|
63
|
+
ある 助動詞,*,*,*,五段・ラ行アル,基本形,ある,アル,アル
|
64
|
+
。 記号,句点,*,*,*,*,。,。,。
|
65
|
+
で 助動詞,*,*,*,特殊・ダ,連用形,だ,デ,デ
|
66
|
+
、 記号,読点,*,*,*,*,、,、,、
|
67
|
+
also 名詞,固有名詞,組織,*,*,*,*
|
68
|
+
containing 名詞,一般,*,*,*,*,*
|
69
|
+
some 名詞,一般,*,*,*,*,*
|
70
|
+
English 名詞,一般,*,*,*,*,*
|
71
|
+
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
72
|
+
ね 助詞,終助詞,*,*,*,*,ね,ネ,ネ
|
73
|
+
EOS
|
74
|
+
EOR
|
75
|
+
parse = Ve::Parse::MecabIpadic.new(text, raw)
|
76
|
+
assert_equal ['これは文章である。', 'で、also containing some Englishですね'], parse.sentences
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_this_shouldnt_crash
|
80
|
+
text = 'チューたろうは田中さんの犬です。'
|
81
|
+
raw = <<-EOR.split("\n")
|
82
|
+
チュー 名詞,一般,*,*,*,*,*
|
83
|
+
たろ 助動詞,*,*,*,特殊・タ,未然形,た,タロ,タロ
|
84
|
+
う 助動詞,*,*,*,不変化型,基本形,う,ウ,ウ
|
85
|
+
は 助詞,係助詞,*,*,*,*,は,ハ,ワ
|
86
|
+
田中 名詞,固有名詞,人名,姓,*,*,田中,タナカ,タナカ
|
87
|
+
さん 名詞,接尾,人名,*,*,*,さん,サン,サン
|
88
|
+
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
89
|
+
犬 名詞,一般,*,*,*,*,犬,イヌ,イヌ
|
90
|
+
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
91
|
+
。 記号,句点,*,*,*,*,。,。,。
|
92
|
+
EOS
|
93
|
+
EOR
|
94
|
+
parse = Ve::Parse::MecabIpadic.new(text, raw)
|
95
|
+
assert_equal 11, parse.tokens.size
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_this_shouldnt_crash_either
|
99
|
+
text = '三十年式歩兵銃'
|
100
|
+
raw = <<-EOR.split("\n")
|
101
|
+
三 名詞,数,*,*,*,*,三,サン,サン
|
102
|
+
十 名詞,数,*,*,*,*,十,ジュウ,ジュー
|
103
|
+
年 名詞,接尾,助数詞,*,*,*,年,ネン,ネン
|
104
|
+
式 名詞,接尾,一般,*,*,*,式,シキ,シキ
|
105
|
+
歩兵 名詞,一般,*,*,*,*,歩兵,ホヘイ,ホヘイ
|
106
|
+
銃 名詞,一般,*,*,*,*,銃,ジュウ,ジュー
|
107
|
+
EOS
|
108
|
+
EOR
|
109
|
+
parse = Ve::Parse::MecabIpadic.new(text, raw)
|
110
|
+
assert_equal 7, parse.tokens.size
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_words
|
114
|
+
# Meishi
|
115
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['車'],
|
116
|
+
:lemmas => ['車'],
|
117
|
+
:pos => [Ve::PartOfSpeech::Noun],
|
118
|
+
:extra => [{:reading => 'クルマ', :transcription => 'クルマ', :grammar => nil}],
|
119
|
+
:tokens => [0..0]},
|
120
|
+
'車', <<-EOR.split("\n"))
|
121
|
+
車 名詞,一般,*,*,*,*,車,クルマ,クルマ
|
122
|
+
EOS
|
123
|
+
EOR
|
124
|
+
|
125
|
+
# Koyuumeishi
|
126
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['太郎'],
|
127
|
+
:lemmas => ['太郎'],
|
128
|
+
:pos => [Ve::PartOfSpeech::ProperNoun],
|
129
|
+
:extra => [{:reading => 'タロウ', :transcription => 'タロー', :grammar => nil}],
|
130
|
+
:tokens => [0..0]},
|
131
|
+
'太郎', <<-EOR.split("\n"))
|
132
|
+
太郎 名詞,固有名詞,人名,名,*,*,太郎,タロウ,タロー
|
133
|
+
EOS
|
134
|
+
EOR
|
135
|
+
|
136
|
+
# Daimeishi
|
137
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['彼'],
|
138
|
+
:lemmas => ['彼'],
|
139
|
+
:pos => [Ve::PartOfSpeech::Pronoun],
|
140
|
+
:extra => [{:reading => 'カレ', :transcription => 'カレ', :grammar => nil}],
|
141
|
+
:tokens => [0..0]},
|
142
|
+
'彼', <<-EOR.split("\n"))
|
143
|
+
彼 名詞,代名詞,一般,*,*,*,彼,カレ,カレ
|
144
|
+
EOS
|
145
|
+
EOR
|
146
|
+
|
147
|
+
# Fukushikanou
|
148
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['午後', 'に'],
|
149
|
+
:lemmas => ['午後', 'に'],
|
150
|
+
:pos => [Ve::PartOfSpeech::Adverb, Ve::PartOfSpeech::Postposition],
|
151
|
+
:extra => [{:reading => 'ゴゴ', :transcription => 'ゴゴ', :grammar => nil}, {:reading=>"ニ", :transcription=>"ニ", :grammar=>nil}],
|
152
|
+
:tokens => [0..0, 1..1]},
|
153
|
+
'午後に', <<-EOR.split("\n"))
|
154
|
+
午後 名詞,副詞可能,*,*,*,*,午後,ゴゴ,ゴゴ
|
155
|
+
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
156
|
+
EOS
|
157
|
+
EOR
|
158
|
+
|
159
|
+
# Akirakani shita should be "akiraka ni shita"
|
160
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ["明らか", "に", "した"],
|
161
|
+
:lemmas => ["明らか", "に", "する"],
|
162
|
+
:pos => [Ve::PartOfSpeech::Adverb, Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Verb],
|
163
|
+
:extra => [{:reading=>"アキラカ", :transcription=>"アキラカ", :grammar=>nil}, {:reading=>"ニ", :transcription=>"ニ", :grammar=>nil}, {:reading=>"シタ", :transcription=>"シタ", :grammar=>nil}],
|
164
|
+
:tokens => [0..0, 1..1, 2..3]},
|
165
|
+
'明らかにした', <<-EOR.split("\n"))
|
166
|
+
明らか 名詞,形容動詞語幹,*,*,*,*,明らか,アキラカ,アキラカ
|
167
|
+
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
168
|
+
し 動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
|
169
|
+
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
|
170
|
+
EOS
|
171
|
+
EOR
|
172
|
+
|
173
|
+
# Kazu
|
174
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['一'],
|
175
|
+
:lemmas => ['一'],
|
176
|
+
:pos => [Ve::PartOfSpeech::Number],
|
177
|
+
:extra => [{:reading => 'イチ', :transcription => 'イチ', :grammar => nil}],
|
178
|
+
:tokens => [0..0]},
|
179
|
+
'一', <<-EOR.split("\n"))
|
180
|
+
一 名詞,数,*,*,*,*,一,イチ,イチ
|
181
|
+
EOS
|
182
|
+
EOR
|
183
|
+
|
184
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['123'],
|
185
|
+
:lemmas => ['123'],
|
186
|
+
:pos => [Ve::PartOfSpeech::Number],
|
187
|
+
:extra => [{:reading => 'イチニサン', :transcription => 'イチニサン', :grammar => nil}],
|
188
|
+
:tokens => [0..2]},
|
189
|
+
'123', <<-EOR.split("\n"))
|
190
|
+
1 名詞,数,*,*,*,*,1,イチ,イチ
|
191
|
+
2 名詞,数,*,*,*,*,2,ニ,ニ
|
192
|
+
3 名詞,数,*,*,*,*,3,サン,サン
|
193
|
+
EOS
|
194
|
+
EOR
|
195
|
+
|
196
|
+
# Sahensetsuzoku + tokumi ta
|
197
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['悪化した'],
|
198
|
+
:lemmas => ['悪化する'],
|
199
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
200
|
+
:extra => [{:reading => 'アッカシタ', :transcription => 'アッカシタ', :grammar => nil}],
|
201
|
+
:tokens => [0..2]},
|
202
|
+
'悪化した', <<-EOR.split("\n"))
|
203
|
+
悪化 名詞,サ変接続,*,*,*,*,悪化,アッカ,アッカ
|
204
|
+
し 動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
|
205
|
+
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
|
206
|
+
EOS
|
207
|
+
EOR
|
208
|
+
|
209
|
+
# Keiyoudoushigokan
|
210
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['重要な'],
|
211
|
+
:lemmas => ['重要'],
|
212
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
213
|
+
:extra => [{:reading => 'ジュウヨウナ', :transcription => 'ジューヨーナ', :grammar => nil}],
|
214
|
+
:tokens => [0..1]},
|
215
|
+
'重要な', <<-EOR.split("\n"))
|
216
|
+
重要 名詞,形容動詞語幹,*,*,*,*,重要,ジュウヨウ,ジューヨー
|
217
|
+
な 助動詞,*,*,*,特殊・ダ,体言接続,だ,ナ,ナ
|
218
|
+
EOS
|
219
|
+
EOR
|
220
|
+
|
221
|
+
# Naikeiyoushigokan
|
222
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['とんでもない'],
|
223
|
+
:lemmas => ['とんでもない'],
|
224
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
225
|
+
:extra => [{:reading => 'トンデモナイ', :transcription => 'トンデモナイ', :grammar => nil}],
|
226
|
+
:tokens => [0..1]},
|
227
|
+
'とんでもない', <<-EOR.split("\n"))
|
228
|
+
とんでも 名詞,ナイ形容詞語幹,*,*,*,*,とんでも,トンデモ,トンデモ
|
229
|
+
ない 助動詞,*,*,*,特殊・ナイ,基本形,ない,ナイ,ナイ
|
230
|
+
EOS
|
231
|
+
EOR
|
232
|
+
|
233
|
+
# Meishi hijiritsu fukushikanou
|
234
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['の', 'うちに'],
|
235
|
+
:lemmas => ['の', 'うちに'],
|
236
|
+
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
|
237
|
+
:extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
|
238
|
+
{:reading => 'ウチニ', :transcription => 'ウチニ', :grammar => nil}],
|
239
|
+
:tokens => [0..0, 1..2]},
|
240
|
+
'のうちに', <<-EOR.split("\n"))
|
241
|
+
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
242
|
+
うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
243
|
+
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
|
244
|
+
EOS
|
245
|
+
EOR
|
246
|
+
|
247
|
+
# Meishi hijiritsu jodoushigokan
|
248
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['の', 'ような'],
|
249
|
+
:lemmas => ['の', 'ようだ'],
|
250
|
+
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Verb],
|
251
|
+
:extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
|
252
|
+
{:reading => 'ヨウナ', :transcription => 'ヨーナ', :grammar => :auxillary}],
|
253
|
+
:tokens => [0..0, 1..2]},
|
254
|
+
'のような', <<-EOR.split("\n"))
|
255
|
+
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
256
|
+
よう 名詞,非自立,助動詞語幹,*,*,*,よう,ヨウ,ヨー
|
257
|
+
な 助動詞,*,*,*,特殊・ダ,体言接続,だ,ナ,ナ
|
258
|
+
EOS
|
259
|
+
EOR
|
260
|
+
|
261
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['の', 'ように'],
|
262
|
+
:lemmas => ['の', 'ように'],
|
263
|
+
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
|
264
|
+
:extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
|
265
|
+
{:reading => 'ヨウニ', :transcription => 'ヨーニ', :grammar => nil}],
|
266
|
+
:tokens => [0..0, 1..2]},
|
267
|
+
'のように', <<-EOR.split("\n"))
|
268
|
+
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
269
|
+
よう 名詞,非自立,助動詞語幹,*,*,*,よう,ヨウ,ヨー
|
270
|
+
に 助詞,副詞化,*,*,*,*,に,ニ,ニ
|
271
|
+
EOS
|
272
|
+
EOR
|
273
|
+
|
274
|
+
# Meishi hijiritsu keiyoudoushigokan
|
275
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['みたいな'],
|
276
|
+
:lemmas => ['みたいだ'],
|
277
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
278
|
+
:extra => [{:reading => 'ミタイナ', :transcription => 'ミタイナ', :grammar => nil}],
|
279
|
+
:tokens => [0..1]},
|
280
|
+
'みたいな', <<-EOR.split("\n"))
|
281
|
+
みたい 名詞,非自立,形容動詞語幹,*,*,*,みたい,ミタイ,ミタイ
|
282
|
+
な 助動詞,*,*,*,特殊・ダ,体言接続,だ,ナ,ナ
|
283
|
+
EOS
|
284
|
+
EOR
|
285
|
+
|
286
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['みたいの'],
|
287
|
+
:lemmas => ['みたいの'],
|
288
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
289
|
+
:extra => [{:reading => 'ミタイノ', :transcription => 'ミタイノ', :grammar => nil}],
|
290
|
+
:tokens => [0..1]},
|
291
|
+
'みたいの', <<-EOR.split("\n"))
|
292
|
+
みたい 名詞,非自立,形容動詞語幹,*,*,*,みたい,ミタイ,ミタイ
|
293
|
+
の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
294
|
+
EOS
|
295
|
+
EOR
|
296
|
+
|
297
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['みたい', 'だ'],
|
298
|
+
:lemmas => ['みたい', 'だ'],
|
299
|
+
:pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Verb],
|
300
|
+
:extra => [{:reading => 'ミタイ', :transcription => 'ミタイ', :grammar => nil},
|
301
|
+
{:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
|
302
|
+
:tokens => [0..0, 1..1]},
|
303
|
+
'みたいだ', <<-EOR.split("\n"))
|
304
|
+
みたい 名詞,非自立,形容動詞語幹,*,*,*,みたい,ミタイ,ミタイ
|
305
|
+
だ 助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ
|
306
|
+
EOS
|
307
|
+
EOR
|
308
|
+
|
309
|
+
# Meishi tokushu jodoushigokan
|
310
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['行く', 'そう', 'だ'],
|
311
|
+
:lemmas => ['行く', 'そう', 'だ'],
|
312
|
+
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
|
313
|
+
:extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil},
|
314
|
+
{:reading => 'ソウ', :transcription => 'ソー', :grammar => :auxillary},
|
315
|
+
{:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
|
316
|
+
:tokens => [0..0, 1..1, 2..2]},
|
317
|
+
'行くそうだ', <<-EOR.split("\n"))
|
318
|
+
行く 動詞,自立,*,*,五段・カ行促音便,基本形,行く,イク,イク
|
319
|
+
そう 名詞,特殊,助動詞語幹,*,*,*,そう,ソウ,ソー
|
320
|
+
だ 助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ
|
321
|
+
EOS
|
322
|
+
EOR
|
323
|
+
|
324
|
+
# Meishi setsubi
|
325
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['楽しさ'],
|
326
|
+
:lemmas => ['楽しい'],
|
327
|
+
:pos => [Ve::PartOfSpeech::Noun],
|
328
|
+
:extra => [{:reading => 'タノシサ', :transcription => 'タノシサ', :grammar => nil}],
|
329
|
+
:tokens => [0..1]},
|
330
|
+
'楽しさ', <<-EOR.split("\n"))
|
331
|
+
楽し 形容詞,自立,*,*,形容詞・イ段,ガル接続,楽しい,タノシ,タノシ
|
332
|
+
さ 名詞,接尾,特殊,*,*,*,さ,サ,サ
|
333
|
+
EOS
|
334
|
+
EOR
|
335
|
+
|
336
|
+
# Meishi setsuzokushiteki
|
337
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['日本', '対', 'アメリカ'],
|
338
|
+
:lemmas => ['日本', '対', 'アメリカ'],
|
339
|
+
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Conjunction, Ve::PartOfSpeech::ProperNoun],
|
340
|
+
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
341
|
+
{:reading => 'タイ', :transcription => 'タイ', :grammar => nil},
|
342
|
+
{:reading => 'アメリカ', :transcription => 'アメリカ', :grammar => nil}],
|
343
|
+
:tokens => [0..0, 1..1, 2..2]},
|
344
|
+
'日本対アメリカ', <<-EOR.split("\n"))
|
345
|
+
日本 名詞,固有名詞,地域,国,*,*,日本,ニッポン,ニッポン
|
346
|
+
対 名詞,接続詞的,*,*,*,*,対,タイ,タイ
|
347
|
+
アメリカ 名詞,固有名詞,地域,国,*,*,アメリカ,アメリカ,アメリカ
|
348
|
+
EOS
|
349
|
+
EOR
|
350
|
+
|
351
|
+
# Meishi doushihijiritsuteki
|
352
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['見て', 'ごらん'],
|
353
|
+
:lemmas => ['見る', 'ごらん'],
|
354
|
+
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
|
355
|
+
:extra => [{:reading => 'ミテ', :transcription => 'ミテ', :grammar => nil},
|
356
|
+
{:reading => 'ゴラン', :transcription => 'ゴラン', :grammar => :nominal}],
|
357
|
+
:tokens => [0..1, 2..2]},
|
358
|
+
'見てごらん', <<-EOR.split("\n"))
|
359
|
+
見 動詞,自立,*,*,一段,連用形,見る,ミ,ミ
|
360
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ
|
361
|
+
ごらん 名詞,動詞非自立的,*,*,*,*,ごらん,ゴラン,ゴラン
|
362
|
+
EOS
|
363
|
+
EOR
|
364
|
+
|
365
|
+
# Settoushi
|
366
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['お', '座り'],
|
367
|
+
:lemmas => ['お', '座り'],
|
368
|
+
:pos => [Ve::PartOfSpeech::Prefix, Ve::PartOfSpeech::Noun],
|
369
|
+
:extra => [{:reading => 'オ', :transcription => 'オ', :grammar => nil},
|
370
|
+
{:reading => 'スワリ', :transcription => 'スワリ', :grammar => nil}],
|
371
|
+
:tokens => [0..0, 1..1]},
|
372
|
+
'お座り', <<-EOR.split("\n"))
|
373
|
+
お 接頭詞,名詞接続,*,*,*,*,お,オ,オ
|
374
|
+
座り 名詞,一般,*,*,*,*,座り,スワリ,スワリ
|
375
|
+
EOS
|
376
|
+
EOR
|
377
|
+
|
378
|
+
# Kigou
|
379
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['。'],
|
380
|
+
:lemmas => ['。'],
|
381
|
+
:pos => [Ve::PartOfSpeech::Symbol],
|
382
|
+
:extra => [{:reading => '。', :transcription => '。', :grammar => nil}],
|
383
|
+
:tokens => [0..0]},
|
384
|
+
'。', <<-EOR.split("\n"))
|
385
|
+
。 記号,句点,*,*,*,*,。,。,。
|
386
|
+
EOS
|
387
|
+
EOR
|
388
|
+
|
389
|
+
# Firaa
|
390
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['えと'],
|
391
|
+
:lemmas => ['えと'],
|
392
|
+
:pos => [Ve::PartOfSpeech::Interjection],
|
393
|
+
:extra => [{:reading => 'エト', :transcription => 'エト', :grammar => nil}],
|
394
|
+
:tokens => [0..0]},
|
395
|
+
'えと', <<-EOR.split("\n"))
|
396
|
+
えと フィラー,*,*,*,*,*,えと,エト,エト
|
397
|
+
EOS
|
398
|
+
EOR
|
399
|
+
|
400
|
+
# Sonota
|
401
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['だ', 'ァ'],
|
402
|
+
:lemmas => ['だ', 'ァ'],
|
403
|
+
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Other],
|
404
|
+
:extra => [{:reading => 'ダ', :transcription => 'ダ', :grammar => nil},
|
405
|
+
{:reading => 'ァ', :transcription => 'ア', :grammar => nil}],
|
406
|
+
:tokens => [0..0, 1..1]},
|
407
|
+
'だァ', <<-EOR.split("\n"))
|
408
|
+
だ 助動詞,*,*,*,特殊・タ,基本形,だ,ダ,ダ
|
409
|
+
ァ その他,間投,*,*,*,*,ァ,ァ,ア
|
410
|
+
EOS
|
411
|
+
EOR
|
412
|
+
|
413
|
+
# Kandoushi
|
414
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['おはよう'],
|
415
|
+
:lemmas => ['おはよう'],
|
416
|
+
:pos => [Ve::PartOfSpeech::Interjection],
|
417
|
+
:extra => [{:reading => 'オハヨウ', :transcription => 'オハヨー', :grammar => nil}],
|
418
|
+
:tokens => [0..0]},
|
419
|
+
'おはよう', <<-EOR.split("\n"))
|
420
|
+
おはよう 感動詞,*,*,*,*,*,おはよう,オハヨウ,オハヨー
|
421
|
+
EOS
|
422
|
+
EOR
|
423
|
+
|
424
|
+
# Rentaishi
|
425
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['この'],
|
426
|
+
:lemmas => ['この'],
|
427
|
+
:pos => [Ve::PartOfSpeech::Determiner],
|
428
|
+
:extra => [{:reading => 'コノ', :transcription => 'コノ', :grammar => nil}],
|
429
|
+
:tokens => [0..0]},
|
430
|
+
'この', <<-EOR.split("\n"))
|
431
|
+
この 連体詞,*,*,*,*,*,この,コノ,コノ
|
432
|
+
EOS
|
433
|
+
EOR
|
434
|
+
|
435
|
+
# Setsuzokushi
|
436
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['そして'],
|
437
|
+
:lemmas => ['そして'],
|
438
|
+
:pos => [Ve::PartOfSpeech::Conjunction],
|
439
|
+
:extra => [{:reading => 'ソシテ', :transcription => 'ソシテ', :grammar => nil}],
|
440
|
+
:tokens => [0..0]},
|
441
|
+
'そして', <<-EOR.split("\n"))
|
442
|
+
そして 接続詞,*,*,*,*,*,そして,ソシテ,ソシテ
|
443
|
+
EOS
|
444
|
+
EOR
|
445
|
+
|
446
|
+
# Fukushi
|
447
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['多分'],
|
448
|
+
:lemmas => ['多分'],
|
449
|
+
:pos => [Ve::PartOfSpeech::Adverb],
|
450
|
+
:extra => [{:reading => 'タブン', :transcription => 'タブン', :grammar => nil}],
|
451
|
+
:tokens => [0..0]},
|
452
|
+
'多分', <<-EOR.split("\n"))
|
453
|
+
多分 副詞,一般,*,*,*,*,多分,タブン,タブン
|
454
|
+
EOS
|
455
|
+
EOR
|
456
|
+
|
457
|
+
# Doushi
|
458
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['行く'],
|
459
|
+
:lemmas => ['行く'],
|
460
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
461
|
+
:extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil}],
|
462
|
+
:tokens => [0..0]},
|
463
|
+
'行く', <<-EOR.split("\n"))
|
464
|
+
行く 動詞,自立,*,*,五段・カ行促音便,基本形,行く,イク,イク
|
465
|
+
EOS
|
466
|
+
EOR
|
467
|
+
|
468
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['行かない'],
|
469
|
+
:lemmas => ['行く'],
|
470
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
471
|
+
:extra => [{:reading => 'イカナイ', :transcription => 'イカナイ', :grammar => nil}],
|
472
|
+
:tokens => [0..1]},
|
473
|
+
'行かない', <<-EOR.split("\n"))
|
474
|
+
行か 動詞,自立,*,*,五段・カ行促音便,未然形,行く,イカ,イカ
|
475
|
+
ない 助動詞,*,*,*,特殊・ナイ,基本形,ない,ナイ,ナイ
|
476
|
+
EOS
|
477
|
+
EOR
|
478
|
+
|
479
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['行ってきて'],
|
480
|
+
:lemmas => ['行く'],
|
481
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
482
|
+
:extra => [{:reading => 'イッテキテ', :transcription => 'イッテキテ', :grammar => nil}],
|
483
|
+
:tokens => [0..3]},
|
484
|
+
'行ってきて', <<-EOR.split("\n"))
|
485
|
+
行っ 動詞,自立,*,*,五段・カ行促音便,連用タ接続,行く,イッ,イッ
|
486
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ
|
487
|
+
き 動詞,非自立,*,*,カ変・クル,連用形,くる,キ,キ
|
488
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ
|
489
|
+
EOS
|
490
|
+
EOR
|
491
|
+
|
492
|
+
# Doushi setsubi
|
493
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['行かれる'],
|
494
|
+
:lemmas => ['行く'],
|
495
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
496
|
+
:extra => [{:reading => 'イカレル', :transcription => 'イカレル', :grammar => nil}],
|
497
|
+
:tokens => [0..1]},
|
498
|
+
'行かれる', <<-EOR.split("\n"))
|
499
|
+
行か 動詞,自立,*,*,五段・カ行促音便,未然形,行く,イカ,イカ
|
500
|
+
れる 動詞,接尾,*,*,一段,基本形,れる,レル,レル
|
501
|
+
EOS
|
502
|
+
EOR
|
503
|
+
|
504
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べさせられた'],
|
505
|
+
:lemmas => ['食べる'],
|
506
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
507
|
+
:extra => [{:reading => 'タベサセラレタ', :transcription => 'タベサセラレタ', :grammar => nil}],
|
508
|
+
:tokens => [0..3]},
|
509
|
+
'食べさせられた', <<-EOR.split("\n"))
|
510
|
+
食べ 動詞,自立,*,*,一段,未然形,食べる,タベ,タベ
|
511
|
+
させ 動詞,接尾,*,*,一段,未然形,させる,サセ,サセ
|
512
|
+
られ 動詞,接尾,*,*,一段,連用形,られる,ラレ,ラレ
|
513
|
+
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
|
514
|
+
EOS
|
515
|
+
EOR
|
516
|
+
|
517
|
+
# Doushi + jodoushi
|
518
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べました'],
|
519
|
+
:lemmas => ['食べる'],
|
520
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
521
|
+
:extra => [{:reading => 'タベマシタ', :transcription => 'タベマシタ', :grammar => nil}],
|
522
|
+
:tokens => [0..2]},
|
523
|
+
'食べました', <<-EOR.split("\n"))
|
524
|
+
食べ 動詞,自立,*,*,一段,連用形,食べる,タベ,タベ
|
525
|
+
まし 助動詞,*,*,*,特殊・マス,連用形,ます,マシ,マシ
|
526
|
+
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
|
527
|
+
EOS
|
528
|
+
EOR
|
529
|
+
|
530
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べません'],
|
531
|
+
:lemmas => ['食べる'],
|
532
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
533
|
+
:extra => [{:reading => 'タベマセン', :transcription => 'タベマセン', :grammar => nil}],
|
534
|
+
:tokens => [0..2]},
|
535
|
+
'食べません', <<-EOR.split("\n"))
|
536
|
+
食べ 動詞,自立,*,*,一段,連用形,食べる,タベ,タベ,たべ/食/食べ,
|
537
|
+
ませ 助動詞,*,*,*,特殊・マス,未然形,ます,マセ,マセ,,
|
538
|
+
ん 助動詞,*,*,*,不変化型,基本形,ん,ン,ン,,
|
539
|
+
EOS
|
540
|
+
EOR
|
541
|
+
|
542
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べている'],
|
543
|
+
:lemmas => ['食べる'],
|
544
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
545
|
+
:extra => [{:reading => 'タベテイル', :transcription => 'タベテイル', :grammar => nil}],
|
546
|
+
:tokens => [0..2]},
|
547
|
+
'食べている', <<-EOR.split("\n"))
|
548
|
+
食べ 動詞,自立,*,*,一段,連用形,食べる,タベ,タベ,たべ/食/食べ,
|
549
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ,,
|
550
|
+
いる 動詞,非自立,*,*,一段,基本形,いる,イル,イル,,
|
551
|
+
EOS
|
552
|
+
EOR
|
553
|
+
|
554
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べてる'],
|
555
|
+
:lemmas => ['食べる'],
|
556
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
557
|
+
:extra => [{:reading => 'タベテル', :transcription => 'タベテル', :grammar => nil}],
|
558
|
+
:tokens => [0..1]},
|
559
|
+
'食べてる', <<-EOR.split("\n"))
|
560
|
+
食べ 動詞,自立,*,*,一段,連用形,食べる,タベ,タベ,たべ/食/食べ,
|
561
|
+
てる 動詞,非自立,*,*,一段,基本形,てる,テル,テル,,
|
562
|
+
EOS
|
563
|
+
EOR
|
564
|
+
|
565
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べず'],
|
566
|
+
:lemmas => ['食べる'],
|
567
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
568
|
+
:extra => [{:reading => 'タベズ', :transcription => 'タベズ', :grammar => nil}],
|
569
|
+
:tokens => [0..1]},
|
570
|
+
'食べず', <<-EOR.split("\n"))
|
571
|
+
食べ 動詞,自立,*,*,一段,未然形,食べる,タベ,タベ,たべ/食/食べ,
|
572
|
+
ず 助動詞,*,*,*,特殊・ヌ,連用ニ接続,ぬ,ズ,ズ,,
|
573
|
+
EOS
|
574
|
+
EOR
|
575
|
+
|
576
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['長光', 'さん'],
|
577
|
+
:lemmas => ['長光', 'さん'],
|
578
|
+
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Suffix],
|
579
|
+
:extra => [{:reading => 'ナガミツ', :transcription => 'ナガミツ', :grammar => nil},
|
580
|
+
{:reading => 'サン', :transcription => 'サン', :grammar => nil}],
|
581
|
+
:tokens => [0..0, 1..1]},
|
582
|
+
'長光さん', <<-EOR.split("\n"))
|
583
|
+
長光 名詞,固有名詞,人名,名,*,*,長光,ナガミツ,ナガミツ,,
|
584
|
+
さん 名詞,接尾,人名,*,*,*,さん,サン,サン,,
|
585
|
+
EOS
|
586
|
+
EOR
|
587
|
+
|
588
|
+
|
589
|
+
# Keiyoushi
|
590
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['寒い'],
|
591
|
+
:lemmas => ['寒い'],
|
592
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
593
|
+
:extra => [{:reading => 'サムイ', :transcription => 'サムイ', :grammar => nil}],
|
594
|
+
:tokens => [0..0]},
|
595
|
+
'寒い', <<-EOR.split("\n"))
|
596
|
+
寒い 形容詞,自立,*,*,形容詞・アウオ段,基本形,寒い,サムイ,サムイ
|
597
|
+
EOS
|
598
|
+
EOR
|
599
|
+
|
600
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['寒くて'],
|
601
|
+
:lemmas => ['寒い'],
|
602
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
603
|
+
:extra => [{:reading => 'サムクテ', :transcription => 'サムクテ', :grammar => nil}],
|
604
|
+
:tokens => [0..1]},
|
605
|
+
'寒くて', <<-EOR.split("\n"))
|
606
|
+
寒く 形容詞,自立,*,*,形容詞・アウオ段,連用テ接続,寒い,サムク,サムク
|
607
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ
|
608
|
+
EOS
|
609
|
+
EOR
|
610
|
+
|
611
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['寒かった'],
|
612
|
+
:lemmas => ['寒い'],
|
613
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
614
|
+
:extra => [{:reading => 'サムカッタ', :transcription => 'サムカッタ', :grammar => nil}],
|
615
|
+
:tokens => [0..1]},
|
616
|
+
'寒かった', <<-EOR.split("\n"))
|
617
|
+
寒かっ 形容詞,自立,*,*,形容詞・アウオ段,連用タ接続,寒い,サムカッ,サムカッ
|
618
|
+
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
|
619
|
+
EOS
|
620
|
+
EOR
|
621
|
+
|
622
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['寒ければ'],
|
623
|
+
:lemmas => ['寒い'],
|
624
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
625
|
+
:extra => [{:reading => 'サムケレバ', :transcription => 'サムケレバ', :grammar => nil}],
|
626
|
+
:tokens => [0..1]},
|
627
|
+
'寒ければ', <<-EOR.split("\n"))
|
628
|
+
寒けれ 形容詞,自立,*,*,形容詞・アウオ段,仮定形,寒い,サムケレ,サムケレ
|
629
|
+
ば 助詞,接続助詞,*,*,*,*,ば,バ,バ
|
630
|
+
EOS
|
631
|
+
EOR
|
632
|
+
|
633
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['寒けりゃ'],
|
634
|
+
:lemmas => ['寒い'],
|
635
|
+
:pos => [Ve::PartOfSpeech::Adjective],
|
636
|
+
:extra => [{:reading => 'サムケリャ', :transcription => 'サムケリャ', :grammar => nil}],
|
637
|
+
:tokens => [0..0]},
|
638
|
+
'寒けりゃ', <<-EOR.split("\n"))
|
639
|
+
寒けりゃ 形容詞,自立,*,*,形容詞・アウオ段,仮定縮約1,寒い,サムケリャ,サムケリャ
|
640
|
+
EOS
|
641
|
+
EOR
|
642
|
+
|
643
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['食べたい'],
|
644
|
+
:lemmas => ['食べる'],
|
645
|
+
:pos => [Ve::PartOfSpeech::Verb],
|
646
|
+
:extra => [{:reading => 'タベタイ', :transcription => 'タベタイ', :grammar => nil}],
|
647
|
+
:tokens => [0..1]},
|
648
|
+
'食べたい', <<-EOR.split("\n"))
|
649
|
+
食べ 動詞,自立,*,*,一段,連用形,食べる,タベ,タベ
|
650
|
+
たい 助動詞,*,*,*,特殊・タイ,基本形,たい,タイ,タイ
|
651
|
+
EOS
|
652
|
+
EOR
|
653
|
+
|
654
|
+
# Joshi
|
655
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['日本', 'から'],
|
656
|
+
:lemmas => ['日本', 'から'],
|
657
|
+
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Postposition],
|
658
|
+
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
659
|
+
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
660
|
+
:tokens => [0..0, 1..1]},
|
661
|
+
'日本から', <<-EOR.split("\n"))
|
662
|
+
日本 名詞,固有名詞,地域,国,*,*,日本,ニッポン,ニッポン
|
663
|
+
から 助詞,格助詞,一般,*,*,*,から,カラ,カラ
|
664
|
+
EOS
|
665
|
+
EOR
|
666
|
+
|
667
|
+
# The copula
|
668
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['日本', 'です'],
|
669
|
+
:lemmas => ['日本', 'です'],
|
670
|
+
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
|
671
|
+
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
672
|
+
{:reading => 'デス', :transcription => 'デス', :grammar => nil}],
|
673
|
+
:tokens => [0..0, 1..1]},
|
674
|
+
'日本です', <<-EOR.split("\n"))
|
675
|
+
日本 名詞,固有名詞,地域,国,*,*,日本,ニッポン,ニッポン
|
676
|
+
です 助動詞,*,*,*,特殊・デス,基本形,です,デス,デス
|
677
|
+
EOS
|
678
|
+
EOR
|
679
|
+
|
680
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['日本', 'だった'],
|
681
|
+
:lemmas => ['日本', 'だ'],
|
682
|
+
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
|
683
|
+
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
684
|
+
{:reading => 'ダッタ', :transcription => 'ダッタ', :grammar => nil}],
|
685
|
+
:tokens => [0..0, 1..2]},
|
686
|
+
'日本だった', <<-EOR.split("\n"))
|
687
|
+
日本 名詞,固有名詞,地域,国,*,*,日本,ニッポン,ニッポン
|
688
|
+
だっ 助動詞,*,*,*,特殊・ダ,連用タ接続,だ,ダッ,ダッ
|
689
|
+
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
|
690
|
+
EOS
|
691
|
+
EOR
|
692
|
+
|
693
|
+
# いるから
|
694
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['いる', 'から'],
|
695
|
+
:lemmas => ['いる', 'から'],
|
696
|
+
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
|
697
|
+
:extra => [{:reading => 'イル', :transcription => 'イル', :grammar => nil},
|
698
|
+
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
699
|
+
:tokens => [0..0, 1..1]},
|
700
|
+
'いるから', <<-EOR.split("\n"))
|
701
|
+
いる 動詞,自立,*,*,一段,基本形,いる,イル,イル
|
702
|
+
から 助詞,接続助詞,*,*,*,*,から,カラ,カラ
|
703
|
+
EOS
|
704
|
+
EOR
|
705
|
+
|
706
|
+
# しているから
|
707
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ['している', 'から'],
|
708
|
+
:lemmas => ['する', 'から'],
|
709
|
+
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
|
710
|
+
:extra => [{:reading => 'シテイル', :transcription => 'シテイル', :grammar => nil},
|
711
|
+
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
712
|
+
:tokens => [0..2, 3..3]},
|
713
|
+
'しているから', <<-EOR.split("\n"))
|
714
|
+
し 動詞,自立,*,*,サ変・スル,連用形,する,シ,シ
|
715
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ
|
716
|
+
いる 動詞,非自立,*,*,一段,基本形,いる,イル,イル
|
717
|
+
から 助詞,接続助詞,*,*,*,*,から,カラ,カラ
|
718
|
+
EOS
|
719
|
+
EOR
|
720
|
+
|
721
|
+
# 基準があるが、
|
722
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ["基準", "が", "ある", "が", "、"],
|
723
|
+
:lemmas => ["基準", "が", "ある", "が", "、"],
|
724
|
+
:pos => [Ve::PartOfSpeech::Noun, Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Symbol],
|
725
|
+
:extra => [{:reading=>"キジュン", :transcription=>"キジュン", :grammar=>nil}, {:reading=>"ガ", :transcription=>"ガ", :grammar=>nil}, {:reading=>"アル", :transcription=>"アル", :grammar=>nil}, {:reading=>"ガ", :transcription=>"ガ", :grammar=>nil}, {:reading=>"、", :transcription=>"、", :grammar=>nil}],
|
726
|
+
:tokens => [0..0, 1..1, 2..2, 3..3, 4..4]},
|
727
|
+
'基準があるが、', <<-EOR.split("\n"))
|
728
|
+
基準 名詞,一般,*,*,*,*,基準,キジュン,キジュン
|
729
|
+
が 助詞,格助詞,一般,*,*,*,が,ガ,ガ
|
730
|
+
ある 動詞,自立,*,*,五段・ラ行,基本形,ある,アル,アル
|
731
|
+
が 助詞,接続助詞,*,*,*,*,が,ガ,ガ
|
732
|
+
、 記号,読点,*,*,*,*,、,、,、
|
733
|
+
EOS
|
734
|
+
EOR
|
735
|
+
|
736
|
+
# 教えてください
|
737
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ["教えて", "ください",],
|
738
|
+
:lemmas => ["教える", "くださる"],
|
739
|
+
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
|
740
|
+
:extra => [{:reading=>"オシエテ", :transcription=>"オシエテ", :grammar=>nil}, {:reading=>"クダサイ", :transcription=>"クダサイ", :grammar=>nil}],
|
741
|
+
:tokens => [0..1, 2..2]},
|
742
|
+
'教えてください', <<-EOR.split("\n"))
|
743
|
+
教え 動詞,自立,*,*,一段,連用形,教える,オシエ,オシエ,おしえ/教え,
|
744
|
+
て 助詞,接続助詞,*,*,*,*,て,テ,テ,,
|
745
|
+
ください 動詞,非自立,*,*,五段・ラ行特殊,命令i,くださる,クダサイ,クダサイ,,
|
746
|
+
EOS
|
747
|
+
EOR
|
748
|
+
|
749
|
+
# はない
|
750
|
+
assert_parses_into_words(Ve::Parse::MecabIpadic, {:words => ["は", "ない"],
|
751
|
+
:lemmas => ["は", "ない"],
|
752
|
+
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Postposition],
|
753
|
+
:extra => [{:reading=>"ハ", :transcription=>"ワ", :grammar=>nil}, {:reading=>"ナイ", :transcription=>"ナイ", :grammar=>nil}],
|
754
|
+
:tokens => [0..0, 1..1]},
|
755
|
+
'はない', <<-EOR.split("\n"))
|
756
|
+
は 助詞,係助詞,*,*,*,*,は,ハ,ワ,,
|
757
|
+
ない 助動詞,*,*,*,特殊・ナイ,基本形,ない,ナイ,ナイ,,
|
758
|
+
EOS
|
759
|
+
EOR
|
760
|
+
|
761
|
+
# TODO: xした should parse as adjective?
|
762
|
+
end
|
763
|
+
|
764
|
+
def test_word_transliteration
|
765
|
+
skip
|
766
|
+
mecab = Ve::Provider::MecabIpadic.new
|
767
|
+
parse = mecab.parse('日本', :transliterate_words => :latn)
|
768
|
+
|
769
|
+
assert_equal 'nihon', parse.words.first.transliteration(:latn)
|
770
|
+
end
|
771
|
+
|
772
|
+
end
|