ve 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.travis.yml +9 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +6 -0
- data/lib/providers/freeling_en.rb +4 -2
- data/lib/providers/mecab_ipadic.rb +52 -32
- data/lib/ve.rb +15 -4
- data/tests/{freeling_en_test.rb → freeling_en_parse_test.rb} +37 -48
- data/tests/freeling_en_provider_test.rb +38 -0
- data/tests/japanese_transliterators_test.rb +1 -1
- data/tests/mecab_ipadic_parse_test.rb +772 -0
- data/tests/mecab_ipadic_provider_test.rb +21 -0
- data/tests/test_helper.rb +5 -4
- data/tests/ve_test.rb +5 -1
- data/ve.gemspec +1 -1
- metadata +27 -35
- data/tests/mecab_ipadic_test.rb +0 -452
@@ -0,0 +1,21 @@
|
|
1
|
+
# Encoding: UTF-8
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class MecabIpadicProviderTest < MiniTest::Unit::TestCase
|
6
|
+
# TODO: make these run without running mecab
|
7
|
+
|
8
|
+
def test_should_be_able_to_start
|
9
|
+
skip
|
10
|
+
mecab = Ve::Provider::MecabIpadic.new
|
11
|
+
assert mecab.works?
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_can_parse
|
15
|
+
skip
|
16
|
+
mecab = Ve::Provider::MecabIpadic.new
|
17
|
+
parse = mecab.parse('')
|
18
|
+
assert_equal Ve::Parse::MecabIpadic, parse.class
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/tests/test_helper.rb
CHANGED
@@ -2,14 +2,15 @@ require 'rubygems'
|
|
2
2
|
require 'bundler/setup'
|
3
3
|
|
4
4
|
require File.expand_path(File.dirname(__FILE__) + "/../lib/ve")
|
5
|
-
require '
|
5
|
+
require 'minitest/autorun'
|
6
|
+
require 'mocha'
|
6
7
|
|
7
|
-
class
|
8
|
+
class MiniTest::Unit::TestCase
|
8
9
|
|
9
10
|
private
|
10
11
|
|
11
|
-
def assert_parses_into_words(
|
12
|
-
parse =
|
12
|
+
def assert_parses_into_words(parse_klass, expected, text, raw)
|
13
|
+
parse = parse_klass.new(text, raw)
|
13
14
|
words = parse.words
|
14
15
|
tokens = parse.tokens
|
15
16
|
|
data/tests/ve_test.rb
CHANGED
@@ -2,17 +2,21 @@
|
|
2
2
|
|
3
3
|
require_relative 'test_helper'
|
4
4
|
|
5
|
-
class VeTest <
|
5
|
+
class VeTest < MiniTest::Unit::TestCase
|
6
|
+
# TODO: Set these up to run properly
|
6
7
|
|
7
8
|
def test_get
|
9
|
+
skip
|
8
10
|
assert_equal ['日本語', 'です'], Ve.get('日本語です', :ja, :words).collect(&:word)
|
9
11
|
end
|
10
12
|
|
11
13
|
def test_in
|
14
|
+
skip
|
12
15
|
assert_equal ['日本語', 'です'], Ve.in(:ja).words('日本語です').collect(&:word)
|
13
16
|
end
|
14
17
|
|
15
18
|
def test_http_interface
|
19
|
+
skip
|
16
20
|
Ve.config(Ve::HTTPInterface, :url => 'http://localhost:4567')
|
17
21
|
assert_equal ['日本語', 'です'], Ve.in(:ja).words('日本語です').collect(&:word)
|
18
22
|
end
|
data/ve.gemspec
CHANGED
metadata
CHANGED
@@ -1,29 +1,24 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ve
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
version: 0.0.2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
6
5
|
platform: ruby
|
7
|
-
authors:
|
8
|
-
-
|
6
|
+
authors:
|
7
|
+
- Kim Ahlström
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
|
13
|
-
date: 2011-11-12 00:00:00 Z
|
11
|
+
date: 2014-06-06 00:00:00.000000000 Z
|
14
12
|
dependencies: []
|
15
|
-
|
16
13
|
description: Ve is a linguistic framework for programmers.
|
17
|
-
email:
|
14
|
+
email:
|
18
15
|
- kim.ahlstrom@gmail.com
|
19
16
|
executables: []
|
20
|
-
|
21
17
|
extensions: []
|
22
|
-
|
23
18
|
extra_rdoc_files: []
|
24
|
-
|
25
|
-
files:
|
19
|
+
files:
|
26
20
|
- .gitignore
|
21
|
+
- .travis.yml
|
27
22
|
- Gemfile
|
28
23
|
- Gemfile.lock
|
29
24
|
- Rakefile
|
@@ -43,38 +38,35 @@ files:
|
|
43
38
|
- lib/ve.rb
|
44
39
|
- lib/word.rb
|
45
40
|
- sinatra/server.rb
|
46
|
-
- tests/
|
41
|
+
- tests/freeling_en_parse_test.rb
|
42
|
+
- tests/freeling_en_provider_test.rb
|
47
43
|
- tests/japanese_transliterators_test.rb
|
48
|
-
- tests/
|
44
|
+
- tests/mecab_ipadic_parse_test.rb
|
45
|
+
- tests/mecab_ipadic_provider_test.rb
|
49
46
|
- tests/test_helper.rb
|
50
47
|
- tests/ve_test.rb
|
51
48
|
- ve.gemspec
|
52
49
|
homepage: http://github.com/kimtaro/ve
|
53
50
|
licenses: []
|
54
|
-
|
51
|
+
metadata: {}
|
55
52
|
post_install_message:
|
56
53
|
rdoc_options: []
|
57
|
-
|
58
|
-
require_paths:
|
54
|
+
require_paths:
|
59
55
|
- lib
|
60
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
version: "0"
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
72
66
|
requirements: []
|
73
|
-
|
74
67
|
rubyforge_project:
|
75
|
-
rubygems_version:
|
68
|
+
rubygems_version: 2.0.3
|
76
69
|
signing_key:
|
77
|
-
specification_version:
|
70
|
+
specification_version: 4
|
78
71
|
summary: Ve is a linguistic framework for programmers
|
79
72
|
test_files: []
|
80
|
-
|
data/tests/mecab_ipadic_test.rb
DELETED
@@ -1,452 +0,0 @@
|
|
1
|
-
# Encoding: UTF-8
|
2
|
-
|
3
|
-
require_relative 'test_helper'
|
4
|
-
|
5
|
-
class MecabIpadicTest < Test::Unit::TestCase
|
6
|
-
|
7
|
-
def test_should_be_able_to_start
|
8
|
-
mecab = Ve::Provider::MecabIpadic.new
|
9
|
-
assert mecab.works?
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_can_parse
|
13
|
-
mecab = Ve::Provider::MecabIpadic.new
|
14
|
-
parse = mecab.parse('')
|
15
|
-
assert_equal Ve::Parse::MecabIpadic, parse.class
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_all_literals_should_equal_the_input_text
|
19
|
-
text = <<-EOS
|
20
|
-
古池や
|
21
|
-
蛙飛び込む
|
22
|
-
水の音
|
23
|
-
|
24
|
-
EOS
|
25
|
-
mecab = Ve::Provider::MecabIpadic.new
|
26
|
-
parse = mecab.parse(text)
|
27
|
-
assert_equal text, parse.tokens.collect { |t| t[:literal] }.join
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_tokens_must_be_created_for_parsed_and_unparsed_text
|
31
|
-
mecab = Ve::Provider::MecabIpadic.new
|
32
|
-
parse = mecab.parse(' A B ')
|
33
|
-
assert_equal [:unparsed, :parsed, :unparsed, :parsed, :unparsed, :sentence_split], parse.tokens.collect { |t| t[:type] }
|
34
|
-
assert_equal [' ', 'A', ' ', 'B', ' ', ''], parse.tokens.collect { |t| t[:literal] }
|
35
|
-
assert_equal [0..0, 1..1, 2..4, 5..5, 6..7, nil], parse.tokens.collect { |t| t[:characters] }
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_tokens_should_not_be_modified_when_attached_to_words
|
39
|
-
mecab = Ve::Provider::MecabIpadic.new
|
40
|
-
parse = mecab.parse('悪化する')
|
41
|
-
tokens = parse.tokens
|
42
|
-
assert_equal '悪化', tokens[0][:literal]
|
43
|
-
assert_equal '悪化', tokens[0][:lemma]
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_sentences
|
47
|
-
mecab = Ve::Provider::MecabIpadic.new
|
48
|
-
parse = mecab.parse('これは文章である。で、also containing some Englishですね')
|
49
|
-
assert_equal ['これは文章である。', 'で、also containing some Englishですね'], parse.sentences
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_this_shouldnt_crash
|
53
|
-
mecab = Ve::Provider::MecabIpadic.new
|
54
|
-
parse = mecab.parse('チューたろうは田中さんの犬です。')
|
55
|
-
assert_equal 11, parse.tokens.size
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_this_shouldnt_crash_either
|
59
|
-
mecab = Ve::Provider::MecabIpadic.new
|
60
|
-
parse = mecab.parse('三十年式歩兵銃')
|
61
|
-
assert_equal 7, parse.tokens.size
|
62
|
-
end
|
63
|
-
|
64
|
-
def test_words
|
65
|
-
mecab = Ve::Provider::MecabIpadic.new
|
66
|
-
|
67
|
-
# Meishi
|
68
|
-
assert_parses_into_words(mecab, {:words => ['車'],
|
69
|
-
:lemmas => ['車'],
|
70
|
-
:pos => [Ve::PartOfSpeech::Noun],
|
71
|
-
:extra => [{:reading => 'クルマ', :transcription => 'クルマ', :grammar => nil}],
|
72
|
-
:tokens => [0..0]},
|
73
|
-
'車')
|
74
|
-
|
75
|
-
# Koyuumeishi
|
76
|
-
assert_parses_into_words(mecab, {:words => ['太郎'],
|
77
|
-
:lemmas => ['太郎'],
|
78
|
-
:pos => [Ve::PartOfSpeech::ProperNoun],
|
79
|
-
:extra => [{:reading => 'タロウ', :transcription => 'タロー', :grammar => nil}],
|
80
|
-
:tokens => [0..0]},
|
81
|
-
'太郎')
|
82
|
-
|
83
|
-
# Daimeishi
|
84
|
-
assert_parses_into_words(mecab, {:words => ['彼'],
|
85
|
-
:lemmas => ['彼'],
|
86
|
-
:pos => [Ve::PartOfSpeech::Pronoun],
|
87
|
-
:extra => [{:reading => 'カレ', :transcription => 'カレ', :grammar => nil}],
|
88
|
-
:tokens => [0..0]},
|
89
|
-
'彼')
|
90
|
-
|
91
|
-
# Fukushikanou
|
92
|
-
assert_parses_into_words(mecab, {:words => ['午後に'],
|
93
|
-
:lemmas => ['午後に'],
|
94
|
-
:pos => [Ve::PartOfSpeech::Adverb],
|
95
|
-
:extra => [{:reading => 'ゴゴニ', :transcription => 'ゴゴニ', :grammar => nil}],
|
96
|
-
:tokens => [0..1]},
|
97
|
-
'午後に')
|
98
|
-
|
99
|
-
# Kazu
|
100
|
-
assert_parses_into_words(mecab, {:words => ['一'],
|
101
|
-
:lemmas => ['一'],
|
102
|
-
:pos => [Ve::PartOfSpeech::Number],
|
103
|
-
:extra => [{:reading => 'イチ', :transcription => 'イチ', :grammar => nil}],
|
104
|
-
:tokens => [0..0]},
|
105
|
-
'一')
|
106
|
-
|
107
|
-
assert_parses_into_words(mecab, {:words => ['123'],
|
108
|
-
:lemmas => ['123'],
|
109
|
-
:pos => [Ve::PartOfSpeech::Number],
|
110
|
-
:extra => [{:reading => 'イチニサン', :transcription => 'イチニサン', :grammar => nil}],
|
111
|
-
:tokens => [0..2]},
|
112
|
-
'123')
|
113
|
-
|
114
|
-
# Sahensetsuzoku + tokumi ta
|
115
|
-
assert_parses_into_words(mecab, {:words => ['悪化した'],
|
116
|
-
:lemmas => ['悪化する'],
|
117
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
118
|
-
:extra => [{:reading => 'アッカシタ', :transcription => 'アッカシタ', :grammar => nil}],
|
119
|
-
:tokens => [0..2]},
|
120
|
-
'悪化した')
|
121
|
-
|
122
|
-
# Keiyoudoushigokan
|
123
|
-
assert_parses_into_words(mecab, {:words => ['重要な'],
|
124
|
-
:lemmas => ['重要'],
|
125
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
126
|
-
:extra => [{:reading => 'ジュウヨウナ', :transcription => 'ジューヨーナ', :grammar => nil}],
|
127
|
-
:tokens => [0..1]},
|
128
|
-
'重要な')
|
129
|
-
|
130
|
-
# Naikeiyoushigokan
|
131
|
-
assert_parses_into_words(mecab, {:words => ['とんでもない'],
|
132
|
-
:lemmas => ['とんでもない'],
|
133
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
134
|
-
:extra => [{:reading => 'トンデモナイ', :transcription => 'トンデモナイ', :grammar => nil}],
|
135
|
-
:tokens => [0..1]},
|
136
|
-
'とんでもない')
|
137
|
-
|
138
|
-
# Meishi hijiritsu fukushikanou
|
139
|
-
assert_parses_into_words(mecab, {:words => ['の', 'うちに'],
|
140
|
-
:lemmas => ['の', 'うちに'],
|
141
|
-
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
|
142
|
-
:extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
|
143
|
-
{:reading => 'ウチニ', :transcription => 'ウチニ', :grammar => nil}],
|
144
|
-
:tokens => [0..0, 1..2]},
|
145
|
-
'のうちに')
|
146
|
-
|
147
|
-
# Meishi hijiritsu jodoushigokan
|
148
|
-
assert_parses_into_words(mecab, {:words => ['の', 'ような'],
|
149
|
-
:lemmas => ['の', 'ようだ'],
|
150
|
-
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Verb],
|
151
|
-
:extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
|
152
|
-
{:reading => 'ヨウナ', :transcription => 'ヨーナ', :grammar => :auxillary}],
|
153
|
-
:tokens => [0..0, 1..2]},
|
154
|
-
'のような')
|
155
|
-
|
156
|
-
assert_parses_into_words(mecab, {:words => ['の', 'ように'],
|
157
|
-
:lemmas => ['の', 'ように'],
|
158
|
-
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Adverb],
|
159
|
-
:extra => [{:reading => 'ノ', :transcription => 'ノ', :grammar => nil},
|
160
|
-
{:reading => 'ヨウニ', :transcription => 'ヨーニ', :grammar => nil}],
|
161
|
-
:tokens => [0..0, 1..2]},
|
162
|
-
'のように')
|
163
|
-
|
164
|
-
# Meishi hijiritsu keiyoudoushigokan
|
165
|
-
assert_parses_into_words(mecab, {:words => ['みたいな'],
|
166
|
-
:lemmas => ['みたいだ'],
|
167
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
168
|
-
:extra => [{:reading => 'ミタイナ', :transcription => 'ミタイナ', :grammar => nil}],
|
169
|
-
:tokens => [0..1]},
|
170
|
-
'みたいな')
|
171
|
-
|
172
|
-
assert_parses_into_words(mecab, {:words => ['みたいの'],
|
173
|
-
:lemmas => ['みたいの'],
|
174
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
175
|
-
:extra => [{:reading => 'ミタイノ', :transcription => 'ミタイノ', :grammar => nil}],
|
176
|
-
:tokens => [0..1]},
|
177
|
-
'みたいの')
|
178
|
-
|
179
|
-
assert_parses_into_words(mecab, {:words => ['みたい', 'だ'],
|
180
|
-
:lemmas => ['みたい', 'だ'],
|
181
|
-
:pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Verb],
|
182
|
-
:extra => [{:reading => 'ミタイ', :transcription => 'ミタイ', :grammar => nil},
|
183
|
-
{:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
|
184
|
-
:tokens => [0..0, 1..1]},
|
185
|
-
'みたいだ')
|
186
|
-
|
187
|
-
# Meishi tokushu jodoushigokan
|
188
|
-
assert_parses_into_words(mecab, {:words => ['行く', 'そう', 'だ'],
|
189
|
-
:lemmas => ['行く', 'そう', 'だ'],
|
190
|
-
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
|
191
|
-
:extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil},
|
192
|
-
{:reading => 'ソウ', :transcription => 'ソー', :grammar => :auxillary},
|
193
|
-
{:reading => 'ダ', :transcription => 'ダ', :grammar => nil}],
|
194
|
-
:tokens => [0..0, 1..1, 2..2]},
|
195
|
-
'行くそうだ')
|
196
|
-
|
197
|
-
# Meishi setsubi
|
198
|
-
# TODO: This should maybe be parsed as one noun instead
|
199
|
-
assert_parses_into_words(mecab, {:words => ['楽し', 'さ'],
|
200
|
-
:lemmas => ['楽しい', 'さ'],
|
201
|
-
:pos => [Ve::PartOfSpeech::Adjective, Ve::PartOfSpeech::Suffix],
|
202
|
-
:extra => [{:reading => 'タノシ', :transcription => 'タノシ', :grammar => nil},
|
203
|
-
{:reading => 'サ', :transcription => 'サ', :grammar => nil}],
|
204
|
-
:tokens => [0..0, 1..1]},
|
205
|
-
'楽しさ')
|
206
|
-
|
207
|
-
# Meishi setsuzokushiteki
|
208
|
-
assert_parses_into_words(mecab, {:words => ['日本', '対', 'アメリカ'],
|
209
|
-
:lemmas => ['日本', '対', 'アメリカ'],
|
210
|
-
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Conjunction, Ve::PartOfSpeech::ProperNoun],
|
211
|
-
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
212
|
-
{:reading => 'タイ', :transcription => 'タイ', :grammar => nil},
|
213
|
-
{:reading => 'アメリカ', :transcription => 'アメリカ', :grammar => nil}],
|
214
|
-
:tokens => [0..0, 1..1, 2..2]},
|
215
|
-
'日本対アメリカ')
|
216
|
-
|
217
|
-
# Meishi doushihijiritsuteki
|
218
|
-
assert_parses_into_words(mecab, {:words => ['見て', 'ごらん'],
|
219
|
-
:lemmas => ['見る', 'ごらん'],
|
220
|
-
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
|
221
|
-
:extra => [{:reading => 'ミテ', :transcription => 'ミテ', :grammar => nil},
|
222
|
-
{:reading => 'ゴラン', :transcription => 'ゴラン', :grammar => :nominal}],
|
223
|
-
:tokens => [0..1, 2..2]},
|
224
|
-
'見てごらん')
|
225
|
-
|
226
|
-
# Settoushi
|
227
|
-
assert_parses_into_words(mecab, {:words => ['お', '座り'],
|
228
|
-
:lemmas => ['お', '座り'],
|
229
|
-
:pos => [Ve::PartOfSpeech::Prefix, Ve::PartOfSpeech::Noun],
|
230
|
-
:extra => [{:reading => 'オ', :transcription => 'オ', :grammar => nil},
|
231
|
-
{:reading => 'スワリ', :transcription => 'スワリ', :grammar => nil}],
|
232
|
-
:tokens => [0..0, 1..1]},
|
233
|
-
'お座り')
|
234
|
-
|
235
|
-
# Kigou
|
236
|
-
assert_parses_into_words(mecab, {:words => ['。'],
|
237
|
-
:lemmas => ['。'],
|
238
|
-
:pos => [Ve::PartOfSpeech::Symbol],
|
239
|
-
:extra => [{:reading => '。', :transcription => '。', :grammar => nil}],
|
240
|
-
:tokens => [0..0]},
|
241
|
-
'。')
|
242
|
-
|
243
|
-
# Firaa
|
244
|
-
assert_parses_into_words(mecab, {:words => ['えと'],
|
245
|
-
:lemmas => ['えと'],
|
246
|
-
:pos => [Ve::PartOfSpeech::Interjection],
|
247
|
-
:extra => [{:reading => 'エト', :transcription => 'エト', :grammar => nil}],
|
248
|
-
:tokens => [0..0]},
|
249
|
-
'えと')
|
250
|
-
|
251
|
-
# Sonota
|
252
|
-
assert_parses_into_words(mecab, {:words => ['だ', 'ァ'],
|
253
|
-
:lemmas => ['だ', 'ァ'],
|
254
|
-
:pos => [Ve::PartOfSpeech::Postposition, Ve::PartOfSpeech::Other],
|
255
|
-
:extra => [{:reading => 'ダ', :transcription => 'ダ', :grammar => nil},
|
256
|
-
{:reading => 'ァ', :transcription => 'ア', :grammar => nil}],
|
257
|
-
:tokens => [0..0, 1..1]},
|
258
|
-
'だァ')
|
259
|
-
|
260
|
-
# Kandoushi
|
261
|
-
assert_parses_into_words(mecab, {:words => ['おはよう'],
|
262
|
-
:lemmas => ['おはよう'],
|
263
|
-
:pos => [Ve::PartOfSpeech::Interjection],
|
264
|
-
:extra => [{:reading => 'オハヨウ', :transcription => 'オハヨー', :grammar => nil}],
|
265
|
-
:tokens => [0..0]},
|
266
|
-
'おはよう')
|
267
|
-
|
268
|
-
# Rentaishi
|
269
|
-
assert_parses_into_words(mecab, {:words => ['この'],
|
270
|
-
:lemmas => ['この'],
|
271
|
-
:pos => [Ve::PartOfSpeech::Determiner],
|
272
|
-
:extra => [{:reading => 'コノ', :transcription => 'コノ', :grammar => nil}],
|
273
|
-
:tokens => [0..0]},
|
274
|
-
'この')
|
275
|
-
|
276
|
-
# Setsuzokushi
|
277
|
-
assert_parses_into_words(mecab, {:words => ['そして'],
|
278
|
-
:lemmas => ['そして'],
|
279
|
-
:pos => [Ve::PartOfSpeech::Conjunction],
|
280
|
-
:extra => [{:reading => 'ソシテ', :transcription => 'ソシテ', :grammar => nil}],
|
281
|
-
:tokens => [0..0]},
|
282
|
-
'そして')
|
283
|
-
|
284
|
-
# Fukushi
|
285
|
-
assert_parses_into_words(mecab, {:words => ['多分'],
|
286
|
-
:lemmas => ['多分'],
|
287
|
-
:pos => [Ve::PartOfSpeech::Adverb],
|
288
|
-
:extra => [{:reading => 'タブン', :transcription => 'タブン', :grammar => nil}],
|
289
|
-
:tokens => [0..0]},
|
290
|
-
'多分')
|
291
|
-
|
292
|
-
# Doushi
|
293
|
-
assert_parses_into_words(mecab, {:words => ['行く'],
|
294
|
-
:lemmas => ['行く'],
|
295
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
296
|
-
:extra => [{:reading => 'イク', :transcription => 'イク', :grammar => nil}],
|
297
|
-
:tokens => [0..0]},
|
298
|
-
'行く')
|
299
|
-
|
300
|
-
assert_parses_into_words(mecab, {:words => ['行かない'],
|
301
|
-
:lemmas => ['行く'],
|
302
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
303
|
-
:extra => [{:reading => 'イカナイ', :transcription => 'イカナイ', :grammar => nil}],
|
304
|
-
:tokens => [0..1]},
|
305
|
-
'行かない')
|
306
|
-
|
307
|
-
assert_parses_into_words(mecab, {:words => ['行って', 'きて'],
|
308
|
-
:lemmas => ['行く', 'くる'],
|
309
|
-
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb],
|
310
|
-
:extra => [{:reading => 'イッテ', :transcription => 'イッテ', :grammar => nil},
|
311
|
-
{:reading => 'キテ', :transcription => 'キテ', :grammar => :auxillary}],
|
312
|
-
:tokens => [0..1, 2..3]},
|
313
|
-
'行ってきて')
|
314
|
-
|
315
|
-
# Doushi setsubi
|
316
|
-
assert_parses_into_words(mecab, {:words => ['行かれる'],
|
317
|
-
:lemmas => ['行く'],
|
318
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
319
|
-
:extra => [{:reading => 'イカレル', :transcription => 'イカレル', :grammar => nil}],
|
320
|
-
:tokens => [0..1]},
|
321
|
-
'行かれる')
|
322
|
-
|
323
|
-
assert_parses_into_words(mecab, {:words => ['食べさせられた'],
|
324
|
-
:lemmas => ['食べる'],
|
325
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
326
|
-
:extra => [{:reading => 'タベサセラレタ', :transcription => 'タベサセラレタ', :grammar => nil}],
|
327
|
-
:tokens => [0..3]},
|
328
|
-
'食べさせられた')
|
329
|
-
|
330
|
-
# Doushi + jodoushi
|
331
|
-
assert_parses_into_words(mecab, {:words => ['食べました'],
|
332
|
-
:lemmas => ['食べる'],
|
333
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
334
|
-
:extra => [{:reading => 'タベマシタ', :transcription => 'タベマシタ', :grammar => nil}],
|
335
|
-
:tokens => [0..2]},
|
336
|
-
'食べました')
|
337
|
-
|
338
|
-
# Keiyoushi
|
339
|
-
assert_parses_into_words(mecab, {:words => ['寒い'],
|
340
|
-
:lemmas => ['寒い'],
|
341
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
342
|
-
:extra => [{:reading => 'サムイ', :transcription => 'サムイ', :grammar => nil}],
|
343
|
-
:tokens => [0..0]},
|
344
|
-
'寒い')
|
345
|
-
|
346
|
-
assert_parses_into_words(mecab, {:words => ['寒くて'],
|
347
|
-
:lemmas => ['寒い'],
|
348
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
349
|
-
:extra => [{:reading => 'サムクテ', :transcription => 'サムクテ', :grammar => nil}],
|
350
|
-
:tokens => [0..1]},
|
351
|
-
'寒くて')
|
352
|
-
|
353
|
-
assert_parses_into_words(mecab, {:words => ['寒かった'],
|
354
|
-
:lemmas => ['寒い'],
|
355
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
356
|
-
:extra => [{:reading => 'サムカッタ', :transcription => 'サムカッタ', :grammar => nil}],
|
357
|
-
:tokens => [0..1]},
|
358
|
-
'寒かった')
|
359
|
-
|
360
|
-
assert_parses_into_words(mecab, {:words => ['寒ければ'],
|
361
|
-
:lemmas => ['寒い'],
|
362
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
363
|
-
:extra => [{:reading => 'サムケレバ', :transcription => 'サムケレバ', :grammar => nil}],
|
364
|
-
:tokens => [0..1]},
|
365
|
-
'寒ければ')
|
366
|
-
|
367
|
-
assert_parses_into_words(mecab, {:words => ['寒けりゃ'],
|
368
|
-
:lemmas => ['寒い'],
|
369
|
-
:pos => [Ve::PartOfSpeech::Adjective],
|
370
|
-
:extra => [{:reading => 'サムケリャ', :transcription => 'サムケリャ', :grammar => nil}],
|
371
|
-
:tokens => [0..0]},
|
372
|
-
'寒けりゃ')
|
373
|
-
|
374
|
-
assert_parses_into_words(mecab, {:words => ['食べたい'],
|
375
|
-
:lemmas => ['食べる'],
|
376
|
-
:pos => [Ve::PartOfSpeech::Verb],
|
377
|
-
:extra => [{:reading => 'タベタイ', :transcription => 'タベタイ', :grammar => nil}],
|
378
|
-
:tokens => [0..1]},
|
379
|
-
'食べたい')
|
380
|
-
|
381
|
-
# Joshi
|
382
|
-
assert_parses_into_words(mecab, {:words => ['日本', 'から'],
|
383
|
-
:lemmas => ['日本', 'から'],
|
384
|
-
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Postposition],
|
385
|
-
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
386
|
-
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
387
|
-
:tokens => [0..0, 1..1]},
|
388
|
-
'日本から')
|
389
|
-
|
390
|
-
# The copula
|
391
|
-
assert_parses_into_words(mecab, {:words => ['日本', 'です'],
|
392
|
-
:lemmas => ['日本', 'です'],
|
393
|
-
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
|
394
|
-
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
395
|
-
{:reading => 'デス', :transcription => 'デス', :grammar => nil}],
|
396
|
-
:tokens => [0..0, 1..1]},
|
397
|
-
'日本です')
|
398
|
-
|
399
|
-
assert_parses_into_words(mecab, {:words => ['日本', 'だった'],
|
400
|
-
:lemmas => ['日本', 'だ'],
|
401
|
-
:pos => [Ve::PartOfSpeech::ProperNoun, Ve::PartOfSpeech::Verb],
|
402
|
-
:extra => [{:reading => 'ニッポン', :transcription => 'ニッポン', :grammar => nil},
|
403
|
-
{:reading => 'ダッタ', :transcription => 'ダッタ', :grammar => nil}],
|
404
|
-
:tokens => [0..0, 1..2]},
|
405
|
-
'日本だった')
|
406
|
-
|
407
|
-
# いるから
|
408
|
-
assert_parses_into_words(mecab, {:words => ['いる', 'から'],
|
409
|
-
:lemmas => ['いる', 'から'],
|
410
|
-
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
|
411
|
-
:extra => [{:reading => 'イル', :transcription => 'イル', :grammar => nil},
|
412
|
-
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
413
|
-
:tokens => [0..0, 1..1]},
|
414
|
-
'いるから')
|
415
|
-
|
416
|
-
# しているから
|
417
|
-
assert_parses_into_words(mecab, {:words => ['して', 'いる', 'から'],
|
418
|
-
:lemmas => ['する', 'いる', 'から'],
|
419
|
-
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
|
420
|
-
:extra => [{:reading => 'シテ', :transcription => 'シテ', :grammar => nil},
|
421
|
-
{:reading => 'イル', :transcription => 'イル', :grammar => :auxillary},
|
422
|
-
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
423
|
-
:tokens => [0..0, 1..1, 2..2]},
|
424
|
-
'しているから')
|
425
|
-
|
426
|
-
# 基準があるが、
|
427
|
-
assert_parses_into_words(mecab, {:words => ['して', 'いる', 'から'],
|
428
|
-
:lemmas => ['する', 'いる', 'から'],
|
429
|
-
:pos => [Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Verb, Ve::PartOfSpeech::Postposition],
|
430
|
-
:extra => [{:reading => 'シテ', :transcription => 'シテ', :grammar => nil},
|
431
|
-
{:reading => 'イル', :transcription => 'イル', :grammar => :auxillary},
|
432
|
-
{:reading => 'カラ', :transcription => 'カラ', :grammar => nil}],
|
433
|
-
:tokens => [0..0, 1..1, 2..2]},
|
434
|
-
'基準があるが、')
|
435
|
-
|
436
|
-
# TODO: xした should parse as adjective?
|
437
|
-
assert_parses_into_words(mecab, {:words => [],
|
438
|
-
:lemmas => [],
|
439
|
-
:pos => [],
|
440
|
-
:extra => [],
|
441
|
-
:tokens => []},
|
442
|
-
'')
|
443
|
-
end
|
444
|
-
|
445
|
-
def todo_test_word_transliteration
|
446
|
-
mecab = Ve::Provider::MecabIpadic.new
|
447
|
-
parse = mecab.parse('日本', :transliterate_words => :latn)
|
448
|
-
|
449
|
-
assert_equal 'nihon', parse.words.first.transliteration(:latn)
|
450
|
-
end
|
451
|
-
|
452
|
-
end
|