langue-japanese 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/Gemfile +1 -0
  2. data/lib/langue/japanese/inflector.rb +44 -0
  3. data/lib/langue/japanese/inflector/default.rb +454 -0
  4. data/lib/langue/japanese/inflector/inflection.rb +59 -0
  5. data/lib/langue/japanese/inflector/inflections.rb +56 -0
  6. data/lib/langue/japanese/language.rb +9 -0
  7. data/lib/langue/japanese/parser.rb +24 -7
  8. data/lib/langue/japanese/shaper.rb +2 -2
  9. data/lib/langue/japanese/structurer.rb +6 -1
  10. data/lib/langue/japanese/version.rb +1 -1
  11. data/lib/langue/japanese/words/adjectival_noun.rb +67 -0
  12. data/lib/langue/japanese/words/adjective.rb +37 -33
  13. data/lib/langue/japanese/words/adverb.rb +24 -0
  14. data/lib/langue/japanese/words/attribute.rb +52 -28
  15. data/lib/langue/japanese/words/classifier.rb +37 -5
  16. data/lib/langue/japanese/words/conjunction.rb +18 -0
  17. data/lib/langue/japanese/words/determiner.rb +24 -0
  18. data/lib/langue/japanese/words/interjection.rb +18 -0
  19. data/lib/langue/japanese/words/morpheme_filter.rb +40 -17
  20. data/lib/langue/japanese/words/noun.rb +50 -43
  21. data/lib/langue/japanese/words/particle.rb +24 -0
  22. data/lib/langue/japanese/words/period.rb +26 -26
  23. data/lib/langue/japanese/words/prefix.rb +13 -5
  24. data/lib/langue/japanese/words/pronoun.rb +5 -7
  25. data/lib/langue/japanese/words/verb.rb +59 -64
  26. data/spec/langue/japanese/data.yaml +53 -5
  27. data/spec/langue/japanese/inflector/inflection_spec.rb +80 -0
  28. data/spec/langue/japanese/inflector/inflections_spec.rb +83 -0
  29. data/spec/langue/japanese/inflector_spec.rb +1551 -0
  30. data/spec/langue/japanese/language_spec.rb +36 -0
  31. data/spec/langue/japanese/parser_spec.rb +100 -28
  32. data/spec/langue/japanese/structurer_spec.rb +8 -2
  33. data/spec/langue/japanese/words/{adjective_noun_spec.rb → adjectival_noun_spec.rb} +18 -12
  34. data/spec/langue/japanese/words/adjective_spec.rb +15 -0
  35. data/spec/langue/japanese/words/adverb_spec.rb +25 -0
  36. data/spec/langue/japanese/words/conjunction_spec.rb +25 -0
  37. data/spec/langue/japanese/words/determiner_spec.rb +25 -0
  38. data/spec/langue/japanese/words/interjection_spec.rb +25 -0
  39. data/spec/langue/japanese/words/noun_spec.rb +19 -0
  40. data/spec/langue/japanese/words/particle_spec.rb +26 -0
  41. data/spec/langue/japanese/words/period_spec.rb +6 -0
  42. data/spec/langue/japanese/words/pronoun_spec.rb +6 -0
  43. data/spec/langue/japanese/words/verb_spec.rb +79 -48
  44. data/spec/spec_helper.rb +23 -3
  45. metadata +22 -5
  46. data/lib/langue/japanese/words/adjective_noun.rb +0 -76
data/Gemfile CHANGED
@@ -1,4 +1,5 @@
1
1
  source 'https://rubygems.org'
2
+ source 'http://atedesign:jth52EsWLu7a@gems.atedesign.co.jp'
2
3
 
3
4
  # Specify your gem's dependencies in langue-japanese.gemspec
4
5
  gemspec
@@ -0,0 +1,44 @@
1
+ require 'langue/japanese/inflector/inflections'
2
+ require 'langue/japanese/logging'
3
+
4
+ module Langue
5
+ module Japanese
6
+ class Inflector
7
+ include Logging
8
+
9
+ # Get the inflections.
10
+ #
11
+ # If given a block, define the inflections.
12
+ #
13
+ # @yield [] define the inflections
14
+ def self.inflections(&define)
15
+ (@inflections ||= Inflections.new).tap do |inflections|
16
+ inflections.instance_eval(&define) if block_given?
17
+ end
18
+ end
19
+
20
+ # @param [Hash] options
21
+ # @option options [Logger] :logger
22
+ def initialize(options = {})
23
+ @logger = options[:logger] || null_logger
24
+ end
25
+
26
+ # Inflect the word.
27
+ #
28
+ # @param [String] classification the inflectional classification
29
+ # @param [String] word root form of the word to inflect
30
+ # @param [String] form the inflectional form
31
+ # @param [Hash] options
32
+ # @option options [String] :following
33
+ # @option options [Boolean] :desu
34
+ # @return [String] the inflected word
35
+ def inflect(classification, word, form, options = {})
36
+ inflection = self.class.inflections[classification]
37
+ raise ArgumentError, %("#{classification}" inflection does not exist) unless inflection
38
+ inflection.inflect(word, form, options)
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ require 'langue/japanese/inflector/default'
@@ -0,0 +1,454 @@
1
+ # -*- coding: utf-8 -*-
2
+ Langue::Japanese::Inflector.inflections do
3
+ adjective_forms = %w(
4
+ 未然形
5
+ 未然ウ接続
6
+ 未然ヌ接続
7
+ 連用タ接続
8
+ 連用テ接続
9
+ 連用ゴザイ接続
10
+ 終止形
11
+ 終止形-感動
12
+ 連体形
13
+ 体言接続
14
+ 仮定形
15
+ 仮定縮約1
16
+ 仮定縮約2
17
+ ガル接続
18
+ )
19
+
20
+ category *adjective_forms do
21
+ inflection '形容詞・アウオ段', 'い', {
22
+ '未然形' => 'く',
23
+ '未然ウ接続' => 'かろ',
24
+ '未然ヌ接続' => 'から',
25
+ '連用タ接続' => 'かっ',
26
+ '連用テ接続' => 'く',
27
+ '連用ゴザイ接続' => 'う',
28
+ '終止形' => 'い',
29
+ '終止形-感動' => '',
30
+ '連体形' => 'い',
31
+ '体言接続' => 'き',
32
+ '仮定形' => 'けれ',
33
+ '仮定縮約1' => 'けりゃ',
34
+ '仮定縮約2' => 'きゃ',
35
+ 'ガル接続' => ''
36
+ }
37
+
38
+ inflection '形容詞・イ段', 'い', {
39
+ '未然形' => 'く',
40
+ '未然ウ接続' => 'かろ',
41
+ '未然ヌ接続' => 'から',
42
+ '連用タ接続' => 'かっ',
43
+ '連用テ接続' => 'く',
44
+ '連用ゴザイ接続' => 'ゅう',
45
+ '終止形' => 'い',
46
+ '終止形-感動' => '',
47
+ '連体形' => 'い',
48
+ '体言接続' => 'き',
49
+ '仮定形' => 'けれ',
50
+ '仮定縮約1' => 'けりゃ',
51
+ '仮定縮約2' => 'きゃ',
52
+ 'ガル接続' => ''
53
+ }
54
+
55
+ inflection '形容動詞', '', {
56
+ '未然形' => 'じゃ',
57
+ '未然ウ接続' => lambda { |options| options[:desu] ? 'でしょ' : 'だろ' },
58
+ '未然ヌ接続' => 'なら',
59
+ '連用タ接続' => lambda { |options| options[:desu] ? 'でし' : 'だっ' },
60
+ '連用テ接続' => lambda { |options| options[:desu] ? 'でし' : ['', true] },
61
+ '連用ゴザイ接続' => 'で',
62
+ '終止形' => lambda { |options| options[:desu] ? 'です' : 'だ' },
63
+ '終止形-感動' => '',
64
+ '連体形' => 'な',
65
+ '体言接続' => 'な',
66
+ '仮定形' => 'なら',
67
+ '仮定縮約1' => 'なら',
68
+ '仮定縮約2' => 'なら',
69
+ 'ガル接続' => ''
70
+ }
71
+ end
72
+
73
+ verb_forms = %w(
74
+ 未然形
75
+ 未然ウ接続
76
+ 未然ヌ接続
77
+ 未然レル接続
78
+ 連用形
79
+ 連用タ接続
80
+ 終止形
81
+ 仮定形
82
+ 仮定縮約
83
+ 命令形
84
+ )
85
+
86
+ category *verb_forms do
87
+ inflection '一段', 'る', {
88
+ '未然形' => '',
89
+ '未然ウ接続' => 'よ',
90
+ '未然ヌ接続' => '',
91
+ '未然レル接続' => '',
92
+ '連用形' => '',
93
+ '連用タ接続' => '',
94
+ '終止形' => 'る',
95
+ '仮定形' => 'れ',
96
+ '仮定縮約' => 'りゃ',
97
+ '命令形' => 'ろ'
98
+ }
99
+
100
+ inflection '一段・クレル', 'る', {
101
+ '未然形' => '',
102
+ '未然ウ接続' => 'よ',
103
+ '未然ヌ接続' => '',
104
+ '未然レル接続' => '',
105
+ '連用形' => '',
106
+ '連用タ接続' => '',
107
+ '終止形' => 'る',
108
+ '仮定形' => 'れ',
109
+ '仮定縮約' => 'りゃ',
110
+ '命令形' => ''
111
+ }
112
+
113
+ inflection '五段・カ行イ音便', 'く', {
114
+ '未然形' => 'か',
115
+ '未然ウ接続' => 'こ',
116
+ '未然ヌ接続' => 'か',
117
+ '未然レル接続' => 'か',
118
+ '連用形' => 'き',
119
+ '連用タ接続' => 'い',
120
+ '終止形' => 'く',
121
+ '仮定形' => 'け',
122
+ '仮定縮約' => 'きゃ',
123
+ '命令形' => 'け'
124
+ }
125
+
126
+ inflection '五段・カ行促音便', 'く', {
127
+ '未然形' => 'か',
128
+ '未然ウ接続' => 'こ',
129
+ '未然ヌ接続' => 'か',
130
+ '未然レル接続' => 'か',
131
+ '連用形' => 'き',
132
+ '連用タ接続' => 'っ',
133
+ '終止形' => 'く',
134
+ '仮定形' => 'け',
135
+ '仮定縮約' => 'きゃ',
136
+ '命令形' => 'け'
137
+ }
138
+
139
+ inflection '五段・カ行促音便ユク', 'く', {
140
+ '未然形' => 'か',
141
+ '未然ウ接続' => 'こ',
142
+ '未然ヌ接続' => 'か',
143
+ '未然レル接続' => 'か',
144
+ '連用形' => 'き',
145
+ '連用タ接続' => 'っ',
146
+ '終止形' => 'く',
147
+ '仮定形' => 'け',
148
+ '仮定縮約' => 'きゃ',
149
+ '命令形' => 'け'
150
+ }
151
+
152
+ inflection 'カ変・クル', 'くる', {
153
+ '未然形' => 'こ',
154
+ '未然ウ接続' => 'こよ',
155
+ '未然ヌ接続' => 'こ',
156
+ '未然レル接続' => 'こ',
157
+ '連用形' => 'き',
158
+ '連用タ接続' => 'き',
159
+ '終止形' => 'くる',
160
+ '仮定形' => 'くれ',
161
+ '仮定縮約' => 'くりゃ',
162
+ '命令形' => 'こい'
163
+ }
164
+
165
+ inflection 'カ変・来ル', 'る', {
166
+ '未然形' => '',
167
+ '未然ウ接続' => 'よ',
168
+ '未然ヌ接続' => '',
169
+ '未然レル接続' => '',
170
+ '連用形' => '',
171
+ '連用タ接続' => '',
172
+ '終止形' => 'る',
173
+ '仮定形' => 'れ',
174
+ '仮定縮約' => 'りゃ',
175
+ '命令形' => 'い'
176
+ }
177
+
178
+ inflection '五段・ガ行', 'ぐ', {
179
+ '未然形' => 'が',
180
+ '未然ウ接続' => 'ご',
181
+ '未然ヌ接続' => 'が',
182
+ '未然レル接続' => 'が',
183
+ '連用形' => 'ぎ',
184
+ '連用タ接続' => ['い', true],
185
+ '終止形' => 'ぐ',
186
+ '仮定形' => 'げ',
187
+ '仮定縮約' => 'ぎゃ',
188
+ '命令形' => 'げ'
189
+ }
190
+
191
+ inflection '五段・サ行', 'す', {
192
+ '未然形' => 'さ',
193
+ '未然ウ接続' => 'そ',
194
+ '未然ヌ接続' => 'さ',
195
+ '未然レル接続' => 'さ',
196
+ '連用形' => 'し',
197
+ '連用タ接続' => 'し',
198
+ '終止形' => 'す',
199
+ '仮定形' => 'せ',
200
+ '仮定縮約' => 'しゃ',
201
+ '命令形' => 'せ'
202
+ }
203
+
204
+ inflection '四段・サ行', 'す', {
205
+ '未然形' => 'さ',
206
+ '未然ウ接続' => 'そ',
207
+ '未然ヌ接続' => 'さ',
208
+ '未然レル接続' => 'さ',
209
+ '連用形' => 'し',
210
+ '連用タ接続' => 'し',
211
+ '終止形' => 'す',
212
+ '仮定形' => 'せ',
213
+ '仮定縮約' => 'しゃ',
214
+ '命令形' => 'せ'
215
+ }
216
+
217
+ inflection 'サ変・スル', 'する', {
218
+ '未然形' => 'し',
219
+ '未然ウ接続' => 'しよ',
220
+ '未然ヌ接続' => 'せ',
221
+ '未然レル接続' => 'さ',
222
+ '連用形' => 'し',
223
+ '連用タ接続' => 'し',
224
+ '終止形' => 'する',
225
+ '仮定形' => 'すれ',
226
+ '仮定縮約' => 'すりゃ',
227
+ '命令形' => 'しろ'
228
+ }
229
+
230
+ inflection 'サ変・−スル', 'する', {
231
+ '未然形' => 'し',
232
+ '未然ウ接続' => 'しよ',
233
+ '未然ヌ接続' => 'し',
234
+ '未然レル接続' => 'せ',
235
+ '連用形' => 'し',
236
+ '連用タ接続' => 'し',
237
+ '終止形' => 'する',
238
+ '仮定形' => 'すれ',
239
+ '仮定縮約' => 'すりゃ',
240
+ '命令形' => 'しろ'
241
+ }
242
+
243
+ inflection 'サ変・−ズル', 'ずる', {
244
+ '未然形' => 'ぜ',
245
+ '未然ウ接続' => 'ぜよ',
246
+ '未然ヌ接続' => 'ぜ',
247
+ '未然レル接続' => 'ぜ',
248
+ '連用形' => 'じ',
249
+ '連用タ接続' => 'じ',
250
+ '終止形' => 'ずる',
251
+ '仮定形' => 'ずれ',
252
+ '仮定縮約' => 'ずりゃ',
253
+ '命令形' => 'じろ'
254
+ }
255
+
256
+ inflection '五段・タ行', 'つ', {
257
+ '未然形' => 'た',
258
+ '未然ウ接続' => 'と',
259
+ '未然ヌ接続' => 'た',
260
+ '未然レル接続' => 'た',
261
+ '連用形' => 'ち',
262
+ '連用タ接続' => 'っ',
263
+ '終止形' => 'つ',
264
+ '仮定形' => 'て',
265
+ '仮定縮約' => 'ちゃ',
266
+ '命令形' => 'て'
267
+ }
268
+
269
+ inflection '四段・タ行', 'つ', {
270
+ '未然形' => 'た',
271
+ '未然ウ接続' => 'と',
272
+ '未然ヌ接続' => 'た',
273
+ '未然レル接続' => 'た',
274
+ '連用形' => 'ち',
275
+ '連用タ接続' => 'っ',
276
+ '終止形' => 'つ',
277
+ '仮定形' => 'て',
278
+ '仮定縮約' => 'ちゃ',
279
+ '命令形' => 'て'
280
+ }
281
+
282
+ inflection '五段・ナ行', 'ぬ', {
283
+ '未然形' => 'な',
284
+ '未然ウ接続' => 'の',
285
+ '未然ヌ接続' => 'な',
286
+ '未然レル接続' => 'な',
287
+ '連用形' => 'に',
288
+ '連用タ接続' => ['ん', true],
289
+ '終止形' => 'ぬ',
290
+ '仮定形' => 'ね',
291
+ '仮定縮約' => 'にゃ',
292
+ '命令形' => 'ね'
293
+ }
294
+
295
+ inflection '四段・ハ行', 'ふ', {
296
+ '未然形' => 'は',
297
+ '未然ウ接続' => 'ほ',
298
+ '未然ヌ接続' => 'は',
299
+ '未然レル接続' => 'は',
300
+ '連用形' => 'ひ',
301
+ '連用タ接続' => 'っ',
302
+ '終止形' => 'ふ',
303
+ '仮定形' => 'へ',
304
+ '仮定縮約' => 'ひゃ',
305
+ '命令形' => 'へ'
306
+ }
307
+
308
+ inflection '五段・バ行', 'ぶ', {
309
+ '未然形' => 'ば',
310
+ '未然ウ接続' => 'ぼ',
311
+ '未然ヌ接続' => 'ば',
312
+ '未然レル接続' => 'ば',
313
+ '連用形' => 'び',
314
+ '連用タ接続' => ['ん', true],
315
+ '終止形' => 'ぶ',
316
+ '仮定形' => 'べ',
317
+ '仮定縮約' => 'びゃ',
318
+ '命令形' => 'べ'
319
+ }
320
+
321
+ inflection '五段・マ行', 'む', {
322
+ '未然形' => 'ま',
323
+ '未然ウ接続' => 'も',
324
+ '未然ヌ接続' => 'ま',
325
+ '未然レル接続' => 'ま',
326
+ '連用形' => 'み',
327
+ '連用タ接続' => ['ん', true],
328
+ '終止形' => 'む',
329
+ '仮定形' => 'め',
330
+ '仮定縮約' => 'みゃ',
331
+ '命令形' => 'め'
332
+ }
333
+
334
+ inflection '五段・ラ行', 'る', {
335
+ '未然形' => 'ら',
336
+ '未然ウ接続' => 'ろ',
337
+ '未然ヌ接続' => 'ら',
338
+ '未然レル接続' => 'ら',
339
+ '連用形' => 'り',
340
+ '連用タ接続' => 'っ',
341
+ '終止形' => 'る',
342
+ '仮定形' => 'れ',
343
+ '仮定縮約' => 'りゃ',
344
+ '命令形' => 'れ'
345
+ }
346
+
347
+ inflection '五段・ラ行特殊', 'る', {
348
+ '未然形' => 'ら',
349
+ '未然ウ接続' => 'ろ',
350
+ '未然ヌ接続' => 'ら',
351
+ '未然レル接続' => 'ら',
352
+ '連用形' => 'い',
353
+ '連用タ接続' => 'っ',
354
+ '終止形' => 'る',
355
+ '仮定形' => 'れ',
356
+ '仮定縮約' => 'りゃ',
357
+ '命令形' => 'い'
358
+ }
359
+
360
+ inflection '五段・ワ行ウ音便', 'う', {
361
+ '未然形' => 'わ',
362
+ '未然ウ接続' => 'お',
363
+ '未然ヌ接続' => 'わ',
364
+ '未然レル接続' => 'わ',
365
+ '連用形' => 'い',
366
+ '連用タ接続' => 'う',
367
+ '終止形' => 'う',
368
+ '仮定形' => 'え',
369
+ '仮定縮約' => 'や',
370
+ '命令形' => 'え'
371
+ }
372
+
373
+ inflection '五段・ワ行促音便', 'う', {
374
+ '未然形' => 'わ',
375
+ '未然ウ接続' => 'お',
376
+ '未然ヌ接続' => 'わ',
377
+ '未然レル接続' => 'わ',
378
+ '連用形' => 'い',
379
+ '連用タ接続' => 'っ',
380
+ '終止形' => 'う',
381
+ '仮定形' => 'え',
382
+ '仮定縮約' => 'や',
383
+ '命令形' => 'え'
384
+ }
385
+ end
386
+
387
+ auxiliary_verb_forms = %w()
388
+
389
+ category *auxiliary_verb_forms do
390
+ end
391
+
392
+ inflection '特殊・ナイ', 'ない', {
393
+ '未然ウ接続' => 'なかろ',
394
+ '未然ヌ接続' => 'なから',
395
+ '連用タ接続' => 'なかっ',
396
+ '連用テ接続' => 'なく',
397
+ '連用デ接続' => 'ない',
398
+ '連用ゴザイ接続' => 'のう',
399
+ '終止形' => 'ない',
400
+ '音便終止形' => 'ねえ',
401
+ '体言接続' => 'なき',
402
+ '仮定形' => 'なけれ',
403
+ '仮定縮約1' => 'なけりゃ',
404
+ '仮定縮約2' => 'なきゃ',
405
+ 'ガル接続' => 'な',
406
+ '命令形' => 'なかれ'
407
+ }
408
+
409
+ inflection '特殊・タイ', 'たい', {
410
+ '未然ウ接続' => 'たかろ',
411
+ '未然ヌ接続' => 'たから',
412
+ '連用タ接続' => 'たかっ',
413
+ '連用テ接続' => 'たく',
414
+ '連用ゴザイ接続' => 'とう',
415
+ '終止形' => 'たい',
416
+ '音便終止形' => 'てえ',
417
+ '体言接続' => 'たき',
418
+ '仮定形' => 'たけれ',
419
+ '仮定縮約1' => 'たけりゃ',
420
+ '仮定縮約2' => 'たきゃ',
421
+ 'ガル接続' => 'た'
422
+ }
423
+
424
+ inflection '特殊・デス', 'す', {
425
+ '未然形' => 'しょ',
426
+ '連用形' => 'し',
427
+ '終止形' => 'す'
428
+ }
429
+
430
+ inflection '特殊・マス', 'す', {
431
+ '終止形' => 'す',
432
+ '未然形' => 'せ',
433
+ '未然ウ接続' => 'しょ',
434
+ '連用形' => 'し',
435
+ '仮定形' => 'すれ',
436
+ '命令形' => 'せ'
437
+ }
438
+
439
+ inflection '特殊・タ', '', {
440
+ '未然形' => 'ろ',
441
+ '終止形' => '',
442
+ '仮定形' => 'ら'
443
+ }
444
+
445
+ inflection '特殊・ダ', 'だ', {
446
+ '未然形' => 'だろ',
447
+ '連用形' => 'で',
448
+ '連用タ接続' => 'だっ',
449
+ '終止形' => 'だ',
450
+ '体言接続' => 'な',
451
+ '仮定形' => 'なら',
452
+ '命令形' => 'なれ'
453
+ }
454
+ end