japanese_deinflector 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/.rspec +2 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/Rakefile +8 -0
- data/japanese_deinflector.gemspec +21 -0
- data/lib/data/deinflect.dat +336 -0
- data/lib/data/deinflect.json +1 -0
- data/lib/deinflect_to_json.rb +32 -0
- data/lib/japanese_deinflector/version.rb +3 -0
- data/lib/japanese_deinflector.rb +41 -0
- data/spec/japanese_deinflector_spec.rb +70 -0
- data/spec/spec_helper.rb +13 -0
- metadata +108 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require 'japanese_deinflector/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'japanese_deinflector'
|
7
|
+
s.version = JapaneseDeinflector::VERSION
|
8
|
+
s.date = '2012-08-28'
|
9
|
+
s.summary = "Deinflect (unconjugate/undecline) Japanese words."
|
10
|
+
s.description = "Deinflect (unconjugate/undecline) Japanese words."
|
11
|
+
s.authors = ["Peter Graham"]
|
12
|
+
s.email = ["pete@gigadrill.com"]
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.require_paths = ["lib"]
|
16
|
+
s.homepage = 'http://github.com/6/japanese_deinflector'
|
17
|
+
|
18
|
+
s.add_development_dependency "json"
|
19
|
+
s.add_development_dependency "rake"
|
20
|
+
s.add_development_dependency "rspec"
|
21
|
+
end
|
@@ -0,0 +1,336 @@
|
|
1
|
+
Deinflect Rules 20081220-0509 | by Jonathan Zarate | http://www.polarcloud.com
|
2
|
+
polite past negative
|
3
|
+
polite negative
|
4
|
+
polite volitional
|
5
|
+
-chau
|
6
|
+
-sugiru
|
7
|
+
-nasai
|
8
|
+
polite past
|
9
|
+
-tara
|
10
|
+
-tari
|
11
|
+
causative
|
12
|
+
potential or passive
|
13
|
+
-sou
|
14
|
+
-tai
|
15
|
+
polite
|
16
|
+
past
|
17
|
+
negative
|
18
|
+
passive
|
19
|
+
-ba
|
20
|
+
volitional
|
21
|
+
potential
|
22
|
+
passive or causative
|
23
|
+
-te
|
24
|
+
-zu
|
25
|
+
imperative
|
26
|
+
masu stem
|
27
|
+
adv
|
28
|
+
noun
|
29
|
+
imperative negative
|
30
|
+
くありませんでした い 1152 0
|
31
|
+
いませんでした う 640 0
|
32
|
+
きませんでした く 640 0
|
33
|
+
きませんでした くる 2176 0
|
34
|
+
ぎませんでした ぐ 640 0
|
35
|
+
しませんでした す 640 0
|
36
|
+
しませんでした する 4224 0
|
37
|
+
ちませんでした つ 640 0
|
38
|
+
にませんでした ぬ 640 0
|
39
|
+
びませんでした ぶ 640 0
|
40
|
+
みませんでした む 640 0
|
41
|
+
りませんでした る 640 0
|
42
|
+
くありません い 1152 1
|
43
|
+
ませんでした る 2432 0
|
44
|
+
いましょう う 640 2
|
45
|
+
きましょう く 640 2
|
46
|
+
きましょう くる 2176 2
|
47
|
+
ぎましょう ぐ 640 2
|
48
|
+
しましょう す 640 2
|
49
|
+
しましょう する 4224 2
|
50
|
+
ちましょう つ 640 2
|
51
|
+
にましょう ぬ 640 2
|
52
|
+
びましょう ぶ 640 2
|
53
|
+
みましょう む 640 2
|
54
|
+
りましょう る 640 2
|
55
|
+
いじゃう ぐ 514 3
|
56
|
+
いすぎる う 513 4
|
57
|
+
いちゃう く 514 3
|
58
|
+
いなさい う 640 5
|
59
|
+
いました う 640 6
|
60
|
+
いません う 640 1
|
61
|
+
かったら い 1152 7
|
62
|
+
かったり い 1152 8
|
63
|
+
きすぎる く 513 4
|
64
|
+
きすぎる くる 2049 4
|
65
|
+
ぎすぎる ぐ 513 4
|
66
|
+
きちゃう くる 2050 3
|
67
|
+
きなさい く 640 5
|
68
|
+
きなさい くる 2176 5
|
69
|
+
ぎなさい ぐ 640 5
|
70
|
+
きました く 640 6
|
71
|
+
きました くる 2176 6
|
72
|
+
ぎました ぐ 640 6
|
73
|
+
きません く 640 1
|
74
|
+
きません くる 2176 1
|
75
|
+
ぎません ぐ 640 1
|
76
|
+
こさせる くる 2049 9
|
77
|
+
こられる くる 2049 10
|
78
|
+
しすぎる す 513 4
|
79
|
+
しすぎる する 4097 4
|
80
|
+
しちゃう す 514 3
|
81
|
+
しちゃう する 4098 3
|
82
|
+
しなさい す 640 5
|
83
|
+
しなさい する 4224 5
|
84
|
+
しました す 640 6
|
85
|
+
しました する 4224 6
|
86
|
+
しません す 640 1
|
87
|
+
しません する 4224 1
|
88
|
+
ちすぎる つ 513 4
|
89
|
+
ちなさい つ 640 5
|
90
|
+
ちました つ 640 6
|
91
|
+
ちません つ 640 1
|
92
|
+
っちゃう う 514 3
|
93
|
+
っちゃう く 514 3
|
94
|
+
っちゃう つ 514 3
|
95
|
+
っちゃう る 514 3
|
96
|
+
にすぎる ぬ 513 4
|
97
|
+
になさい ぬ 640 5
|
98
|
+
にました ぬ 640 6
|
99
|
+
にません ぬ 640 1
|
100
|
+
びすぎる ぶ 513 4
|
101
|
+
びなさい ぶ 640 5
|
102
|
+
びました ぶ 640 6
|
103
|
+
びません ぶ 640 1
|
104
|
+
ましょう る 2432 2
|
105
|
+
みすぎる む 513 4
|
106
|
+
みなさい む 640 5
|
107
|
+
みました む 640 6
|
108
|
+
みません む 640 1
|
109
|
+
りすぎる る 513 4
|
110
|
+
りなさい る 640 5
|
111
|
+
りました る 640 6
|
112
|
+
りません る 640 1
|
113
|
+
んじゃう ぬ 514 3
|
114
|
+
んじゃう ぶ 514 3
|
115
|
+
んじゃう む 514 3
|
116
|
+
いそう う 640 11
|
117
|
+
いたい う 516 12
|
118
|
+
いたら く 640 7
|
119
|
+
いだら ぐ 640 7
|
120
|
+
いたり く 640 8
|
121
|
+
いだり ぐ 640 8
|
122
|
+
います う 640 13
|
123
|
+
かせる く 513 9
|
124
|
+
がせる ぐ 513 9
|
125
|
+
かった い 1152 14
|
126
|
+
かない く 516 15
|
127
|
+
がない ぐ 516 15
|
128
|
+
かれる く 513 16
|
129
|
+
がれる ぐ 513 16
|
130
|
+
きそう く 640 11
|
131
|
+
きそう くる 2176 11
|
132
|
+
ぎそう ぐ 640 11
|
133
|
+
きたい く 516 12
|
134
|
+
きたい くる 2052 12
|
135
|
+
ぎたい ぐ 516 12
|
136
|
+
きたら くる 2176 7
|
137
|
+
きたり くる 2176 8
|
138
|
+
きます く 640 13
|
139
|
+
きます くる 2176 13
|
140
|
+
ぎます ぐ 640 13
|
141
|
+
くない い 1028 15
|
142
|
+
ければ い 1152 17
|
143
|
+
こない くる 2052 15
|
144
|
+
こよう くる 2176 18
|
145
|
+
これる くる 2049 19
|
146
|
+
させる する 4097 9
|
147
|
+
させる る 2305 9
|
148
|
+
さない す 516 15
|
149
|
+
される す 513 20
|
150
|
+
される する 4097 16
|
151
|
+
しそう す 640 11
|
152
|
+
しそう する 4224 11
|
153
|
+
したい す 516 12
|
154
|
+
したい する 4100 12
|
155
|
+
したら す 640 7
|
156
|
+
したら する 4224 7
|
157
|
+
したり す 640 8
|
158
|
+
したり する 4224 8
|
159
|
+
しない する 4100 15
|
160
|
+
します す 640 13
|
161
|
+
します する 4224 13
|
162
|
+
しよう する 4224 18
|
163
|
+
すぎる い 1025 4
|
164
|
+
すぎる る 2305 4
|
165
|
+
たせる つ 513 9
|
166
|
+
たない つ 516 15
|
167
|
+
たれる つ 513 16
|
168
|
+
ちそう つ 640 11
|
169
|
+
ちたい つ 516 12
|
170
|
+
ちます つ 640 13
|
171
|
+
ちゃう る 2306 3
|
172
|
+
ったら う 640 7
|
173
|
+
ったら つ 640 7
|
174
|
+
ったら る 640 7
|
175
|
+
ったり う 640 8
|
176
|
+
ったり つ 640 8
|
177
|
+
ったり る 640 8
|
178
|
+
なさい る 2432 5
|
179
|
+
なせる ぬ 513 9
|
180
|
+
なない ぬ 516 15
|
181
|
+
なれる ぬ 513 16
|
182
|
+
にそう ぬ 640 11
|
183
|
+
にたい ぬ 516 12
|
184
|
+
にます ぬ 640 13
|
185
|
+
ばせる ぶ 513 9
|
186
|
+
ばない ぶ 516 15
|
187
|
+
ばれる ぶ 513 16
|
188
|
+
びそう ぶ 640 11
|
189
|
+
びたい ぶ 516 12
|
190
|
+
びます ぶ 640 13
|
191
|
+
ました る 2432 6
|
192
|
+
ませる む 513 9
|
193
|
+
ません る 2432 1
|
194
|
+
まない む 516 15
|
195
|
+
まれる む 513 16
|
196
|
+
みそう む 640 11
|
197
|
+
みたい む 516 12
|
198
|
+
みます む 640 13
|
199
|
+
らせる る 513 9
|
200
|
+
らない る 516 15
|
201
|
+
られる る 2817 10
|
202
|
+
りそう る 640 11
|
203
|
+
りたい る 516 12
|
204
|
+
ります る 640 13
|
205
|
+
わせる う 513 9
|
206
|
+
わない う 516 15
|
207
|
+
われる う 513 16
|
208
|
+
んだら ぬ 640 7
|
209
|
+
んだら ぶ 640 7
|
210
|
+
んだら む 640 7
|
211
|
+
んだり ぬ 640 8
|
212
|
+
んだり ぶ 640 8
|
213
|
+
んだり む 640 8
|
214
|
+
いた く 640 14
|
215
|
+
いだ ぐ 640 14
|
216
|
+
いて く 640 21
|
217
|
+
いで ぐ 640 21
|
218
|
+
えば う 640 17
|
219
|
+
える う 513 19
|
220
|
+
おう う 640 18
|
221
|
+
かず く 640 22
|
222
|
+
がず ぐ 640 22
|
223
|
+
きた くる 2176 14
|
224
|
+
きて くる 2176 21
|
225
|
+
くて い 1152 21
|
226
|
+
けば く 640 17
|
227
|
+
げば ぐ 640 17
|
228
|
+
ける く 513 19
|
229
|
+
げる ぐ 513 19
|
230
|
+
こい くる 2176 23
|
231
|
+
こう く 640 18
|
232
|
+
ごう ぐ 640 18
|
233
|
+
こず くる 2176 22
|
234
|
+
さず す 640 22
|
235
|
+
した す 640 14
|
236
|
+
した する 4224 14
|
237
|
+
して す 640 21
|
238
|
+
して する 4224 21
|
239
|
+
しろ する 4224 23
|
240
|
+
せず する 4224 22
|
241
|
+
せば す 640 17
|
242
|
+
せよ する 4224 23
|
243
|
+
せる す 513 19
|
244
|
+
そう い 1152 11
|
245
|
+
そう す 640 18
|
246
|
+
そう る 2432 11
|
247
|
+
たい る 2308 12
|
248
|
+
たず つ 640 22
|
249
|
+
たら る 2432 7
|
250
|
+
たり る 2432 8
|
251
|
+
った う 640 14
|
252
|
+
った く 640 14
|
253
|
+
った つ 640 14
|
254
|
+
った る 640 14
|
255
|
+
って う 640 21
|
256
|
+
って く 640 21
|
257
|
+
って つ 640 21
|
258
|
+
って る 640 21
|
259
|
+
てば つ 640 17
|
260
|
+
てる つ 513 19
|
261
|
+
とう つ 640 18
|
262
|
+
ない る 2308 15
|
263
|
+
なず ぬ 640 22
|
264
|
+
ねば ぬ 640 17
|
265
|
+
ねる ぬ 513 19
|
266
|
+
のう ぬ 640 18
|
267
|
+
ばず ぶ 640 22
|
268
|
+
べば ぶ 640 17
|
269
|
+
べる ぶ 513 19
|
270
|
+
ぼう ぶ 640 18
|
271
|
+
ます る 2432 13
|
272
|
+
まず む 640 22
|
273
|
+
めば む 640 17
|
274
|
+
める む 513 19
|
275
|
+
もう む 640 18
|
276
|
+
よう る 2432 18
|
277
|
+
らず る 640 22
|
278
|
+
れば る 7040 17
|
279
|
+
れる る 2817 19
|
280
|
+
ろう る 640 18
|
281
|
+
わず う 640 22
|
282
|
+
んだ ぬ 640 14
|
283
|
+
んだ ぶ 640 14
|
284
|
+
んだ む 640 14
|
285
|
+
んで ぬ 640 21
|
286
|
+
んで ぶ 640 21
|
287
|
+
んで む 640 21
|
288
|
+
い いる 384 24
|
289
|
+
い う 640 24
|
290
|
+
い る 2176 23
|
291
|
+
え う 640 23
|
292
|
+
え える 384 24
|
293
|
+
き きる 384 24
|
294
|
+
き く 640 24
|
295
|
+
ぎ ぎる 384 24
|
296
|
+
ぎ ぐ 640 24
|
297
|
+
く い 1152 25
|
298
|
+
け く 640 23
|
299
|
+
け ける 384 24
|
300
|
+
げ ぐ 640 23
|
301
|
+
げ げる 384 24
|
302
|
+
さ い 1152 26
|
303
|
+
し す 640 24
|
304
|
+
じ じる 384 24
|
305
|
+
ず る 2432 22
|
306
|
+
せ す 640 23
|
307
|
+
せ せる 384 24
|
308
|
+
ぜ ぜる 384 24
|
309
|
+
た る 2432 14
|
310
|
+
ち ちる 384 24
|
311
|
+
ち つ 640 24
|
312
|
+
て つ 640 23
|
313
|
+
て てる 384 24
|
314
|
+
て る 2432 21
|
315
|
+
で でる 384 24
|
316
|
+
な 7040 27
|
317
|
+
に にる 384 24
|
318
|
+
に ぬ 640 24
|
319
|
+
ね ぬ 640 23
|
320
|
+
ね ねる 384 24
|
321
|
+
ひ ひる 384 24
|
322
|
+
び びる 384 24
|
323
|
+
び ぶ 640 24
|
324
|
+
へ へる 384 24
|
325
|
+
べ ぶ 640 23
|
326
|
+
べ べる 384 24
|
327
|
+
み みる 384 24
|
328
|
+
み む 640 24
|
329
|
+
め む 640 23
|
330
|
+
め める 384 24
|
331
|
+
よ る 384 23
|
332
|
+
り りる 384 24
|
333
|
+
り る 640 24
|
334
|
+
れ る 640 23
|
335
|
+
れ れる 384 24
|
336
|
+
ろ る 384 23
|
@@ -0,0 +1 @@
|
|
1
|
+
{"reasons":["polite past negative","polite negative","polite volitional","-chau","-sugiru","-nasai","polite past","-tara","-tari","causative","potential or passive","-sou","-tai","polite","past","negative","passive","-ba","volitional","potential","passive or causative","-te","-zu","imperative","masu stem","adv","noun","imperative negative"],"rules":{"9":[{"from":"くありませんでした","to":"い","reason_id":0}],"7":[{"from":"いませんでした","to":"う","reason_id":0},{"from":"きませんでした","to":"く","reason_id":0},{"from":"きませんでした","to":"くる","reason_id":0},{"from":"ぎませんでした","to":"ぐ","reason_id":0},{"from":"しませんでした","to":"す","reason_id":0},{"from":"しませんでした","to":"する","reason_id":0},{"from":"ちませんでした","to":"つ","reason_id":0},{"from":"にませんでした","to":"ぬ","reason_id":0},{"from":"びませんでした","to":"ぶ","reason_id":0},{"from":"みませんでした","to":"む","reason_id":0},{"from":"りませんでした","to":"る","reason_id":0}],"6":[{"from":"くありません","to":"い","reason_id":1},{"from":"ませんでした","to":"る","reason_id":0}],"5":[{"from":"いましょう","to":"う","reason_id":2},{"from":"きましょう","to":"く","reason_id":2},{"from":"きましょう","to":"くる","reason_id":2},{"from":"ぎましょう","to":"ぐ","reason_id":2},{"from":"しましょう","to":"す","reason_id":2},{"from":"しましょう","to":"する","reason_id":2},{"from":"ちましょう","to":"つ","reason_id":2},{"from":"にましょう","to":"ぬ","reason_id":2},{"from":"びましょう","to":"ぶ","reason_id":2},{"from":"みましょう","to":"む","reason_id":2},{"from":"りましょう","to":"る","reason_id":2}],"4":[{"from":"いじゃう","to":"ぐ","reason_id":3},{"from":"いすぎる","to":"う","reason_id":4},{"from":"いちゃう","to":"く","reason_id":3},{"from":"いなさい","to":"う","reason_id":5},{"from":"いました","to":"う","reason_id":6},{"from":"いません","to":"う","reason_id":1},{"from":"かったら","to":"い","reason_id":7},{"from":"かったり","to":"い","reason_id":8},{"from":"きすぎる","to":"く","reason_id":4},{"from":"きすぎる","to":"くる","reason_id":4},{"from":"ぎすぎる","to":"ぐ","reason_id":4},{"from":"きちゃう","to":"くる","reason_id":3},{"from":"きなさい","to":"く","reason_id":5},{"from":"きなさい","to":"くる","reason_id":5},{"from":"ぎなさい","to":"ぐ","reason_id":5},{"from":"きました","to":"く","reason_id":6},{"from":"きました","to":"くる","reason_id":6},{"from":"ぎました","to":"ぐ","reason_id":6},{"from":"きません","to":"く","reason_id":1},{"from":"きません","to":"くる","reason_id":1},{"from":"ぎません","to":"ぐ","reason_id":1},{"from":"こさせる","to":"くる","reason_id":9},{"from":"こられる","to":"くる","reason_id":10},{"from":"しすぎる","to":"す","reason_id":4},{"from":"しすぎる","to":"する","reason_id":4},{"from":"しちゃう","to":"す","reason_id":3},{"from":"しちゃう","to":"する","reason_id":3},{"from":"しなさい","to":"す","reason_id":5},{"from":"しなさい","to":"する","reason_id":5},{"from":"しました","to":"す","reason_id":6},{"from":"しました","to":"する","reason_id":6},{"from":"しません","to":"す","reason_id":1},{"from":"しません","to":"する","reason_id":1},{"from":"ちすぎる","to":"つ","reason_id":4},{"from":"ちなさい","to":"つ","reason_id":5},{"from":"ちました","to":"つ","reason_id":6},{"from":"ちません","to":"つ","reason_id":1},{"from":"っちゃう","to":"う","reason_id":3},{"from":"っちゃう","to":"く","reason_id":3},{"from":"っちゃう","to":"つ","reason_id":3},{"from":"っちゃう","to":"る","reason_id":3},{"from":"にすぎる","to":"ぬ","reason_id":4},{"from":"になさい","to":"ぬ","reason_id":5},{"from":"にました","to":"ぬ","reason_id":6},{"from":"にません","to":"ぬ","reason_id":1},{"from":"びすぎる","to":"ぶ","reason_id":4},{"from":"びなさい","to":"ぶ","reason_id":5},{"from":"びました","to":"ぶ","reason_id":6},{"from":"びません","to":"ぶ","reason_id":1},{"from":"ましょう","to":"る","reason_id":2},{"from":"みすぎる","to":"む","reason_id":4},{"from":"みなさい","to":"む","reason_id":5},{"from":"みました","to":"む","reason_id":6},{"from":"みません","to":"む","reason_id":1},{"from":"りすぎる","to":"る","reason_id":4},{"from":"りなさい","to":"る","reason_id":5},{"from":"りました","to":"る","reason_id":6},{"from":"りません","to":"る","reason_id":1},{"from":"んじゃう","to":"ぬ","reason_id":3},{"from":"んじゃう","to":"ぶ","reason_id":3},{"from":"んじゃう","to":"む","reason_id":3}],"3":[{"from":"いそう","to":"う","reason_id":11},{"from":"いたい","to":"う","reason_id":12},{"from":"いたら","to":"く","reason_id":7},{"from":"いだら","to":"ぐ","reason_id":7},{"from":"いたり","to":"く","reason_id":8},{"from":"いだり","to":"ぐ","reason_id":8},{"from":"います","to":"う","reason_id":13},{"from":"かせる","to":"く","reason_id":9},{"from":"がせる","to":"ぐ","reason_id":9},{"from":"かった","to":"い","reason_id":14},{"from":"かない","to":"く","reason_id":15},{"from":"がない","to":"ぐ","reason_id":15},{"from":"かれる","to":"く","reason_id":16},{"from":"がれる","to":"ぐ","reason_id":16},{"from":"きそう","to":"く","reason_id":11},{"from":"きそう","to":"くる","reason_id":11},{"from":"ぎそう","to":"ぐ","reason_id":11},{"from":"きたい","to":"く","reason_id":12},{"from":"きたい","to":"くる","reason_id":12},{"from":"ぎたい","to":"ぐ","reason_id":12},{"from":"きたら","to":"くる","reason_id":7},{"from":"きたり","to":"くる","reason_id":8},{"from":"きます","to":"く","reason_id":13},{"from":"きます","to":"くる","reason_id":13},{"from":"ぎます","to":"ぐ","reason_id":13},{"from":"くない","to":"い","reason_id":15},{"from":"ければ","to":"い","reason_id":17},{"from":"こない","to":"くる","reason_id":15},{"from":"こよう","to":"くる","reason_id":18},{"from":"これる","to":"くる","reason_id":19},{"from":"させる","to":"する","reason_id":9},{"from":"させる","to":"る","reason_id":9},{"from":"さない","to":"す","reason_id":15},{"from":"される","to":"す","reason_id":20},{"from":"される","to":"する","reason_id":16},{"from":"しそう","to":"す","reason_id":11},{"from":"しそう","to":"する","reason_id":11},{"from":"したい","to":"す","reason_id":12},{"from":"したい","to":"する","reason_id":12},{"from":"したら","to":"す","reason_id":7},{"from":"したら","to":"する","reason_id":7},{"from":"したり","to":"す","reason_id":8},{"from":"したり","to":"する","reason_id":8},{"from":"しない","to":"する","reason_id":15},{"from":"します","to":"す","reason_id":13},{"from":"します","to":"する","reason_id":13},{"from":"しよう","to":"する","reason_id":18},{"from":"すぎる","to":"い","reason_id":4},{"from":"すぎる","to":"る","reason_id":4},{"from":"たせる","to":"つ","reason_id":9},{"from":"たない","to":"つ","reason_id":15},{"from":"たれる","to":"つ","reason_id":16},{"from":"ちそう","to":"つ","reason_id":11},{"from":"ちたい","to":"つ","reason_id":12},{"from":"ちます","to":"つ","reason_id":13},{"from":"ちゃう","to":"る","reason_id":3},{"from":"ったら","to":"う","reason_id":7},{"from":"ったら","to":"つ","reason_id":7},{"from":"ったら","to":"る","reason_id":7},{"from":"ったり","to":"う","reason_id":8},{"from":"ったり","to":"つ","reason_id":8},{"from":"ったり","to":"る","reason_id":8},{"from":"なさい","to":"る","reason_id":5},{"from":"なせる","to":"ぬ","reason_id":9},{"from":"なない","to":"ぬ","reason_id":15},{"from":"なれる","to":"ぬ","reason_id":16},{"from":"にそう","to":"ぬ","reason_id":11},{"from":"にたい","to":"ぬ","reason_id":12},{"from":"にます","to":"ぬ","reason_id":13},{"from":"ばせる","to":"ぶ","reason_id":9},{"from":"ばない","to":"ぶ","reason_id":15},{"from":"ばれる","to":"ぶ","reason_id":16},{"from":"びそう","to":"ぶ","reason_id":11},{"from":"びたい","to":"ぶ","reason_id":12},{"from":"びます","to":"ぶ","reason_id":13},{"from":"ました","to":"る","reason_id":6},{"from":"ませる","to":"む","reason_id":9},{"from":"ません","to":"る","reason_id":1},{"from":"まない","to":"む","reason_id":15},{"from":"まれる","to":"む","reason_id":16},{"from":"みそう","to":"む","reason_id":11},{"from":"みたい","to":"む","reason_id":12},{"from":"みます","to":"む","reason_id":13},{"from":"らせる","to":"る","reason_id":9},{"from":"らない","to":"る","reason_id":15},{"from":"られる","to":"る","reason_id":10},{"from":"りそう","to":"る","reason_id":11},{"from":"りたい","to":"る","reason_id":12},{"from":"ります","to":"る","reason_id":13},{"from":"わせる","to":"う","reason_id":9},{"from":"わない","to":"う","reason_id":15},{"from":"われる","to":"う","reason_id":16},{"from":"んだら","to":"ぬ","reason_id":7},{"from":"んだら","to":"ぶ","reason_id":7},{"from":"んだら","to":"む","reason_id":7},{"from":"んだり","to":"ぬ","reason_id":8},{"from":"んだり","to":"ぶ","reason_id":8},{"from":"んだり","to":"む","reason_id":8}],"2":[{"from":"いた","to":"く","reason_id":14},{"from":"いだ","to":"ぐ","reason_id":14},{"from":"いて","to":"く","reason_id":21},{"from":"いで","to":"ぐ","reason_id":21},{"from":"えば","to":"う","reason_id":17},{"from":"える","to":"う","reason_id":19},{"from":"おう","to":"う","reason_id":18},{"from":"かず","to":"く","reason_id":22},{"from":"がず","to":"ぐ","reason_id":22},{"from":"きた","to":"くる","reason_id":14},{"from":"きて","to":"くる","reason_id":21},{"from":"くて","to":"い","reason_id":21},{"from":"けば","to":"く","reason_id":17},{"from":"げば","to":"ぐ","reason_id":17},{"from":"ける","to":"く","reason_id":19},{"from":"げる","to":"ぐ","reason_id":19},{"from":"こい","to":"くる","reason_id":23},{"from":"こう","to":"く","reason_id":18},{"from":"ごう","to":"ぐ","reason_id":18},{"from":"こず","to":"くる","reason_id":22},{"from":"さず","to":"す","reason_id":22},{"from":"した","to":"す","reason_id":14},{"from":"した","to":"する","reason_id":14},{"from":"して","to":"す","reason_id":21},{"from":"して","to":"する","reason_id":21},{"from":"しろ","to":"する","reason_id":23},{"from":"せず","to":"する","reason_id":22},{"from":"せば","to":"す","reason_id":17},{"from":"せよ","to":"する","reason_id":23},{"from":"せる","to":"す","reason_id":19},{"from":"そう","to":"い","reason_id":11},{"from":"そう","to":"す","reason_id":18},{"from":"そう","to":"る","reason_id":11},{"from":"たい","to":"る","reason_id":12},{"from":"たず","to":"つ","reason_id":22},{"from":"たら","to":"る","reason_id":7},{"from":"たり","to":"る","reason_id":8},{"from":"った","to":"う","reason_id":14},{"from":"った","to":"く","reason_id":14},{"from":"った","to":"つ","reason_id":14},{"from":"った","to":"る","reason_id":14},{"from":"って","to":"う","reason_id":21},{"from":"って","to":"く","reason_id":21},{"from":"って","to":"つ","reason_id":21},{"from":"って","to":"る","reason_id":21},{"from":"てば","to":"つ","reason_id":17},{"from":"てる","to":"つ","reason_id":19},{"from":"とう","to":"つ","reason_id":18},{"from":"ない","to":"る","reason_id":15},{"from":"なず","to":"ぬ","reason_id":22},{"from":"ねば","to":"ぬ","reason_id":17},{"from":"ねる","to":"ぬ","reason_id":19},{"from":"のう","to":"ぬ","reason_id":18},{"from":"ばず","to":"ぶ","reason_id":22},{"from":"べば","to":"ぶ","reason_id":17},{"from":"べる","to":"ぶ","reason_id":19},{"from":"ぼう","to":"ぶ","reason_id":18},{"from":"ます","to":"る","reason_id":13},{"from":"まず","to":"む","reason_id":22},{"from":"めば","to":"む","reason_id":17},{"from":"める","to":"む","reason_id":19},{"from":"もう","to":"む","reason_id":18},{"from":"よう","to":"る","reason_id":18},{"from":"らず","to":"る","reason_id":22},{"from":"れば","to":"る","reason_id":17},{"from":"れる","to":"る","reason_id":19},{"from":"ろう","to":"る","reason_id":18},{"from":"わず","to":"う","reason_id":22},{"from":"んだ","to":"ぬ","reason_id":14},{"from":"んだ","to":"ぶ","reason_id":14},{"from":"んだ","to":"む","reason_id":14},{"from":"んで","to":"ぬ","reason_id":21},{"from":"んで","to":"ぶ","reason_id":21},{"from":"んで","to":"む","reason_id":21}],"1":[{"from":"い","to":"いる","reason_id":24},{"from":"い","to":"う","reason_id":24},{"from":"い","to":"る","reason_id":23},{"from":"え","to":"う","reason_id":23},{"from":"え","to":"える","reason_id":24},{"from":"き","to":"きる","reason_id":24},{"from":"き","to":"く","reason_id":24},{"from":"ぎ","to":"ぎる","reason_id":24},{"from":"ぎ","to":"ぐ","reason_id":24},{"from":"く","to":"い","reason_id":25},{"from":"け","to":"く","reason_id":23},{"from":"け","to":"ける","reason_id":24},{"from":"げ","to":"ぐ","reason_id":23},{"from":"げ","to":"げる","reason_id":24},{"from":"さ","to":"い","reason_id":26},{"from":"し","to":"す","reason_id":24},{"from":"じ","to":"じる","reason_id":24},{"from":"ず","to":"る","reason_id":22},{"from":"せ","to":"す","reason_id":23},{"from":"せ","to":"せる","reason_id":24},{"from":"ぜ","to":"ぜる","reason_id":24},{"from":"た","to":"る","reason_id":14},{"from":"ち","to":"ちる","reason_id":24},{"from":"ち","to":"つ","reason_id":24},{"from":"て","to":"つ","reason_id":23},{"from":"て","to":"てる","reason_id":24},{"from":"て","to":"る","reason_id":21},{"from":"で","to":"でる","reason_id":24},{"from":"な","to":"","reason_id":27},{"from":"に","to":"にる","reason_id":24},{"from":"に","to":"ぬ","reason_id":24},{"from":"ね","to":"ぬ","reason_id":23},{"from":"ね","to":"ねる","reason_id":24},{"from":"ひ","to":"ひる","reason_id":24},{"from":"び","to":"びる","reason_id":24},{"from":"び","to":"ぶ","reason_id":24},{"from":"へ","to":"へる","reason_id":24},{"from":"べ","to":"ぶ","reason_id":23},{"from":"べ","to":"べる","reason_id":24},{"from":"み","to":"みる","reason_id":24},{"from":"み","to":"む","reason_id":24},{"from":"め","to":"む","reason_id":23},{"from":"め","to":"める","reason_id":24},{"from":"よ","to":"る","reason_id":23},{"from":"り","to":"りる","reason_id":24},{"from":"り","to":"る","reason_id":24},{"from":"れ","to":"る","reason_id":23},{"from":"れ","to":"れる","reason_id":24},{"from":"ろ","to":"る","reason_id":23}]}}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
def parse(fpath)
|
5
|
+
reasons = []
|
6
|
+
rules_hash = {}
|
7
|
+
File.open(fpath).each_with_index do |line, i|
|
8
|
+
next if i == 0 # Skip header
|
9
|
+
parts = line.strip.split(/\t/)
|
10
|
+
# Reasons are listed at the top of the file and are not tab-separated
|
11
|
+
if parts.size == 1
|
12
|
+
reasons << parts[0]
|
13
|
+
# Rules are tab-separated in the following format:
|
14
|
+
# <from>\t<to>\t<type>\t<reason_index>
|
15
|
+
else
|
16
|
+
from = parts.first
|
17
|
+
rules_hash[from.size] ||= []
|
18
|
+
rules_hash[from.size] << {
|
19
|
+
:from => from,
|
20
|
+
:to => parts[1],
|
21
|
+
:reason_id => parts[3].to_i
|
22
|
+
}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
{:reasons => reasons, :rules => rules_hash}
|
27
|
+
end
|
28
|
+
|
29
|
+
root = File.expand_path(File.dirname(__FILE__))
|
30
|
+
File.open(File.join(root, 'data/deinflect.json'), 'w') do |f|
|
31
|
+
f.write(parse(File.join(root, 'data/deinflect.dat')).to_json)
|
32
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
require 'json'
|
3
|
+
require "japanese_deinflector/version"
|
4
|
+
|
5
|
+
class JapaneseDeinflector
|
6
|
+
def initialize
|
7
|
+
File.open(File.join(File.expand_path(File.dirname(__FILE__)), 'data/deinflect.json')) do |f|
|
8
|
+
rules_and_reasons = JSON.parse(f.read, :symbolize_names => true)
|
9
|
+
@reasons = rules_and_reasons[:reasons]
|
10
|
+
# Convert hash keys to integers
|
11
|
+
@rules = {}
|
12
|
+
# Convert hash keys from something like :"9" -> 9
|
13
|
+
rules_and_reasons[:rules].each do |size, rules|
|
14
|
+
@rules[size.to_s.to_i] = rules
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def deinflect(word)
|
20
|
+
possibilities = []
|
21
|
+
rules_less_than_size(word.size).each do |size, rules|
|
22
|
+
ending = word[-size..-1]
|
23
|
+
rules.each do |rule|
|
24
|
+
next unless ending == rule[:from]
|
25
|
+
deinflected_word = "#{word[0..-size-1]}#{rule[:to]}"
|
26
|
+
next if possibilities.include?(deinflected_word)
|
27
|
+
# Weight is between 0 and 1, 1 being a higher chance of actual deinflection
|
28
|
+
weight = (Float(size) / word.size).round(3)
|
29
|
+
reason = @reasons[rule[:reason_id]]
|
30
|
+
possibilities << {:weight => weight, :word => deinflected_word, :reason => reason}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
possibilities
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def rules_less_than_size(max_size)
|
39
|
+
@rules.clone.keep_if{|size, rules| size < max_size}
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe JapaneseDeinflector do
|
5
|
+
subject{ JapaneseDeinflector.new }
|
6
|
+
|
7
|
+
it "deinflects plain positive verbs" do
|
8
|
+
# progressive tense
|
9
|
+
#subject.deinflect("見ている").first[:word].should == "見る"
|
10
|
+
#subject.deinflect("歌っている").first[:word].should == "歌う"
|
11
|
+
|
12
|
+
# past tense
|
13
|
+
subject.deinflect("見た").first[:word].should == "見る"
|
14
|
+
subject.deinflect("歌った").first[:word].should == "歌う"
|
15
|
+
end
|
16
|
+
|
17
|
+
it "deinflects polite positive verbs" do
|
18
|
+
# present tense
|
19
|
+
subject.deinflect("見ます").first[:word].should == "見る"
|
20
|
+
subject.deinflect("歌います").first[:word].should == "歌う"
|
21
|
+
|
22
|
+
# progressive tense
|
23
|
+
#subject.deinflect("見ています").first[:word].should == "見る"
|
24
|
+
#subject.deinflect("歌っています").first[:word].should == "歌う"
|
25
|
+
|
26
|
+
# past tense
|
27
|
+
subject.deinflect("見ました").first[:word].should == "見る"
|
28
|
+
subject.deinflect("歌いました").first[:word].should == "歌う"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "deinflects plain negative verbs" do
|
32
|
+
# present tense
|
33
|
+
subject.deinflect("見ない").first[:word].should == "見る"
|
34
|
+
subject.deinflect("歌わない").first[:word].should == "歌う"
|
35
|
+
|
36
|
+
# progressive tense
|
37
|
+
#subject.deinflect("見ていない").first[:word].should == "見る"
|
38
|
+
#subject.deinflect("歌っていない").first[:word].should == "歌う"
|
39
|
+
|
40
|
+
# past tense
|
41
|
+
#subject.deinflect("見なかった").first[:word].should == "見る"
|
42
|
+
#subject.deinflect("歌わなかった").first[:word].should == "歌う"
|
43
|
+
end
|
44
|
+
|
45
|
+
it "deinflects polite negative formal verbs" do
|
46
|
+
# present tense
|
47
|
+
subject.deinflect("見ません").first[:word].should == "見る"
|
48
|
+
subject.deinflect("歌いません").first[:word].should == "歌う"
|
49
|
+
|
50
|
+
# progressive tense
|
51
|
+
#subject.deinflect("見ていません").first[:word].should == "見る"
|
52
|
+
#subject.deinflect("歌っていません").first[:word].should == "歌う"
|
53
|
+
|
54
|
+
# past tense
|
55
|
+
subject.deinflect("見ませんでした").first[:word].should == "見る"
|
56
|
+
subject.deinflect("歌いませんでした").first[:word].should == "歌う"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "deinflects polite negative formal adjectives" do
|
60
|
+
# present tense
|
61
|
+
subject.deinflect("嬉しくありません").first[:word].should == "嬉しい"
|
62
|
+
|
63
|
+
# past tense
|
64
|
+
subject.deinflect("嬉しくありませんでした").first[:word].should == "嬉しい"
|
65
|
+
end
|
66
|
+
|
67
|
+
it "deinflects imperative verbs" do
|
68
|
+
subject.deinflect("歌って").first[:word].should == "歌う"
|
69
|
+
end
|
70
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'japanese_deinflector'
|
2
|
+
|
3
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
4
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
5
|
+
# Require this file using `require "spec_helper.rb"` to ensure that it is only
|
6
|
+
# loaded once.
|
7
|
+
#
|
8
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
9
|
+
RSpec.configure do |config|
|
10
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
11
|
+
config.run_all_when_everything_filtered = true
|
12
|
+
config.filter_run :focus
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: japanese_deinflector
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Peter Graham
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-28 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: json
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Deinflect (unconjugate/undecline) Japanese words.
|
63
|
+
email:
|
64
|
+
- pete@gigadrill.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- .rspec
|
71
|
+
- .travis.yml
|
72
|
+
- Gemfile
|
73
|
+
- Rakefile
|
74
|
+
- japanese_deinflector.gemspec
|
75
|
+
- lib/data/deinflect.dat
|
76
|
+
- lib/data/deinflect.json
|
77
|
+
- lib/deinflect_to_json.rb
|
78
|
+
- lib/japanese_deinflector.rb
|
79
|
+
- lib/japanese_deinflector/version.rb
|
80
|
+
- spec/japanese_deinflector_spec.rb
|
81
|
+
- spec/spec_helper.rb
|
82
|
+
homepage: http://github.com/6/japanese_deinflector
|
83
|
+
licenses: []
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubyforge_project:
|
102
|
+
rubygems_version: 1.8.21
|
103
|
+
signing_key:
|
104
|
+
specification_version: 3
|
105
|
+
summary: Deinflect (unconjugate/undecline) Japanese words.
|
106
|
+
test_files:
|
107
|
+
- spec/japanese_deinflector_spec.rb
|
108
|
+
- spec/spec_helper.rb
|