linguistics 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1145 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # This file contains functions for deriving the infinitive forms of conjugated
4
+ # English words. Requiring this file adds functions and constants to the
5
+ # Linguistics::EN module.
6
+ #
7
+ # == Authors
8
+ #
9
+ # * Michael Granger <ged@FaerieMUD.org>
10
+ #
11
+ # == Acknowledgments
12
+ #
13
+ # This code was ported from the excellent 'Lingua::EN::Infinitive' Perl module
14
+ # by Ron Savage, which is distributed under the following license:
15
+ #
16
+ # Australian copyright (c) 1999-2002 Ron Savage.
17
+ #
18
+ # All Programs of mine are 'OSI Certified Open Source Software';
19
+ # you can redistribute them and/or modify them under the terms of
20
+ # The Artistic License, a copy of which is available at:
21
+ # http://www.opensource.org/licenses/index.html
22
+ #
23
+ #
24
+ # :include: LICENSE
25
+ #
26
+ #--
27
+ #
28
+ # Please see the file LICENSE in the base directory for licensing details.
29
+ #
30
+ module Linguistics::EN
31
+
32
+ # :stopdoc:
33
+
34
+ # Irregular words => infinitive forms
35
+ IrregularInfinitives = {
36
+ 'abided' => 'abide',
37
+ 'abode' => 'abide',
38
+ 'am' => 'be',
39
+ 'are' => 'be',
40
+ 'arisen' => 'arise',
41
+ 'arose' => 'arise',
42
+ 'ate' => 'eat',
43
+ 'awaked' => 'awake',
44
+ 'awoke' => 'awake',
45
+ 'bade' => 'bid',
46
+ 'beaten' => 'beat',
47
+ 'became' => 'become',
48
+ 'been' => 'be',
49
+ 'befallen' => 'befall',
50
+ 'befell' => 'befall',
51
+ 'began' => 'begin',
52
+ 'begat' => 'beget',
53
+ 'begot' => 'beget',
54
+ 'begotten' => 'beget',
55
+ 'begun' => 'begin',
56
+ 'beheld' => 'behold',
57
+ 'bent' => 'bend',
58
+ 'bereaved' => 'bereave',
59
+ 'bereft' => 'bereave',
60
+ 'beseeched' => 'beseech',
61
+ 'besought' => 'beseech',
62
+ 'bespoke' => 'bespeak',
63
+ 'bespoken' => 'bespeak',
64
+ 'bestrewed' => 'bestrew',
65
+ 'bestrewn' => 'bestrew',
66
+ 'bestrid' => 'bestride',
67
+ 'bestridden' => 'bestride',
68
+ 'bestrode' => 'bestride',
69
+ 'betaken' => 'betake',
70
+ 'bethought' => 'bethink',
71
+ 'betook' => 'betake',
72
+ 'betted' => 'bet',
73
+ 'bidden' => 'bid',
74
+ 'bided' => 'bide',
75
+ 'bit' => 'bite',
76
+ 'bitten' => 'bite',
77
+ 'bled' => 'bleed',
78
+ 'blended' => 'blend',
79
+ 'blent' => 'blend',
80
+ 'blessed' => 'bless',
81
+ 'blest' => 'bless',
82
+ 'blew' => 'blow',
83
+ 'blown' => 'blow',
84
+ 'bode' => 'bide',
85
+ 'bore' => 'bear',
86
+ 'born' => 'bear',
87
+ 'borne' => 'bear',
88
+ 'bought' => 'buy',
89
+ 'bound' => 'bind',
90
+ 'bred' => 'breed',
91
+ 'broadcasted' => 'broadcast',
92
+ 'broke' => 'break',
93
+ 'broken' => 'break',
94
+ 'brought' => 'bring',
95
+ 'browbeaten' => 'browbeat',
96
+ 'built' => 'build',
97
+ 'burned' => 'burn',
98
+ 'burnt' => 'burn',
99
+ 'came' => 'come',
100
+ 'caught' => 'catch',
101
+ 'chid' => 'chide',
102
+ 'chidden' => 'chide',
103
+ 'chided' => 'chide',
104
+ 'chose' => 'choose',
105
+ 'chosen' => 'choose',
106
+ 'clad' => 'clothe',
107
+ 'clave' => 'cleave',
108
+ 'cleaved' => 'cleave',
109
+ 'cleft' => 'cleave',
110
+ 'clothed' => 'clothe',
111
+ 'clove' => 'cleave',
112
+ 'cloven' => 'cleave',
113
+ 'clung' => 'cling',
114
+ 'costed' => 'cost',
115
+ 'could' => 'can',
116
+ 'crept' => 'creep',
117
+ 'crew' => 'crow',
118
+ 'crowed' => 'crow',
119
+ 'dealt' => 'deal',
120
+ 'did' => 'do',
121
+ 'done' => 'do',
122
+ 'dove' => 'dive',
123
+ 'drank' => 'drink',
124
+ 'drawn' => 'draw',
125
+ 'dreamed' => 'dream',
126
+ 'dreamt' => 'dream',
127
+ 'drew' => 'draw',
128
+ 'driven' => 'drive',
129
+ 'drove' => 'drive',
130
+ 'drunk' => 'drink',
131
+ 'dug' => 'dig',
132
+ 'dwelled' => 'dwell',
133
+ 'dwelt' => 'dwell',
134
+ 'eaten' => 'eat',
135
+ 'fallen' => 'fall',
136
+ 'fed' => 'feed',
137
+ 'fell' => 'fall',
138
+ 'felt' => 'feel',
139
+ 'fled' => 'flee',
140
+ 'flew' => 'fly',
141
+ 'flown' => 'fly',
142
+ 'flung' => 'fling',
143
+ 'forbad' => 'forbid',
144
+ 'forbade' => 'forbid',
145
+ 'forbidden' => 'forbid',
146
+ 'forbore' => 'forbear',
147
+ 'forborne' => 'forbear',
148
+ 'fordid' => 'fordo',
149
+ 'fordone' => 'fordo',
150
+ 'forecasted' => 'forecast',
151
+ 'foregone' => 'forego',
152
+ 'foreknew' => 'foreknow',
153
+ 'foreknown' => 'foreknow',
154
+ 'foreran' => 'forerun',
155
+ 'foresaw' => 'foresee',
156
+ 'foreshowed' => 'foreshow',
157
+ 'foreshown' => 'foreshow',
158
+ 'foretold' => 'foretell',
159
+ 'forewent' => 'forego',
160
+ 'forgave' => 'forgive',
161
+ 'forgiven' => 'forgive',
162
+ 'forgot' => 'forget',
163
+ 'forgotten' => 'forget',
164
+ 'forsaken' => 'forsake',
165
+ 'forseen' => 'foresee',
166
+ 'forsook' => 'forsake',
167
+ 'forswore' => 'forswear',
168
+ 'forsworn' => 'forswear',
169
+ 'fought' => 'fight',
170
+ 'found' => 'find',
171
+ 'froze' => 'freeze',
172
+ 'frozen' => 'freeze',
173
+ 'gainsaid' => 'gainsay',
174
+ 'gave' => 'give',
175
+ 'gilded' => 'gild',
176
+ 'gilt' => 'gild',
177
+ 'girded' => 'gird',
178
+ 'girt' => 'gird',
179
+ 'given' => 'give',
180
+ 'gone' => 'go',
181
+ 'got' => 'get',
182
+ 'gotten' => 'get',
183
+ 'graved' => 'grave',
184
+ 'graven' => 'grave',
185
+ 'grew' => 'grow',
186
+ 'ground' => 'grind',
187
+ 'grown' => 'grow',
188
+ 'had' => 'have',
189
+ 'hamstringed' => 'hamstring',
190
+ 'hamstrung' => 'hamstring',
191
+ 'hanged' => 'hang',
192
+ 'heard' => 'hear',
193
+ 'heaved' => 'heave',
194
+ 'held' => 'hold',
195
+ 'hewed' => 'hew',
196
+ 'hewn' => 'hew',
197
+ 'hid' => 'hide',
198
+ 'hidden' => 'hide',
199
+ 'hove' => 'heave',
200
+ 'hung' => 'hang',
201
+ 'inlaid' => 'inlay',
202
+ 'is' => 'be',
203
+ 'kept' => 'keep',
204
+ 'kneeled' => 'kneel',
205
+ 'knelt' => 'kneel',
206
+ 'knew' => 'know',
207
+ 'knitted' => 'knit',
208
+ 'known' => 'know',
209
+ 'laded' => 'lade',
210
+ 'laden' => 'lade',
211
+ 'laid' => 'lay',
212
+ 'lain' => 'lie',
213
+ 'lay' => 'lie',
214
+ 'leaned' => 'lean',
215
+ 'leant' => 'lean',
216
+ 'leaped' => 'leap',
217
+ 'leapt' => 'leap',
218
+ 'learned' => 'learn',
219
+ 'learnt' => 'learn',
220
+ 'led' => 'lead',
221
+ 'left' => 'leave',
222
+ 'lent' => 'lend',
223
+ 'lighted' => 'light',
224
+ 'lit' => 'light',
225
+ 'lost' => 'lose',
226
+ 'made' => 'make',
227
+ 'meant' => 'mean',
228
+ 'melted' => 'melt',
229
+ 'met' => 'meet',
230
+ 'might' => 'may',
231
+ 'misdealt' => 'misdeal',
232
+ 'misgave' => 'misgive',
233
+ 'misgiven' => 'misgive',
234
+ 'mislaid' => 'mislay',
235
+ 'misled' => 'mislead',
236
+ 'mistaken' => 'mistake',
237
+ 'mistook' => 'mistake',
238
+ 'misunderstood' => 'misunderstand',
239
+ 'molten' => 'melt',
240
+ 'mowed' => 'mow',
241
+ 'mown' => 'mow',
242
+ 'outate' => 'outeat',
243
+ 'outbade' => 'outbid',
244
+ 'outbidden' => 'outbid',
245
+ 'outbred' => 'outbreed',
246
+ 'outdid' => 'outdo',
247
+ 'outdone' => 'outdo',
248
+ 'outeaten' => 'outeat',
249
+ 'outfought' => 'outfight',
250
+ 'outgone' => 'outgo',
251
+ 'outgrew' => 'outgrow',
252
+ 'outgrown' => 'outgrow',
253
+ 'outlaid' => 'outlay',
254
+ 'outran' => 'outrun',
255
+ 'outridden' => 'outride',
256
+ 'outrode' => 'outride',
257
+ 'outsat' => 'outsit',
258
+ 'outshone' => 'outshine',
259
+ 'outshot' => 'outshoot',
260
+ 'outsold' => 'outsell',
261
+ 'outspent' => 'outspend',
262
+ 'outthrew' => 'outthrow',
263
+ 'outthrown' => 'outthrow',
264
+ 'outwent' => 'outgo',
265
+ 'outwore' => 'outwear',
266
+ 'outworn' => 'outwear',
267
+ 'overate' => 'overeat',
268
+ 'overbade' => 'overbid',
269
+ 'overbidden' => 'overbid',
270
+ 'overblew' => 'overblow',
271
+ 'overblown' => 'overblow',
272
+ 'overbore' => 'overbear',
273
+ 'overborn' => 'overbear',
274
+ 'overborne' => 'overbear',
275
+ 'overbought' => 'overbuy',
276
+ 'overbuilt' => 'overbuild',
277
+ 'overcame' => 'overcome',
278
+ 'overdid' => 'overdo',
279
+ 'overdone' => 'overdo',
280
+ 'overdrawn' => 'overdraw',
281
+ 'overdrew' => 'overdraw',
282
+ 'overdriven' => 'overdrive',
283
+ 'overdrove' => 'overdrive',
284
+ 'overeaten' => 'overeat',
285
+ 'overfed' => 'overfeed',
286
+ 'overflew' => 'overfly',
287
+ 'overflown' => 'overfly',
288
+ 'overgrew' => 'overgrow',
289
+ 'overgrown' => 'overgrow',
290
+ 'overhanged' => 'overhang',
291
+ 'overheard' => 'overhear',
292
+ 'overhung' => 'overhang',
293
+ 'overladed' => 'overlade',
294
+ 'overladen' => 'overlade',
295
+ 'overlaid' => 'overlay',
296
+ 'overlain' => 'overlie',
297
+ 'overlay' => 'overlie',
298
+ 'overleaped' => 'overleap',
299
+ 'overleapt' => 'overleap',
300
+ 'overpaid' => 'overpay',
301
+ 'overran' => 'overrun',
302
+ 'overridden' => 'override',
303
+ 'overrode' => 'override',
304
+ 'oversaw' => 'oversee',
305
+ 'overseen' => 'oversee',
306
+ 'oversewed' => 'oversew',
307
+ 'oversewn' => 'oversew',
308
+ 'overshot' => 'overshoot',
309
+ 'overslept' => 'oversleep',
310
+ 'overspent' => 'overspend',
311
+ 'overtaken' => 'overtake',
312
+ 'overthrew' => 'overthrow',
313
+ 'overthrown' => 'overthrow',
314
+ 'overtook' => 'overtake',
315
+ 'overwinded' => 'overwind',
316
+ 'overwound' => 'overwind',
317
+ 'overwritten' => 'overwrite',
318
+ 'overwrote' => 'overwrite',
319
+ 'paid' => 'pay',
320
+ 'partaken' => 'partake',
321
+ 'partook' => 'partake',
322
+ 'prechose' => 'prechoose',
323
+ 'prechosen' => 'prechoose',
324
+ 'proved' => 'prove',
325
+ 'proven' => 'prove',
326
+ 'quitted' => 'quit',
327
+ 'ran' => 'run',
328
+ 'rang' => 'ring',
329
+ 'reaved' => 'reave',
330
+ 'rebuilt' => 'rebuild',
331
+ 'reeved' => 'reeve',
332
+ 'reft' => 'reave',
333
+ 'relaid' => 'relay',
334
+ 'rent' => 'rend',
335
+ 'repaid' => 'repay',
336
+ 'retold' => 'retell',
337
+ 'ridded' => 'rid',
338
+ 'ridden' => 'ride',
339
+ 'risen' => 'rise',
340
+ 'rived' => 'rive',
341
+ 'riven' => 'rive',
342
+ 'rode' => 'ride',
343
+ 'rose' => 'rise',
344
+ 'rove' => 'reeve',
345
+ 'rung' => 'ring',
346
+ 'said' => 'say',
347
+ 'sang' => 'sing',
348
+ 'sank' => 'sink',
349
+ 'sat' => 'sit',
350
+ 'saw' => 'see',
351
+ 'sawed' => 'saw',
352
+ 'sawn' => 'saw',
353
+ 'seen' => 'see',
354
+ 'sent' => 'send',
355
+ 'sewed' => 'sew',
356
+ 'sewn' => 'sew',
357
+ 'shaken' => 'shake',
358
+ 'shaved' => 'shave',
359
+ 'shaven' => 'shave',
360
+ 'sheared' => 'shear',
361
+ 'shined' => 'shine',
362
+ 'shod' => 'shoe',
363
+ 'shoed' => 'shoe',
364
+ 'shone' => 'shine',
365
+ 'shook' => 'shake',
366
+ 'shorn' => 'shear',
367
+ 'shot' => 'shoot',
368
+ 'showed' => 'show',
369
+ 'shown' => 'show',
370
+ 'shrank' => 'shrink',
371
+ 'shredded' => 'shred',
372
+ 'shrived' => 'shrive',
373
+ 'shriven' => 'shrive',
374
+ 'shrove' => 'shrive',
375
+ 'shrunk' => 'shrink',
376
+ 'shrunken' => 'shrink',
377
+ 'slain' => 'slay',
378
+ 'slept' => 'sleep',
379
+ 'slew' => 'slay',
380
+ 'slid' => 'slide',
381
+ 'slidden' => 'slide',
382
+ 'slung' => 'sling',
383
+ 'slunk' => 'slink',
384
+ 'smelled' => 'smell',
385
+ 'smelt' => 'smell',
386
+ 'smitten' => 'smite',
387
+ 'smote' => 'smite',
388
+ 'snuck' => 'sneak',
389
+ 'sold' => 'sell',
390
+ 'sought' => 'seek',
391
+ 'sowed' => 'sow',
392
+ 'sown' => 'sow',
393
+ 'span' => 'spin',
394
+ 'spat' => 'spit',
395
+ 'sped' => 'speed',
396
+ 'speeded' => 'speed',
397
+ 'spelled' => 'spell',
398
+ 'spelt' => 'spell',
399
+ 'spent' => 'spend',
400
+ 'spilled' => 'spill',
401
+ 'spilt' => 'spill',
402
+ 'spoiled' => 'spoil',
403
+ 'spoilt' => 'spoil',
404
+ 'spoke' => 'speak',
405
+ 'spoken' => 'speak',
406
+ 'sprang' => 'spring',
407
+ 'sprung' => 'spring',
408
+ 'spun' => 'spin',
409
+ 'stank' => 'stink',
410
+ 'staved' => 'stave',
411
+ 'stole' => 'steal',
412
+ 'stolen' => 'steal',
413
+ 'stood' => 'stand',
414
+ 'stove' => 'stave',
415
+ 'strewed' => 'strew',
416
+ 'strewn' => 'strew',
417
+ 'stricken' => 'strike',
418
+ 'strid' => 'stride',
419
+ 'stridden' => 'stride',
420
+ 'strived' => 'strive',
421
+ 'striven' => 'strive',
422
+ 'strode' => 'stride',
423
+ 'strove' => 'strive',
424
+ 'struck' => 'strike',
425
+ 'strung' => 'string',
426
+ 'stuck' => 'stick',
427
+ 'stung' => 'sting',
428
+ 'stunk' => 'stink',
429
+ 'sung' => 'sing',
430
+ 'sunk' => 'sink',
431
+ 'sunken' => 'sink',
432
+ 'swam' => 'swim',
433
+ 'sweated' => 'sweat',
434
+ 'swelled' => 'swell',
435
+ 'swept' => 'sweep',
436
+ 'swollen' => 'swell',
437
+ 'swore' => 'swear',
438
+ 'sworn' => 'swear',
439
+ 'swum' => 'swim',
440
+ 'swung' => 'swing',
441
+ 'taken' => 'take',
442
+ 'taught' => 'teach',
443
+ 'thought' => 'think',
444
+ 'threw' => 'throw',
445
+ 'thrived' => 'thrive',
446
+ 'thriven' => 'thrive',
447
+ 'throve' => 'thrive',
448
+ 'thrown' => 'throw',
449
+ 'told' => 'tell',
450
+ 'took' => 'take',
451
+ 'tore' => 'tear',
452
+ 'torn' => 'tear',
453
+ 'trod' => 'tread',
454
+ 'trodden' => 'tread',
455
+ 'unbent' => 'unbend',
456
+ 'unbound' => 'unbind',
457
+ 'unbuilt' => 'unbuild',
458
+ 'underbought' => 'underbuy',
459
+ 'underfed' => 'underfeed',
460
+ 'undergone' => 'undergo',
461
+ 'underlaid' => 'underlay',
462
+ 'underlain' => 'underlie',
463
+ 'underlay' => 'underlie',
464
+ 'underpaid' => 'underpay',
465
+ 'underran' => 'underrun',
466
+ 'undershot' => 'undershoot',
467
+ 'undersold' => 'undersell',
468
+ 'understood' => 'understand',
469
+ 'undertaken' => 'undertake',
470
+ 'undertook' => 'undertake',
471
+ 'underwent' => 'undergo',
472
+ 'underwritten' => 'underwrite',
473
+ 'underwrote' => 'underwrite',
474
+ 'undid' => 'undo',
475
+ 'undone' => 'undo',
476
+ 'undrawn' => 'undraw',
477
+ 'undrew' => 'undraw',
478
+ 'unfroze' => 'unfreeze',
479
+ 'unfrozen' => 'unfreeze',
480
+ 'ungirded' => 'ungird',
481
+ 'ungirt' => 'ungird',
482
+ 'unhanged' => 'unhang',
483
+ 'unhung' => 'unhang',
484
+ 'unknitted' => 'unknit',
485
+ 'unladed' => 'unlade',
486
+ 'unladen' => 'unlade',
487
+ 'unlaid' => 'unlay',
488
+ 'unlearned' => 'unlearn',
489
+ 'unlearnt' => 'unlearn',
490
+ 'unmade' => 'unmake',
491
+ 'unreeved' => 'unreeve',
492
+ 'unrove' => 'unreeve',
493
+ 'unsaid' => 'unsay',
494
+ 'unslung' => 'unsling',
495
+ 'unspoke' => 'unspeak',
496
+ 'unspoken' => 'unspeak',
497
+ 'unstrung' => 'unstring',
498
+ 'unstuck' => 'unstick',
499
+ 'unswore' => 'unswear',
500
+ 'unsworn' => 'unswear',
501
+ 'untaught' => 'unteach',
502
+ 'unthought' => 'unthink',
503
+ 'untrod' => 'untread',
504
+ 'untrodden' => 'untread',
505
+ 'unwinded' => 'unwind',
506
+ 'unwound' => 'unwind',
507
+ 'unwove' => 'unweave',
508
+ 'unwoven' => 'unweave',
509
+ 'upbuilt' => 'upbuild',
510
+ 'upheld' => 'uphold',
511
+ 'uprisen' => 'uprise',
512
+ 'uprose' => 'uprise',
513
+ 'upswept' => 'upsweep',
514
+ 'upswung' => 'upswing',
515
+ 'waked' => 'wake',
516
+ 'was' => 'be',
517
+ 'waylaid' => 'waylay',
518
+ 'wedded' => 'wed',
519
+ 'went' => 'go',
520
+ 'wept' => 'weep',
521
+ 'were' => 'be',
522
+ 'wetted' => 'wet',
523
+ 'winded' => 'wind',
524
+ 'wist' => 'wit',
525
+ 'wot' => 'wit',
526
+ 'withdrawn' => 'withdraw',
527
+ 'withdrew' => 'withdraw',
528
+ 'withheld' => 'withhold',
529
+ 'withstood' => 'withstand',
530
+ 'woke' => 'wake',
531
+ 'woken' => 'wake',
532
+ 'won' => 'win',
533
+ 'wore' => 'wear',
534
+ 'worked' => 'work',
535
+ 'worn' => 'wear',
536
+ 'wound' => 'wind',
537
+ 'wove' => 'weave',
538
+ 'woven' => 'weave',
539
+ 'written' => 'write',
540
+ 'wrote' => 'write',
541
+ 'wrought' => 'work',
542
+ 'wrung' => 'wring',
543
+ }
544
+
545
+ # Mapping of word suffixes to infinitive rules.
546
+ InfSuffixRules = {
547
+ # '<suffix>' => {
548
+ # :order => <sort order>,
549
+ # :rule => <rule number>,
550
+
551
+ # :word1 == 0 => Use 0, the index of the longest prefix
552
+ # within @{$prefix{$self->{'suffix'} } }, below.
553
+
554
+ # :word1 == 1 => Use 1, the index of the 2nd longest prefix
555
+ # within @{$prefix{$self->{'suffix'} } }, below.
556
+
557
+ # :word1 == -1 => Use the index of the shortest prefix
558
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter.
559
+
560
+ # :word1 == -2 => Use the index of the shortest prefix
561
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter,
562
+ # and use the shortest prefix as well.
563
+
564
+ # :word1 == -3 => Use the index of the shortest prefix
565
+ # within @{$prefix{$self->{'suffix'} } }, below + meter,
566
+ # and use the shortest prefix + metre as well.
567
+
568
+ # :word1 == -4 => Use the original string.
569
+ 'hes' => {
570
+ :order => 1011,
571
+ :rule => '1',
572
+ :word1 => 0, # Longest prefix.
573
+ :suffix1 => '',
574
+ :suffix2 => '',
575
+ },
576
+ 'ses' => {
577
+ :order => 1021,
578
+ :rule => '2',
579
+ :word1 => 0, # Longest prefix.
580
+ :suffix1 => '',
581
+ :suffix2 => '',
582
+ },
583
+ 'xes' => {
584
+ :order => 1031,
585
+ :rule => '3',
586
+ :word1 => 0, # Longest prefix.
587
+ :suffix1 => '',
588
+ :suffix2 => '',
589
+ },
590
+ 'zes' => {
591
+ :order => 1041,
592
+ :rule => '4',
593
+ :word1 => 0, # Longest prefix.
594
+ :suffix1 => '',
595
+ :suffix2 => '',
596
+ },
597
+ 'iless' => {
598
+ :order => 1051,
599
+ :rule => '43a',
600
+ :word1 => -1, # Shortest prefix.
601
+ :suffix1 => 'y',
602
+ :suffix2 => '',
603
+ },
604
+ 'less' => {
605
+ :order => 1052,
606
+ :rule => '43b',
607
+ :word1 => -1, # Shortest prefix.
608
+ :suffix1 => '',
609
+ :suffix2 => '',
610
+ },
611
+ 'iness' => {
612
+ :order => 1053,
613
+ :rule => '44a',
614
+ :word1 => -1, # Shortest prefix.
615
+ :suffix1 => 'y',
616
+ :suffix2 => '',
617
+ },
618
+ 'ness' => {
619
+ :order => 1054,
620
+ :rule => '44b',
621
+ :word1 => -1, # Shortest prefix.
622
+ :suffix1 => '',
623
+ :suffix2 => '',
624
+ },
625
+ "'s" => {
626
+ :order => 1055,
627
+ :rule => '7',
628
+ :word1 => -1, # Shortest prefix.
629
+ :suffix1 => '',
630
+ :suffix2 => '',
631
+ },
632
+ 'ies' => {
633
+ :order => 1056,
634
+ :rule => '13a',
635
+ :word1 => -1, # Shortest prefix.
636
+ :suffix1 => 'y',
637
+ :suffix2 => '',
638
+ },
639
+ 'es' => {
640
+ :order => 1057,
641
+ :rule => '13b',
642
+ :word1 => 0, # Longest prefix.
643
+ :suffix1 => '',
644
+ :suffix2 => '',
645
+ },
646
+ 'ss' => {
647
+ :order => 1061,
648
+ :rule => '6a',
649
+ :word1 => -4, # Original string.
650
+ :suffix1 => '',
651
+ :suffix2 => '',
652
+ },
653
+ 's' => {
654
+ :order => 1062,
655
+ :rule => '6b',
656
+ :word1 => -1, # Shortest prefix.
657
+ :suffix1 => '',
658
+ :suffix2 => '',
659
+ },
660
+ 'ater' => {
661
+ :order => 1081,
662
+ :rule => '8',
663
+ :word1 => -4, # Original string.
664
+ :suffix1 => '',
665
+ :suffix2 => '',
666
+ },
667
+ 'cter' => {
668
+ :order => 1091,
669
+ :rule => '9',
670
+ :word1 => -4, # Original string.
671
+ :suffix1 => '',
672
+ :suffix2 => '',
673
+ },
674
+ 'ier' => {
675
+ :order => 1101,
676
+ :rule => '10',
677
+ :word1 => -1, # Shortest prefix.
678
+ :suffix1 => 'y',
679
+ :suffix2 => '',
680
+ },
681
+ 'er' => {
682
+ :order => 1111,
683
+ :rule => '11',
684
+ :word1 => 0, # Longest prefix.
685
+ :suffix1 => '',
686
+ :suffix2 => '',
687
+ },
688
+ 'ied' => {
689
+ :order => 1121,
690
+ :rule => '12a',
691
+ :word1 => -1, # Shortest prefix.
692
+ :suffix1 => 'y',
693
+ :suffix2 => '',
694
+ },
695
+ 'ed' => {
696
+ :order => 1122,
697
+ :rule => '12b', # There is extra code for 12b below.
698
+ :word1 => 0, # Longest prefix.
699
+ :suffix1 => '',
700
+ :suffix2 => '',
701
+ },
702
+ 'iest' => {
703
+ :order => 1141,
704
+ :rule => '14a',
705
+ :word1 => -1, # Shortest prefix.
706
+ :suffix1 => 'y',
707
+ :suffix2 => '',
708
+ },
709
+ 'est' => {
710
+ :order => 1142,
711
+ :rule => '14b',
712
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
713
+ :suffix1 => 'e',
714
+ :suffix2 => '',
715
+ },
716
+ 'blity' => {
717
+ :order => 1143,
718
+ :rule => '21',
719
+ :word1 => -4, # Original string.
720
+ :suffix1 => '',
721
+ :suffix2 => '',
722
+ },
723
+ 'bility' => {
724
+ :order => 1144,
725
+ :rule => '22',
726
+ :word1 => -1, # Shortest prefix.
727
+ :suffix1 => 'ble',
728
+ :suffix2 => '',
729
+ },
730
+ 'fiable' => {
731
+ :order => 1145,
732
+ :rule => '23',
733
+ :word1 => -1, # Shortest prefix.
734
+ :suffix1 => 'fy',
735
+ :suffix2 => '',
736
+ },
737
+ 'logist' => {
738
+ :order => 1146,
739
+ :rule => '24',
740
+ :word1 => -1, # Shortest prefix.
741
+ :suffix1 => 'logy',
742
+ :suffix2 => '',
743
+ },
744
+ 'ing' => {
745
+ :order => 1151,
746
+ :rule => '15', # There is extra code for 15 below.
747
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
748
+ :suffix1 => 'e',
749
+ :suffix2 => '',
750
+ },
751
+ 'ist' => {
752
+ :order => 1161,
753
+ :rule => '16',
754
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
755
+ :suffix1 => 'e',
756
+ :suffix2 => '',
757
+ },
758
+ 'ism' => {
759
+ :order => 1171,
760
+ :rule => '17',
761
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
762
+ :suffix1 => 'e',
763
+ :suffix2 => '',
764
+ },
765
+ 'ity' => {
766
+ :order => 1181,
767
+ :rule => '18',
768
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
769
+ :suffix1 => 'e',
770
+ :suffix2 => '',
771
+ },
772
+ 'ize' => {
773
+ :order => 1191,
774
+ :rule => '19',
775
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
776
+ :suffix1 => 'e',
777
+ :suffix2 => '',
778
+ },
779
+ 'cable' => {
780
+ :order => 1201,
781
+ :rule => '20a',
782
+ :word1 => -4, # Original string.
783
+ :suffix1 => '',
784
+ :suffix2 => '',
785
+ },
786
+ 'gable' => {
787
+ :order => 1202,
788
+ :rule => '20b',
789
+ :word1 => -4, # Original string.
790
+ :suffix1 => '',
791
+ :suffix2 => '',
792
+ },
793
+ 'able' => {
794
+ :order => 1203,
795
+ :rule => '20c',
796
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
797
+ :suffix1 => 'e',
798
+ :suffix2 => '',
799
+ },
800
+ 'graphic' => {
801
+ :order => 1251,
802
+ :rule => '25',
803
+ :word1 => -1, # Shortest prefix.
804
+ :suffix1 => 'graphy',
805
+ :suffix2 => '',
806
+ },
807
+ 'istic' => {
808
+ :order => 1261,
809
+ :rule => '26',
810
+ :word1 => -1, # Shortest prefix.
811
+ :suffix1 => 'ist',
812
+ :suffix2 => '',
813
+ },
814
+ 'itic' => {
815
+ :order => 1271,
816
+ :rule => '27',
817
+ :word1 => -1, # Shortest prefix.
818
+ :suffix1 => 'ite',
819
+ :suffix2 => '',
820
+ },
821
+ 'like' => {
822
+ :order => 1281,
823
+ :rule => '28',
824
+ :word1 => -1, # Shortest prefix.
825
+ :suffix1 => '',
826
+ :suffix2 => '',
827
+ },
828
+ 'logic' => {
829
+ :order => 1291,
830
+ :rule => '29',
831
+ :word1 => -1, # Shortest prefix.
832
+ :suffix1 => 'logy',
833
+ :suffix2 => '',
834
+ },
835
+ 'ment' => {
836
+ :order => 1301,
837
+ :rule => '30',
838
+ :word1 => -1, # Shortest prefix.
839
+ :suffix1 => '',
840
+ :suffix2 => '',
841
+ },
842
+ 'mental' => {
843
+ :order => 1311,
844
+ :rule => '31',
845
+ :word1 => -1, # Shortest prefix.
846
+ :suffix1 => 'ment',
847
+ :suffix2 => '',
848
+ },
849
+ 'metry' => {
850
+ :order => 1321,
851
+ :rule => '32',
852
+ :word1 => -3, # Shortest prefix + meter, and shortest perfix + metre.
853
+ :suffix1 => 'meter',
854
+ :suffix2 => 'metre',
855
+ },
856
+ 'nce' => {
857
+ :order => 1331,
858
+ :rule => '33',
859
+ :word1 => -1, # Shortest prefix.
860
+ :suffix1 => 'nt',
861
+ :suffix2 => '',
862
+ },
863
+ 'ncy' => {
864
+ :order => 1341,
865
+ :rule => '34',
866
+ :word1 => -1, # Shortest prefix.
867
+ :suffix1 => 'nt',
868
+ :suffix2 => '',
869
+ },
870
+ 'ship' => {
871
+ :order => 1351,
872
+ :rule => '35',
873
+ :word1 => -1, # Shortest prefix.
874
+ :suffix1 => '',
875
+ :suffix2 => '',
876
+ },
877
+ 'ical' => {
878
+ :order => 1361,
879
+ :rule => '36',
880
+ :word1 => -1, # Shortest prefix.
881
+ :suffix1 => 'ic',
882
+ :suffix2 => '',
883
+ },
884
+ 'ional' => {
885
+ :order => 1371,
886
+ :rule => '37',
887
+ :word1 => -1, # Shortest prefix.
888
+ :suffix1 => 'ion',
889
+ :suffix2 => '',
890
+ },
891
+ 'bly' => {
892
+ :order => 1381,
893
+ :rule => '38',
894
+ :word1 => -1, # Shortest prefix.
895
+ :suffix1 => 'ble',
896
+ :suffix2 => '',
897
+ },
898
+ 'ily' => {
899
+ :order => 1391,
900
+ :rule => '39',
901
+ :word1 => -1, # Shortest prefix.
902
+ :suffix1 => 'y',
903
+ :suffix2 => '',
904
+ },
905
+ 'ly' => {
906
+ :order => 1401,
907
+ :rule => '40',
908
+ :word1 => -1, # Shortest prefix.
909
+ :suffix1 => '',
910
+ :suffix2 => '',
911
+ },
912
+ 'iful' => {
913
+ :order => 1411,
914
+ :rule => '41a',
915
+ :word1 => -1, # Shortest prefix.
916
+ :suffix1 => 'y',
917
+ :suffix2 => '',
918
+ },
919
+ 'ful' => {
920
+ :order => 1412,
921
+ :rule => '41b',
922
+ :word1 => -1, # Shortest prefix.
923
+ :suffix1 => '',
924
+ :suffix2 => '',
925
+ },
926
+ 'ihood' => {
927
+ :order => 1421,
928
+ :rule => '42a',
929
+ :word1 => -1, # Shortest prefix.
930
+ :suffix1 => 'y',
931
+ :suffix2 => '',
932
+ },
933
+ 'hood' => {
934
+ :order => 1422,
935
+ :rule => '42b',
936
+ :word1 => -1, # Shortest prefix.
937
+ :suffix1 => '',
938
+ :suffix2 => '',
939
+ },
940
+ 'ification' => {
941
+ :order => 1451,
942
+ :rule => '45',
943
+ :word1 => -1, # Shortest prefix.
944
+ :suffix1 => 'ify',
945
+ :suffix2 => '',
946
+ },
947
+ 'ization' => {
948
+ :order => 1461,
949
+ :rule => '46',
950
+ :word1 => -1, # Shortest prefix.
951
+ :suffix1 => 'ize',
952
+ :suffix2 => '',
953
+ },
954
+ 'ction' => {
955
+ :order => 1471,
956
+ :rule => '47',
957
+ :word1 => -1, # Shortest prefix.
958
+ :suffix1 => 'ct',
959
+ :suffix2 => '',
960
+ },
961
+ 'rtion' => {
962
+ :order => 1481,
963
+ :rule => '48',
964
+ :word1 => -1, # Shortest prefix.
965
+ :suffix1 => 'rt',
966
+ :suffix2 => '',
967
+ },
968
+ 'ation' => {
969
+ :order => 1491,
970
+ :rule => '49',
971
+ :word1 => -1, # Shortest prefix.
972
+ :suffix1 => 'ate',
973
+ :suffix2 => '',
974
+ },
975
+ 'ator' => {
976
+ :order => 1501,
977
+ :rule => '50',
978
+ :word1 => -1, # Shortest prefix.
979
+ :suffix1 => 'ate',
980
+ :suffix2 => '',
981
+ },
982
+ 'ctor' => {
983
+ :order => 1511,
984
+ :rule => '51',
985
+ :word1 => -1, # Shortest prefix.
986
+ :suffix1 => 'ct',
987
+ :suffix2 => '',
988
+ },
989
+ 'ive' => {
990
+ :order => 1521,
991
+ :rule => '52',
992
+ :word1 => -1, # Shortest prefix.
993
+ :suffix1 => 'ion',
994
+ :suffix2 => '',
995
+ },
996
+ 'onian' => {
997
+ :order => 1530,
998
+ :rule => '54',
999
+ :word1 => -1, # Shortest prefix.
1000
+ :suffix1 => 'on',
1001
+ :suffix2 => '',
1002
+ },
1003
+ 'an' => {
1004
+ :order => 1531,
1005
+ :rule => '53',
1006
+ :word1 => -1, # Shortest prefix.
1007
+ :suffix1 => 'a',
1008
+ :suffix2 => '',
1009
+ },
1010
+ }
1011
+ InfSuffixRuleOrder = InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}
1012
+
1013
+ # :startdoc:
1014
+
1015
+ ### The object class for the result returned from calling
1016
+ ### Linguistics::EN::infinitive.
1017
+ class Infinitive < String
1018
+
1019
+ ### Create and return a new Infinitive object.
1020
+ def initialize( word1, word2, suffix, rule )
1021
+ super( word1 )
1022
+ @word2 = word2
1023
+ @suffix = suffix
1024
+ @rule = rule
1025
+ end
1026
+
1027
+
1028
+ ######
1029
+ public
1030
+ ######
1031
+
1032
+ # The fallback deconjugated form
1033
+ attr_reader :word2
1034
+
1035
+ # The suffix used to to identify the transform rule
1036
+ attr_reader :suffix
1037
+
1038
+ # The rule used
1039
+ attr_reader :rule
1040
+ end
1041
+
1042
+
1043
+ ###############
1044
+ module_function
1045
+ ###############
1046
+
1047
+ ### Return the infinitive form of the given word
1048
+ def infinitive( word )
1049
+ word = word.to_s
1050
+ word1 = word2 = suffix = rule = newword = ''
1051
+
1052
+ if IrregularInfinitives.key?( word )
1053
+ word1 = IrregularInfinitives[ word ]
1054
+ rule = 'irregular'
1055
+ else
1056
+ # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
1057
+ prefix, suffix = nil
1058
+ prefixes = Hash::new {|hsh,key| hsh[key] = []}
1059
+
1060
+ # Build the hash of prefixes for the word
1061
+ 1.upto( word.length ) {|i|
1062
+ prefix = word[0, i]
1063
+ suffix = word[i..-1]
1064
+
1065
+ (suffix.length - 1).downto( 0 ) {|j|
1066
+ newword = prefix + suffix[0, j]
1067
+ prefixes[ suffix ].push( newword )
1068
+ }
1069
+ }
1070
+
1071
+ $stderr.puts "prefixes: %p" % prefixes if $DEBUG
1072
+
1073
+ # Now check for rules covering the prefixes for this word, picking
1074
+ # the first one if one was found.
1075
+ if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
1076
+ rule = InfSuffixRules[ suffix ][:rule]
1077
+ shortestPrefix = InfSuffixRules[ suffix ][:word1]
1078
+ $stderr.puts "Using rule %p (%p) for suffix %p" %
1079
+ [ rule, shortestPrefix, suffix ] if $DEBUG
1080
+
1081
+ case shortestPrefix
1082
+ when 0
1083
+ word1 = prefixes[ suffix ][ 0 ]
1084
+ word2 = prefixes[ suffix ][ 1 ]
1085
+ $stderr.puts "For sp = 0: word1: %p, word2: %p" %
1086
+ [ word1, word2 ] if $DEBUG
1087
+
1088
+ when -1
1089
+ word1 = prefixes[ suffix ].last +
1090
+ InfSuffixRules[ suffix ][:suffix1]
1091
+ word2 = ''
1092
+ $stderr.puts "For sp = -1: word1: %p, word2: %p" %
1093
+ [ word1, word2 ] if $DEBUG
1094
+
1095
+ when -2
1096
+ word1 = prefixes[ suffix ].last +
1097
+ InfSuffixRules[ suffix ][:suffix1]
1098
+ word2 = prefixes[ suffix ].last
1099
+ $stderr.puts "For sp = -2: word1: %p, word2: %p" %
1100
+ [ word1, word2 ] if $DEBUG
1101
+
1102
+ when -3
1103
+ word1 = prefixes[ suffix ].last +
1104
+ InfSuffixRules[ suffix ][:suffix1]
1105
+ word2 = prefixes[ suffix ].last +
1106
+ InfSuffixRules[ suffix ][:suffix2]
1107
+ $stderr.puts "For sp = -3: word1: %p, word2: %p" %
1108
+ [ word1, word2 ] if $DEBUG
1109
+
1110
+ when -4
1111
+ word1 = word
1112
+ word2 = ''
1113
+ $stderr.puts "For sp = -4: word1: %p, word2: %p" %
1114
+ [ word1, word2 ] if $DEBUG
1115
+
1116
+ else
1117
+ raise IndexError,
1118
+ "Couldn't find rule for shortest prefix %p" %
1119
+ shortestPrefix
1120
+ end
1121
+
1122
+ # Rules 12b and 15: Strip off 'ed' or 'ing'.
1123
+ if rule == '12b' or rule == '15'
1124
+ # Do we have a monosyllable of this form:
1125
+ # o 0+ Consonants
1126
+ # o 1+ Vowel
1127
+ # o 2 Non-wx
1128
+ # Eg: tipped => tipp?
1129
+ # Then return tip and tipp.
1130
+ # Eg: swimming => swimm?
1131
+ # Then return tipswim and swimm.
1132
+
1133
+ if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
1134
+ word1 = $1 + $2
1135
+ word2 = $1 + $2 + $2
1136
+ end
1137
+ end
1138
+ end
1139
+ end
1140
+
1141
+ return Infinitive::new( word1, word2, suffix, rule )
1142
+ end
1143
+
1144
+ end # module EN::Linguistics
1145
+