Linguistics 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1149 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # This file contains functions for deriving the infinitive forms of conjugated
4
+ # English words. Requiring this file adds functions and constants to the
5
+ # Linguistics::EN module.
6
+ #
7
+ # == Authors
8
+ #
9
+ # * Michael Granger <ged@FaerieMUD.org>
10
+ #
11
+ # == Copyright
12
+ #
13
+ # Copyright (c) 2003-2005 The FaerieMUD Consortium. All rights reserved.
14
+ #
15
+ # This module is free software. You may use, modify, and/or redistribute this
16
+ # software under the terms of the Perl Artistic License. (See
17
+ # http://language.perl.com/misc/Artistic.html)
18
+ #
19
+ # This code was ported from the excellent 'Lingua::EN::Infinitive' Perl module
20
+ # by Ron Savage, which is distributed under the following license:
21
+ #
22
+ # Australian copyright (c) 1999-2002 Ron Savage.
23
+ #
24
+ # All Programs of mine are 'OSI Certified Open Source Software';
25
+ # you can redistribute them and/or modify them under the terms of
26
+ # The Artistic License, a copy of which is available at:
27
+ # http://www.opensource.org/licenses/index.html
28
+ #
29
+ # # == Version
30
+ #
31
+ # $Id: infinitive.rb,v 1.2 2003/09/14 10:35:32 deveiant Exp $
32
+ #
33
+
34
+ module Linguistics::EN
35
+
36
+ # :stopdoc:
37
+
38
+ # Irregular words => infinitive forms
39
+ IrregularInfinitives = {
40
+ 'abided' => 'abide',
41
+ 'abode' => 'abide',
42
+ 'am' => 'be',
43
+ 'are' => 'be',
44
+ 'arisen' => 'arise',
45
+ 'arose' => 'arise',
46
+ 'ate' => 'eat',
47
+ 'awaked' => 'awake',
48
+ 'awoke' => 'awake',
49
+ 'bade' => 'bid',
50
+ 'beaten' => 'beat',
51
+ 'became' => 'become',
52
+ 'been' => 'be',
53
+ 'befallen' => 'befall',
54
+ 'befell' => 'befall',
55
+ 'began' => 'begin',
56
+ 'begat' => 'beget',
57
+ 'begot' => 'beget',
58
+ 'begotten' => 'beget',
59
+ 'begun' => 'begin',
60
+ 'beheld' => 'behold',
61
+ 'bent' => 'bend',
62
+ 'bereaved' => 'bereave',
63
+ 'bereft' => 'bereave',
64
+ 'beseeched' => 'beseech',
65
+ 'besought' => 'beseech',
66
+ 'bespoke' => 'bespeak',
67
+ 'bespoken' => 'bespeak',
68
+ 'bestrewed' => 'bestrew',
69
+ 'bestrewn' => 'bestrew',
70
+ 'bestrid' => 'bestride',
71
+ 'bestridden' => 'bestride',
72
+ 'bestrode' => 'bestride',
73
+ 'betaken' => 'betake',
74
+ 'bethought' => 'bethink',
75
+ 'betook' => 'betake',
76
+ 'betted' => 'bet',
77
+ 'bidden' => 'bid',
78
+ 'bided' => 'bide',
79
+ 'bit' => 'bite',
80
+ 'bitten' => 'bite',
81
+ 'bled' => 'bleed',
82
+ 'blended' => 'blend',
83
+ 'blent' => 'blend',
84
+ 'blessed' => 'bless',
85
+ 'blest' => 'bless',
86
+ 'blew' => 'blow',
87
+ 'blown' => 'blow',
88
+ 'bode' => 'bide',
89
+ 'bore' => 'bear',
90
+ 'born' => 'bear',
91
+ 'borne' => 'bear',
92
+ 'bought' => 'buy',
93
+ 'bound' => 'bind',
94
+ 'bred' => 'breed',
95
+ 'broadcasted' => 'broadcast',
96
+ 'broke' => 'break',
97
+ 'broken' => 'break',
98
+ 'brought' => 'bring',
99
+ 'browbeaten' => 'browbeat',
100
+ 'built' => 'build',
101
+ 'burned' => 'burn',
102
+ 'burnt' => 'burn',
103
+ 'came' => 'come',
104
+ 'caught' => 'catch',
105
+ 'chid' => 'chide',
106
+ 'chidden' => 'chide',
107
+ 'chided' => 'chide',
108
+ 'chose' => 'choose',
109
+ 'chosen' => 'choose',
110
+ 'clad' => 'clothe',
111
+ 'clave' => 'cleave',
112
+ 'cleaved' => 'cleave',
113
+ 'cleft' => 'cleave',
114
+ 'clothed' => 'clothe',
115
+ 'clove' => 'cleave',
116
+ 'cloven' => 'cleave',
117
+ 'clung' => 'cling',
118
+ 'costed' => 'cost',
119
+ 'could' => 'can',
120
+ 'crept' => 'creep',
121
+ 'crew' => 'crow',
122
+ 'crowed' => 'crow',
123
+ 'dealt' => 'deal',
124
+ 'did' => 'do',
125
+ 'done' => 'do',
126
+ 'dove' => 'dive',
127
+ 'drank' => 'drink',
128
+ 'drawn' => 'draw',
129
+ 'dreamed' => 'dream',
130
+ 'dreamt' => 'dream',
131
+ 'drew' => 'draw',
132
+ 'driven' => 'drive',
133
+ 'drove' => 'drive',
134
+ 'drunk' => 'drink',
135
+ 'dug' => 'dig',
136
+ 'dwelled' => 'dwell',
137
+ 'dwelt' => 'dwell',
138
+ 'eaten' => 'eat',
139
+ 'fallen' => 'fall',
140
+ 'fed' => 'feed',
141
+ 'fell' => 'fall',
142
+ 'felt' => 'feel',
143
+ 'fled' => 'flee',
144
+ 'flew' => 'fly',
145
+ 'flown' => 'fly',
146
+ 'flung' => 'fling',
147
+ 'forbad' => 'forbid',
148
+ 'forbade' => 'forbid',
149
+ 'forbidden' => 'forbid',
150
+ 'forbore' => 'forbear',
151
+ 'forborne' => 'forbear',
152
+ 'fordid' => 'fordo',
153
+ 'fordone' => 'fordo',
154
+ 'forecasted' => 'forecast',
155
+ 'foregone' => 'forego',
156
+ 'foreknew' => 'foreknow',
157
+ 'foreknown' => 'foreknow',
158
+ 'foreran' => 'forerun',
159
+ 'foresaw' => 'foresee',
160
+ 'foreshowed' => 'foreshow',
161
+ 'foreshown' => 'foreshow',
162
+ 'foretold' => 'foretell',
163
+ 'forewent' => 'forego',
164
+ 'forgave' => 'forgive',
165
+ 'forgiven' => 'forgive',
166
+ 'forgot' => 'forget',
167
+ 'forgotten' => 'forget',
168
+ 'forsaken' => 'forsake',
169
+ 'forseen' => 'foresee',
170
+ 'forsook' => 'forsake',
171
+ 'forswore' => 'forswear',
172
+ 'forsworn' => 'forswear',
173
+ 'fought' => 'fight',
174
+ 'found' => 'find',
175
+ 'froze' => 'freeze',
176
+ 'frozen' => 'freeze',
177
+ 'gainsaid' => 'gainsay',
178
+ 'gave' => 'give',
179
+ 'gilded' => 'gild',
180
+ 'gilt' => 'gild',
181
+ 'girded' => 'gird',
182
+ 'girt' => 'gird',
183
+ 'given' => 'give',
184
+ 'gone' => 'go',
185
+ 'got' => 'get',
186
+ 'gotten' => 'get',
187
+ 'graved' => 'grave',
188
+ 'graven' => 'grave',
189
+ 'grew' => 'grow',
190
+ 'ground' => 'grind',
191
+ 'grown' => 'grow',
192
+ 'had' => 'have',
193
+ 'hamstringed' => 'hamstring',
194
+ 'hamstrung' => 'hamstring',
195
+ 'hanged' => 'hang',
196
+ 'heard' => 'hear',
197
+ 'heaved' => 'heave',
198
+ 'held' => 'hold',
199
+ 'hewed' => 'hew',
200
+ 'hewn' => 'hew',
201
+ 'hid' => 'hide',
202
+ 'hidden' => 'hide',
203
+ 'hove' => 'heave',
204
+ 'hung' => 'hang',
205
+ 'inlaid' => 'inlay',
206
+ 'is' => 'be',
207
+ 'kept' => 'keep',
208
+ 'kneeled' => 'kneel',
209
+ 'knelt' => 'kneel',
210
+ 'knew' => 'know',
211
+ 'knitted' => 'knit',
212
+ 'known' => 'know',
213
+ 'laded' => 'lade',
214
+ 'laden' => 'lade',
215
+ 'laid' => 'lay',
216
+ 'lain' => 'lie',
217
+ 'lay' => 'lie',
218
+ 'leaned' => 'lean',
219
+ 'leant' => 'lean',
220
+ 'leaped' => 'leap',
221
+ 'leapt' => 'leap',
222
+ 'learned' => 'learn',
223
+ 'learnt' => 'learn',
224
+ 'led' => 'lead',
225
+ 'left' => 'leave',
226
+ 'lent' => 'lend',
227
+ 'lighted' => 'light',
228
+ 'lit' => 'light',
229
+ 'lost' => 'lose',
230
+ 'made' => 'make',
231
+ 'meant' => 'mean',
232
+ 'melted' => 'melt',
233
+ 'met' => 'meet',
234
+ 'might' => 'may',
235
+ 'misdealt' => 'misdeal',
236
+ 'misgave' => 'misgive',
237
+ 'misgiven' => 'misgive',
238
+ 'mislaid' => 'mislay',
239
+ 'misled' => 'mislead',
240
+ 'mistaken' => 'mistake',
241
+ 'mistook' => 'mistake',
242
+ 'misunderstood' => 'misunderstand',
243
+ 'molten' => 'melt',
244
+ 'mowed' => 'mow',
245
+ 'mown' => 'mow',
246
+ 'outate' => 'outeat',
247
+ 'outbade' => 'outbid',
248
+ 'outbidden' => 'outbid',
249
+ 'outbred' => 'outbreed',
250
+ 'outdid' => 'outdo',
251
+ 'outdone' => 'outdo',
252
+ 'outeaten' => 'outeat',
253
+ 'outfought' => 'outfight',
254
+ 'outgone' => 'outgo',
255
+ 'outgrew' => 'outgrow',
256
+ 'outgrown' => 'outgrow',
257
+ 'outlaid' => 'outlay',
258
+ 'outran' => 'outrun',
259
+ 'outridden' => 'outride',
260
+ 'outrode' => 'outride',
261
+ 'outsat' => 'outsit',
262
+ 'outshone' => 'outshine',
263
+ 'outshot' => 'outshoot',
264
+ 'outsold' => 'outsell',
265
+ 'outspent' => 'outspend',
266
+ 'outthrew' => 'outthrow',
267
+ 'outthrown' => 'outthrow',
268
+ 'outwent' => 'outgo',
269
+ 'outwore' => 'outwear',
270
+ 'outworn' => 'outwear',
271
+ 'overate' => 'overeat',
272
+ 'overbade' => 'overbid',
273
+ 'overbidden' => 'overbid',
274
+ 'overblew' => 'overblow',
275
+ 'overblown' => 'overblow',
276
+ 'overbore' => 'overbear',
277
+ 'overborn' => 'overbear',
278
+ 'overborne' => 'overbear',
279
+ 'overbought' => 'overbuy',
280
+ 'overbuilt' => 'overbuild',
281
+ 'overcame' => 'overcome',
282
+ 'overdid' => 'overdo',
283
+ 'overdone' => 'overdo',
284
+ 'overdrawn' => 'overdraw',
285
+ 'overdrew' => 'overdraw',
286
+ 'overdriven' => 'overdrive',
287
+ 'overdrove' => 'overdrive',
288
+ 'overeaten' => 'overeat',
289
+ 'overfed' => 'overfeed',
290
+ 'overflew' => 'overfly',
291
+ 'overflown' => 'overfly',
292
+ 'overgrew' => 'overgrow',
293
+ 'overgrown' => 'overgrow',
294
+ 'overhanged' => 'overhang',
295
+ 'overheard' => 'overhear',
296
+ 'overhung' => 'overhang',
297
+ 'overladed' => 'overlade',
298
+ 'overladen' => 'overlade',
299
+ 'overlaid' => 'overlay',
300
+ 'overlain' => 'overlie',
301
+ 'overlay' => 'overlie',
302
+ 'overleaped' => 'overleap',
303
+ 'overleapt' => 'overleap',
304
+ 'overpaid' => 'overpay',
305
+ 'overran' => 'overrun',
306
+ 'overridden' => 'override',
307
+ 'overrode' => 'override',
308
+ 'oversaw' => 'oversee',
309
+ 'overseen' => 'oversee',
310
+ 'oversewed' => 'oversew',
311
+ 'oversewn' => 'oversew',
312
+ 'overshot' => 'overshoot',
313
+ 'overslept' => 'oversleep',
314
+ 'overspent' => 'overspend',
315
+ 'overtaken' => 'overtake',
316
+ 'overthrew' => 'overthrow',
317
+ 'overthrown' => 'overthrow',
318
+ 'overtook' => 'overtake',
319
+ 'overwinded' => 'overwind',
320
+ 'overwound' => 'overwind',
321
+ 'overwritten' => 'overwrite',
322
+ 'overwrote' => 'overwrite',
323
+ 'paid' => 'pay',
324
+ 'partaken' => 'partake',
325
+ 'partook' => 'partake',
326
+ 'prechose' => 'prechoose',
327
+ 'prechosen' => 'prechoose',
328
+ 'proved' => 'prove',
329
+ 'proven' => 'prove',
330
+ 'quitted' => 'quit',
331
+ 'ran' => 'run',
332
+ 'rang' => 'ring',
333
+ 'reaved' => 'reave',
334
+ 'rebuilt' => 'rebuild',
335
+ 'reeved' => 'reeve',
336
+ 'reft' => 'reave',
337
+ 'relaid' => 'relay',
338
+ 'rent' => 'rend',
339
+ 'repaid' => 'repay',
340
+ 'retold' => 'retell',
341
+ 'ridded' => 'rid',
342
+ 'ridden' => 'ride',
343
+ 'risen' => 'rise',
344
+ 'rived' => 'rive',
345
+ 'riven' => 'rive',
346
+ 'rode' => 'ride',
347
+ 'rose' => 'rise',
348
+ 'rove' => 'reeve',
349
+ 'rung' => 'ring',
350
+ 'said' => 'say',
351
+ 'sang' => 'sing',
352
+ 'sank' => 'sink',
353
+ 'sat' => 'sit',
354
+ 'saw' => 'see',
355
+ 'sawed' => 'saw',
356
+ 'sawn' => 'saw',
357
+ 'seen' => 'see',
358
+ 'sent' => 'send',
359
+ 'sewed' => 'sew',
360
+ 'sewn' => 'sew',
361
+ 'shaken' => 'shake',
362
+ 'shaved' => 'shave',
363
+ 'shaven' => 'shave',
364
+ 'sheared' => 'shear',
365
+ 'shined' => 'shine',
366
+ 'shod' => 'shoe',
367
+ 'shoed' => 'shoe',
368
+ 'shone' => 'shine',
369
+ 'shook' => 'shake',
370
+ 'shorn' => 'shear',
371
+ 'shot' => 'shoot',
372
+ 'showed' => 'show',
373
+ 'shown' => 'show',
374
+ 'shrank' => 'shrink',
375
+ 'shredded' => 'shred',
376
+ 'shrived' => 'shrive',
377
+ 'shriven' => 'shrive',
378
+ 'shrove' => 'shrive',
379
+ 'shrunk' => 'shrink',
380
+ 'shrunken' => 'shrink',
381
+ 'slain' => 'slay',
382
+ 'slept' => 'sleep',
383
+ 'slew' => 'slay',
384
+ 'slid' => 'slide',
385
+ 'slidden' => 'slide',
386
+ 'slung' => 'sling',
387
+ 'slunk' => 'slink',
388
+ 'smelled' => 'smell',
389
+ 'smelt' => 'smell',
390
+ 'smitten' => 'smite',
391
+ 'smote' => 'smite',
392
+ 'snuck' => 'sneak',
393
+ 'sold' => 'sell',
394
+ 'sought' => 'seek',
395
+ 'sowed' => 'sow',
396
+ 'sown' => 'sow',
397
+ 'span' => 'spin',
398
+ 'spat' => 'spit',
399
+ 'sped' => 'speed',
400
+ 'speeded' => 'speed',
401
+ 'spelled' => 'spell',
402
+ 'spelt' => 'spell',
403
+ 'spent' => 'spend',
404
+ 'spilled' => 'spill',
405
+ 'spilt' => 'spill',
406
+ 'spoiled' => 'spoil',
407
+ 'spoilt' => 'spoil',
408
+ 'spoke' => 'speak',
409
+ 'spoken' => 'speak',
410
+ 'sprang' => 'spring',
411
+ 'sprung' => 'spring',
412
+ 'spun' => 'spin',
413
+ 'stank' => 'stink',
414
+ 'staved' => 'stave',
415
+ 'stole' => 'steal',
416
+ 'stolen' => 'steal',
417
+ 'stood' => 'stand',
418
+ 'stove' => 'stave',
419
+ 'strewed' => 'strew',
420
+ 'strewn' => 'strew',
421
+ 'stricken' => 'strike',
422
+ 'strid' => 'stride',
423
+ 'stridden' => 'stride',
424
+ 'strived' => 'strive',
425
+ 'striven' => 'strive',
426
+ 'strode' => 'stride',
427
+ 'strove' => 'strive',
428
+ 'struck' => 'strike',
429
+ 'strung' => 'string',
430
+ 'stuck' => 'stick',
431
+ 'stung' => 'sting',
432
+ 'stunk' => 'stink',
433
+ 'sung' => 'sing',
434
+ 'sunk' => 'sink',
435
+ 'sunken' => 'sink',
436
+ 'swam' => 'swim',
437
+ 'sweated' => 'sweat',
438
+ 'swelled' => 'swell',
439
+ 'swept' => 'sweep',
440
+ 'swollen' => 'swell',
441
+ 'swore' => 'swear',
442
+ 'sworn' => 'swear',
443
+ 'swum' => 'swim',
444
+ 'swung' => 'swing',
445
+ 'taken' => 'take',
446
+ 'taught' => 'teach',
447
+ 'thought' => 'think',
448
+ 'threw' => 'throw',
449
+ 'thrived' => 'thrive',
450
+ 'thriven' => 'thrive',
451
+ 'throve' => 'thrive',
452
+ 'thrown' => 'throw',
453
+ 'told' => 'tell',
454
+ 'took' => 'take',
455
+ 'tore' => 'tear',
456
+ 'torn' => 'tear',
457
+ 'trod' => 'tread',
458
+ 'trodden' => 'tread',
459
+ 'unbent' => 'unbend',
460
+ 'unbound' => 'unbind',
461
+ 'unbuilt' => 'unbuild',
462
+ 'underbought' => 'underbuy',
463
+ 'underfed' => 'underfeed',
464
+ 'undergone' => 'undergo',
465
+ 'underlaid' => 'underlay',
466
+ 'underlain' => 'underlie',
467
+ 'underlay' => 'underlie',
468
+ 'underpaid' => 'underpay',
469
+ 'underran' => 'underrun',
470
+ 'undershot' => 'undershoot',
471
+ 'undersold' => 'undersell',
472
+ 'understood' => 'understand',
473
+ 'undertaken' => 'undertake',
474
+ 'undertook' => 'undertake',
475
+ 'underwent' => 'undergo',
476
+ 'underwritten' => 'underwrite',
477
+ 'underwrote' => 'underwrite',
478
+ 'undid' => 'undo',
479
+ 'undone' => 'undo',
480
+ 'undrawn' => 'undraw',
481
+ 'undrew' => 'undraw',
482
+ 'unfroze' => 'unfreeze',
483
+ 'unfrozen' => 'unfreeze',
484
+ 'ungirded' => 'ungird',
485
+ 'ungirt' => 'ungird',
486
+ 'unhanged' => 'unhang',
487
+ 'unhung' => 'unhang',
488
+ 'unknitted' => 'unknit',
489
+ 'unladed' => 'unlade',
490
+ 'unladen' => 'unlade',
491
+ 'unlaid' => 'unlay',
492
+ 'unlearned' => 'unlearn',
493
+ 'unlearnt' => 'unlearn',
494
+ 'unmade' => 'unmake',
495
+ 'unreeved' => 'unreeve',
496
+ 'unrove' => 'unreeve',
497
+ 'unsaid' => 'unsay',
498
+ 'unslung' => 'unsling',
499
+ 'unspoke' => 'unspeak',
500
+ 'unspoken' => 'unspeak',
501
+ 'unstrung' => 'unstring',
502
+ 'unstuck' => 'unstick',
503
+ 'unswore' => 'unswear',
504
+ 'unsworn' => 'unswear',
505
+ 'untaught' => 'unteach',
506
+ 'unthought' => 'unthink',
507
+ 'untrod' => 'untread',
508
+ 'untrodden' => 'untread',
509
+ 'unwinded' => 'unwind',
510
+ 'unwound' => 'unwind',
511
+ 'unwove' => 'unweave',
512
+ 'unwoven' => 'unweave',
513
+ 'upbuilt' => 'upbuild',
514
+ 'upheld' => 'uphold',
515
+ 'uprisen' => 'uprise',
516
+ 'uprose' => 'uprise',
517
+ 'upswept' => 'upsweep',
518
+ 'upswung' => 'upswing',
519
+ 'waked' => 'wake',
520
+ 'was' => 'be',
521
+ 'waylaid' => 'waylay',
522
+ 'wedded' => 'wed',
523
+ 'went' => 'go',
524
+ 'wept' => 'weep',
525
+ 'were' => 'be',
526
+ 'wetted' => 'wet',
527
+ 'winded' => 'wind',
528
+ 'wist' => 'wit',
529
+ 'wot' => 'wit',
530
+ 'withdrawn' => 'withdraw',
531
+ 'withdrew' => 'withdraw',
532
+ 'withheld' => 'withhold',
533
+ 'withstood' => 'withstand',
534
+ 'woke' => 'wake',
535
+ 'woken' => 'wake',
536
+ 'won' => 'win',
537
+ 'wore' => 'wear',
538
+ 'worked' => 'work',
539
+ 'worn' => 'wear',
540
+ 'wound' => 'wind',
541
+ 'wove' => 'weave',
542
+ 'woven' => 'weave',
543
+ 'written' => 'write',
544
+ 'wrote' => 'write',
545
+ 'wrought' => 'work',
546
+ 'wrung' => 'wring',
547
+ }
548
+
549
+ # Mapping of word suffixes to infinitive rules.
550
+ InfSuffixRules = {
551
+ # '<suffix>' => {
552
+ # :order => <sort order>,
553
+ # :rule => <rule number>,
554
+
555
+ # :word1 == 0 => Use 0, the index of the longest prefix
556
+ # within @{$prefix{$self->{'suffix'} } }, below.
557
+
558
+ # :word1 == 1 => Use 1, the index of the 2nd longest prefix
559
+ # within @{$prefix{$self->{'suffix'} } }, below.
560
+
561
+ # :word1 == -1 => Use the index of the shortest prefix
562
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter.
563
+
564
+ # :word1 == -2 => Use the index of the shortest prefix
565
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter,
566
+ # and use the shortest prefix as well.
567
+
568
+ # :word1 == -3 => Use the index of the shortest prefix
569
+ # within @{$prefix{$self->{'suffix'} } }, below + meter,
570
+ # and use the shortest prefix + metre as well.
571
+
572
+ # :word1 == -4 => Use the original string.
573
+ 'hes' => {
574
+ :order => 1011,
575
+ :rule => '1',
576
+ :word1 => 0, # Longest prefix.
577
+ :suffix1 => '',
578
+ :suffix2 => '',
579
+ },
580
+ 'ses' => {
581
+ :order => 1021,
582
+ :rule => '2',
583
+ :word1 => 0, # Longest prefix.
584
+ :suffix1 => '',
585
+ :suffix2 => '',
586
+ },
587
+ 'xes' => {
588
+ :order => 1031,
589
+ :rule => '3',
590
+ :word1 => 0, # Longest prefix.
591
+ :suffix1 => '',
592
+ :suffix2 => '',
593
+ },
594
+ 'zes' => {
595
+ :order => 1041,
596
+ :rule => '4',
597
+ :word1 => 0, # Longest prefix.
598
+ :suffix1 => '',
599
+ :suffix2 => '',
600
+ },
601
+ 'iless' => {
602
+ :order => 1051,
603
+ :rule => '43a',
604
+ :word1 => -1, # Shortest prefix.
605
+ :suffix1 => 'y',
606
+ :suffix2 => '',
607
+ },
608
+ 'less' => {
609
+ :order => 1052,
610
+ :rule => '43b',
611
+ :word1 => -1, # Shortest prefix.
612
+ :suffix1 => '',
613
+ :suffix2 => '',
614
+ },
615
+ 'iness' => {
616
+ :order => 1053,
617
+ :rule => '44a',
618
+ :word1 => -1, # Shortest prefix.
619
+ :suffix1 => 'y',
620
+ :suffix2 => '',
621
+ },
622
+ 'ness' => {
623
+ :order => 1054,
624
+ :rule => '44b',
625
+ :word1 => -1, # Shortest prefix.
626
+ :suffix1 => '',
627
+ :suffix2 => '',
628
+ },
629
+ "'s" => {
630
+ :order => 1055,
631
+ :rule => '7',
632
+ :word1 => -1, # Shortest prefix.
633
+ :suffix1 => '',
634
+ :suffix2 => '',
635
+ },
636
+ 'ies' => {
637
+ :order => 1056,
638
+ :rule => '13a',
639
+ :word1 => -1, # Shortest prefix.
640
+ :suffix1 => 'y',
641
+ :suffix2 => '',
642
+ },
643
+ 'es' => {
644
+ :order => 1057,
645
+ :rule => '13b',
646
+ :word1 => 0, # Longest prefix.
647
+ :suffix1 => '',
648
+ :suffix2 => '',
649
+ },
650
+ 'ss' => {
651
+ :order => 1061,
652
+ :rule => '6a',
653
+ :word1 => -4, # Original string.
654
+ :suffix1 => '',
655
+ :suffix2 => '',
656
+ },
657
+ 's' => {
658
+ :order => 1062,
659
+ :rule => '6b',
660
+ :word1 => -1, # Shortest prefix.
661
+ :suffix1 => '',
662
+ :suffix2 => '',
663
+ },
664
+ 'ater' => {
665
+ :order => 1081,
666
+ :rule => '8',
667
+ :word1 => -4, # Original string.
668
+ :suffix1 => '',
669
+ :suffix2 => '',
670
+ },
671
+ 'cter' => {
672
+ :order => 1091,
673
+ :rule => '9',
674
+ :word1 => -4, # Original string.
675
+ :suffix1 => '',
676
+ :suffix2 => '',
677
+ },
678
+ 'ier' => {
679
+ :order => 1101,
680
+ :rule => '10',
681
+ :word1 => -1, # Shortest prefix.
682
+ :suffix1 => 'y',
683
+ :suffix2 => '',
684
+ },
685
+ 'er' => {
686
+ :order => 1111,
687
+ :rule => '11',
688
+ :word1 => 0, # Longest prefix.
689
+ :suffix1 => '',
690
+ :suffix2 => '',
691
+ },
692
+ 'ied' => {
693
+ :order => 1121,
694
+ :rule => '12a',
695
+ :word1 => -1, # Shortest prefix.
696
+ :suffix1 => 'y',
697
+ :suffix2 => '',
698
+ },
699
+ 'ed' => {
700
+ :order => 1122,
701
+ :rule => '12b', # There is extra code for 12b below.
702
+ :word1 => 0, # Longest prefix.
703
+ :suffix1 => '',
704
+ :suffix2 => '',
705
+ },
706
+ 'iest' => {
707
+ :order => 1141,
708
+ :rule => '14a',
709
+ :word1 => -1, # Shortest prefix.
710
+ :suffix1 => 'y',
711
+ :suffix2 => '',
712
+ },
713
+ 'est' => {
714
+ :order => 1142,
715
+ :rule => '14b',
716
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
717
+ :suffix1 => 'e',
718
+ :suffix2 => '',
719
+ },
720
+ 'blity' => {
721
+ :order => 1143,
722
+ :rule => '21',
723
+ :word1 => -4, # Original string.
724
+ :suffix1 => '',
725
+ :suffix2 => '',
726
+ },
727
+ 'bility' => {
728
+ :order => 1144,
729
+ :rule => '22',
730
+ :word1 => -1, # Shortest prefix.
731
+ :suffix1 => 'ble',
732
+ :suffix2 => '',
733
+ },
734
+ 'fiable' => {
735
+ :order => 1145,
736
+ :rule => '23',
737
+ :word1 => -1, # Shortest prefix.
738
+ :suffix1 => 'fy',
739
+ :suffix2 => '',
740
+ },
741
+ 'logist' => {
742
+ :order => 1146,
743
+ :rule => '24',
744
+ :word1 => -1, # Shortest prefix.
745
+ :suffix1 => 'logy',
746
+ :suffix2 => '',
747
+ },
748
+ 'ing' => {
749
+ :order => 1151,
750
+ :rule => '15', # There is extra code for 15 below.
751
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
752
+ :suffix1 => 'e',
753
+ :suffix2 => '',
754
+ },
755
+ 'ist' => {
756
+ :order => 1161,
757
+ :rule => '16',
758
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
759
+ :suffix1 => 'e',
760
+ :suffix2 => '',
761
+ },
762
+ 'ism' => {
763
+ :order => 1171,
764
+ :rule => '17',
765
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
766
+ :suffix1 => 'e',
767
+ :suffix2 => '',
768
+ },
769
+ 'ity' => {
770
+ :order => 1181,
771
+ :rule => '18',
772
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
773
+ :suffix1 => 'e',
774
+ :suffix2 => '',
775
+ },
776
+ 'ize' => {
777
+ :order => 1191,
778
+ :rule => '19',
779
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
780
+ :suffix1 => 'e',
781
+ :suffix2 => '',
782
+ },
783
+ 'cable' => {
784
+ :order => 1201,
785
+ :rule => '20a',
786
+ :word1 => -4, # Original string.
787
+ :suffix1 => '',
788
+ :suffix2 => '',
789
+ },
790
+ 'gable' => {
791
+ :order => 1202,
792
+ :rule => '20b',
793
+ :word1 => -4, # Original string.
794
+ :suffix1 => '',
795
+ :suffix2 => '',
796
+ },
797
+ 'able' => {
798
+ :order => 1203,
799
+ :rule => '20c',
800
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
801
+ :suffix1 => 'e',
802
+ :suffix2 => '',
803
+ },
804
+ 'graphic' => {
805
+ :order => 1251,
806
+ :rule => '25',
807
+ :word1 => -1, # Shortest prefix.
808
+ :suffix1 => 'graphy',
809
+ :suffix2 => '',
810
+ },
811
+ 'istic' => {
812
+ :order => 1261,
813
+ :rule => '26',
814
+ :word1 => -1, # Shortest prefix.
815
+ :suffix1 => 'ist',
816
+ :suffix2 => '',
817
+ },
818
+ 'itic' => {
819
+ :order => 1271,
820
+ :rule => '27',
821
+ :word1 => -1, # Shortest prefix.
822
+ :suffix1 => 'ite',
823
+ :suffix2 => '',
824
+ },
825
+ 'like' => {
826
+ :order => 1281,
827
+ :rule => '28',
828
+ :word1 => -1, # Shortest prefix.
829
+ :suffix1 => '',
830
+ :suffix2 => '',
831
+ },
832
+ 'logic' => {
833
+ :order => 1291,
834
+ :rule => '29',
835
+ :word1 => -1, # Shortest prefix.
836
+ :suffix1 => 'logy',
837
+ :suffix2 => '',
838
+ },
839
+ 'ment' => {
840
+ :order => 1301,
841
+ :rule => '30',
842
+ :word1 => -1, # Shortest prefix.
843
+ :suffix1 => '',
844
+ :suffix2 => '',
845
+ },
846
+ 'mental' => {
847
+ :order => 1311,
848
+ :rule => '31',
849
+ :word1 => -1, # Shortest prefix.
850
+ :suffix1 => 'ment',
851
+ :suffix2 => '',
852
+ },
853
+ 'metry' => {
854
+ :order => 1321,
855
+ :rule => '32',
856
+ :word1 => -3, # Shortest prefix + meter, and shortest perfix + metre.
857
+ :suffix1 => 'meter',
858
+ :suffix2 => 'metre',
859
+ },
860
+ 'nce' => {
861
+ :order => 1331,
862
+ :rule => '33',
863
+ :word1 => -1, # Shortest prefix.
864
+ :suffix1 => 'nt',
865
+ :suffix2 => '',
866
+ },
867
+ 'ncy' => {
868
+ :order => 1341,
869
+ :rule => '34',
870
+ :word1 => -1, # Shortest prefix.
871
+ :suffix1 => 'nt',
872
+ :suffix2 => '',
873
+ },
874
+ 'ship' => {
875
+ :order => 1351,
876
+ :rule => '35',
877
+ :word1 => -1, # Shortest prefix.
878
+ :suffix1 => '',
879
+ :suffix2 => '',
880
+ },
881
+ 'ical' => {
882
+ :order => 1361,
883
+ :rule => '36',
884
+ :word1 => -1, # Shortest prefix.
885
+ :suffix1 => 'ic',
886
+ :suffix2 => '',
887
+ },
888
+ 'ional' => {
889
+ :order => 1371,
890
+ :rule => '37',
891
+ :word1 => -1, # Shortest prefix.
892
+ :suffix1 => 'ion',
893
+ :suffix2 => '',
894
+ },
895
+ 'bly' => {
896
+ :order => 1381,
897
+ :rule => '38',
898
+ :word1 => -1, # Shortest prefix.
899
+ :suffix1 => 'ble',
900
+ :suffix2 => '',
901
+ },
902
+ 'ily' => {
903
+ :order => 1391,
904
+ :rule => '39',
905
+ :word1 => -1, # Shortest prefix.
906
+ :suffix1 => 'y',
907
+ :suffix2 => '',
908
+ },
909
+ 'ly' => {
910
+ :order => 1401,
911
+ :rule => '40',
912
+ :word1 => -1, # Shortest prefix.
913
+ :suffix1 => '',
914
+ :suffix2 => '',
915
+ },
916
+ 'iful' => {
917
+ :order => 1411,
918
+ :rule => '41a',
919
+ :word1 => -1, # Shortest prefix.
920
+ :suffix1 => 'y',
921
+ :suffix2 => '',
922
+ },
923
+ 'ful' => {
924
+ :order => 1412,
925
+ :rule => '41b',
926
+ :word1 => -1, # Shortest prefix.
927
+ :suffix1 => '',
928
+ :suffix2 => '',
929
+ },
930
+ 'ihood' => {
931
+ :order => 1421,
932
+ :rule => '42a',
933
+ :word1 => -1, # Shortest prefix.
934
+ :suffix1 => 'y',
935
+ :suffix2 => '',
936
+ },
937
+ 'hood' => {
938
+ :order => 1422,
939
+ :rule => '42b',
940
+ :word1 => -1, # Shortest prefix.
941
+ :suffix1 => '',
942
+ :suffix2 => '',
943
+ },
944
+ 'ification' => {
945
+ :order => 1451,
946
+ :rule => '45',
947
+ :word1 => -1, # Shortest prefix.
948
+ :suffix1 => 'ify',
949
+ :suffix2 => '',
950
+ },
951
+ 'ization' => {
952
+ :order => 1461,
953
+ :rule => '46',
954
+ :word1 => -1, # Shortest prefix.
955
+ :suffix1 => 'ize',
956
+ :suffix2 => '',
957
+ },
958
+ 'ction' => {
959
+ :order => 1471,
960
+ :rule => '47',
961
+ :word1 => -1, # Shortest prefix.
962
+ :suffix1 => 'ct',
963
+ :suffix2 => '',
964
+ },
965
+ 'rtion' => {
966
+ :order => 1481,
967
+ :rule => '48',
968
+ :word1 => -1, # Shortest prefix.
969
+ :suffix1 => 'rt',
970
+ :suffix2 => '',
971
+ },
972
+ 'ation' => {
973
+ :order => 1491,
974
+ :rule => '49',
975
+ :word1 => -1, # Shortest prefix.
976
+ :suffix1 => 'ate',
977
+ :suffix2 => '',
978
+ },
979
+ 'ator' => {
980
+ :order => 1501,
981
+ :rule => '50',
982
+ :word1 => -1, # Shortest prefix.
983
+ :suffix1 => 'ate',
984
+ :suffix2 => '',
985
+ },
986
+ 'ctor' => {
987
+ :order => 1511,
988
+ :rule => '51',
989
+ :word1 => -1, # Shortest prefix.
990
+ :suffix1 => 'ct',
991
+ :suffix2 => '',
992
+ },
993
+ 'ive' => {
994
+ :order => 1521,
995
+ :rule => '52',
996
+ :word1 => -1, # Shortest prefix.
997
+ :suffix1 => 'ion',
998
+ :suffix2 => '',
999
+ },
1000
+ 'onian' => {
1001
+ :order => 1530,
1002
+ :rule => '54',
1003
+ :word1 => -1, # Shortest prefix.
1004
+ :suffix1 => 'on',
1005
+ :suffix2 => '',
1006
+ },
1007
+ 'an' => {
1008
+ :order => 1531,
1009
+ :rule => '53',
1010
+ :word1 => -1, # Shortest prefix.
1011
+ :suffix1 => 'a',
1012
+ :suffix2 => '',
1013
+ },
1014
+ }
1015
+ InfSuffixRuleOrder = InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}
1016
+
1017
+ # :startdoc:
1018
+
1019
+ ### The object class for the result returned from calling
1020
+ ### Linguistics::EN::infinitive.
1021
+ class Infinitive < String
1022
+
1023
+ ### Create and return a new Infinitive object.
1024
+ def initialize( word1, word2, suffix, rule )
1025
+ super( word1 )
1026
+ @word2 = word2
1027
+ @suffix = suffix
1028
+ @rule = rule
1029
+ end
1030
+
1031
+
1032
+ ######
1033
+ public
1034
+ ######
1035
+
1036
+ # The fallback deconjugated form
1037
+ attr_reader :word2
1038
+
1039
+ # The suffix used to to identify the transform rule
1040
+ attr_reader :suffix
1041
+
1042
+ # The rule used
1043
+ attr_reader :rule
1044
+ end
1045
+
1046
+
1047
+ ###############
1048
+ module_function
1049
+ ###############
1050
+
1051
+ ### Return the infinitive form of the given word
1052
+ def infinitive( word )
1053
+ word = word.to_s
1054
+ word1 = word2 = suffix = rule = newword = ''
1055
+
1056
+ if IrregularInfinitives.key?( word )
1057
+ word1 = IrregularInfinitives[ word ]
1058
+ rule = 'irregular'
1059
+ else
1060
+ # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
1061
+ prefix, suffix = nil
1062
+ prefixes = Hash::new {|hsh,key| hsh[key] = []}
1063
+
1064
+ # Build the hash of prefixes for the word
1065
+ 1.upto( word.length ) {|i|
1066
+ prefix = word[0, i]
1067
+ suffix = word[i..-1]
1068
+
1069
+ (suffix.length - 1).downto( 0 ) {|j|
1070
+ newword = prefix + suffix[0, j]
1071
+ prefixes[ suffix ].push( newword )
1072
+ }
1073
+ }
1074
+
1075
+ $stderr.puts "prefixes: %p" % prefixes if $DEBUG
1076
+
1077
+ # Now check for rules covering the prefixes for this word, picking
1078
+ # the first one if one was found.
1079
+ if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
1080
+ rule = InfSuffixRules[ suffix ][:rule]
1081
+ shortestPrefix = InfSuffixRules[ suffix ][:word1]
1082
+ $stderr.puts "Using rule %p (%p) for suffix %p" %
1083
+ [ rule, shortestPrefix, suffix ] if $DEBUG
1084
+
1085
+ case shortestPrefix
1086
+ when 0
1087
+ word1 = prefixes[ suffix ][ 0 ]
1088
+ word2 = prefixes[ suffix ][ 1 ]
1089
+ $stderr.puts "For sp = 0: word1: %p, word2: %p" %
1090
+ [ word1, word2 ] if $DEBUG
1091
+
1092
+ when -1
1093
+ word1 = prefixes[ suffix ].last +
1094
+ InfSuffixRules[ suffix ][:suffix1]
1095
+ word2 = ''
1096
+ $stderr.puts "For sp = -1: word1: %p, word2: %p" %
1097
+ [ word1, word2 ] if $DEBUG
1098
+
1099
+ when -2
1100
+ word1 = prefixes[ suffix ].last +
1101
+ InfSuffixRules[ suffix ][:suffix1]
1102
+ word2 = prefixes[ suffix ].last
1103
+ $stderr.puts "For sp = -2: word1: %p, word2: %p" %
1104
+ [ word1, word2 ] if $DEBUG
1105
+
1106
+ when -3
1107
+ word1 = prefixes[ suffix ].last +
1108
+ InfSuffixRules[ suffix ][:suffix1]
1109
+ word2 = prefixes[ suffix ].last +
1110
+ InfSuffixRules[ suffix ][:suffix2]
1111
+ $stderr.puts "For sp = -3: word1: %p, word2: %p" %
1112
+ [ word1, word2 ] if $DEBUG
1113
+
1114
+ when -4
1115
+ word1 = word
1116
+ word2 = ''
1117
+ $stderr.puts "For sp = -4: word1: %p, word2: %p" %
1118
+ [ word1, word2 ] if $DEBUG
1119
+
1120
+ else
1121
+ raise IndexError,
1122
+ "Couldn't find rule for shortest prefix %p" %
1123
+ shortestPrefix
1124
+ end
1125
+
1126
+ # Rules 12b and 15: Strip off 'ed' or 'ing'.
1127
+ if rule == '12b' or rule == '15'
1128
+ # Do we have a monosyllable of this form:
1129
+ # o 0+ Consonants
1130
+ # o 1+ Vowel
1131
+ # o 2 Non-wx
1132
+ # Eg: tipped => tipp?
1133
+ # Then return tip and tipp.
1134
+ # Eg: swimming => swimm?
1135
+ # Then return tipswim and swimm.
1136
+
1137
+ if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
1138
+ word1 = $1 + $2
1139
+ word2 = $1 + $2 + $2
1140
+ end
1141
+ end
1142
+ end
1143
+ end
1144
+
1145
+ return Infinitive::new( word1, word2, suffix, rule )
1146
+ end
1147
+
1148
+ end # module EN::Linguistics
1149
+