linguistics 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1145 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # This file contains functions for deriving the infinitive forms of conjugated
4
+ # English words. Requiring this file adds functions and constants to the
5
+ # Linguistics::EN module.
6
+ #
7
+ # == Authors
8
+ #
9
+ # * Michael Granger <ged@FaerieMUD.org>
10
+ #
11
+ # == Acknowledgments
12
+ #
13
+ # This code was ported from the excellent 'Lingua::EN::Infinitive' Perl module
14
+ # by Ron Savage, which is distributed under the following license:
15
+ #
16
+ # Australian copyright (c) 1999-2002 Ron Savage.
17
+ #
18
+ # All Programs of mine are 'OSI Certified Open Source Software';
19
+ # you can redistribute them and/or modify them under the terms of
20
+ # The Artistic License, a copy of which is available at:
21
+ # http://www.opensource.org/licenses/index.html
22
+ #
23
+ #
24
+ # :include: LICENSE
25
+ #
26
+ #--
27
+ #
28
+ # Please see the file LICENSE in the base directory for licensing details.
29
+ #
30
+ module Linguistics::EN
31
+
32
+ # :stopdoc:
33
+
34
+ # Irregular words => infinitive forms
35
+ IrregularInfinitives = {
36
+ 'abided' => 'abide',
37
+ 'abode' => 'abide',
38
+ 'am' => 'be',
39
+ 'are' => 'be',
40
+ 'arisen' => 'arise',
41
+ 'arose' => 'arise',
42
+ 'ate' => 'eat',
43
+ 'awaked' => 'awake',
44
+ 'awoke' => 'awake',
45
+ 'bade' => 'bid',
46
+ 'beaten' => 'beat',
47
+ 'became' => 'become',
48
+ 'been' => 'be',
49
+ 'befallen' => 'befall',
50
+ 'befell' => 'befall',
51
+ 'began' => 'begin',
52
+ 'begat' => 'beget',
53
+ 'begot' => 'beget',
54
+ 'begotten' => 'beget',
55
+ 'begun' => 'begin',
56
+ 'beheld' => 'behold',
57
+ 'bent' => 'bend',
58
+ 'bereaved' => 'bereave',
59
+ 'bereft' => 'bereave',
60
+ 'beseeched' => 'beseech',
61
+ 'besought' => 'beseech',
62
+ 'bespoke' => 'bespeak',
63
+ 'bespoken' => 'bespeak',
64
+ 'bestrewed' => 'bestrew',
65
+ 'bestrewn' => 'bestrew',
66
+ 'bestrid' => 'bestride',
67
+ 'bestridden' => 'bestride',
68
+ 'bestrode' => 'bestride',
69
+ 'betaken' => 'betake',
70
+ 'bethought' => 'bethink',
71
+ 'betook' => 'betake',
72
+ 'betted' => 'bet',
73
+ 'bidden' => 'bid',
74
+ 'bided' => 'bide',
75
+ 'bit' => 'bite',
76
+ 'bitten' => 'bite',
77
+ 'bled' => 'bleed',
78
+ 'blended' => 'blend',
79
+ 'blent' => 'blend',
80
+ 'blessed' => 'bless',
81
+ 'blest' => 'bless',
82
+ 'blew' => 'blow',
83
+ 'blown' => 'blow',
84
+ 'bode' => 'bide',
85
+ 'bore' => 'bear',
86
+ 'born' => 'bear',
87
+ 'borne' => 'bear',
88
+ 'bought' => 'buy',
89
+ 'bound' => 'bind',
90
+ 'bred' => 'breed',
91
+ 'broadcasted' => 'broadcast',
92
+ 'broke' => 'break',
93
+ 'broken' => 'break',
94
+ 'brought' => 'bring',
95
+ 'browbeaten' => 'browbeat',
96
+ 'built' => 'build',
97
+ 'burned' => 'burn',
98
+ 'burnt' => 'burn',
99
+ 'came' => 'come',
100
+ 'caught' => 'catch',
101
+ 'chid' => 'chide',
102
+ 'chidden' => 'chide',
103
+ 'chided' => 'chide',
104
+ 'chose' => 'choose',
105
+ 'chosen' => 'choose',
106
+ 'clad' => 'clothe',
107
+ 'clave' => 'cleave',
108
+ 'cleaved' => 'cleave',
109
+ 'cleft' => 'cleave',
110
+ 'clothed' => 'clothe',
111
+ 'clove' => 'cleave',
112
+ 'cloven' => 'cleave',
113
+ 'clung' => 'cling',
114
+ 'costed' => 'cost',
115
+ 'could' => 'can',
116
+ 'crept' => 'creep',
117
+ 'crew' => 'crow',
118
+ 'crowed' => 'crow',
119
+ 'dealt' => 'deal',
120
+ 'did' => 'do',
121
+ 'done' => 'do',
122
+ 'dove' => 'dive',
123
+ 'drank' => 'drink',
124
+ 'drawn' => 'draw',
125
+ 'dreamed' => 'dream',
126
+ 'dreamt' => 'dream',
127
+ 'drew' => 'draw',
128
+ 'driven' => 'drive',
129
+ 'drove' => 'drive',
130
+ 'drunk' => 'drink',
131
+ 'dug' => 'dig',
132
+ 'dwelled' => 'dwell',
133
+ 'dwelt' => 'dwell',
134
+ 'eaten' => 'eat',
135
+ 'fallen' => 'fall',
136
+ 'fed' => 'feed',
137
+ 'fell' => 'fall',
138
+ 'felt' => 'feel',
139
+ 'fled' => 'flee',
140
+ 'flew' => 'fly',
141
+ 'flown' => 'fly',
142
+ 'flung' => 'fling',
143
+ 'forbad' => 'forbid',
144
+ 'forbade' => 'forbid',
145
+ 'forbidden' => 'forbid',
146
+ 'forbore' => 'forbear',
147
+ 'forborne' => 'forbear',
148
+ 'fordid' => 'fordo',
149
+ 'fordone' => 'fordo',
150
+ 'forecasted' => 'forecast',
151
+ 'foregone' => 'forego',
152
+ 'foreknew' => 'foreknow',
153
+ 'foreknown' => 'foreknow',
154
+ 'foreran' => 'forerun',
155
+ 'foresaw' => 'foresee',
156
+ 'foreshowed' => 'foreshow',
157
+ 'foreshown' => 'foreshow',
158
+ 'foretold' => 'foretell',
159
+ 'forewent' => 'forego',
160
+ 'forgave' => 'forgive',
161
+ 'forgiven' => 'forgive',
162
+ 'forgot' => 'forget',
163
+ 'forgotten' => 'forget',
164
+ 'forsaken' => 'forsake',
165
+ 'forseen' => 'foresee',
166
+ 'forsook' => 'forsake',
167
+ 'forswore' => 'forswear',
168
+ 'forsworn' => 'forswear',
169
+ 'fought' => 'fight',
170
+ 'found' => 'find',
171
+ 'froze' => 'freeze',
172
+ 'frozen' => 'freeze',
173
+ 'gainsaid' => 'gainsay',
174
+ 'gave' => 'give',
175
+ 'gilded' => 'gild',
176
+ 'gilt' => 'gild',
177
+ 'girded' => 'gird',
178
+ 'girt' => 'gird',
179
+ 'given' => 'give',
180
+ 'gone' => 'go',
181
+ 'got' => 'get',
182
+ 'gotten' => 'get',
183
+ 'graved' => 'grave',
184
+ 'graven' => 'grave',
185
+ 'grew' => 'grow',
186
+ 'ground' => 'grind',
187
+ 'grown' => 'grow',
188
+ 'had' => 'have',
189
+ 'hamstringed' => 'hamstring',
190
+ 'hamstrung' => 'hamstring',
191
+ 'hanged' => 'hang',
192
+ 'heard' => 'hear',
193
+ 'heaved' => 'heave',
194
+ 'held' => 'hold',
195
+ 'hewed' => 'hew',
196
+ 'hewn' => 'hew',
197
+ 'hid' => 'hide',
198
+ 'hidden' => 'hide',
199
+ 'hove' => 'heave',
200
+ 'hung' => 'hang',
201
+ 'inlaid' => 'inlay',
202
+ 'is' => 'be',
203
+ 'kept' => 'keep',
204
+ 'kneeled' => 'kneel',
205
+ 'knelt' => 'kneel',
206
+ 'knew' => 'know',
207
+ 'knitted' => 'knit',
208
+ 'known' => 'know',
209
+ 'laded' => 'lade',
210
+ 'laden' => 'lade',
211
+ 'laid' => 'lay',
212
+ 'lain' => 'lie',
213
+ 'lay' => 'lie',
214
+ 'leaned' => 'lean',
215
+ 'leant' => 'lean',
216
+ 'leaped' => 'leap',
217
+ 'leapt' => 'leap',
218
+ 'learned' => 'learn',
219
+ 'learnt' => 'learn',
220
+ 'led' => 'lead',
221
+ 'left' => 'leave',
222
+ 'lent' => 'lend',
223
+ 'lighted' => 'light',
224
+ 'lit' => 'light',
225
+ 'lost' => 'lose',
226
+ 'made' => 'make',
227
+ 'meant' => 'mean',
228
+ 'melted' => 'melt',
229
+ 'met' => 'meet',
230
+ 'might' => 'may',
231
+ 'misdealt' => 'misdeal',
232
+ 'misgave' => 'misgive',
233
+ 'misgiven' => 'misgive',
234
+ 'mislaid' => 'mislay',
235
+ 'misled' => 'mislead',
236
+ 'mistaken' => 'mistake',
237
+ 'mistook' => 'mistake',
238
+ 'misunderstood' => 'misunderstand',
239
+ 'molten' => 'melt',
240
+ 'mowed' => 'mow',
241
+ 'mown' => 'mow',
242
+ 'outate' => 'outeat',
243
+ 'outbade' => 'outbid',
244
+ 'outbidden' => 'outbid',
245
+ 'outbred' => 'outbreed',
246
+ 'outdid' => 'outdo',
247
+ 'outdone' => 'outdo',
248
+ 'outeaten' => 'outeat',
249
+ 'outfought' => 'outfight',
250
+ 'outgone' => 'outgo',
251
+ 'outgrew' => 'outgrow',
252
+ 'outgrown' => 'outgrow',
253
+ 'outlaid' => 'outlay',
254
+ 'outran' => 'outrun',
255
+ 'outridden' => 'outride',
256
+ 'outrode' => 'outride',
257
+ 'outsat' => 'outsit',
258
+ 'outshone' => 'outshine',
259
+ 'outshot' => 'outshoot',
260
+ 'outsold' => 'outsell',
261
+ 'outspent' => 'outspend',
262
+ 'outthrew' => 'outthrow',
263
+ 'outthrown' => 'outthrow',
264
+ 'outwent' => 'outgo',
265
+ 'outwore' => 'outwear',
266
+ 'outworn' => 'outwear',
267
+ 'overate' => 'overeat',
268
+ 'overbade' => 'overbid',
269
+ 'overbidden' => 'overbid',
270
+ 'overblew' => 'overblow',
271
+ 'overblown' => 'overblow',
272
+ 'overbore' => 'overbear',
273
+ 'overborn' => 'overbear',
274
+ 'overborne' => 'overbear',
275
+ 'overbought' => 'overbuy',
276
+ 'overbuilt' => 'overbuild',
277
+ 'overcame' => 'overcome',
278
+ 'overdid' => 'overdo',
279
+ 'overdone' => 'overdo',
280
+ 'overdrawn' => 'overdraw',
281
+ 'overdrew' => 'overdraw',
282
+ 'overdriven' => 'overdrive',
283
+ 'overdrove' => 'overdrive',
284
+ 'overeaten' => 'overeat',
285
+ 'overfed' => 'overfeed',
286
+ 'overflew' => 'overfly',
287
+ 'overflown' => 'overfly',
288
+ 'overgrew' => 'overgrow',
289
+ 'overgrown' => 'overgrow',
290
+ 'overhanged' => 'overhang',
291
+ 'overheard' => 'overhear',
292
+ 'overhung' => 'overhang',
293
+ 'overladed' => 'overlade',
294
+ 'overladen' => 'overlade',
295
+ 'overlaid' => 'overlay',
296
+ 'overlain' => 'overlie',
297
+ 'overlay' => 'overlie',
298
+ 'overleaped' => 'overleap',
299
+ 'overleapt' => 'overleap',
300
+ 'overpaid' => 'overpay',
301
+ 'overran' => 'overrun',
302
+ 'overridden' => 'override',
303
+ 'overrode' => 'override',
304
+ 'oversaw' => 'oversee',
305
+ 'overseen' => 'oversee',
306
+ 'oversewed' => 'oversew',
307
+ 'oversewn' => 'oversew',
308
+ 'overshot' => 'overshoot',
309
+ 'overslept' => 'oversleep',
310
+ 'overspent' => 'overspend',
311
+ 'overtaken' => 'overtake',
312
+ 'overthrew' => 'overthrow',
313
+ 'overthrown' => 'overthrow',
314
+ 'overtook' => 'overtake',
315
+ 'overwinded' => 'overwind',
316
+ 'overwound' => 'overwind',
317
+ 'overwritten' => 'overwrite',
318
+ 'overwrote' => 'overwrite',
319
+ 'paid' => 'pay',
320
+ 'partaken' => 'partake',
321
+ 'partook' => 'partake',
322
+ 'prechose' => 'prechoose',
323
+ 'prechosen' => 'prechoose',
324
+ 'proved' => 'prove',
325
+ 'proven' => 'prove',
326
+ 'quitted' => 'quit',
327
+ 'ran' => 'run',
328
+ 'rang' => 'ring',
329
+ 'reaved' => 'reave',
330
+ 'rebuilt' => 'rebuild',
331
+ 'reeved' => 'reeve',
332
+ 'reft' => 'reave',
333
+ 'relaid' => 'relay',
334
+ 'rent' => 'rend',
335
+ 'repaid' => 'repay',
336
+ 'retold' => 'retell',
337
+ 'ridded' => 'rid',
338
+ 'ridden' => 'ride',
339
+ 'risen' => 'rise',
340
+ 'rived' => 'rive',
341
+ 'riven' => 'rive',
342
+ 'rode' => 'ride',
343
+ 'rose' => 'rise',
344
+ 'rove' => 'reeve',
345
+ 'rung' => 'ring',
346
+ 'said' => 'say',
347
+ 'sang' => 'sing',
348
+ 'sank' => 'sink',
349
+ 'sat' => 'sit',
350
+ 'saw' => 'see',
351
+ 'sawed' => 'saw',
352
+ 'sawn' => 'saw',
353
+ 'seen' => 'see',
354
+ 'sent' => 'send',
355
+ 'sewed' => 'sew',
356
+ 'sewn' => 'sew',
357
+ 'shaken' => 'shake',
358
+ 'shaved' => 'shave',
359
+ 'shaven' => 'shave',
360
+ 'sheared' => 'shear',
361
+ 'shined' => 'shine',
362
+ 'shod' => 'shoe',
363
+ 'shoed' => 'shoe',
364
+ 'shone' => 'shine',
365
+ 'shook' => 'shake',
366
+ 'shorn' => 'shear',
367
+ 'shot' => 'shoot',
368
+ 'showed' => 'show',
369
+ 'shown' => 'show',
370
+ 'shrank' => 'shrink',
371
+ 'shredded' => 'shred',
372
+ 'shrived' => 'shrive',
373
+ 'shriven' => 'shrive',
374
+ 'shrove' => 'shrive',
375
+ 'shrunk' => 'shrink',
376
+ 'shrunken' => 'shrink',
377
+ 'slain' => 'slay',
378
+ 'slept' => 'sleep',
379
+ 'slew' => 'slay',
380
+ 'slid' => 'slide',
381
+ 'slidden' => 'slide',
382
+ 'slung' => 'sling',
383
+ 'slunk' => 'slink',
384
+ 'smelled' => 'smell',
385
+ 'smelt' => 'smell',
386
+ 'smitten' => 'smite',
387
+ 'smote' => 'smite',
388
+ 'snuck' => 'sneak',
389
+ 'sold' => 'sell',
390
+ 'sought' => 'seek',
391
+ 'sowed' => 'sow',
392
+ 'sown' => 'sow',
393
+ 'span' => 'spin',
394
+ 'spat' => 'spit',
395
+ 'sped' => 'speed',
396
+ 'speeded' => 'speed',
397
+ 'spelled' => 'spell',
398
+ 'spelt' => 'spell',
399
+ 'spent' => 'spend',
400
+ 'spilled' => 'spill',
401
+ 'spilt' => 'spill',
402
+ 'spoiled' => 'spoil',
403
+ 'spoilt' => 'spoil',
404
+ 'spoke' => 'speak',
405
+ 'spoken' => 'speak',
406
+ 'sprang' => 'spring',
407
+ 'sprung' => 'spring',
408
+ 'spun' => 'spin',
409
+ 'stank' => 'stink',
410
+ 'staved' => 'stave',
411
+ 'stole' => 'steal',
412
+ 'stolen' => 'steal',
413
+ 'stood' => 'stand',
414
+ 'stove' => 'stave',
415
+ 'strewed' => 'strew',
416
+ 'strewn' => 'strew',
417
+ 'stricken' => 'strike',
418
+ 'strid' => 'stride',
419
+ 'stridden' => 'stride',
420
+ 'strived' => 'strive',
421
+ 'striven' => 'strive',
422
+ 'strode' => 'stride',
423
+ 'strove' => 'strive',
424
+ 'struck' => 'strike',
425
+ 'strung' => 'string',
426
+ 'stuck' => 'stick',
427
+ 'stung' => 'sting',
428
+ 'stunk' => 'stink',
429
+ 'sung' => 'sing',
430
+ 'sunk' => 'sink',
431
+ 'sunken' => 'sink',
432
+ 'swam' => 'swim',
433
+ 'sweated' => 'sweat',
434
+ 'swelled' => 'swell',
435
+ 'swept' => 'sweep',
436
+ 'swollen' => 'swell',
437
+ 'swore' => 'swear',
438
+ 'sworn' => 'swear',
439
+ 'swum' => 'swim',
440
+ 'swung' => 'swing',
441
+ 'taken' => 'take',
442
+ 'taught' => 'teach',
443
+ 'thought' => 'think',
444
+ 'threw' => 'throw',
445
+ 'thrived' => 'thrive',
446
+ 'thriven' => 'thrive',
447
+ 'throve' => 'thrive',
448
+ 'thrown' => 'throw',
449
+ 'told' => 'tell',
450
+ 'took' => 'take',
451
+ 'tore' => 'tear',
452
+ 'torn' => 'tear',
453
+ 'trod' => 'tread',
454
+ 'trodden' => 'tread',
455
+ 'unbent' => 'unbend',
456
+ 'unbound' => 'unbind',
457
+ 'unbuilt' => 'unbuild',
458
+ 'underbought' => 'underbuy',
459
+ 'underfed' => 'underfeed',
460
+ 'undergone' => 'undergo',
461
+ 'underlaid' => 'underlay',
462
+ 'underlain' => 'underlie',
463
+ 'underlay' => 'underlie',
464
+ 'underpaid' => 'underpay',
465
+ 'underran' => 'underrun',
466
+ 'undershot' => 'undershoot',
467
+ 'undersold' => 'undersell',
468
+ 'understood' => 'understand',
469
+ 'undertaken' => 'undertake',
470
+ 'undertook' => 'undertake',
471
+ 'underwent' => 'undergo',
472
+ 'underwritten' => 'underwrite',
473
+ 'underwrote' => 'underwrite',
474
+ 'undid' => 'undo',
475
+ 'undone' => 'undo',
476
+ 'undrawn' => 'undraw',
477
+ 'undrew' => 'undraw',
478
+ 'unfroze' => 'unfreeze',
479
+ 'unfrozen' => 'unfreeze',
480
+ 'ungirded' => 'ungird',
481
+ 'ungirt' => 'ungird',
482
+ 'unhanged' => 'unhang',
483
+ 'unhung' => 'unhang',
484
+ 'unknitted' => 'unknit',
485
+ 'unladed' => 'unlade',
486
+ 'unladen' => 'unlade',
487
+ 'unlaid' => 'unlay',
488
+ 'unlearned' => 'unlearn',
489
+ 'unlearnt' => 'unlearn',
490
+ 'unmade' => 'unmake',
491
+ 'unreeved' => 'unreeve',
492
+ 'unrove' => 'unreeve',
493
+ 'unsaid' => 'unsay',
494
+ 'unslung' => 'unsling',
495
+ 'unspoke' => 'unspeak',
496
+ 'unspoken' => 'unspeak',
497
+ 'unstrung' => 'unstring',
498
+ 'unstuck' => 'unstick',
499
+ 'unswore' => 'unswear',
500
+ 'unsworn' => 'unswear',
501
+ 'untaught' => 'unteach',
502
+ 'unthought' => 'unthink',
503
+ 'untrod' => 'untread',
504
+ 'untrodden' => 'untread',
505
+ 'unwinded' => 'unwind',
506
+ 'unwound' => 'unwind',
507
+ 'unwove' => 'unweave',
508
+ 'unwoven' => 'unweave',
509
+ 'upbuilt' => 'upbuild',
510
+ 'upheld' => 'uphold',
511
+ 'uprisen' => 'uprise',
512
+ 'uprose' => 'uprise',
513
+ 'upswept' => 'upsweep',
514
+ 'upswung' => 'upswing',
515
+ 'waked' => 'wake',
516
+ 'was' => 'be',
517
+ 'waylaid' => 'waylay',
518
+ 'wedded' => 'wed',
519
+ 'went' => 'go',
520
+ 'wept' => 'weep',
521
+ 'were' => 'be',
522
+ 'wetted' => 'wet',
523
+ 'winded' => 'wind',
524
+ 'wist' => 'wit',
525
+ 'wot' => 'wit',
526
+ 'withdrawn' => 'withdraw',
527
+ 'withdrew' => 'withdraw',
528
+ 'withheld' => 'withhold',
529
+ 'withstood' => 'withstand',
530
+ 'woke' => 'wake',
531
+ 'woken' => 'wake',
532
+ 'won' => 'win',
533
+ 'wore' => 'wear',
534
+ 'worked' => 'work',
535
+ 'worn' => 'wear',
536
+ 'wound' => 'wind',
537
+ 'wove' => 'weave',
538
+ 'woven' => 'weave',
539
+ 'written' => 'write',
540
+ 'wrote' => 'write',
541
+ 'wrought' => 'work',
542
+ 'wrung' => 'wring',
543
+ }
544
+
545
+ # Mapping of word suffixes to infinitive rules.
546
+ InfSuffixRules = {
547
+ # '<suffix>' => {
548
+ # :order => <sort order>,
549
+ # :rule => <rule number>,
550
+
551
+ # :word1 == 0 => Use 0, the index of the longest prefix
552
+ # within @{$prefix{$self->{'suffix'} } }, below.
553
+
554
+ # :word1 == 1 => Use 1, the index of the 2nd longest prefix
555
+ # within @{$prefix{$self->{'suffix'} } }, below.
556
+
557
+ # :word1 == -1 => Use the index of the shortest prefix
558
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter.
559
+
560
+ # :word1 == -2 => Use the index of the shortest prefix
561
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter,
562
+ # and use the shortest prefix as well.
563
+
564
+ # :word1 == -3 => Use the index of the shortest prefix
565
+ # within @{$prefix{$self->{'suffix'} } }, below + meter,
566
+ # and use the shortest prefix + metre as well.
567
+
568
+ # :word1 == -4 => Use the original string.
569
+ 'hes' => {
570
+ :order => 1011,
571
+ :rule => '1',
572
+ :word1 => 0, # Longest prefix.
573
+ :suffix1 => '',
574
+ :suffix2 => '',
575
+ },
576
+ 'ses' => {
577
+ :order => 1021,
578
+ :rule => '2',
579
+ :word1 => 0, # Longest prefix.
580
+ :suffix1 => '',
581
+ :suffix2 => '',
582
+ },
583
+ 'xes' => {
584
+ :order => 1031,
585
+ :rule => '3',
586
+ :word1 => 0, # Longest prefix.
587
+ :suffix1 => '',
588
+ :suffix2 => '',
589
+ },
590
+ 'zes' => {
591
+ :order => 1041,
592
+ :rule => '4',
593
+ :word1 => 0, # Longest prefix.
594
+ :suffix1 => '',
595
+ :suffix2 => '',
596
+ },
597
+ 'iless' => {
598
+ :order => 1051,
599
+ :rule => '43a',
600
+ :word1 => -1, # Shortest prefix.
601
+ :suffix1 => 'y',
602
+ :suffix2 => '',
603
+ },
604
+ 'less' => {
605
+ :order => 1052,
606
+ :rule => '43b',
607
+ :word1 => -1, # Shortest prefix.
608
+ :suffix1 => '',
609
+ :suffix2 => '',
610
+ },
611
+ 'iness' => {
612
+ :order => 1053,
613
+ :rule => '44a',
614
+ :word1 => -1, # Shortest prefix.
615
+ :suffix1 => 'y',
616
+ :suffix2 => '',
617
+ },
618
+ 'ness' => {
619
+ :order => 1054,
620
+ :rule => '44b',
621
+ :word1 => -1, # Shortest prefix.
622
+ :suffix1 => '',
623
+ :suffix2 => '',
624
+ },
625
+ "'s" => {
626
+ :order => 1055,
627
+ :rule => '7',
628
+ :word1 => -1, # Shortest prefix.
629
+ :suffix1 => '',
630
+ :suffix2 => '',
631
+ },
632
+ 'ies' => {
633
+ :order => 1056,
634
+ :rule => '13a',
635
+ :word1 => -1, # Shortest prefix.
636
+ :suffix1 => 'y',
637
+ :suffix2 => '',
638
+ },
639
+ 'es' => {
640
+ :order => 1057,
641
+ :rule => '13b',
642
+ :word1 => 0, # Longest prefix.
643
+ :suffix1 => '',
644
+ :suffix2 => '',
645
+ },
646
+ 'ss' => {
647
+ :order => 1061,
648
+ :rule => '6a',
649
+ :word1 => -4, # Original string.
650
+ :suffix1 => '',
651
+ :suffix2 => '',
652
+ },
653
+ 's' => {
654
+ :order => 1062,
655
+ :rule => '6b',
656
+ :word1 => -1, # Shortest prefix.
657
+ :suffix1 => '',
658
+ :suffix2 => '',
659
+ },
660
+ 'ater' => {
661
+ :order => 1081,
662
+ :rule => '8',
663
+ :word1 => -4, # Original string.
664
+ :suffix1 => '',
665
+ :suffix2 => '',
666
+ },
667
+ 'cter' => {
668
+ :order => 1091,
669
+ :rule => '9',
670
+ :word1 => -4, # Original string.
671
+ :suffix1 => '',
672
+ :suffix2 => '',
673
+ },
674
+ 'ier' => {
675
+ :order => 1101,
676
+ :rule => '10',
677
+ :word1 => -1, # Shortest prefix.
678
+ :suffix1 => 'y',
679
+ :suffix2 => '',
680
+ },
681
+ 'er' => {
682
+ :order => 1111,
683
+ :rule => '11',
684
+ :word1 => 0, # Longest prefix.
685
+ :suffix1 => '',
686
+ :suffix2 => '',
687
+ },
688
+ 'ied' => {
689
+ :order => 1121,
690
+ :rule => '12a',
691
+ :word1 => -1, # Shortest prefix.
692
+ :suffix1 => 'y',
693
+ :suffix2 => '',
694
+ },
695
+ 'ed' => {
696
+ :order => 1122,
697
+ :rule => '12b', # There is extra code for 12b below.
698
+ :word1 => 0, # Longest prefix.
699
+ :suffix1 => '',
700
+ :suffix2 => '',
701
+ },
702
+ 'iest' => {
703
+ :order => 1141,
704
+ :rule => '14a',
705
+ :word1 => -1, # Shortest prefix.
706
+ :suffix1 => 'y',
707
+ :suffix2 => '',
708
+ },
709
+ 'est' => {
710
+ :order => 1142,
711
+ :rule => '14b',
712
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
713
+ :suffix1 => 'e',
714
+ :suffix2 => '',
715
+ },
716
+ 'blity' => {
717
+ :order => 1143,
718
+ :rule => '21',
719
+ :word1 => -4, # Original string.
720
+ :suffix1 => '',
721
+ :suffix2 => '',
722
+ },
723
+ 'bility' => {
724
+ :order => 1144,
725
+ :rule => '22',
726
+ :word1 => -1, # Shortest prefix.
727
+ :suffix1 => 'ble',
728
+ :suffix2 => '',
729
+ },
730
+ 'fiable' => {
731
+ :order => 1145,
732
+ :rule => '23',
733
+ :word1 => -1, # Shortest prefix.
734
+ :suffix1 => 'fy',
735
+ :suffix2 => '',
736
+ },
737
+ 'logist' => {
738
+ :order => 1146,
739
+ :rule => '24',
740
+ :word1 => -1, # Shortest prefix.
741
+ :suffix1 => 'logy',
742
+ :suffix2 => '',
743
+ },
744
+ 'ing' => {
745
+ :order => 1151,
746
+ :rule => '15', # There is extra code for 15 below.
747
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
748
+ :suffix1 => 'e',
749
+ :suffix2 => '',
750
+ },
751
+ 'ist' => {
752
+ :order => 1161,
753
+ :rule => '16',
754
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
755
+ :suffix1 => 'e',
756
+ :suffix2 => '',
757
+ },
758
+ 'ism' => {
759
+ :order => 1171,
760
+ :rule => '17',
761
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
762
+ :suffix1 => 'e',
763
+ :suffix2 => '',
764
+ },
765
+ 'ity' => {
766
+ :order => 1181,
767
+ :rule => '18',
768
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
769
+ :suffix1 => 'e',
770
+ :suffix2 => '',
771
+ },
772
+ 'ize' => {
773
+ :order => 1191,
774
+ :rule => '19',
775
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
776
+ :suffix1 => 'e',
777
+ :suffix2 => '',
778
+ },
779
+ 'cable' => {
780
+ :order => 1201,
781
+ :rule => '20a',
782
+ :word1 => -4, # Original string.
783
+ :suffix1 => '',
784
+ :suffix2 => '',
785
+ },
786
+ 'gable' => {
787
+ :order => 1202,
788
+ :rule => '20b',
789
+ :word1 => -4, # Original string.
790
+ :suffix1 => '',
791
+ :suffix2 => '',
792
+ },
793
+ 'able' => {
794
+ :order => 1203,
795
+ :rule => '20c',
796
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
797
+ :suffix1 => 'e',
798
+ :suffix2 => '',
799
+ },
800
+ 'graphic' => {
801
+ :order => 1251,
802
+ :rule => '25',
803
+ :word1 => -1, # Shortest prefix.
804
+ :suffix1 => 'graphy',
805
+ :suffix2 => '',
806
+ },
807
+ 'istic' => {
808
+ :order => 1261,
809
+ :rule => '26',
810
+ :word1 => -1, # Shortest prefix.
811
+ :suffix1 => 'ist',
812
+ :suffix2 => '',
813
+ },
814
+ 'itic' => {
815
+ :order => 1271,
816
+ :rule => '27',
817
+ :word1 => -1, # Shortest prefix.
818
+ :suffix1 => 'ite',
819
+ :suffix2 => '',
820
+ },
821
+ 'like' => {
822
+ :order => 1281,
823
+ :rule => '28',
824
+ :word1 => -1, # Shortest prefix.
825
+ :suffix1 => '',
826
+ :suffix2 => '',
827
+ },
828
+ 'logic' => {
829
+ :order => 1291,
830
+ :rule => '29',
831
+ :word1 => -1, # Shortest prefix.
832
+ :suffix1 => 'logy',
833
+ :suffix2 => '',
834
+ },
835
+ 'ment' => {
836
+ :order => 1301,
837
+ :rule => '30',
838
+ :word1 => -1, # Shortest prefix.
839
+ :suffix1 => '',
840
+ :suffix2 => '',
841
+ },
842
+ 'mental' => {
843
+ :order => 1311,
844
+ :rule => '31',
845
+ :word1 => -1, # Shortest prefix.
846
+ :suffix1 => 'ment',
847
+ :suffix2 => '',
848
+ },
849
+ 'metry' => {
850
+ :order => 1321,
851
+ :rule => '32',
852
+ :word1 => -3, # Shortest prefix + meter, and shortest perfix + metre.
853
+ :suffix1 => 'meter',
854
+ :suffix2 => 'metre',
855
+ },
856
+ 'nce' => {
857
+ :order => 1331,
858
+ :rule => '33',
859
+ :word1 => -1, # Shortest prefix.
860
+ :suffix1 => 'nt',
861
+ :suffix2 => '',
862
+ },
863
+ 'ncy' => {
864
+ :order => 1341,
865
+ :rule => '34',
866
+ :word1 => -1, # Shortest prefix.
867
+ :suffix1 => 'nt',
868
+ :suffix2 => '',
869
+ },
870
+ 'ship' => {
871
+ :order => 1351,
872
+ :rule => '35',
873
+ :word1 => -1, # Shortest prefix.
874
+ :suffix1 => '',
875
+ :suffix2 => '',
876
+ },
877
+ 'ical' => {
878
+ :order => 1361,
879
+ :rule => '36',
880
+ :word1 => -1, # Shortest prefix.
881
+ :suffix1 => 'ic',
882
+ :suffix2 => '',
883
+ },
884
+ 'ional' => {
885
+ :order => 1371,
886
+ :rule => '37',
887
+ :word1 => -1, # Shortest prefix.
888
+ :suffix1 => 'ion',
889
+ :suffix2 => '',
890
+ },
891
+ 'bly' => {
892
+ :order => 1381,
893
+ :rule => '38',
894
+ :word1 => -1, # Shortest prefix.
895
+ :suffix1 => 'ble',
896
+ :suffix2 => '',
897
+ },
898
+ 'ily' => {
899
+ :order => 1391,
900
+ :rule => '39',
901
+ :word1 => -1, # Shortest prefix.
902
+ :suffix1 => 'y',
903
+ :suffix2 => '',
904
+ },
905
+ 'ly' => {
906
+ :order => 1401,
907
+ :rule => '40',
908
+ :word1 => -1, # Shortest prefix.
909
+ :suffix1 => '',
910
+ :suffix2 => '',
911
+ },
912
+ 'iful' => {
913
+ :order => 1411,
914
+ :rule => '41a',
915
+ :word1 => -1, # Shortest prefix.
916
+ :suffix1 => 'y',
917
+ :suffix2 => '',
918
+ },
919
+ 'ful' => {
920
+ :order => 1412,
921
+ :rule => '41b',
922
+ :word1 => -1, # Shortest prefix.
923
+ :suffix1 => '',
924
+ :suffix2 => '',
925
+ },
926
+ 'ihood' => {
927
+ :order => 1421,
928
+ :rule => '42a',
929
+ :word1 => -1, # Shortest prefix.
930
+ :suffix1 => 'y',
931
+ :suffix2 => '',
932
+ },
933
+ 'hood' => {
934
+ :order => 1422,
935
+ :rule => '42b',
936
+ :word1 => -1, # Shortest prefix.
937
+ :suffix1 => '',
938
+ :suffix2 => '',
939
+ },
940
+ 'ification' => {
941
+ :order => 1451,
942
+ :rule => '45',
943
+ :word1 => -1, # Shortest prefix.
944
+ :suffix1 => 'ify',
945
+ :suffix2 => '',
946
+ },
947
+ 'ization' => {
948
+ :order => 1461,
949
+ :rule => '46',
950
+ :word1 => -1, # Shortest prefix.
951
+ :suffix1 => 'ize',
952
+ :suffix2 => '',
953
+ },
954
+ 'ction' => {
955
+ :order => 1471,
956
+ :rule => '47',
957
+ :word1 => -1, # Shortest prefix.
958
+ :suffix1 => 'ct',
959
+ :suffix2 => '',
960
+ },
961
+ 'rtion' => {
962
+ :order => 1481,
963
+ :rule => '48',
964
+ :word1 => -1, # Shortest prefix.
965
+ :suffix1 => 'rt',
966
+ :suffix2 => '',
967
+ },
968
+ 'ation' => {
969
+ :order => 1491,
970
+ :rule => '49',
971
+ :word1 => -1, # Shortest prefix.
972
+ :suffix1 => 'ate',
973
+ :suffix2 => '',
974
+ },
975
+ 'ator' => {
976
+ :order => 1501,
977
+ :rule => '50',
978
+ :word1 => -1, # Shortest prefix.
979
+ :suffix1 => 'ate',
980
+ :suffix2 => '',
981
+ },
982
+ 'ctor' => {
983
+ :order => 1511,
984
+ :rule => '51',
985
+ :word1 => -1, # Shortest prefix.
986
+ :suffix1 => 'ct',
987
+ :suffix2 => '',
988
+ },
989
+ 'ive' => {
990
+ :order => 1521,
991
+ :rule => '52',
992
+ :word1 => -1, # Shortest prefix.
993
+ :suffix1 => 'ion',
994
+ :suffix2 => '',
995
+ },
996
+ 'onian' => {
997
+ :order => 1530,
998
+ :rule => '54',
999
+ :word1 => -1, # Shortest prefix.
1000
+ :suffix1 => 'on',
1001
+ :suffix2 => '',
1002
+ },
1003
+ 'an' => {
1004
+ :order => 1531,
1005
+ :rule => '53',
1006
+ :word1 => -1, # Shortest prefix.
1007
+ :suffix1 => 'a',
1008
+ :suffix2 => '',
1009
+ },
1010
+ }
1011
+ InfSuffixRuleOrder = InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}
1012
+
1013
+ # :startdoc:
1014
+
1015
+ ### The object class for the result returned from calling
1016
+ ### Linguistics::EN::infinitive.
1017
+ class Infinitive < String
1018
+
1019
+ ### Create and return a new Infinitive object.
1020
+ def initialize( word1, word2, suffix, rule )
1021
+ super( word1 )
1022
+ @word2 = word2
1023
+ @suffix = suffix
1024
+ @rule = rule
1025
+ end
1026
+
1027
+
1028
+ ######
1029
+ public
1030
+ ######
1031
+
1032
+ # The fallback deconjugated form
1033
+ attr_reader :word2
1034
+
1035
+ # The suffix used to to identify the transform rule
1036
+ attr_reader :suffix
1037
+
1038
+ # The rule used
1039
+ attr_reader :rule
1040
+ end
1041
+
1042
+
1043
+ ###############
1044
+ module_function
1045
+ ###############
1046
+
1047
+ ### Return the infinitive form of the given word
1048
+ def infinitive( word )
1049
+ word = word.to_s
1050
+ word1 = word2 = suffix = rule = newword = ''
1051
+
1052
+ if IrregularInfinitives.key?( word )
1053
+ word1 = IrregularInfinitives[ word ]
1054
+ rule = 'irregular'
1055
+ else
1056
+ # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
1057
+ prefix, suffix = nil
1058
+ prefixes = Hash::new {|hsh,key| hsh[key] = []}
1059
+
1060
+ # Build the hash of prefixes for the word
1061
+ 1.upto( word.length ) {|i|
1062
+ prefix = word[0, i]
1063
+ suffix = word[i..-1]
1064
+
1065
+ (suffix.length - 1).downto( 0 ) {|j|
1066
+ newword = prefix + suffix[0, j]
1067
+ prefixes[ suffix ].push( newword )
1068
+ }
1069
+ }
1070
+
1071
+ $stderr.puts "prefixes: %p" % prefixes if $DEBUG
1072
+
1073
+ # Now check for rules covering the prefixes for this word, picking
1074
+ # the first one if one was found.
1075
+ if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
1076
+ rule = InfSuffixRules[ suffix ][:rule]
1077
+ shortestPrefix = InfSuffixRules[ suffix ][:word1]
1078
+ $stderr.puts "Using rule %p (%p) for suffix %p" %
1079
+ [ rule, shortestPrefix, suffix ] if $DEBUG
1080
+
1081
+ case shortestPrefix
1082
+ when 0
1083
+ word1 = prefixes[ suffix ][ 0 ]
1084
+ word2 = prefixes[ suffix ][ 1 ]
1085
+ $stderr.puts "For sp = 0: word1: %p, word2: %p" %
1086
+ [ word1, word2 ] if $DEBUG
1087
+
1088
+ when -1
1089
+ word1 = prefixes[ suffix ].last +
1090
+ InfSuffixRules[ suffix ][:suffix1]
1091
+ word2 = ''
1092
+ $stderr.puts "For sp = -1: word1: %p, word2: %p" %
1093
+ [ word1, word2 ] if $DEBUG
1094
+
1095
+ when -2
1096
+ word1 = prefixes[ suffix ].last +
1097
+ InfSuffixRules[ suffix ][:suffix1]
1098
+ word2 = prefixes[ suffix ].last
1099
+ $stderr.puts "For sp = -2: word1: %p, word2: %p" %
1100
+ [ word1, word2 ] if $DEBUG
1101
+
1102
+ when -3
1103
+ word1 = prefixes[ suffix ].last +
1104
+ InfSuffixRules[ suffix ][:suffix1]
1105
+ word2 = prefixes[ suffix ].last +
1106
+ InfSuffixRules[ suffix ][:suffix2]
1107
+ $stderr.puts "For sp = -3: word1: %p, word2: %p" %
1108
+ [ word1, word2 ] if $DEBUG
1109
+
1110
+ when -4
1111
+ word1 = word
1112
+ word2 = ''
1113
+ $stderr.puts "For sp = -4: word1: %p, word2: %p" %
1114
+ [ word1, word2 ] if $DEBUG
1115
+
1116
+ else
1117
+ raise IndexError,
1118
+ "Couldn't find rule for shortest prefix %p" %
1119
+ shortestPrefix
1120
+ end
1121
+
1122
+ # Rules 12b and 15: Strip off 'ed' or 'ing'.
1123
+ if rule == '12b' or rule == '15'
1124
+ # Do we have a monosyllable of this form:
1125
+ # o 0+ Consonants
1126
+ # o 1+ Vowel
1127
+ # o 2 Non-wx
1128
+ # Eg: tipped => tipp?
1129
+ # Then return tip and tipp.
1130
+ # Eg: swimming => swimm?
1131
+ # Then return tipswim and swimm.
1132
+
1133
+ if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
1134
+ word1 = $1 + $2
1135
+ word2 = $1 + $2 + $2
1136
+ end
1137
+ end
1138
+ end
1139
+ end
1140
+
1141
+ return Infinitive::new( word1, word2, suffix, rule )
1142
+ end
1143
+
1144
+ end # module EN::Linguistics
1145
+