Linguistics 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1149 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # This file contains functions for deriving the infinitive forms of conjugated
4
+ # English words. Requiring this file adds functions and constants to the
5
+ # Linguistics::EN module.
6
+ #
7
+ # == Authors
8
+ #
9
+ # * Michael Granger <ged@FaerieMUD.org>
10
+ #
11
+ # == Copyright
12
+ #
13
+ # Copyright (c) 2003-2005 The FaerieMUD Consortium. All rights reserved.
14
+ #
15
+ # This module is free software. You may use, modify, and/or redistribute this
16
+ # software under the terms of the Perl Artistic License. (See
17
+ # http://language.perl.com/misc/Artistic.html)
18
+ #
19
+ # This code was ported from the excellent 'Lingua::EN::Infinitive' Perl module
20
+ # by Ron Savage, which is distributed under the following license:
21
+ #
22
+ # Australian copyright (c) 1999-2002 Ron Savage.
23
+ #
24
+ # All Programs of mine are 'OSI Certified Open Source Software';
25
+ # you can redistribute them and/or modify them under the terms of
26
+ # The Artistic License, a copy of which is available at:
27
+ # http://www.opensource.org/licenses/index.html
28
+ #
29
+ # # == Version
30
+ #
31
+ # $Id: infinitive.rb,v 1.2 2003/09/14 10:35:32 deveiant Exp $
32
+ #
33
+
34
+ module Linguistics::EN
35
+
36
+ # :stopdoc:
37
+
38
+ # Irregular words => infinitive forms
39
+ IrregularInfinitives = {
40
+ 'abided' => 'abide',
41
+ 'abode' => 'abide',
42
+ 'am' => 'be',
43
+ 'are' => 'be',
44
+ 'arisen' => 'arise',
45
+ 'arose' => 'arise',
46
+ 'ate' => 'eat',
47
+ 'awaked' => 'awake',
48
+ 'awoke' => 'awake',
49
+ 'bade' => 'bid',
50
+ 'beaten' => 'beat',
51
+ 'became' => 'become',
52
+ 'been' => 'be',
53
+ 'befallen' => 'befall',
54
+ 'befell' => 'befall',
55
+ 'began' => 'begin',
56
+ 'begat' => 'beget',
57
+ 'begot' => 'beget',
58
+ 'begotten' => 'beget',
59
+ 'begun' => 'begin',
60
+ 'beheld' => 'behold',
61
+ 'bent' => 'bend',
62
+ 'bereaved' => 'bereave',
63
+ 'bereft' => 'bereave',
64
+ 'beseeched' => 'beseech',
65
+ 'besought' => 'beseech',
66
+ 'bespoke' => 'bespeak',
67
+ 'bespoken' => 'bespeak',
68
+ 'bestrewed' => 'bestrew',
69
+ 'bestrewn' => 'bestrew',
70
+ 'bestrid' => 'bestride',
71
+ 'bestridden' => 'bestride',
72
+ 'bestrode' => 'bestride',
73
+ 'betaken' => 'betake',
74
+ 'bethought' => 'bethink',
75
+ 'betook' => 'betake',
76
+ 'betted' => 'bet',
77
+ 'bidden' => 'bid',
78
+ 'bided' => 'bide',
79
+ 'bit' => 'bite',
80
+ 'bitten' => 'bite',
81
+ 'bled' => 'bleed',
82
+ 'blended' => 'blend',
83
+ 'blent' => 'blend',
84
+ 'blessed' => 'bless',
85
+ 'blest' => 'bless',
86
+ 'blew' => 'blow',
87
+ 'blown' => 'blow',
88
+ 'bode' => 'bide',
89
+ 'bore' => 'bear',
90
+ 'born' => 'bear',
91
+ 'borne' => 'bear',
92
+ 'bought' => 'buy',
93
+ 'bound' => 'bind',
94
+ 'bred' => 'breed',
95
+ 'broadcasted' => 'broadcast',
96
+ 'broke' => 'break',
97
+ 'broken' => 'break',
98
+ 'brought' => 'bring',
99
+ 'browbeaten' => 'browbeat',
100
+ 'built' => 'build',
101
+ 'burned' => 'burn',
102
+ 'burnt' => 'burn',
103
+ 'came' => 'come',
104
+ 'caught' => 'catch',
105
+ 'chid' => 'chide',
106
+ 'chidden' => 'chide',
107
+ 'chided' => 'chide',
108
+ 'chose' => 'choose',
109
+ 'chosen' => 'choose',
110
+ 'clad' => 'clothe',
111
+ 'clave' => 'cleave',
112
+ 'cleaved' => 'cleave',
113
+ 'cleft' => 'cleave',
114
+ 'clothed' => 'clothe',
115
+ 'clove' => 'cleave',
116
+ 'cloven' => 'cleave',
117
+ 'clung' => 'cling',
118
+ 'costed' => 'cost',
119
+ 'could' => 'can',
120
+ 'crept' => 'creep',
121
+ 'crew' => 'crow',
122
+ 'crowed' => 'crow',
123
+ 'dealt' => 'deal',
124
+ 'did' => 'do',
125
+ 'done' => 'do',
126
+ 'dove' => 'dive',
127
+ 'drank' => 'drink',
128
+ 'drawn' => 'draw',
129
+ 'dreamed' => 'dream',
130
+ 'dreamt' => 'dream',
131
+ 'drew' => 'draw',
132
+ 'driven' => 'drive',
133
+ 'drove' => 'drive',
134
+ 'drunk' => 'drink',
135
+ 'dug' => 'dig',
136
+ 'dwelled' => 'dwell',
137
+ 'dwelt' => 'dwell',
138
+ 'eaten' => 'eat',
139
+ 'fallen' => 'fall',
140
+ 'fed' => 'feed',
141
+ 'fell' => 'fall',
142
+ 'felt' => 'feel',
143
+ 'fled' => 'flee',
144
+ 'flew' => 'fly',
145
+ 'flown' => 'fly',
146
+ 'flung' => 'fling',
147
+ 'forbad' => 'forbid',
148
+ 'forbade' => 'forbid',
149
+ 'forbidden' => 'forbid',
150
+ 'forbore' => 'forbear',
151
+ 'forborne' => 'forbear',
152
+ 'fordid' => 'fordo',
153
+ 'fordone' => 'fordo',
154
+ 'forecasted' => 'forecast',
155
+ 'foregone' => 'forego',
156
+ 'foreknew' => 'foreknow',
157
+ 'foreknown' => 'foreknow',
158
+ 'foreran' => 'forerun',
159
+ 'foresaw' => 'foresee',
160
+ 'foreshowed' => 'foreshow',
161
+ 'foreshown' => 'foreshow',
162
+ 'foretold' => 'foretell',
163
+ 'forewent' => 'forego',
164
+ 'forgave' => 'forgive',
165
+ 'forgiven' => 'forgive',
166
+ 'forgot' => 'forget',
167
+ 'forgotten' => 'forget',
168
+ 'forsaken' => 'forsake',
169
+ 'forseen' => 'foresee',
170
+ 'forsook' => 'forsake',
171
+ 'forswore' => 'forswear',
172
+ 'forsworn' => 'forswear',
173
+ 'fought' => 'fight',
174
+ 'found' => 'find',
175
+ 'froze' => 'freeze',
176
+ 'frozen' => 'freeze',
177
+ 'gainsaid' => 'gainsay',
178
+ 'gave' => 'give',
179
+ 'gilded' => 'gild',
180
+ 'gilt' => 'gild',
181
+ 'girded' => 'gird',
182
+ 'girt' => 'gird',
183
+ 'given' => 'give',
184
+ 'gone' => 'go',
185
+ 'got' => 'get',
186
+ 'gotten' => 'get',
187
+ 'graved' => 'grave',
188
+ 'graven' => 'grave',
189
+ 'grew' => 'grow',
190
+ 'ground' => 'grind',
191
+ 'grown' => 'grow',
192
+ 'had' => 'have',
193
+ 'hamstringed' => 'hamstring',
194
+ 'hamstrung' => 'hamstring',
195
+ 'hanged' => 'hang',
196
+ 'heard' => 'hear',
197
+ 'heaved' => 'heave',
198
+ 'held' => 'hold',
199
+ 'hewed' => 'hew',
200
+ 'hewn' => 'hew',
201
+ 'hid' => 'hide',
202
+ 'hidden' => 'hide',
203
+ 'hove' => 'heave',
204
+ 'hung' => 'hang',
205
+ 'inlaid' => 'inlay',
206
+ 'is' => 'be',
207
+ 'kept' => 'keep',
208
+ 'kneeled' => 'kneel',
209
+ 'knelt' => 'kneel',
210
+ 'knew' => 'know',
211
+ 'knitted' => 'knit',
212
+ 'known' => 'know',
213
+ 'laded' => 'lade',
214
+ 'laden' => 'lade',
215
+ 'laid' => 'lay',
216
+ 'lain' => 'lie',
217
+ 'lay' => 'lie',
218
+ 'leaned' => 'lean',
219
+ 'leant' => 'lean',
220
+ 'leaped' => 'leap',
221
+ 'leapt' => 'leap',
222
+ 'learned' => 'learn',
223
+ 'learnt' => 'learn',
224
+ 'led' => 'lead',
225
+ 'left' => 'leave',
226
+ 'lent' => 'lend',
227
+ 'lighted' => 'light',
228
+ 'lit' => 'light',
229
+ 'lost' => 'lose',
230
+ 'made' => 'make',
231
+ 'meant' => 'mean',
232
+ 'melted' => 'melt',
233
+ 'met' => 'meet',
234
+ 'might' => 'may',
235
+ 'misdealt' => 'misdeal',
236
+ 'misgave' => 'misgive',
237
+ 'misgiven' => 'misgive',
238
+ 'mislaid' => 'mislay',
239
+ 'misled' => 'mislead',
240
+ 'mistaken' => 'mistake',
241
+ 'mistook' => 'mistake',
242
+ 'misunderstood' => 'misunderstand',
243
+ 'molten' => 'melt',
244
+ 'mowed' => 'mow',
245
+ 'mown' => 'mow',
246
+ 'outate' => 'outeat',
247
+ 'outbade' => 'outbid',
248
+ 'outbidden' => 'outbid',
249
+ 'outbred' => 'outbreed',
250
+ 'outdid' => 'outdo',
251
+ 'outdone' => 'outdo',
252
+ 'outeaten' => 'outeat',
253
+ 'outfought' => 'outfight',
254
+ 'outgone' => 'outgo',
255
+ 'outgrew' => 'outgrow',
256
+ 'outgrown' => 'outgrow',
257
+ 'outlaid' => 'outlay',
258
+ 'outran' => 'outrun',
259
+ 'outridden' => 'outride',
260
+ 'outrode' => 'outride',
261
+ 'outsat' => 'outsit',
262
+ 'outshone' => 'outshine',
263
+ 'outshot' => 'outshoot',
264
+ 'outsold' => 'outsell',
265
+ 'outspent' => 'outspend',
266
+ 'outthrew' => 'outthrow',
267
+ 'outthrown' => 'outthrow',
268
+ 'outwent' => 'outgo',
269
+ 'outwore' => 'outwear',
270
+ 'outworn' => 'outwear',
271
+ 'overate' => 'overeat',
272
+ 'overbade' => 'overbid',
273
+ 'overbidden' => 'overbid',
274
+ 'overblew' => 'overblow',
275
+ 'overblown' => 'overblow',
276
+ 'overbore' => 'overbear',
277
+ 'overborn' => 'overbear',
278
+ 'overborne' => 'overbear',
279
+ 'overbought' => 'overbuy',
280
+ 'overbuilt' => 'overbuild',
281
+ 'overcame' => 'overcome',
282
+ 'overdid' => 'overdo',
283
+ 'overdone' => 'overdo',
284
+ 'overdrawn' => 'overdraw',
285
+ 'overdrew' => 'overdraw',
286
+ 'overdriven' => 'overdrive',
287
+ 'overdrove' => 'overdrive',
288
+ 'overeaten' => 'overeat',
289
+ 'overfed' => 'overfeed',
290
+ 'overflew' => 'overfly',
291
+ 'overflown' => 'overfly',
292
+ 'overgrew' => 'overgrow',
293
+ 'overgrown' => 'overgrow',
294
+ 'overhanged' => 'overhang',
295
+ 'overheard' => 'overhear',
296
+ 'overhung' => 'overhang',
297
+ 'overladed' => 'overlade',
298
+ 'overladen' => 'overlade',
299
+ 'overlaid' => 'overlay',
300
+ 'overlain' => 'overlie',
301
+ 'overlay' => 'overlie',
302
+ 'overleaped' => 'overleap',
303
+ 'overleapt' => 'overleap',
304
+ 'overpaid' => 'overpay',
305
+ 'overran' => 'overrun',
306
+ 'overridden' => 'override',
307
+ 'overrode' => 'override',
308
+ 'oversaw' => 'oversee',
309
+ 'overseen' => 'oversee',
310
+ 'oversewed' => 'oversew',
311
+ 'oversewn' => 'oversew',
312
+ 'overshot' => 'overshoot',
313
+ 'overslept' => 'oversleep',
314
+ 'overspent' => 'overspend',
315
+ 'overtaken' => 'overtake',
316
+ 'overthrew' => 'overthrow',
317
+ 'overthrown' => 'overthrow',
318
+ 'overtook' => 'overtake',
319
+ 'overwinded' => 'overwind',
320
+ 'overwound' => 'overwind',
321
+ 'overwritten' => 'overwrite',
322
+ 'overwrote' => 'overwrite',
323
+ 'paid' => 'pay',
324
+ 'partaken' => 'partake',
325
+ 'partook' => 'partake',
326
+ 'prechose' => 'prechoose',
327
+ 'prechosen' => 'prechoose',
328
+ 'proved' => 'prove',
329
+ 'proven' => 'prove',
330
+ 'quitted' => 'quit',
331
+ 'ran' => 'run',
332
+ 'rang' => 'ring',
333
+ 'reaved' => 'reave',
334
+ 'rebuilt' => 'rebuild',
335
+ 'reeved' => 'reeve',
336
+ 'reft' => 'reave',
337
+ 'relaid' => 'relay',
338
+ 'rent' => 'rend',
339
+ 'repaid' => 'repay',
340
+ 'retold' => 'retell',
341
+ 'ridded' => 'rid',
342
+ 'ridden' => 'ride',
343
+ 'risen' => 'rise',
344
+ 'rived' => 'rive',
345
+ 'riven' => 'rive',
346
+ 'rode' => 'ride',
347
+ 'rose' => 'rise',
348
+ 'rove' => 'reeve',
349
+ 'rung' => 'ring',
350
+ 'said' => 'say',
351
+ 'sang' => 'sing',
352
+ 'sank' => 'sink',
353
+ 'sat' => 'sit',
354
+ 'saw' => 'see',
355
+ 'sawed' => 'saw',
356
+ 'sawn' => 'saw',
357
+ 'seen' => 'see',
358
+ 'sent' => 'send',
359
+ 'sewed' => 'sew',
360
+ 'sewn' => 'sew',
361
+ 'shaken' => 'shake',
362
+ 'shaved' => 'shave',
363
+ 'shaven' => 'shave',
364
+ 'sheared' => 'shear',
365
+ 'shined' => 'shine',
366
+ 'shod' => 'shoe',
367
+ 'shoed' => 'shoe',
368
+ 'shone' => 'shine',
369
+ 'shook' => 'shake',
370
+ 'shorn' => 'shear',
371
+ 'shot' => 'shoot',
372
+ 'showed' => 'show',
373
+ 'shown' => 'show',
374
+ 'shrank' => 'shrink',
375
+ 'shredded' => 'shred',
376
+ 'shrived' => 'shrive',
377
+ 'shriven' => 'shrive',
378
+ 'shrove' => 'shrive',
379
+ 'shrunk' => 'shrink',
380
+ 'shrunken' => 'shrink',
381
+ 'slain' => 'slay',
382
+ 'slept' => 'sleep',
383
+ 'slew' => 'slay',
384
+ 'slid' => 'slide',
385
+ 'slidden' => 'slide',
386
+ 'slung' => 'sling',
387
+ 'slunk' => 'slink',
388
+ 'smelled' => 'smell',
389
+ 'smelt' => 'smell',
390
+ 'smitten' => 'smite',
391
+ 'smote' => 'smite',
392
+ 'snuck' => 'sneak',
393
+ 'sold' => 'sell',
394
+ 'sought' => 'seek',
395
+ 'sowed' => 'sow',
396
+ 'sown' => 'sow',
397
+ 'span' => 'spin',
398
+ 'spat' => 'spit',
399
+ 'sped' => 'speed',
400
+ 'speeded' => 'speed',
401
+ 'spelled' => 'spell',
402
+ 'spelt' => 'spell',
403
+ 'spent' => 'spend',
404
+ 'spilled' => 'spill',
405
+ 'spilt' => 'spill',
406
+ 'spoiled' => 'spoil',
407
+ 'spoilt' => 'spoil',
408
+ 'spoke' => 'speak',
409
+ 'spoken' => 'speak',
410
+ 'sprang' => 'spring',
411
+ 'sprung' => 'spring',
412
+ 'spun' => 'spin',
413
+ 'stank' => 'stink',
414
+ 'staved' => 'stave',
415
+ 'stole' => 'steal',
416
+ 'stolen' => 'steal',
417
+ 'stood' => 'stand',
418
+ 'stove' => 'stave',
419
+ 'strewed' => 'strew',
420
+ 'strewn' => 'strew',
421
+ 'stricken' => 'strike',
422
+ 'strid' => 'stride',
423
+ 'stridden' => 'stride',
424
+ 'strived' => 'strive',
425
+ 'striven' => 'strive',
426
+ 'strode' => 'stride',
427
+ 'strove' => 'strive',
428
+ 'struck' => 'strike',
429
+ 'strung' => 'string',
430
+ 'stuck' => 'stick',
431
+ 'stung' => 'sting',
432
+ 'stunk' => 'stink',
433
+ 'sung' => 'sing',
434
+ 'sunk' => 'sink',
435
+ 'sunken' => 'sink',
436
+ 'swam' => 'swim',
437
+ 'sweated' => 'sweat',
438
+ 'swelled' => 'swell',
439
+ 'swept' => 'sweep',
440
+ 'swollen' => 'swell',
441
+ 'swore' => 'swear',
442
+ 'sworn' => 'swear',
443
+ 'swum' => 'swim',
444
+ 'swung' => 'swing',
445
+ 'taken' => 'take',
446
+ 'taught' => 'teach',
447
+ 'thought' => 'think',
448
+ 'threw' => 'throw',
449
+ 'thrived' => 'thrive',
450
+ 'thriven' => 'thrive',
451
+ 'throve' => 'thrive',
452
+ 'thrown' => 'throw',
453
+ 'told' => 'tell',
454
+ 'took' => 'take',
455
+ 'tore' => 'tear',
456
+ 'torn' => 'tear',
457
+ 'trod' => 'tread',
458
+ 'trodden' => 'tread',
459
+ 'unbent' => 'unbend',
460
+ 'unbound' => 'unbind',
461
+ 'unbuilt' => 'unbuild',
462
+ 'underbought' => 'underbuy',
463
+ 'underfed' => 'underfeed',
464
+ 'undergone' => 'undergo',
465
+ 'underlaid' => 'underlay',
466
+ 'underlain' => 'underlie',
467
+ 'underlay' => 'underlie',
468
+ 'underpaid' => 'underpay',
469
+ 'underran' => 'underrun',
470
+ 'undershot' => 'undershoot',
471
+ 'undersold' => 'undersell',
472
+ 'understood' => 'understand',
473
+ 'undertaken' => 'undertake',
474
+ 'undertook' => 'undertake',
475
+ 'underwent' => 'undergo',
476
+ 'underwritten' => 'underwrite',
477
+ 'underwrote' => 'underwrite',
478
+ 'undid' => 'undo',
479
+ 'undone' => 'undo',
480
+ 'undrawn' => 'undraw',
481
+ 'undrew' => 'undraw',
482
+ 'unfroze' => 'unfreeze',
483
+ 'unfrozen' => 'unfreeze',
484
+ 'ungirded' => 'ungird',
485
+ 'ungirt' => 'ungird',
486
+ 'unhanged' => 'unhang',
487
+ 'unhung' => 'unhang',
488
+ 'unknitted' => 'unknit',
489
+ 'unladed' => 'unlade',
490
+ 'unladen' => 'unlade',
491
+ 'unlaid' => 'unlay',
492
+ 'unlearned' => 'unlearn',
493
+ 'unlearnt' => 'unlearn',
494
+ 'unmade' => 'unmake',
495
+ 'unreeved' => 'unreeve',
496
+ 'unrove' => 'unreeve',
497
+ 'unsaid' => 'unsay',
498
+ 'unslung' => 'unsling',
499
+ 'unspoke' => 'unspeak',
500
+ 'unspoken' => 'unspeak',
501
+ 'unstrung' => 'unstring',
502
+ 'unstuck' => 'unstick',
503
+ 'unswore' => 'unswear',
504
+ 'unsworn' => 'unswear',
505
+ 'untaught' => 'unteach',
506
+ 'unthought' => 'unthink',
507
+ 'untrod' => 'untread',
508
+ 'untrodden' => 'untread',
509
+ 'unwinded' => 'unwind',
510
+ 'unwound' => 'unwind',
511
+ 'unwove' => 'unweave',
512
+ 'unwoven' => 'unweave',
513
+ 'upbuilt' => 'upbuild',
514
+ 'upheld' => 'uphold',
515
+ 'uprisen' => 'uprise',
516
+ 'uprose' => 'uprise',
517
+ 'upswept' => 'upsweep',
518
+ 'upswung' => 'upswing',
519
+ 'waked' => 'wake',
520
+ 'was' => 'be',
521
+ 'waylaid' => 'waylay',
522
+ 'wedded' => 'wed',
523
+ 'went' => 'go',
524
+ 'wept' => 'weep',
525
+ 'were' => 'be',
526
+ 'wetted' => 'wet',
527
+ 'winded' => 'wind',
528
+ 'wist' => 'wit',
529
+ 'wot' => 'wit',
530
+ 'withdrawn' => 'withdraw',
531
+ 'withdrew' => 'withdraw',
532
+ 'withheld' => 'withhold',
533
+ 'withstood' => 'withstand',
534
+ 'woke' => 'wake',
535
+ 'woken' => 'wake',
536
+ 'won' => 'win',
537
+ 'wore' => 'wear',
538
+ 'worked' => 'work',
539
+ 'worn' => 'wear',
540
+ 'wound' => 'wind',
541
+ 'wove' => 'weave',
542
+ 'woven' => 'weave',
543
+ 'written' => 'write',
544
+ 'wrote' => 'write',
545
+ 'wrought' => 'work',
546
+ 'wrung' => 'wring',
547
+ }
548
+
549
+ # Mapping of word suffixes to infinitive rules.
550
+ InfSuffixRules = {
551
+ # '<suffix>' => {
552
+ # :order => <sort order>,
553
+ # :rule => <rule number>,
554
+
555
+ # :word1 == 0 => Use 0, the index of the longest prefix
556
+ # within @{$prefix{$self->{'suffix'} } }, below.
557
+
558
+ # :word1 == 1 => Use 1, the index of the 2nd longest prefix
559
+ # within @{$prefix{$self->{'suffix'} } }, below.
560
+
561
+ # :word1 == -1 => Use the index of the shortest prefix
562
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter.
563
+
564
+ # :word1 == -2 => Use the index of the shortest prefix
565
+ # within @{$prefix{$self->{'suffix'} } }, below + a letter,
566
+ # and use the shortest prefix as well.
567
+
568
+ # :word1 == -3 => Use the index of the shortest prefix
569
+ # within @{$prefix{$self->{'suffix'} } }, below + meter,
570
+ # and use the shortest prefix + metre as well.
571
+
572
+ # :word1 == -4 => Use the original string.
573
+ 'hes' => {
574
+ :order => 1011,
575
+ :rule => '1',
576
+ :word1 => 0, # Longest prefix.
577
+ :suffix1 => '',
578
+ :suffix2 => '',
579
+ },
580
+ 'ses' => {
581
+ :order => 1021,
582
+ :rule => '2',
583
+ :word1 => 0, # Longest prefix.
584
+ :suffix1 => '',
585
+ :suffix2 => '',
586
+ },
587
+ 'xes' => {
588
+ :order => 1031,
589
+ :rule => '3',
590
+ :word1 => 0, # Longest prefix.
591
+ :suffix1 => '',
592
+ :suffix2 => '',
593
+ },
594
+ 'zes' => {
595
+ :order => 1041,
596
+ :rule => '4',
597
+ :word1 => 0, # Longest prefix.
598
+ :suffix1 => '',
599
+ :suffix2 => '',
600
+ },
601
+ 'iless' => {
602
+ :order => 1051,
603
+ :rule => '43a',
604
+ :word1 => -1, # Shortest prefix.
605
+ :suffix1 => 'y',
606
+ :suffix2 => '',
607
+ },
608
+ 'less' => {
609
+ :order => 1052,
610
+ :rule => '43b',
611
+ :word1 => -1, # Shortest prefix.
612
+ :suffix1 => '',
613
+ :suffix2 => '',
614
+ },
615
+ 'iness' => {
616
+ :order => 1053,
617
+ :rule => '44a',
618
+ :word1 => -1, # Shortest prefix.
619
+ :suffix1 => 'y',
620
+ :suffix2 => '',
621
+ },
622
+ 'ness' => {
623
+ :order => 1054,
624
+ :rule => '44b',
625
+ :word1 => -1, # Shortest prefix.
626
+ :suffix1 => '',
627
+ :suffix2 => '',
628
+ },
629
+ "'s" => {
630
+ :order => 1055,
631
+ :rule => '7',
632
+ :word1 => -1, # Shortest prefix.
633
+ :suffix1 => '',
634
+ :suffix2 => '',
635
+ },
636
+ 'ies' => {
637
+ :order => 1056,
638
+ :rule => '13a',
639
+ :word1 => -1, # Shortest prefix.
640
+ :suffix1 => 'y',
641
+ :suffix2 => '',
642
+ },
643
+ 'es' => {
644
+ :order => 1057,
645
+ :rule => '13b',
646
+ :word1 => 0, # Longest prefix.
647
+ :suffix1 => '',
648
+ :suffix2 => '',
649
+ },
650
+ 'ss' => {
651
+ :order => 1061,
652
+ :rule => '6a',
653
+ :word1 => -4, # Original string.
654
+ :suffix1 => '',
655
+ :suffix2 => '',
656
+ },
657
+ 's' => {
658
+ :order => 1062,
659
+ :rule => '6b',
660
+ :word1 => -1, # Shortest prefix.
661
+ :suffix1 => '',
662
+ :suffix2 => '',
663
+ },
664
+ 'ater' => {
665
+ :order => 1081,
666
+ :rule => '8',
667
+ :word1 => -4, # Original string.
668
+ :suffix1 => '',
669
+ :suffix2 => '',
670
+ },
671
+ 'cter' => {
672
+ :order => 1091,
673
+ :rule => '9',
674
+ :word1 => -4, # Original string.
675
+ :suffix1 => '',
676
+ :suffix2 => '',
677
+ },
678
+ 'ier' => {
679
+ :order => 1101,
680
+ :rule => '10',
681
+ :word1 => -1, # Shortest prefix.
682
+ :suffix1 => 'y',
683
+ :suffix2 => '',
684
+ },
685
+ 'er' => {
686
+ :order => 1111,
687
+ :rule => '11',
688
+ :word1 => 0, # Longest prefix.
689
+ :suffix1 => '',
690
+ :suffix2 => '',
691
+ },
692
+ 'ied' => {
693
+ :order => 1121,
694
+ :rule => '12a',
695
+ :word1 => -1, # Shortest prefix.
696
+ :suffix1 => 'y',
697
+ :suffix2 => '',
698
+ },
699
+ 'ed' => {
700
+ :order => 1122,
701
+ :rule => '12b', # There is extra code for 12b below.
702
+ :word1 => 0, # Longest prefix.
703
+ :suffix1 => '',
704
+ :suffix2 => '',
705
+ },
706
+ 'iest' => {
707
+ :order => 1141,
708
+ :rule => '14a',
709
+ :word1 => -1, # Shortest prefix.
710
+ :suffix1 => 'y',
711
+ :suffix2 => '',
712
+ },
713
+ 'est' => {
714
+ :order => 1142,
715
+ :rule => '14b',
716
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
717
+ :suffix1 => 'e',
718
+ :suffix2 => '',
719
+ },
720
+ 'blity' => {
721
+ :order => 1143,
722
+ :rule => '21',
723
+ :word1 => -4, # Original string.
724
+ :suffix1 => '',
725
+ :suffix2 => '',
726
+ },
727
+ 'bility' => {
728
+ :order => 1144,
729
+ :rule => '22',
730
+ :word1 => -1, # Shortest prefix.
731
+ :suffix1 => 'ble',
732
+ :suffix2 => '',
733
+ },
734
+ 'fiable' => {
735
+ :order => 1145,
736
+ :rule => '23',
737
+ :word1 => -1, # Shortest prefix.
738
+ :suffix1 => 'fy',
739
+ :suffix2 => '',
740
+ },
741
+ 'logist' => {
742
+ :order => 1146,
743
+ :rule => '24',
744
+ :word1 => -1, # Shortest prefix.
745
+ :suffix1 => 'logy',
746
+ :suffix2 => '',
747
+ },
748
+ 'ing' => {
749
+ :order => 1151,
750
+ :rule => '15', # There is extra code for 15 below.
751
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
752
+ :suffix1 => 'e',
753
+ :suffix2 => '',
754
+ },
755
+ 'ist' => {
756
+ :order => 1161,
757
+ :rule => '16',
758
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
759
+ :suffix1 => 'e',
760
+ :suffix2 => '',
761
+ },
762
+ 'ism' => {
763
+ :order => 1171,
764
+ :rule => '17',
765
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
766
+ :suffix1 => 'e',
767
+ :suffix2 => '',
768
+ },
769
+ 'ity' => {
770
+ :order => 1181,
771
+ :rule => '18',
772
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
773
+ :suffix1 => 'e',
774
+ :suffix2 => '',
775
+ },
776
+ 'ize' => {
777
+ :order => 1191,
778
+ :rule => '19',
779
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
780
+ :suffix1 => 'e',
781
+ :suffix2 => '',
782
+ },
783
+ 'cable' => {
784
+ :order => 1201,
785
+ :rule => '20a',
786
+ :word1 => -4, # Original string.
787
+ :suffix1 => '',
788
+ :suffix2 => '',
789
+ },
790
+ 'gable' => {
791
+ :order => 1202,
792
+ :rule => '20b',
793
+ :word1 => -4, # Original string.
794
+ :suffix1 => '',
795
+ :suffix2 => '',
796
+ },
797
+ 'able' => {
798
+ :order => 1203,
799
+ :rule => '20c',
800
+ :word1 => -2, # Shortest prefix + a letter, and shortest prefix.
801
+ :suffix1 => 'e',
802
+ :suffix2 => '',
803
+ },
804
+ 'graphic' => {
805
+ :order => 1251,
806
+ :rule => '25',
807
+ :word1 => -1, # Shortest prefix.
808
+ :suffix1 => 'graphy',
809
+ :suffix2 => '',
810
+ },
811
+ 'istic' => {
812
+ :order => 1261,
813
+ :rule => '26',
814
+ :word1 => -1, # Shortest prefix.
815
+ :suffix1 => 'ist',
816
+ :suffix2 => '',
817
+ },
818
+ 'itic' => {
819
+ :order => 1271,
820
+ :rule => '27',
821
+ :word1 => -1, # Shortest prefix.
822
+ :suffix1 => 'ite',
823
+ :suffix2 => '',
824
+ },
825
+ 'like' => {
826
+ :order => 1281,
827
+ :rule => '28',
828
+ :word1 => -1, # Shortest prefix.
829
+ :suffix1 => '',
830
+ :suffix2 => '',
831
+ },
832
+ 'logic' => {
833
+ :order => 1291,
834
+ :rule => '29',
835
+ :word1 => -1, # Shortest prefix.
836
+ :suffix1 => 'logy',
837
+ :suffix2 => '',
838
+ },
839
+ 'ment' => {
840
+ :order => 1301,
841
+ :rule => '30',
842
+ :word1 => -1, # Shortest prefix.
843
+ :suffix1 => '',
844
+ :suffix2 => '',
845
+ },
846
+ 'mental' => {
847
+ :order => 1311,
848
+ :rule => '31',
849
+ :word1 => -1, # Shortest prefix.
850
+ :suffix1 => 'ment',
851
+ :suffix2 => '',
852
+ },
853
+ 'metry' => {
854
+ :order => 1321,
855
+ :rule => '32',
856
+ :word1 => -3, # Shortest prefix + meter, and shortest perfix + metre.
857
+ :suffix1 => 'meter',
858
+ :suffix2 => 'metre',
859
+ },
860
+ 'nce' => {
861
+ :order => 1331,
862
+ :rule => '33',
863
+ :word1 => -1, # Shortest prefix.
864
+ :suffix1 => 'nt',
865
+ :suffix2 => '',
866
+ },
867
+ 'ncy' => {
868
+ :order => 1341,
869
+ :rule => '34',
870
+ :word1 => -1, # Shortest prefix.
871
+ :suffix1 => 'nt',
872
+ :suffix2 => '',
873
+ },
874
+ 'ship' => {
875
+ :order => 1351,
876
+ :rule => '35',
877
+ :word1 => -1, # Shortest prefix.
878
+ :suffix1 => '',
879
+ :suffix2 => '',
880
+ },
881
+ 'ical' => {
882
+ :order => 1361,
883
+ :rule => '36',
884
+ :word1 => -1, # Shortest prefix.
885
+ :suffix1 => 'ic',
886
+ :suffix2 => '',
887
+ },
888
+ 'ional' => {
889
+ :order => 1371,
890
+ :rule => '37',
891
+ :word1 => -1, # Shortest prefix.
892
+ :suffix1 => 'ion',
893
+ :suffix2 => '',
894
+ },
895
+ 'bly' => {
896
+ :order => 1381,
897
+ :rule => '38',
898
+ :word1 => -1, # Shortest prefix.
899
+ :suffix1 => 'ble',
900
+ :suffix2 => '',
901
+ },
902
+ 'ily' => {
903
+ :order => 1391,
904
+ :rule => '39',
905
+ :word1 => -1, # Shortest prefix.
906
+ :suffix1 => 'y',
907
+ :suffix2 => '',
908
+ },
909
+ 'ly' => {
910
+ :order => 1401,
911
+ :rule => '40',
912
+ :word1 => -1, # Shortest prefix.
913
+ :suffix1 => '',
914
+ :suffix2 => '',
915
+ },
916
+ 'iful' => {
917
+ :order => 1411,
918
+ :rule => '41a',
919
+ :word1 => -1, # Shortest prefix.
920
+ :suffix1 => 'y',
921
+ :suffix2 => '',
922
+ },
923
+ 'ful' => {
924
+ :order => 1412,
925
+ :rule => '41b',
926
+ :word1 => -1, # Shortest prefix.
927
+ :suffix1 => '',
928
+ :suffix2 => '',
929
+ },
930
+ 'ihood' => {
931
+ :order => 1421,
932
+ :rule => '42a',
933
+ :word1 => -1, # Shortest prefix.
934
+ :suffix1 => 'y',
935
+ :suffix2 => '',
936
+ },
937
+ 'hood' => {
938
+ :order => 1422,
939
+ :rule => '42b',
940
+ :word1 => -1, # Shortest prefix.
941
+ :suffix1 => '',
942
+ :suffix2 => '',
943
+ },
944
+ 'ification' => {
945
+ :order => 1451,
946
+ :rule => '45',
947
+ :word1 => -1, # Shortest prefix.
948
+ :suffix1 => 'ify',
949
+ :suffix2 => '',
950
+ },
951
+ 'ization' => {
952
+ :order => 1461,
953
+ :rule => '46',
954
+ :word1 => -1, # Shortest prefix.
955
+ :suffix1 => 'ize',
956
+ :suffix2 => '',
957
+ },
958
+ 'ction' => {
959
+ :order => 1471,
960
+ :rule => '47',
961
+ :word1 => -1, # Shortest prefix.
962
+ :suffix1 => 'ct',
963
+ :suffix2 => '',
964
+ },
965
+ 'rtion' => {
966
+ :order => 1481,
967
+ :rule => '48',
968
+ :word1 => -1, # Shortest prefix.
969
+ :suffix1 => 'rt',
970
+ :suffix2 => '',
971
+ },
972
+ 'ation' => {
973
+ :order => 1491,
974
+ :rule => '49',
975
+ :word1 => -1, # Shortest prefix.
976
+ :suffix1 => 'ate',
977
+ :suffix2 => '',
978
+ },
979
+ 'ator' => {
980
+ :order => 1501,
981
+ :rule => '50',
982
+ :word1 => -1, # Shortest prefix.
983
+ :suffix1 => 'ate',
984
+ :suffix2 => '',
985
+ },
986
+ 'ctor' => {
987
+ :order => 1511,
988
+ :rule => '51',
989
+ :word1 => -1, # Shortest prefix.
990
+ :suffix1 => 'ct',
991
+ :suffix2 => '',
992
+ },
993
+ 'ive' => {
994
+ :order => 1521,
995
+ :rule => '52',
996
+ :word1 => -1, # Shortest prefix.
997
+ :suffix1 => 'ion',
998
+ :suffix2 => '',
999
+ },
1000
+ 'onian' => {
1001
+ :order => 1530,
1002
+ :rule => '54',
1003
+ :word1 => -1, # Shortest prefix.
1004
+ :suffix1 => 'on',
1005
+ :suffix2 => '',
1006
+ },
1007
+ 'an' => {
1008
+ :order => 1531,
1009
+ :rule => '53',
1010
+ :word1 => -1, # Shortest prefix.
1011
+ :suffix1 => 'a',
1012
+ :suffix2 => '',
1013
+ },
1014
+ }
1015
+ InfSuffixRuleOrder = InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}
1016
+
1017
+ # :startdoc:
1018
+
1019
+ ### The object class for the result returned from calling
1020
+ ### Linguistics::EN::infinitive.
1021
+ class Infinitive < String
1022
+
1023
+ ### Create and return a new Infinitive object.
1024
+ def initialize( word1, word2, suffix, rule )
1025
+ super( word1 )
1026
+ @word2 = word2
1027
+ @suffix = suffix
1028
+ @rule = rule
1029
+ end
1030
+
1031
+
1032
+ ######
1033
+ public
1034
+ ######
1035
+
1036
+ # The fallback deconjugated form
1037
+ attr_reader :word2
1038
+
1039
+ # The suffix used to to identify the transform rule
1040
+ attr_reader :suffix
1041
+
1042
+ # The rule used
1043
+ attr_reader :rule
1044
+ end
1045
+
1046
+
1047
+ ###############
1048
+ module_function
1049
+ ###############
1050
+
1051
+ ### Return the infinitive form of the given word
1052
+ def infinitive( word )
1053
+ word = word.to_s
1054
+ word1 = word2 = suffix = rule = newword = ''
1055
+
1056
+ if IrregularInfinitives.key?( word )
1057
+ word1 = IrregularInfinitives[ word ]
1058
+ rule = 'irregular'
1059
+ else
1060
+ # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
1061
+ prefix, suffix = nil
1062
+ prefixes = Hash::new {|hsh,key| hsh[key] = []}
1063
+
1064
+ # Build the hash of prefixes for the word
1065
+ 1.upto( word.length ) {|i|
1066
+ prefix = word[0, i]
1067
+ suffix = word[i..-1]
1068
+
1069
+ (suffix.length - 1).downto( 0 ) {|j|
1070
+ newword = prefix + suffix[0, j]
1071
+ prefixes[ suffix ].push( newword )
1072
+ }
1073
+ }
1074
+
1075
+ $stderr.puts "prefixes: %p" % prefixes if $DEBUG
1076
+
1077
+ # Now check for rules covering the prefixes for this word, picking
1078
+ # the first one if one was found.
1079
+ if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
1080
+ rule = InfSuffixRules[ suffix ][:rule]
1081
+ shortestPrefix = InfSuffixRules[ suffix ][:word1]
1082
+ $stderr.puts "Using rule %p (%p) for suffix %p" %
1083
+ [ rule, shortestPrefix, suffix ] if $DEBUG
1084
+
1085
+ case shortestPrefix
1086
+ when 0
1087
+ word1 = prefixes[ suffix ][ 0 ]
1088
+ word2 = prefixes[ suffix ][ 1 ]
1089
+ $stderr.puts "For sp = 0: word1: %p, word2: %p" %
1090
+ [ word1, word2 ] if $DEBUG
1091
+
1092
+ when -1
1093
+ word1 = prefixes[ suffix ].last +
1094
+ InfSuffixRules[ suffix ][:suffix1]
1095
+ word2 = ''
1096
+ $stderr.puts "For sp = -1: word1: %p, word2: %p" %
1097
+ [ word1, word2 ] if $DEBUG
1098
+
1099
+ when -2
1100
+ word1 = prefixes[ suffix ].last +
1101
+ InfSuffixRules[ suffix ][:suffix1]
1102
+ word2 = prefixes[ suffix ].last
1103
+ $stderr.puts "For sp = -2: word1: %p, word2: %p" %
1104
+ [ word1, word2 ] if $DEBUG
1105
+
1106
+ when -3
1107
+ word1 = prefixes[ suffix ].last +
1108
+ InfSuffixRules[ suffix ][:suffix1]
1109
+ word2 = prefixes[ suffix ].last +
1110
+ InfSuffixRules[ suffix ][:suffix2]
1111
+ $stderr.puts "For sp = -3: word1: %p, word2: %p" %
1112
+ [ word1, word2 ] if $DEBUG
1113
+
1114
+ when -4
1115
+ word1 = word
1116
+ word2 = ''
1117
+ $stderr.puts "For sp = -4: word1: %p, word2: %p" %
1118
+ [ word1, word2 ] if $DEBUG
1119
+
1120
+ else
1121
+ raise IndexError,
1122
+ "Couldn't find rule for shortest prefix %p" %
1123
+ shortestPrefix
1124
+ end
1125
+
1126
+ # Rules 12b and 15: Strip off 'ed' or 'ing'.
1127
+ if rule == '12b' or rule == '15'
1128
+ # Do we have a monosyllable of this form:
1129
+ # o 0+ Consonants
1130
+ # o 1+ Vowel
1131
+ # o 2 Non-wx
1132
+ # Eg: tipped => tipp?
1133
+ # Then return tip and tipp.
1134
+ # Eg: swimming => swimm?
1135
+ # Then return tipswim and swimm.
1136
+
1137
+ if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
1138
+ word1 = $1 + $2
1139
+ word2 = $1 + $2 + $2
1140
+ end
1141
+ end
1142
+ end
1143
+ end
1144
+
1145
+ return Infinitive::new( word1, word2, suffix, rule )
1146
+ end
1147
+
1148
+ end # module EN::Linguistics
1149
+