linguistics 1.0.9 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.gemtest +0 -0
  3. data/ChangeLog +849 -342
  4. data/History.rdoc +11 -0
  5. data/LICENSE +9 -9
  6. data/Manifest.txt +44 -0
  7. data/README.rdoc +226 -0
  8. data/Rakefile +32 -349
  9. data/examples/endocs.rb +272 -0
  10. data/examples/generalize_sentence.rb +2 -1
  11. data/examples/klingon.rb +22 -0
  12. data/lib/linguistics.rb +130 -292
  13. data/lib/linguistics/en.rb +337 -1628
  14. data/lib/linguistics/en/articles.rb +138 -0
  15. data/lib/linguistics/en/conjugation.rb +2245 -0
  16. data/lib/linguistics/en/conjunctions.rb +202 -0
  17. data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
  18. data/lib/linguistics/en/linkparser.rb +41 -49
  19. data/lib/linguistics/en/numbers.rb +483 -0
  20. data/lib/linguistics/en/participles.rb +33 -0
  21. data/lib/linguistics/en/pluralization.rb +810 -0
  22. data/lib/linguistics/en/stemmer.rb +75 -0
  23. data/lib/linguistics/en/titlecase.rb +121 -0
  24. data/lib/linguistics/en/wordnet.rb +63 -97
  25. data/lib/linguistics/inflector.rb +89 -0
  26. data/lib/linguistics/iso639.rb +534 -448
  27. data/lib/linguistics/languagebehavior.rb +36 -0
  28. data/lib/linguistics/monkeypatches.rb +42 -0
  29. data/spec/lib/constants.rb +15 -0
  30. data/spec/lib/helpers.rb +38 -0
  31. data/spec/linguistics/en/articles_spec.rb +797 -0
  32. data/spec/linguistics/en/conjugation_spec.rb +2083 -0
  33. data/spec/linguistics/en/conjunctions_spec.rb +154 -0
  34. data/spec/linguistics/en/infinitives_spec.rb +518 -0
  35. data/spec/linguistics/en/linkparser_spec.rb +66 -0
  36. data/spec/linguistics/en/numbers_spec.rb +1295 -0
  37. data/spec/linguistics/en/participles_spec.rb +55 -0
  38. data/spec/linguistics/en/pluralization_spec.rb +4636 -0
  39. data/spec/linguistics/en/stemmer_spec.rb +72 -0
  40. data/spec/linguistics/en/titlecase_spec.rb +841 -0
  41. data/spec/linguistics/en/wordnet_spec.rb +85 -0
  42. data/spec/linguistics/en_spec.rb +45 -167
  43. data/spec/linguistics/inflector_spec.rb +40 -0
  44. data/spec/linguistics/iso639_spec.rb +49 -53
  45. data/spec/linguistics/monkeypatches_spec.rb +40 -0
  46. data/spec/linguistics_spec.rb +46 -76
  47. metadata +241 -113
  48. metadata.gz.sig +0 -0
  49. data/README +0 -166
  50. data/README.english +0 -245
  51. data/rake/191_compat.rb +0 -26
  52. data/rake/dependencies.rb +0 -76
  53. data/rake/documentation.rb +0 -123
  54. data/rake/helpers.rb +0 -502
  55. data/rake/hg.rb +0 -318
  56. data/rake/manual.rb +0 -787
  57. data/rake/packaging.rb +0 -129
  58. data/rake/publishing.rb +0 -341
  59. data/rake/style.rb +0 -62
  60. data/rake/svn.rb +0 -668
  61. data/rake/testing.rb +0 -152
  62. data/rake/verifytask.rb +0 -64
  63. data/tests/en/infinitive.tests.rb +0 -207
  64. data/tests/en/inflect.tests.rb +0 -1389
  65. data/tests/en/lafcadio.tests.rb +0 -77
  66. data/tests/en/linkparser.tests.rb +0 -42
  67. data/tests/en/lprintf.tests.rb +0 -77
  68. data/tests/en/titlecase.tests.rb +0 -73
  69. data/tests/en/wordnet.tests.rb +0 -95
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'linguistics/en' unless defined?( Linguistics::EN )
4
+
5
+ # Methods for deriving present participles for the English-language
6
+ # Linguistics module.
7
+ module Linguistics::EN::Participles
8
+
9
+ # Register this module to the list of modules to include
10
+ Linguistics::EN.register_extension( self )
11
+
12
+
13
+ ### Attempt to return the inflected string in its present participle
14
+ ### form (e.g., talked -> talking).
15
+ def present_participle
16
+ plural = self.to_s.en.plural_verb
17
+
18
+ plural.sub!( /ie$/, 'y' ) or
19
+ plural.sub!( /ue$/, 'u' ) or
20
+ plural.sub!( /([auy])e$/, '$1' ) or
21
+ plural.sub!( /i$/, '' ) or
22
+ plural.sub!( /([^e])e$/, "\\1" ) or
23
+ /er$/.match( plural ) or
24
+ plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
25
+
26
+ return "#{plural}ing"
27
+ end
28
+ alias_method :part_pres, :present_participle
29
+ Linguistics::EN.register_lprintf_formatter :PART_PRES, :present_participle
30
+
31
+
32
+ end # module Linguistics::EN::Participles
33
+
@@ -0,0 +1,810 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'linguistics/en' unless defined?( Linguistics )
4
+
5
+ # Plural inflection methods for the English-language Linguistics module.
6
+ #
7
+ # It provides conversion of plural forms of all nouns, most verbs,
8
+ # and some adjectives. It also provides "classical" variants (for
9
+ # example: "brother" -> "brethren", "dogma" -> "dogmata", etc.) where
10
+ # appropriate.
11
+ module Linguistics::EN::Pluralization
12
+
13
+ # Register this module to the list of modules to include
14
+ Linguistics::EN.register_extension( self )
15
+
16
+ ### Utility function for creating Regexp unions
17
+ def self::matchgroup( *parts )
18
+ return Regexp.union( *(parts.flatten) )
19
+ end
20
+ private_class_method :matchgroup
21
+
22
+ #
23
+ # Plurals
24
+ #
25
+
26
+ PL_sb_irregular_s = {
27
+ "ephemeris" => "ephemerides",
28
+ "iris" => "irises|irides",
29
+ "clitoris" => "clitorises|clitorides",
30
+ "corpus" => "corpuses|corpora",
31
+ "opus" => "opuses|opera",
32
+ "genus" => "genera",
33
+ "mythos" => "mythoi",
34
+ "penis" => "penises|penes",
35
+ "testis" => "testes",
36
+ }
37
+
38
+ PL_sb_irregular_h = {
39
+ "child" => "children",
40
+ "brother" => "brothers|brethren",
41
+ "loaf" => "loaves",
42
+ "hoof" => "hoofs|hooves",
43
+ "beef" => "beefs|beeves",
44
+ "money" => "monies",
45
+ "mongoose" => "mongooses",
46
+ "ox" => "oxen",
47
+ "cow" => "cows|kine",
48
+ "soliloquy" => "soliloquies",
49
+ "graffito" => "graffiti",
50
+ "prima donna" => "prima donnas|prime donne",
51
+ "octopus" => "octopuses|octopodes",
52
+ "genie" => "genies|genii",
53
+ "ganglion" => "ganglions|ganglia",
54
+ "trilby" => "trilbys",
55
+ "turf" => "turfs|turves",
56
+ }.update( PL_sb_irregular_s )
57
+ PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
58
+
59
+
60
+ # Classical "..a" -> "..ata"
61
+ PL_sb_C_a_ata = matchgroup %w[
62
+ anathema bema carcinoma charisma diploma
63
+ dogma drama edema enema enigma lemma
64
+ lymphoma magma melisma miasma oedema
65
+ sarcoma schema soma stigma stoma trauma
66
+ gumma pragma
67
+ ].collect {|word| word[0...-1]}
68
+
69
+ # Unconditional "..a" -> "..ae"
70
+ PL_sb_U_a_ae = matchgroup %w[
71
+ alumna alga vertebra persona
72
+ ]
73
+
74
+ # Classical "..a" -> "..ae"
75
+ PL_sb_C_a_ae = matchgroup [/.*umbra/ ] + %w[
76
+ amoeba antenna formula hyperbola
77
+ medusa nebula parabola abscissa
78
+ hydra nova lacuna aurora
79
+ flora fauna
80
+ ]
81
+
82
+ # Classical "..en" -> "..ina"
83
+ PL_sb_C_en_ina = matchgroup %w[
84
+ stamen foramen lumen
85
+ ].collect {|word| word[0...-2] }
86
+
87
+ # Unconditional "..um" -> "..a"
88
+ PL_sb_U_um_a = matchgroup %w[
89
+ bacterium agendum desideratum erratum
90
+ stratum datum ovum extremum candelabrum
91
+ ].collect {|word| word[0...-2] }
92
+
93
+ # Classical "..um" -> "..a"
94
+ PL_sb_C_um_a = matchgroup %w[
95
+ maximum minimum momentum optimum
96
+ quantum cranium curriculum dictum
97
+ phylum aquarium compendium emporium
98
+ enconium gymnasium honorarium interregnum
99
+ lustrum memorandum millenium rostrum
100
+ spectrum speculum stadium trapezium
101
+ ultimatum medium vacuum velum
102
+ consortium
103
+ ].collect {|word| word[0...-2]}
104
+
105
+ # Unconditional "..us" -> "i"
106
+ PL_sb_U_us_i = matchgroup %w[
107
+ alumnus alveolus bacillus bronchus
108
+ locus nucleus stimulus meniscus
109
+ ].collect {|word| word[0...-2]}
110
+
111
+ # Classical "..us" -> "..i"
112
+ PL_sb_C_us_i = matchgroup %w[
113
+ focus radius genius
114
+ incubus succubus nimbus
115
+ fungus nucleolus stylus
116
+ torus umbilicus uterus
117
+ hippopotamus
118
+ ].collect {|word| word[0...-2]}
119
+
120
+ # Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
121
+ PL_sb_C_us_us = matchgroup %w[
122
+ status apparatus prospectus sinus
123
+ hiatus impetus plexus
124
+ ]
125
+
126
+ # Unconditional "..on" -> "a"
127
+ PL_sb_U_on_a = matchgroup %w[
128
+ criterion perihelion aphelion
129
+ phenomenon prolegomenon noumenon
130
+ organon asyndeton hyperbaton
131
+ ].collect {|word| word[0...-2]}
132
+
133
+ # Classical "..on" -> "..a"
134
+ PL_sb_C_on_a = matchgroup %w[
135
+ oxymoron
136
+ ].collect {|word| word[0...-2]}
137
+
138
+ # Classical "..o" -> "..i" (but normally -> "..os")
139
+ PL_sb_C_o_i_a = %w[
140
+ solo soprano basso alto
141
+ contralto tempo piano
142
+ ]
143
+ PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
144
+
145
+ # Always "..o" -> "..os"
146
+ PL_sb_U_o_os = matchgroup( %w[
147
+ albino archipelago armadillo
148
+ commando crescendo fiasco
149
+ ditto dynamo embryo
150
+ ghetto guano inferno
151
+ jumbo lumbago magneto
152
+ manifesto medico octavo
153
+ photo pro quarto
154
+ canto lingo generalissimo
155
+ stylo rhino
156
+ ] | PL_sb_C_o_i_a )
157
+
158
+
159
+ # Unconditional "..[ei]x" -> "..ices"
160
+ PL_sb_U_ex_ices = matchgroup %w[
161
+ codex murex silex
162
+ ].collect {|word| word[0...-2]}
163
+ PL_sb_U_ix_ices = matchgroup %w[
164
+ radix helix
165
+ ].collect {|word| word[0...-2]}
166
+
167
+ # Classical "..[ei]x" -> "..ices"
168
+ PL_sb_C_ex_ices = matchgroup %w[
169
+ vortex vertex cortex latex
170
+ pontifex apex index simplex
171
+ ].collect {|word| word[0...-2]}
172
+ PL_sb_C_ix_ices = matchgroup %w[
173
+ appendix
174
+ ].collect {|word| word[0...-2]}
175
+
176
+
177
+ # Arabic: ".." -> "..i"
178
+ PL_sb_C_i = matchgroup %w[
179
+ afrit afreet efreet
180
+ ]
181
+
182
+
183
+ # Hebrew: ".." -> "..im"
184
+ PL_sb_C_im = matchgroup %w[
185
+ goy seraph cherub
186
+ ]
187
+
188
+ # Unconditional "..man" -> "..mans"
189
+ PL_sb_U_man_mans = matchgroup %w[
190
+ human
191
+ Alabaman Bahaman Burman German
192
+ Hiroshiman Liman Nakayaman Oklahoman
193
+ Panaman Selman Sonaman Tacoman Yakiman
194
+ Yokohaman Yuman
195
+ ]
196
+
197
+
198
+ PL_sb_uninflected_s = [
199
+ # Pairs or groups subsumed to a singular...
200
+ "breeches", "britches", "clippers", "gallows", "hijinks",
201
+ "headquarters", "pliers", "scissors", "testes", "herpes",
202
+ "pincers", "shears", "proceedings", "trousers",
203
+
204
+ # Unassimilated Latin 4th declension
205
+ "cantus", "coitus", "nexus",
206
+
207
+ # Recent imports...
208
+ "contretemps", "corps", "debris",
209
+ /.*ois/,
210
+
211
+ # Diseases
212
+ /.*measles/, "mumps",
213
+
214
+ # Miscellaneous others...
215
+ "diabetes", "jackanapes", "series", "species", "rabies",
216
+ "chassis", "innings", "news", "mews",
217
+ ]
218
+
219
+
220
+ # Don't inflect in classical mode, otherwise normal inflection
221
+ PL_sb_uninflected_herd = matchgroup %w[
222
+ wildebeest swine eland bison buffalo
223
+ elk moose rhinoceros
224
+ ]
225
+
226
+ PL_sb_uninflected = matchgroup(
227
+
228
+ # Some fish and herd animals
229
+ /.*fish/, "tuna", "salmon", "mackerel", "trout",
230
+ "bream", /sea[- ]bass/, "carp", "cod", "flounder", "whiting",
231
+
232
+ /.*deer/, /.*sheep/,
233
+
234
+ # All nationals ending in -ese
235
+ "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
236
+ "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
237
+ "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
238
+ "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
239
+ "Shavese", "Vermontese", "Wenchowese", "Yengeese",
240
+ /.*[nrlm]ese/,
241
+
242
+ # Some words ending in ...s (often pairs taken as a whole)
243
+ PL_sb_uninflected_s,
244
+
245
+ # Diseases
246
+ /.*pox/,
247
+
248
+ # Other oddities
249
+ "graffiti", "djinn"
250
+ )
251
+
252
+
253
+ # Singular words ending in ...s (all inflect with ...es)
254
+ PL_sb_singular_s = matchgroup [ /.*ss/, /.*us/ ] +
255
+ %w[
256
+ acropolis aegis alias arthritis asbestos atlas
257
+ bathos bias bronchitis bursitis caddis cannabis
258
+ canvas chaos cosmos dais digitalis encephalitis
259
+ epidermis ethos eyas gas glottis hepatitis
260
+ hubris ibis lens mantis marquis metropolis
261
+ neuritis pathos pelvis polis rhinoceros
262
+ sassafras tonsillitis trellis
263
+ ]
264
+
265
+ PL_v_special_s = matchgroup [
266
+ PL_sb_singular_s,
267
+ PL_sb_uninflected_s,
268
+ PL_sb_irregular_s.keys,
269
+ /(.*[csx])is/,
270
+ /(.*)ceps/,
271
+ /[A-Z].*s/,
272
+ ]
273
+
274
+ PL_sb_postfix_adj = '(' + {
275
+
276
+ 'general' => '(?!major|lieutenant|brigadier|adjutant)\S+',
277
+ 'martial' => "court",
278
+
279
+ }.collect {|key,val|
280
+ "(?:#{val})(?=(?:-|\\s+)#{key})"
281
+ }.join("|") + ")(.*)"
282
+
283
+
284
+ PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
285
+ PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
286
+
287
+ PL_prep = matchgroup %w[
288
+ about above across after among around at athwart before behind
289
+ below beneath beside besides between betwixt beyond but by
290
+ during except for from in into near of off on onto out over
291
+ since till to under until unto upon with
292
+ ]
293
+
294
+ PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
295
+ PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
296
+
297
+
298
+ PL_pron_nom_h = {
299
+ # Nominative Reflexive
300
+ "i" => "we", "myself" => "ourselves",
301
+ "you" => "you", "yourself" => "yourselves",
302
+ "she" => "they", "herself" => "themselves",
303
+ "he" => "they", "himself" => "themselves",
304
+ "it" => "they", "itself" => "themselves",
305
+ "they" => "they", "themself" => "themselves",
306
+
307
+ # Possessive
308
+ "mine" => "ours",
309
+ "yours" => "yours",
310
+ "hers" => "theirs",
311
+ "his" => "theirs",
312
+ "its" => "theirs",
313
+ "theirs" => "theirs",
314
+ }
315
+ PL_pron_nom = Regexp.new( PL_pron_nom_h.keys.join('|'), Regexp::IGNORECASE )
316
+
317
+ PL_pron_acc_h = {
318
+ # Accusative Reflexive
319
+ "me" => "us", "myself" => "ourselves",
320
+ "you" => "you", "yourself" => "yourselves",
321
+ "her" => "them", "herself" => "themselves",
322
+ "him" => "them", "himself" => "themselves",
323
+ "it" => "them", "itself" => "themselves",
324
+ "them" => "them", "themself" => "themselves",
325
+ }
326
+ PL_pron_acc = matchgroup PL_pron_acc_h.keys
327
+
328
+ PL_v_irregular_pres_h = {
329
+ # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
330
+ # 3rd pers. (indet.)
331
+ "am" => "are", "are" => "are", "is" => "are",
332
+ "was" => "were", "were" => "were", "was" => "were",
333
+ "have" => "have", "have" => "have", "has" => "have",
334
+ }
335
+ PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
336
+
337
+ PL_v_ambiguous_pres_h = {
338
+ # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
339
+ # 3rd pers. (indet.)
340
+ "act" => "act", "act" => "act", "acts" => "act",
341
+ "blame" => "blame", "blame" => "blame", "blames" => "blame",
342
+ "can" => "can", "can" => "can", "can" => "can",
343
+ "must" => "must", "must" => "must", "must" => "must",
344
+ "fly" => "fly", "fly" => "fly", "flies" => "fly",
345
+ "copy" => "copy", "copy" => "copy", "copies" => "copy",
346
+ "drink" => "drink", "drink" => "drink", "drinks" => "drink",
347
+ "fight" => "fight", "fight" => "fight", "fights" => "fight",
348
+ "fire" => "fire", "fire" => "fire", "fires" => "fire",
349
+ "like" => "like", "like" => "like", "likes" => "like",
350
+ "look" => "look", "look" => "look", "looks" => "look",
351
+ "make" => "make", "make" => "make", "makes" => "make",
352
+ "reach" => "reach", "reach" => "reach", "reaches" => "reach",
353
+ "run" => "run", "run" => "run", "runs" => "run",
354
+ "sink" => "sink", "sink" => "sink", "sinks" => "sink",
355
+ "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
356
+ "view" => "view", "view" => "view", "views" => "view",
357
+ }
358
+ PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
359
+
360
+ PL_v_irregular_non_pres = matchgroup %w[
361
+ did had ate made put
362
+ spent fought sank gave sought
363
+ shall could ought should
364
+ ]
365
+
366
+ PL_v_ambiguous_non_pres = matchgroup %w[
367
+ thought saw bent will might cut
368
+ ]
369
+
370
+ PL_count_zero = matchgroup %w[
371
+ 0 no zero nil
372
+ ]
373
+
374
+ PL_count_one = matchgroup %w[
375
+ 1 a an one each every this that
376
+ ]
377
+
378
+ PL_adj_special_h = {
379
+ "a" => "some", "an" => "some",
380
+ "this" => "these", "that" => "those",
381
+ }
382
+ PL_adj_special = matchgroup PL_adj_special_h.keys
383
+
384
+ PL_adj_poss_h = {
385
+ "my" => "our",
386
+ "your" => "your",
387
+ "its" => "their",
388
+ "her" => "their",
389
+ "his" => "their",
390
+ "their" => "their",
391
+ }
392
+ PL_adj_poss = matchgroup PL_adj_poss_h.keys
393
+
394
+
395
+ #################################################################
396
+ ### P U B L I C F U N C T I O N S
397
+ #################################################################
398
+
399
+ ### Return the plural of the given +phrase+ if +count+ indicates it should
400
+ ### be plural.
401
+ def plural( count=2 )
402
+ phrase = if self.respond_to?( :to_int )
403
+ self.numwords
404
+ else
405
+ self.to_s
406
+ end
407
+
408
+ self.log.debug "Pluralizing %p" % [ phrase ]
409
+ pre = text = post = nil
410
+
411
+ # If the string has whitespace, only pluralize the middle bit, but
412
+ # preserve the whitespace to add back to the result.
413
+ if md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
414
+ pre, text, post = md.captures
415
+ else
416
+ return phrase
417
+ end
418
+
419
+ plural = postprocess( text,
420
+ pluralize_special_adjective(text, count) ||
421
+ pluralize_special_verb(text, count) ||
422
+ pluralize_noun(text, count) )
423
+
424
+ return pre + plural + post
425
+ end
426
+ Linguistics::EN.register_lprintf_formatter :PL, :plural
427
+
428
+
429
+ ### Return the plural of the given noun +phrase+ if +count+ indicates it
430
+ ### should be plural.
431
+ def plural_noun( count=2 )
432
+ phrase = self.to_s
433
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
434
+ pre, word, post = md.captures
435
+
436
+ return phrase if word.nil? or word.empty?
437
+
438
+ plural = postprocess( word, pluralize_noun(word, count) )
439
+
440
+ return pre + plural + post
441
+ end
442
+
443
+
444
+ ### Return the plural of the given verb +phrase+ if +count+ indicates it
445
+ ### should be plural.
446
+ def plural_verb( count=2 )
447
+ phrase = self.to_s
448
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
449
+ pre, word, post = md.captures
450
+
451
+ return phrase if word.nil? or word.empty?
452
+
453
+ plural = postprocess( word,
454
+ pluralize_special_verb(word, count) ||
455
+ pluralize_general_verb(word, count) )
456
+
457
+ return pre + plural + post
458
+ end
459
+
460
+
461
+ ### Return the plural of the given adjectival +phrase+ if +count+ indicates
462
+ ### it should be plural.
463
+ def plural_adjective( count=2 )
464
+ phrase = self.to_s
465
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
466
+ pre, word, post = md.captures
467
+
468
+ return phrase if word.nil? or word.empty?
469
+
470
+ plural = postprocess( word, pluralize_special_adjective(word, count) || word )
471
+
472
+ return pre + plural + post
473
+ end
474
+ alias_method :plural_adj, :plural_adjective
475
+
476
+
477
+ #################################################################
478
+ ### P R I V A T E F U N C T I O N S
479
+ #################################################################
480
+
481
+ #######
482
+ private
483
+ #######
484
+
485
+ ### Do normal/classical switching and match capitalization in +inflected+ by
486
+ ### examining the +original+ input.
487
+ def postprocess( original, inflected )
488
+
489
+ # If there's a classical variant, use it instead of the modern one if
490
+ # classical mode is on.
491
+ inflected.sub!( /([^|]+)\|(.+)/ ) do
492
+ Linguistics::EN.classical? ? $2 : $1
493
+ end
494
+
495
+ # Try to duplicate the case of the original string
496
+ case original
497
+ when "I"
498
+ return inflected
499
+ when /^[A-Z]+$/
500
+ return inflected.upcase
501
+ when /^[A-Z]/
502
+ # Can't use #capitalize, as it will downcase the rest of the string,
503
+ # too.
504
+ inflected[0,1] = inflected[0,1].upcase
505
+ return inflected
506
+ else
507
+ return inflected
508
+ end
509
+ end
510
+
511
+
512
+ ### Normalize a count to either 1 or 2 (singular or plural)
513
+ def normalize_count( count, default=2 )
514
+ return default if count.nil? # Default to plural
515
+ if /^(#{PL_count_one})$/i =~ count.to_s ||
516
+ ( Linguistics::EN.classical? && /^(#{PL_count_zero})$/ =~ count.to_s )
517
+ return 1
518
+ else
519
+ return default
520
+ end
521
+ end
522
+
523
+
524
+ ### Pluralize nouns
525
+ def pluralize_noun( word, count=2 )
526
+ self.log.debug "Trying to pluralize %p as a noun" % [ word ]
527
+
528
+ value = nil
529
+ count = normalize_count( count )
530
+
531
+ return word if count == 1
532
+
533
+ # Handle user-defined nouns
534
+ #if value = ud_match( word, PL_sb_user_defined )
535
+ # return value
536
+ #end
537
+
538
+ # Handle empty word, singular count and uninflected plurals
539
+ case word
540
+ when ''
541
+ self.log.debug " empty string"
542
+ return word
543
+ when /^(#{PL_sb_uninflected})$/i
544
+ self.log.debug " uninflected plural"
545
+ return word
546
+ else
547
+ if Linguistics::EN.classical? && /^(#{PL_sb_uninflected_herd})$/i =~ word
548
+ self.log.debug " uninflected classical herd word"
549
+ return word
550
+ end
551
+ end
552
+
553
+ # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
554
+ case word
555
+ when /^(?:#{PL_sb_postfix_adj})$/i
556
+ value = $2
557
+ noun = $1
558
+ self.log.debug " postfixed adjectival compound noun phrase (#{value} -> #{noun})"
559
+ return pluralize_noun( noun, 2 ) + value
560
+
561
+ when /^(?:#{PL_sb_prep_dual_compound})$/i
562
+ noun = $1
563
+ value = [ $2, $3 ]
564
+ self.log.debug " prepositional dual compound noun phrase (%s -> %s %s)" %
565
+ [ noun, *value ]
566
+ return pluralize_noun( noun, 2 ) + value[0] + pluralize_noun( value[1] )
567
+
568
+ when /^(?:#{PL_sb_prep_compound})$/i
569
+ noun = $1
570
+ value = $2
571
+ self.log.debug " prepositional singular compound noun phrase (%s -> %s)" %
572
+ [ noun, value ]
573
+ return pluralize_noun( noun, 2 ) + value
574
+
575
+ # Handle pronouns
576
+ when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
577
+ prep, pron = $1, $2
578
+ self.log.debug " prepositional pronoun phrase (%p + %p)" % [ prep, pron ]
579
+ return prep + PL_pron_acc_h[ pron.downcase ]
580
+
581
+ when /^(#{PL_pron_nom})$/i
582
+ pron = $1
583
+ self.log.debug " nominative pronoun; using PL_pron_nom table"
584
+ return PL_pron_nom_h[ word.downcase ]
585
+
586
+ when /^(#{PL_pron_acc})$/i
587
+ self.log.debug " accusative pronoun; using PL_pron_acc table"
588
+ return PL_pron_acc_h[ word.downcase ]
589
+
590
+ # Handle isolated irregular plurals
591
+ when /(.*)\b(#{PL_sb_irregular})$/i
592
+ prefix, word = $1, $2
593
+ self.log.debug " isolated irregular; using PL_sb_irregular_h table"
594
+ return prefix + PL_sb_irregular_h[ word.downcase ]
595
+
596
+ # Unconditional ...man -> ...mans
597
+ when /(#{PL_sb_U_man_mans})$/i
598
+ word = $1
599
+ self.log.debug " unconditional man -> mans (%p)" % [ word ]
600
+ return "#{word}s"
601
+
602
+ # Handle families of irregular plurals
603
+ when /(.*)man$/i then return "#{$1}men"
604
+ when /(.*[ml])ouse$/i then return "#{$1}ice"
605
+ when /(.*)goose$/i then return "#{$1}geese"
606
+ when /(.*)tooth$/i then return "#{$1}teeth"
607
+ when /(.*)foot$/i then return "#{$1}feet"
608
+
609
+ # Handle unassimilated imports
610
+ when /(.*)ceps$/i then return word
611
+ when /(.*)zoon$/i then return "#{$1}zoa"
612
+ when /(.*[csx])is$/i then return "#{$1}es"
613
+ when /(#{PL_sb_U_ex_ices})ex$/i then return "#{$1}ices"
614
+ when /(#{PL_sb_U_ix_ices})ix$/i then return "#{$1}ices"
615
+ when /(#{PL_sb_U_um_a})um$/i then return "#{$1}a"
616
+ when /(#{PL_sb_U_us_i})us$/i then return "#{$1}i"
617
+ when /(#{PL_sb_U_on_a})on$/i then return "#{$1}a"
618
+ when /(#{PL_sb_U_a_ae})$/i then return "#{$1}e"
619
+ end
620
+
621
+
622
+ # Handle incompletely assimilated imports in classical mode
623
+ if Linguistics::EN.classical?
624
+ self.log.debug " checking for classical incompletely assimilated imports"
625
+ case word
626
+ when /(.*)trix$/i then return "#{$1}trices"
627
+ when /(.*)eau$/i then return "#{$1}eaux"
628
+ when /(.*)ieu$/i then return "#{$1}ieux"
629
+ when /(.{2,}[yia])nx$/i then return "#{$1}nges"
630
+ when /(#{PL_sb_C_en_ina})en$/i then return "#{$1}ina"
631
+ when /(#{PL_sb_C_ex_ices})ex$/i then return "#{$1}ices"
632
+ when /(#{PL_sb_C_ix_ices})ix$/i then return "#{$1}ices"
633
+ when /(#{PL_sb_C_um_a})um$/i then return "#{$1}a"
634
+ when /(#{PL_sb_C_us_i})us$/i then return "#{$1}i"
635
+ when /(#{PL_sb_C_us_us})$/i then return "#{$1}"
636
+ when /(#{PL_sb_C_a_ae})$/i then return "#{$1}e"
637
+ when /(#{PL_sb_C_a_ata})a$/i then return "#{$1}ata"
638
+ when /(#{PL_sb_C_o_i})o$/i then return "#{$1}i"
639
+ when /(#{PL_sb_C_on_a})on$/i then return "#{$1}a"
640
+ when /#{PL_sb_C_im}$/i then return "#{word}im"
641
+ when /#{PL_sb_C_i}$/i then return "#{word}i"
642
+ end
643
+ end
644
+
645
+
646
+ # Handle singular nouns ending in ...s or other silibants
647
+ case word
648
+ when /^(#{PL_sb_singular_s})$/i then return "#{$1}es"
649
+ when /^([A-Z].*s)$/ then return "#{$1}es"
650
+ when /(.*)([cs]h|[zx])$/i then return "#{$1}#{$2}es"
651
+ # when /(.*)(us)$/i then return "#{$1}#{$2}es"
652
+
653
+ # Handle ...f -> ...ves
654
+ when /(.*[eao])lf$/i then return "#{$1}lves"
655
+ when /(.*[^d])eaf$/i then return "#{$1}eaves"
656
+ when /(.*[nlw])ife$/i then return "#{$1}ives"
657
+ when /(.*)arf$/i then return "#{$1}arves"
658
+
659
+ # Handle ...y
660
+ when /(.*[aeiou])y$/i then return "#{$1}ys"
661
+ when /([A-Z].*y)$/ then return "#{$1}s"
662
+ when /(.*)y$/i then return "#{$1}ies"
663
+
664
+ # Handle ...o
665
+ when /#{PL_sb_U_o_os}$/i then return "#{word}s"
666
+ when /[aeiou]o$/i then return "#{word}s"
667
+ when /o$/i then return "#{word}es"
668
+
669
+ # Otherwise just add ...s
670
+ else
671
+ self.log.debug " appears to be regular; adding +s"
672
+ return "#{word}s"
673
+ end
674
+ end # def pluralize_noun
675
+
676
+
677
+
678
+ ### Pluralize special verbs
679
+ def pluralize_special_verb( word, count )
680
+ self.log.debug "Trying to pluralize %p as a special verb..." % [ word ]
681
+ count ||= 1
682
+ count = normalize_count( count )
683
+
684
+ if /^(#{PL_count_one})$/i =~ count.to_s
685
+ self.log.debug " it's a single-count word, returning it unchanged."
686
+ return word # :FIXME: should this return nil instead?
687
+ # return nil
688
+ end
689
+
690
+ # Handle user-defined verbs
691
+ #if value = ud_match( word, PL_v_user_defined )
692
+ # return value
693
+ #end
694
+
695
+ case word
696
+
697
+ # Handle irregular present tense (simple and compound)
698
+ when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
699
+ key = $1.downcase
700
+ self.log.debug " yep, it's an irregular present tense verb (%p)" % [ key ]
701
+ return PL_v_irregular_pres_h[ $1.downcase ] + $2
702
+
703
+ # Handle irregular future, preterite and perfect tenses
704
+ when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
705
+ self.log.debug " yep, it's an irregular non-present tense verb (%p)" % [ key ]
706
+ return word
707
+
708
+ # Handle special cases
709
+ when /^(#{PL_v_special_s})$/
710
+ self.log.debug " it's a not special-case verb; aborting."
711
+ return nil
712
+
713
+ # Handle standard 3rd person (chop the ...(e)s off single words)
714
+ when /^(.*)([cs]h|[x]|zz|ss)es$/i
715
+ base, suffix = $1, $2
716
+ self.log.debug " it's a standard third-person verb (%p + %p)" % [ base, suffix ]
717
+ return base + suffix
718
+ when /^(..+)ies$/i
719
+ verb = $1
720
+ self.log.debug " it's a standard third-person verb (%p + ies -> +y)" % [ verb ]
721
+ return "#{verb}y"
722
+ when /^(.+)oes$/i
723
+ verb = $1
724
+ self.log.debug " it's a standard third-person verb (%p + oes -> +o)" % [ verb ]
725
+ return "#{verb}o"
726
+ when /^(.*[^s])s$/i
727
+ verb = $1
728
+ self.log.debug " it's a standard third-person verb (%p + (^s)s -> -s)" % [ verb ]
729
+ return verb
730
+
731
+ # Otherwise, a regular verb (handle elsewhere)
732
+ else
733
+ self.log.debug " nope. Either a regular verb or not a verb."
734
+ return nil
735
+ end
736
+ end
737
+
738
+
739
+ ### Pluralize regular verbs
740
+ def pluralize_general_verb( word, count )
741
+ count = normalize_count( count )
742
+
743
+ return word if /^(#{PL_count_one})$/i =~ count.to_s
744
+
745
+ case word
746
+
747
+ # Handle ambiguous present tenses (simple and compound)
748
+ when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
749
+ return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
750
+
751
+ # Handle ambiguous preterite and perfect tenses
752
+ when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
753
+ return word
754
+
755
+ # Otherwise, 1st or 2nd person is uninflected
756
+ else
757
+ return word
758
+ end
759
+ end
760
+
761
+
762
+ ### Handle special adjectives
763
+ def pluralize_special_adjective( word, count )
764
+ self.log.debug "Trying to pluralize %p as a special adjective..." % [ word ]
765
+ count ||= 1
766
+ count = normalize_count( count )
767
+
768
+ if /^(#{PL_count_one})$/i =~ count.to_s
769
+ self.log.debug " it's a single-count word; aborting"
770
+ return nil
771
+ end
772
+
773
+ # Handle user-defined verbs
774
+ #if value = ud_match( word, PL_adj_user_defined )
775
+ # return value
776
+ #end
777
+
778
+ case word
779
+
780
+ # Handle known cases
781
+ when /^(#{PL_adj_special})$/i
782
+ key = $1.downcase
783
+ self.log.debug " yep, it's a special plural adjective (%p)" % [ key ]
784
+ return PL_adj_special_h[ key ]
785
+
786
+ # Handle possessives
787
+ when /^(#{PL_adj_poss})$/i
788
+ key = $1.downcase
789
+ self.log.debug " it's a special possessive adjective (%p)" % [ key ]
790
+ return PL_adj_poss_h[ $1.downcase ]
791
+
792
+ when /^(.*)'s?$/
793
+ pl = $1.en.plural_noun( count )
794
+ self.log.debug " it has an apostrophe (%p); using generic possessive rules" % [ pl ]
795
+ if /s$/ =~ pl
796
+ return "#{pl}'"
797
+ else
798
+ return "#{pl}'s"
799
+ end
800
+
801
+ # Otherwise, no idea
802
+ else
803
+ self.log.debug " nope."
804
+ return nil
805
+ end
806
+ end
807
+
808
+
809
+ end # module Linguistics::EN::Pluralization
810
+