linguistics 1.0.9 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.gemtest +0 -0
  3. data/ChangeLog +849 -342
  4. data/History.rdoc +11 -0
  5. data/LICENSE +9 -9
  6. data/Manifest.txt +44 -0
  7. data/README.rdoc +226 -0
  8. data/Rakefile +32 -349
  9. data/examples/endocs.rb +272 -0
  10. data/examples/generalize_sentence.rb +2 -1
  11. data/examples/klingon.rb +22 -0
  12. data/lib/linguistics.rb +130 -292
  13. data/lib/linguistics/en.rb +337 -1628
  14. data/lib/linguistics/en/articles.rb +138 -0
  15. data/lib/linguistics/en/conjugation.rb +2245 -0
  16. data/lib/linguistics/en/conjunctions.rb +202 -0
  17. data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
  18. data/lib/linguistics/en/linkparser.rb +41 -49
  19. data/lib/linguistics/en/numbers.rb +483 -0
  20. data/lib/linguistics/en/participles.rb +33 -0
  21. data/lib/linguistics/en/pluralization.rb +810 -0
  22. data/lib/linguistics/en/stemmer.rb +75 -0
  23. data/lib/linguistics/en/titlecase.rb +121 -0
  24. data/lib/linguistics/en/wordnet.rb +63 -97
  25. data/lib/linguistics/inflector.rb +89 -0
  26. data/lib/linguistics/iso639.rb +534 -448
  27. data/lib/linguistics/languagebehavior.rb +36 -0
  28. data/lib/linguistics/monkeypatches.rb +42 -0
  29. data/spec/lib/constants.rb +15 -0
  30. data/spec/lib/helpers.rb +38 -0
  31. data/spec/linguistics/en/articles_spec.rb +797 -0
  32. data/spec/linguistics/en/conjugation_spec.rb +2083 -0
  33. data/spec/linguistics/en/conjunctions_spec.rb +154 -0
  34. data/spec/linguistics/en/infinitives_spec.rb +518 -0
  35. data/spec/linguistics/en/linkparser_spec.rb +66 -0
  36. data/spec/linguistics/en/numbers_spec.rb +1295 -0
  37. data/spec/linguistics/en/participles_spec.rb +55 -0
  38. data/spec/linguistics/en/pluralization_spec.rb +4636 -0
  39. data/spec/linguistics/en/stemmer_spec.rb +72 -0
  40. data/spec/linguistics/en/titlecase_spec.rb +841 -0
  41. data/spec/linguistics/en/wordnet_spec.rb +85 -0
  42. data/spec/linguistics/en_spec.rb +45 -167
  43. data/spec/linguistics/inflector_spec.rb +40 -0
  44. data/spec/linguistics/iso639_spec.rb +49 -53
  45. data/spec/linguistics/monkeypatches_spec.rb +40 -0
  46. data/spec/linguistics_spec.rb +46 -76
  47. metadata +241 -113
  48. metadata.gz.sig +0 -0
  49. data/README +0 -166
  50. data/README.english +0 -245
  51. data/rake/191_compat.rb +0 -26
  52. data/rake/dependencies.rb +0 -76
  53. data/rake/documentation.rb +0 -123
  54. data/rake/helpers.rb +0 -502
  55. data/rake/hg.rb +0 -318
  56. data/rake/manual.rb +0 -787
  57. data/rake/packaging.rb +0 -129
  58. data/rake/publishing.rb +0 -341
  59. data/rake/style.rb +0 -62
  60. data/rake/svn.rb +0 -668
  61. data/rake/testing.rb +0 -152
  62. data/rake/verifytask.rb +0 -64
  63. data/tests/en/infinitive.tests.rb +0 -207
  64. data/tests/en/inflect.tests.rb +0 -1389
  65. data/tests/en/lafcadio.tests.rb +0 -77
  66. data/tests/en/linkparser.tests.rb +0 -42
  67. data/tests/en/lprintf.tests.rb +0 -77
  68. data/tests/en/titlecase.tests.rb +0 -73
  69. data/tests/en/wordnet.tests.rb +0 -95
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'linguistics/en' unless defined?( Linguistics::EN )
4
+
5
+ # Methods for deriving present participles for the English-language
6
+ # Linguistics module.
7
+ module Linguistics::EN::Participles
8
+
9
+ # Register this module to the list of modules to include
10
+ Linguistics::EN.register_extension( self )
11
+
12
+
13
+ ### Attempt to return the inflected string in its present participle
14
+ ### form (e.g., talked -> talking).
15
+ def present_participle
16
+ plural = self.to_s.en.plural_verb
17
+
18
+ plural.sub!( /ie$/, 'y' ) or
19
+ plural.sub!( /ue$/, 'u' ) or
20
+ plural.sub!( /([auy])e$/, '$1' ) or
21
+ plural.sub!( /i$/, '' ) or
22
+ plural.sub!( /([^e])e$/, "\\1" ) or
23
+ /er$/.match( plural ) or
24
+ plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
25
+
26
+ return "#{plural}ing"
27
+ end
28
+ alias_method :part_pres, :present_participle
29
+ Linguistics::EN.register_lprintf_formatter :PART_PRES, :present_participle
30
+
31
+
32
+ end # module Linguistics::EN::Participles
33
+
@@ -0,0 +1,810 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'linguistics/en' unless defined?( Linguistics )
4
+
5
+ # Plural inflection methods for the English-language Linguistics module.
6
+ #
7
+ # It provides conversion of plural forms of all nouns, most verbs,
8
+ # and some adjectives. It also provides "classical" variants (for
9
+ # example: "brother" -> "brethren", "dogma" -> "dogmata", etc.) where
10
+ # appropriate.
11
+ module Linguistics::EN::Pluralization
12
+
13
+ # Register this module to the list of modules to include
14
+ Linguistics::EN.register_extension( self )
15
+
16
+ ### Utility function for creating Regexp unions
17
+ def self::matchgroup( *parts )
18
+ return Regexp.union( *(parts.flatten) )
19
+ end
20
+ private_class_method :matchgroup
21
+
22
+ #
23
+ # Plurals
24
+ #
25
+
26
+ PL_sb_irregular_s = {
27
+ "ephemeris" => "ephemerides",
28
+ "iris" => "irises|irides",
29
+ "clitoris" => "clitorises|clitorides",
30
+ "corpus" => "corpuses|corpora",
31
+ "opus" => "opuses|opera",
32
+ "genus" => "genera",
33
+ "mythos" => "mythoi",
34
+ "penis" => "penises|penes",
35
+ "testis" => "testes",
36
+ }
37
+
38
+ PL_sb_irregular_h = {
39
+ "child" => "children",
40
+ "brother" => "brothers|brethren",
41
+ "loaf" => "loaves",
42
+ "hoof" => "hoofs|hooves",
43
+ "beef" => "beefs|beeves",
44
+ "money" => "monies",
45
+ "mongoose" => "mongooses",
46
+ "ox" => "oxen",
47
+ "cow" => "cows|kine",
48
+ "soliloquy" => "soliloquies",
49
+ "graffito" => "graffiti",
50
+ "prima donna" => "prima donnas|prime donne",
51
+ "octopus" => "octopuses|octopodes",
52
+ "genie" => "genies|genii",
53
+ "ganglion" => "ganglions|ganglia",
54
+ "trilby" => "trilbys",
55
+ "turf" => "turfs|turves",
56
+ }.update( PL_sb_irregular_s )
57
+ PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
58
+
59
+
60
+ # Classical "..a" -> "..ata"
61
+ PL_sb_C_a_ata = matchgroup %w[
62
+ anathema bema carcinoma charisma diploma
63
+ dogma drama edema enema enigma lemma
64
+ lymphoma magma melisma miasma oedema
65
+ sarcoma schema soma stigma stoma trauma
66
+ gumma pragma
67
+ ].collect {|word| word[0...-1]}
68
+
69
+ # Unconditional "..a" -> "..ae"
70
+ PL_sb_U_a_ae = matchgroup %w[
71
+ alumna alga vertebra persona
72
+ ]
73
+
74
+ # Classical "..a" -> "..ae"
75
+ PL_sb_C_a_ae = matchgroup [/.*umbra/ ] + %w[
76
+ amoeba antenna formula hyperbola
77
+ medusa nebula parabola abscissa
78
+ hydra nova lacuna aurora
79
+ flora fauna
80
+ ]
81
+
82
+ # Classical "..en" -> "..ina"
83
+ PL_sb_C_en_ina = matchgroup %w[
84
+ stamen foramen lumen
85
+ ].collect {|word| word[0...-2] }
86
+
87
+ # Unconditional "..um" -> "..a"
88
+ PL_sb_U_um_a = matchgroup %w[
89
+ bacterium agendum desideratum erratum
90
+ stratum datum ovum extremum candelabrum
91
+ ].collect {|word| word[0...-2] }
92
+
93
+ # Classical "..um" -> "..a"
94
+ PL_sb_C_um_a = matchgroup %w[
95
+ maximum minimum momentum optimum
96
+ quantum cranium curriculum dictum
97
+ phylum aquarium compendium emporium
98
+ enconium gymnasium honorarium interregnum
99
+ lustrum memorandum millenium rostrum
100
+ spectrum speculum stadium trapezium
101
+ ultimatum medium vacuum velum
102
+ consortium
103
+ ].collect {|word| word[0...-2]}
104
+
105
+ # Unconditional "..us" -> "i"
106
+ PL_sb_U_us_i = matchgroup %w[
107
+ alumnus alveolus bacillus bronchus
108
+ locus nucleus stimulus meniscus
109
+ ].collect {|word| word[0...-2]}
110
+
111
+ # Classical "..us" -> "..i"
112
+ PL_sb_C_us_i = matchgroup %w[
113
+ focus radius genius
114
+ incubus succubus nimbus
115
+ fungus nucleolus stylus
116
+ torus umbilicus uterus
117
+ hippopotamus
118
+ ].collect {|word| word[0...-2]}
119
+
120
+ # Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
121
+ PL_sb_C_us_us = matchgroup %w[
122
+ status apparatus prospectus sinus
123
+ hiatus impetus plexus
124
+ ]
125
+
126
+ # Unconditional "..on" -> "a"
127
+ PL_sb_U_on_a = matchgroup %w[
128
+ criterion perihelion aphelion
129
+ phenomenon prolegomenon noumenon
130
+ organon asyndeton hyperbaton
131
+ ].collect {|word| word[0...-2]}
132
+
133
+ # Classical "..on" -> "..a"
134
+ PL_sb_C_on_a = matchgroup %w[
135
+ oxymoron
136
+ ].collect {|word| word[0...-2]}
137
+
138
+ # Classical "..o" -> "..i" (but normally -> "..os")
139
+ PL_sb_C_o_i_a = %w[
140
+ solo soprano basso alto
141
+ contralto tempo piano
142
+ ]
143
+ PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
144
+
145
+ # Always "..o" -> "..os"
146
+ PL_sb_U_o_os = matchgroup( %w[
147
+ albino archipelago armadillo
148
+ commando crescendo fiasco
149
+ ditto dynamo embryo
150
+ ghetto guano inferno
151
+ jumbo lumbago magneto
152
+ manifesto medico octavo
153
+ photo pro quarto
154
+ canto lingo generalissimo
155
+ stylo rhino
156
+ ] | PL_sb_C_o_i_a )
157
+
158
+
159
+ # Unconditional "..[ei]x" -> "..ices"
160
+ PL_sb_U_ex_ices = matchgroup %w[
161
+ codex murex silex
162
+ ].collect {|word| word[0...-2]}
163
+ PL_sb_U_ix_ices = matchgroup %w[
164
+ radix helix
165
+ ].collect {|word| word[0...-2]}
166
+
167
+ # Classical "..[ei]x" -> "..ices"
168
+ PL_sb_C_ex_ices = matchgroup %w[
169
+ vortex vertex cortex latex
170
+ pontifex apex index simplex
171
+ ].collect {|word| word[0...-2]}
172
+ PL_sb_C_ix_ices = matchgroup %w[
173
+ appendix
174
+ ].collect {|word| word[0...-2]}
175
+
176
+
177
+ # Arabic: ".." -> "..i"
178
+ PL_sb_C_i = matchgroup %w[
179
+ afrit afreet efreet
180
+ ]
181
+
182
+
183
+ # Hebrew: ".." -> "..im"
184
+ PL_sb_C_im = matchgroup %w[
185
+ goy seraph cherub
186
+ ]
187
+
188
+ # Unconditional "..man" -> "..mans"
189
+ PL_sb_U_man_mans = matchgroup %w[
190
+ human
191
+ Alabaman Bahaman Burman German
192
+ Hiroshiman Liman Nakayaman Oklahoman
193
+ Panaman Selman Sonaman Tacoman Yakiman
194
+ Yokohaman Yuman
195
+ ]
196
+
197
+
198
+ PL_sb_uninflected_s = [
199
+ # Pairs or groups subsumed to a singular...
200
+ "breeches", "britches", "clippers", "gallows", "hijinks",
201
+ "headquarters", "pliers", "scissors", "testes", "herpes",
202
+ "pincers", "shears", "proceedings", "trousers",
203
+
204
+ # Unassimilated Latin 4th declension
205
+ "cantus", "coitus", "nexus",
206
+
207
+ # Recent imports...
208
+ "contretemps", "corps", "debris",
209
+ /.*ois/,
210
+
211
+ # Diseases
212
+ /.*measles/, "mumps",
213
+
214
+ # Miscellaneous others...
215
+ "diabetes", "jackanapes", "series", "species", "rabies",
216
+ "chassis", "innings", "news", "mews",
217
+ ]
218
+
219
+
220
+ # Don't inflect in classical mode, otherwise normal inflection
221
+ PL_sb_uninflected_herd = matchgroup %w[
222
+ wildebeest swine eland bison buffalo
223
+ elk moose rhinoceros
224
+ ]
225
+
226
+ PL_sb_uninflected = matchgroup(
227
+
228
+ # Some fish and herd animals
229
+ /.*fish/, "tuna", "salmon", "mackerel", "trout",
230
+ "bream", /sea[- ]bass/, "carp", "cod", "flounder", "whiting",
231
+
232
+ /.*deer/, /.*sheep/,
233
+
234
+ # All nationals ending in -ese
235
+ "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
236
+ "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
237
+ "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
238
+ "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
239
+ "Shavese", "Vermontese", "Wenchowese", "Yengeese",
240
+ /.*[nrlm]ese/,
241
+
242
+ # Some words ending in ...s (often pairs taken as a whole)
243
+ PL_sb_uninflected_s,
244
+
245
+ # Diseases
246
+ /.*pox/,
247
+
248
+ # Other oddities
249
+ "graffiti", "djinn"
250
+ )
251
+
252
+
253
+ # Singular words ending in ...s (all inflect with ...es)
254
+ PL_sb_singular_s = matchgroup [ /.*ss/, /.*us/ ] +
255
+ %w[
256
+ acropolis aegis alias arthritis asbestos atlas
257
+ bathos bias bronchitis bursitis caddis cannabis
258
+ canvas chaos cosmos dais digitalis encephalitis
259
+ epidermis ethos eyas gas glottis hepatitis
260
+ hubris ibis lens mantis marquis metropolis
261
+ neuritis pathos pelvis polis rhinoceros
262
+ sassafras tonsillitis trellis
263
+ ]
264
+
265
+ PL_v_special_s = matchgroup [
266
+ PL_sb_singular_s,
267
+ PL_sb_uninflected_s,
268
+ PL_sb_irregular_s.keys,
269
+ /(.*[csx])is/,
270
+ /(.*)ceps/,
271
+ /[A-Z].*s/,
272
+ ]
273
+
274
+ PL_sb_postfix_adj = '(' + {
275
+
276
+ 'general' => '(?!major|lieutenant|brigadier|adjutant)\S+',
277
+ 'martial' => "court",
278
+
279
+ }.collect {|key,val|
280
+ "(?:#{val})(?=(?:-|\\s+)#{key})"
281
+ }.join("|") + ")(.*)"
282
+
283
+
284
+ PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
285
+ PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
286
+
287
+ PL_prep = matchgroup %w[
288
+ about above across after among around at athwart before behind
289
+ below beneath beside besides between betwixt beyond but by
290
+ during except for from in into near of off on onto out over
291
+ since till to under until unto upon with
292
+ ]
293
+
294
+ PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
295
+ PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
296
+
297
+
298
+ PL_pron_nom_h = {
299
+ # Nominative Reflexive
300
+ "i" => "we", "myself" => "ourselves",
301
+ "you" => "you", "yourself" => "yourselves",
302
+ "she" => "they", "herself" => "themselves",
303
+ "he" => "they", "himself" => "themselves",
304
+ "it" => "they", "itself" => "themselves",
305
+ "they" => "they", "themself" => "themselves",
306
+
307
+ # Possessive
308
+ "mine" => "ours",
309
+ "yours" => "yours",
310
+ "hers" => "theirs",
311
+ "his" => "theirs",
312
+ "its" => "theirs",
313
+ "theirs" => "theirs",
314
+ }
315
+ PL_pron_nom = Regexp.new( PL_pron_nom_h.keys.join('|'), Regexp::IGNORECASE )
316
+
317
+ PL_pron_acc_h = {
318
+ # Accusative Reflexive
319
+ "me" => "us", "myself" => "ourselves",
320
+ "you" => "you", "yourself" => "yourselves",
321
+ "her" => "them", "herself" => "themselves",
322
+ "him" => "them", "himself" => "themselves",
323
+ "it" => "them", "itself" => "themselves",
324
+ "them" => "them", "themself" => "themselves",
325
+ }
326
+ PL_pron_acc = matchgroup PL_pron_acc_h.keys
327
+
328
+ PL_v_irregular_pres_h = {
329
+ # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
330
+ # 3rd pers. (indet.)
331
+ "am" => "are", "are" => "are", "is" => "are",
332
+ "was" => "were", "were" => "were", "was" => "were",
333
+ "have" => "have", "have" => "have", "has" => "have",
334
+ }
335
+ PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
336
+
337
+ PL_v_ambiguous_pres_h = {
338
+ # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
339
+ # 3rd pers. (indet.)
340
+ "act" => "act", "act" => "act", "acts" => "act",
341
+ "blame" => "blame", "blame" => "blame", "blames" => "blame",
342
+ "can" => "can", "can" => "can", "can" => "can",
343
+ "must" => "must", "must" => "must", "must" => "must",
344
+ "fly" => "fly", "fly" => "fly", "flies" => "fly",
345
+ "copy" => "copy", "copy" => "copy", "copies" => "copy",
346
+ "drink" => "drink", "drink" => "drink", "drinks" => "drink",
347
+ "fight" => "fight", "fight" => "fight", "fights" => "fight",
348
+ "fire" => "fire", "fire" => "fire", "fires" => "fire",
349
+ "like" => "like", "like" => "like", "likes" => "like",
350
+ "look" => "look", "look" => "look", "looks" => "look",
351
+ "make" => "make", "make" => "make", "makes" => "make",
352
+ "reach" => "reach", "reach" => "reach", "reaches" => "reach",
353
+ "run" => "run", "run" => "run", "runs" => "run",
354
+ "sink" => "sink", "sink" => "sink", "sinks" => "sink",
355
+ "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
356
+ "view" => "view", "view" => "view", "views" => "view",
357
+ }
358
+ PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
359
+
360
+ PL_v_irregular_non_pres = matchgroup %w[
361
+ did had ate made put
362
+ spent fought sank gave sought
363
+ shall could ought should
364
+ ]
365
+
366
+ PL_v_ambiguous_non_pres = matchgroup %w[
367
+ thought saw bent will might cut
368
+ ]
369
+
370
+ PL_count_zero = matchgroup %w[
371
+ 0 no zero nil
372
+ ]
373
+
374
+ PL_count_one = matchgroup %w[
375
+ 1 a an one each every this that
376
+ ]
377
+
378
+ PL_adj_special_h = {
379
+ "a" => "some", "an" => "some",
380
+ "this" => "these", "that" => "those",
381
+ }
382
+ PL_adj_special = matchgroup PL_adj_special_h.keys
383
+
384
+ PL_adj_poss_h = {
385
+ "my" => "our",
386
+ "your" => "your",
387
+ "its" => "their",
388
+ "her" => "their",
389
+ "his" => "their",
390
+ "their" => "their",
391
+ }
392
+ PL_adj_poss = matchgroup PL_adj_poss_h.keys
393
+
394
+
395
+ #################################################################
396
+ ### P U B L I C F U N C T I O N S
397
+ #################################################################
398
+
399
+ ### Return the plural of the given +phrase+ if +count+ indicates it should
400
+ ### be plural.
401
+ def plural( count=2 )
402
+ phrase = if self.respond_to?( :to_int )
403
+ self.numwords
404
+ else
405
+ self.to_s
406
+ end
407
+
408
+ self.log.debug "Pluralizing %p" % [ phrase ]
409
+ pre = text = post = nil
410
+
411
+ # If the string has whitespace, only pluralize the middle bit, but
412
+ # preserve the whitespace to add back to the result.
413
+ if md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
414
+ pre, text, post = md.captures
415
+ else
416
+ return phrase
417
+ end
418
+
419
+ plural = postprocess( text,
420
+ pluralize_special_adjective(text, count) ||
421
+ pluralize_special_verb(text, count) ||
422
+ pluralize_noun(text, count) )
423
+
424
+ return pre + plural + post
425
+ end
426
+ Linguistics::EN.register_lprintf_formatter :PL, :plural
427
+
428
+
429
+ ### Return the plural of the given noun +phrase+ if +count+ indicates it
430
+ ### should be plural.
431
+ def plural_noun( count=2 )
432
+ phrase = self.to_s
433
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
434
+ pre, word, post = md.captures
435
+
436
+ return phrase if word.nil? or word.empty?
437
+
438
+ plural = postprocess( word, pluralize_noun(word, count) )
439
+
440
+ return pre + plural + post
441
+ end
442
+
443
+
444
+ ### Return the plural of the given verb +phrase+ if +count+ indicates it
445
+ ### should be plural.
446
+ def plural_verb( count=2 )
447
+ phrase = self.to_s
448
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
449
+ pre, word, post = md.captures
450
+
451
+ return phrase if word.nil? or word.empty?
452
+
453
+ plural = postprocess( word,
454
+ pluralize_special_verb(word, count) ||
455
+ pluralize_general_verb(word, count) )
456
+
457
+ return pre + plural + post
458
+ end
459
+
460
+
461
+ ### Return the plural of the given adjectival +phrase+ if +count+ indicates
462
+ ### it should be plural.
463
+ def plural_adjective( count=2 )
464
+ phrase = self.to_s
465
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
466
+ pre, word, post = md.captures
467
+
468
+ return phrase if word.nil? or word.empty?
469
+
470
+ plural = postprocess( word, pluralize_special_adjective(word, count) || word )
471
+
472
+ return pre + plural + post
473
+ end
474
+ alias_method :plural_adj, :plural_adjective
475
+
476
+
477
+ #################################################################
478
+ ### P R I V A T E F U N C T I O N S
479
+ #################################################################
480
+
481
+ #######
482
+ private
483
+ #######
484
+
485
+ ### Do normal/classical switching and match capitalization in +inflected+ by
486
+ ### examining the +original+ input.
487
+ def postprocess( original, inflected )
488
+
489
+ # If there's a classical variant, use it instead of the modern one if
490
+ # classical mode is on.
491
+ inflected.sub!( /([^|]+)\|(.+)/ ) do
492
+ Linguistics::EN.classical? ? $2 : $1
493
+ end
494
+
495
+ # Try to duplicate the case of the original string
496
+ case original
497
+ when "I"
498
+ return inflected
499
+ when /^[A-Z]+$/
500
+ return inflected.upcase
501
+ when /^[A-Z]/
502
+ # Can't use #capitalize, as it will downcase the rest of the string,
503
+ # too.
504
+ inflected[0,1] = inflected[0,1].upcase
505
+ return inflected
506
+ else
507
+ return inflected
508
+ end
509
+ end
510
+
511
+
512
+ ### Normalize a count to either 1 or 2 (singular or plural)
513
+ def normalize_count( count, default=2 )
514
+ return default if count.nil? # Default to plural
515
+ if /^(#{PL_count_one})$/i =~ count.to_s ||
516
+ ( Linguistics::EN.classical? && /^(#{PL_count_zero})$/ =~ count.to_s )
517
+ return 1
518
+ else
519
+ return default
520
+ end
521
+ end
522
+
523
+
524
+ ### Pluralize nouns
525
+ def pluralize_noun( word, count=2 )
526
+ self.log.debug "Trying to pluralize %p as a noun" % [ word ]
527
+
528
+ value = nil
529
+ count = normalize_count( count )
530
+
531
+ return word if count == 1
532
+
533
+ # Handle user-defined nouns
534
+ #if value = ud_match( word, PL_sb_user_defined )
535
+ # return value
536
+ #end
537
+
538
+ # Handle empty word, singular count and uninflected plurals
539
+ case word
540
+ when ''
541
+ self.log.debug " empty string"
542
+ return word
543
+ when /^(#{PL_sb_uninflected})$/i
544
+ self.log.debug " uninflected plural"
545
+ return word
546
+ else
547
+ if Linguistics::EN.classical? && /^(#{PL_sb_uninflected_herd})$/i =~ word
548
+ self.log.debug " uninflected classical herd word"
549
+ return word
550
+ end
551
+ end
552
+
553
+ # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
554
+ case word
555
+ when /^(?:#{PL_sb_postfix_adj})$/i
556
+ value = $2
557
+ noun = $1
558
+ self.log.debug " postfixed adjectival compound noun phrase (#{value} -> #{noun})"
559
+ return pluralize_noun( noun, 2 ) + value
560
+
561
+ when /^(?:#{PL_sb_prep_dual_compound})$/i
562
+ noun = $1
563
+ value = [ $2, $3 ]
564
+ self.log.debug " prepositional dual compound noun phrase (%s -> %s %s)" %
565
+ [ noun, *value ]
566
+ return pluralize_noun( noun, 2 ) + value[0] + pluralize_noun( value[1] )
567
+
568
+ when /^(?:#{PL_sb_prep_compound})$/i
569
+ noun = $1
570
+ value = $2
571
+ self.log.debug " prepositional singular compound noun phrase (%s -> %s)" %
572
+ [ noun, value ]
573
+ return pluralize_noun( noun, 2 ) + value
574
+
575
+ # Handle pronouns
576
+ when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
577
+ prep, pron = $1, $2
578
+ self.log.debug " prepositional pronoun phrase (%p + %p)" % [ prep, pron ]
579
+ return prep + PL_pron_acc_h[ pron.downcase ]
580
+
581
+ when /^(#{PL_pron_nom})$/i
582
+ pron = $1
583
+ self.log.debug " nominative pronoun; using PL_pron_nom table"
584
+ return PL_pron_nom_h[ word.downcase ]
585
+
586
+ when /^(#{PL_pron_acc})$/i
587
+ self.log.debug " accusative pronoun; using PL_pron_acc table"
588
+ return PL_pron_acc_h[ word.downcase ]
589
+
590
+ # Handle isolated irregular plurals
591
+ when /(.*)\b(#{PL_sb_irregular})$/i
592
+ prefix, word = $1, $2
593
+ self.log.debug " isolated irregular; using PL_sb_irregular_h table"
594
+ return prefix + PL_sb_irregular_h[ word.downcase ]
595
+
596
+ # Unconditional ...man -> ...mans
597
+ when /(#{PL_sb_U_man_mans})$/i
598
+ word = $1
599
+ self.log.debug " unconditional man -> mans (%p)" % [ word ]
600
+ return "#{word}s"
601
+
602
+ # Handle families of irregular plurals
603
+ when /(.*)man$/i then return "#{$1}men"
604
+ when /(.*[ml])ouse$/i then return "#{$1}ice"
605
+ when /(.*)goose$/i then return "#{$1}geese"
606
+ when /(.*)tooth$/i then return "#{$1}teeth"
607
+ when /(.*)foot$/i then return "#{$1}feet"
608
+
609
+ # Handle unassimilated imports
610
+ when /(.*)ceps$/i then return word
611
+ when /(.*)zoon$/i then return "#{$1}zoa"
612
+ when /(.*[csx])is$/i then return "#{$1}es"
613
+ when /(#{PL_sb_U_ex_ices})ex$/i then return "#{$1}ices"
614
+ when /(#{PL_sb_U_ix_ices})ix$/i then return "#{$1}ices"
615
+ when /(#{PL_sb_U_um_a})um$/i then return "#{$1}a"
616
+ when /(#{PL_sb_U_us_i})us$/i then return "#{$1}i"
617
+ when /(#{PL_sb_U_on_a})on$/i then return "#{$1}a"
618
+ when /(#{PL_sb_U_a_ae})$/i then return "#{$1}e"
619
+ end
620
+
621
+
622
+ # Handle incompletely assimilated imports in classical mode
623
+ if Linguistics::EN.classical?
624
+ self.log.debug " checking for classical incompletely assimilated imports"
625
+ case word
626
+ when /(.*)trix$/i then return "#{$1}trices"
627
+ when /(.*)eau$/i then return "#{$1}eaux"
628
+ when /(.*)ieu$/i then return "#{$1}ieux"
629
+ when /(.{2,}[yia])nx$/i then return "#{$1}nges"
630
+ when /(#{PL_sb_C_en_ina})en$/i then return "#{$1}ina"
631
+ when /(#{PL_sb_C_ex_ices})ex$/i then return "#{$1}ices"
632
+ when /(#{PL_sb_C_ix_ices})ix$/i then return "#{$1}ices"
633
+ when /(#{PL_sb_C_um_a})um$/i then return "#{$1}a"
634
+ when /(#{PL_sb_C_us_i})us$/i then return "#{$1}i"
635
+ when /(#{PL_sb_C_us_us})$/i then return "#{$1}"
636
+ when /(#{PL_sb_C_a_ae})$/i then return "#{$1}e"
637
+ when /(#{PL_sb_C_a_ata})a$/i then return "#{$1}ata"
638
+ when /(#{PL_sb_C_o_i})o$/i then return "#{$1}i"
639
+ when /(#{PL_sb_C_on_a})on$/i then return "#{$1}a"
640
+ when /#{PL_sb_C_im}$/i then return "#{word}im"
641
+ when /#{PL_sb_C_i}$/i then return "#{word}i"
642
+ end
643
+ end
644
+
645
+
646
+ # Handle singular nouns ending in ...s or other silibants
647
+ case word
648
+ when /^(#{PL_sb_singular_s})$/i then return "#{$1}es"
649
+ when /^([A-Z].*s)$/ then return "#{$1}es"
650
+ when /(.*)([cs]h|[zx])$/i then return "#{$1}#{$2}es"
651
+ # when /(.*)(us)$/i then return "#{$1}#{$2}es"
652
+
653
+ # Handle ...f -> ...ves
654
+ when /(.*[eao])lf$/i then return "#{$1}lves"
655
+ when /(.*[^d])eaf$/i then return "#{$1}eaves"
656
+ when /(.*[nlw])ife$/i then return "#{$1}ives"
657
+ when /(.*)arf$/i then return "#{$1}arves"
658
+
659
+ # Handle ...y
660
+ when /(.*[aeiou])y$/i then return "#{$1}ys"
661
+ when /([A-Z].*y)$/ then return "#{$1}s"
662
+ when /(.*)y$/i then return "#{$1}ies"
663
+
664
+ # Handle ...o
665
+ when /#{PL_sb_U_o_os}$/i then return "#{word}s"
666
+ when /[aeiou]o$/i then return "#{word}s"
667
+ when /o$/i then return "#{word}es"
668
+
669
+ # Otherwise just add ...s
670
+ else
671
+ self.log.debug " appears to be regular; adding +s"
672
+ return "#{word}s"
673
+ end
674
+ end # def pluralize_noun
675
+
676
+
677
+
678
+ ### Pluralize special verbs
679
+ def pluralize_special_verb( word, count )
680
+ self.log.debug "Trying to pluralize %p as a special verb..." % [ word ]
681
+ count ||= 1
682
+ count = normalize_count( count )
683
+
684
+ if /^(#{PL_count_one})$/i =~ count.to_s
685
+ self.log.debug " it's a single-count word, returning it unchanged."
686
+ return word # :FIXME: should this return nil instead?
687
+ # return nil
688
+ end
689
+
690
+ # Handle user-defined verbs
691
+ #if value = ud_match( word, PL_v_user_defined )
692
+ # return value
693
+ #end
694
+
695
+ case word
696
+
697
+ # Handle irregular present tense (simple and compound)
698
+ when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
699
+ key = $1.downcase
700
+ self.log.debug " yep, it's an irregular present tense verb (%p)" % [ key ]
701
+ return PL_v_irregular_pres_h[ $1.downcase ] + $2
702
+
703
+ # Handle irregular future, preterite and perfect tenses
704
+ when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
705
+ self.log.debug " yep, it's an irregular non-present tense verb (%p)" % [ key ]
706
+ return word
707
+
708
+ # Handle special cases
709
+ when /^(#{PL_v_special_s})$/
710
+ self.log.debug " it's a not special-case verb; aborting."
711
+ return nil
712
+
713
+ # Handle standard 3rd person (chop the ...(e)s off single words)
714
+ when /^(.*)([cs]h|[x]|zz|ss)es$/i
715
+ base, suffix = $1, $2
716
+ self.log.debug " it's a standard third-person verb (%p + %p)" % [ base, suffix ]
717
+ return base + suffix
718
+ when /^(..+)ies$/i
719
+ verb = $1
720
+ self.log.debug " it's a standard third-person verb (%p + ies -> +y)" % [ verb ]
721
+ return "#{verb}y"
722
+ when /^(.+)oes$/i
723
+ verb = $1
724
+ self.log.debug " it's a standard third-person verb (%p + oes -> +o)" % [ verb ]
725
+ return "#{verb}o"
726
+ when /^(.*[^s])s$/i
727
+ verb = $1
728
+ self.log.debug " it's a standard third-person verb (%p + (^s)s -> -s)" % [ verb ]
729
+ return verb
730
+
731
+ # Otherwise, a regular verb (handle elsewhere)
732
+ else
733
+ self.log.debug " nope. Either a regular verb or not a verb."
734
+ return nil
735
+ end
736
+ end
737
+
738
+
739
+ ### Pluralize regular verbs
740
+ def pluralize_general_verb( word, count )
741
+ count = normalize_count( count )
742
+
743
+ return word if /^(#{PL_count_one})$/i =~ count.to_s
744
+
745
+ case word
746
+
747
+ # Handle ambiguous present tenses (simple and compound)
748
+ when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
749
+ return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
750
+
751
+ # Handle ambiguous preterite and perfect tenses
752
+ when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
753
+ return word
754
+
755
+ # Otherwise, 1st or 2nd person is uninflected
756
+ else
757
+ return word
758
+ end
759
+ end
760
+
761
+
762
+ ### Handle special adjectives
763
+ def pluralize_special_adjective( word, count )
764
+ self.log.debug "Trying to pluralize %p as a special adjective..." % [ word ]
765
+ count ||= 1
766
+ count = normalize_count( count )
767
+
768
+ if /^(#{PL_count_one})$/i =~ count.to_s
769
+ self.log.debug " it's a single-count word; aborting"
770
+ return nil
771
+ end
772
+
773
+ # Handle user-defined verbs
774
+ #if value = ud_match( word, PL_adj_user_defined )
775
+ # return value
776
+ #end
777
+
778
+ case word
779
+
780
+ # Handle known cases
781
+ when /^(#{PL_adj_special})$/i
782
+ key = $1.downcase
783
+ self.log.debug " yep, it's a special plural adjective (%p)" % [ key ]
784
+ return PL_adj_special_h[ key ]
785
+
786
+ # Handle possessives
787
+ when /^(#{PL_adj_poss})$/i
788
+ key = $1.downcase
789
+ self.log.debug " it's a special possessive adjective (%p)" % [ key ]
790
+ return PL_adj_poss_h[ $1.downcase ]
791
+
792
+ when /^(.*)'s?$/
793
+ pl = $1.en.plural_noun( count )
794
+ self.log.debug " it has an apostrophe (%p); using generic possessive rules" % [ pl ]
795
+ if /s$/ =~ pl
796
+ return "#{pl}'"
797
+ else
798
+ return "#{pl}'s"
799
+ end
800
+
801
+ # Otherwise, no idea
802
+ else
803
+ self.log.debug " nope."
804
+ return nil
805
+ end
806
+ end
807
+
808
+
809
+ end # module Linguistics::EN::Pluralization
810
+