linguistics 1.0.9 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.gemtest +0 -0
  3. data/ChangeLog +849 -342
  4. data/History.rdoc +11 -0
  5. data/LICENSE +9 -9
  6. data/Manifest.txt +44 -0
  7. data/README.rdoc +226 -0
  8. data/Rakefile +32 -349
  9. data/examples/endocs.rb +272 -0
  10. data/examples/generalize_sentence.rb +2 -1
  11. data/examples/klingon.rb +22 -0
  12. data/lib/linguistics.rb +130 -292
  13. data/lib/linguistics/en.rb +337 -1628
  14. data/lib/linguistics/en/articles.rb +138 -0
  15. data/lib/linguistics/en/conjugation.rb +2245 -0
  16. data/lib/linguistics/en/conjunctions.rb +202 -0
  17. data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
  18. data/lib/linguistics/en/linkparser.rb +41 -49
  19. data/lib/linguistics/en/numbers.rb +483 -0
  20. data/lib/linguistics/en/participles.rb +33 -0
  21. data/lib/linguistics/en/pluralization.rb +810 -0
  22. data/lib/linguistics/en/stemmer.rb +75 -0
  23. data/lib/linguistics/en/titlecase.rb +121 -0
  24. data/lib/linguistics/en/wordnet.rb +63 -97
  25. data/lib/linguistics/inflector.rb +89 -0
  26. data/lib/linguistics/iso639.rb +534 -448
  27. data/lib/linguistics/languagebehavior.rb +36 -0
  28. data/lib/linguistics/monkeypatches.rb +42 -0
  29. data/spec/lib/constants.rb +15 -0
  30. data/spec/lib/helpers.rb +38 -0
  31. data/spec/linguistics/en/articles_spec.rb +797 -0
  32. data/spec/linguistics/en/conjugation_spec.rb +2083 -0
  33. data/spec/linguistics/en/conjunctions_spec.rb +154 -0
  34. data/spec/linguistics/en/infinitives_spec.rb +518 -0
  35. data/spec/linguistics/en/linkparser_spec.rb +66 -0
  36. data/spec/linguistics/en/numbers_spec.rb +1295 -0
  37. data/spec/linguistics/en/participles_spec.rb +55 -0
  38. data/spec/linguistics/en/pluralization_spec.rb +4636 -0
  39. data/spec/linguistics/en/stemmer_spec.rb +72 -0
  40. data/spec/linguistics/en/titlecase_spec.rb +841 -0
  41. data/spec/linguistics/en/wordnet_spec.rb +85 -0
  42. data/spec/linguistics/en_spec.rb +45 -167
  43. data/spec/linguistics/inflector_spec.rb +40 -0
  44. data/spec/linguistics/iso639_spec.rb +49 -53
  45. data/spec/linguistics/monkeypatches_spec.rb +40 -0
  46. data/spec/linguistics_spec.rb +46 -76
  47. metadata +241 -113
  48. metadata.gz.sig +0 -0
  49. data/README +0 -166
  50. data/README.english +0 -245
  51. data/rake/191_compat.rb +0 -26
  52. data/rake/dependencies.rb +0 -76
  53. data/rake/documentation.rb +0 -123
  54. data/rake/helpers.rb +0 -502
  55. data/rake/hg.rb +0 -318
  56. data/rake/manual.rb +0 -787
  57. data/rake/packaging.rb +0 -129
  58. data/rake/publishing.rb +0 -341
  59. data/rake/style.rb +0 -62
  60. data/rake/svn.rb +0 -668
  61. data/rake/testing.rb +0 -152
  62. data/rake/verifytask.rb +0 -64
  63. data/tests/en/infinitive.tests.rb +0 -207
  64. data/tests/en/inflect.tests.rb +0 -1389
  65. data/tests/en/lafcadio.tests.rb +0 -77
  66. data/tests/en/linkparser.tests.rb +0 -42
  67. data/tests/en/lprintf.tests.rb +0 -77
  68. data/tests/en/titlecase.tests.rb +0 -73
  69. data/tests/en/wordnet.tests.rb +0 -95
@@ -1,1093 +1,401 @@
1
1
  #!/usr/bin/ruby
2
- #
3
- # = Linguistics::EN
2
+
3
+ require 'rubygems' # For Gem.find_files
4
+ require 'pathname'
5
+
6
+ require 'linguistics' unless defined?( Linguistics )
7
+
8
+
9
+ # This module is a container for various English-language linguistic
10
+ # functions for the Linguistics library. It can be either loaded
11
+ # directly, or by passing some variant of +:en+ or +:eng+ to the
12
+ # Linguistics.use method.
4
13
  #
5
- # This module contains English-language linguistic functions for the Linguistics
6
- # module. It can be either loaded directly, or by passing some variant of 'en'
7
- # or 'eng' to the Linguistics::use method.
14
+ # == Pluralization
8
15
  #
9
- # The functions contained by the module provide:
16
+ # "box".en.plural
17
+ # # => "boxes"
10
18
  #
11
- # == Plural Inflections
12
- #
13
- # Plural forms of all nouns, most verbs, and some adjectives are provided. Where
14
- # appropriate, "classical" variants (for example: "brother" -> "brethren",
15
- # "dogma" -> "dogmata", etc.) are also provided.
19
+ # "mouse".en.plural
20
+ # # => "mice"
21
+ #
22
+ # "ruby".en.plural
23
+ # # => "rubies"
16
24
  #
17
- # These can be accessed via the #plural, #plural_noun, #plural_verb, and
18
- # #plural_adjective methods.
19
25
  #
20
26
  # == Indefinite Articles
21
27
  #
22
- # Pronunciation-based "a"/"an" selection is provided for all English words, and
23
- # most initialisms.
28
+ # "book".en.a
29
+ # # => "a book"
30
+ #
31
+ # "article".en.a
32
+ # # => "an article"
33
+ #
34
+ #
35
+ # == Present Participles
36
+ #
37
+ # "runs".en.present_participle
38
+ # # => "running"
39
+ #
40
+ # "eats".en.present_participle
41
+ # # => "eating"
42
+ #
43
+ # "spies".en.present_participle
44
+ # # => "spying"
45
+ #
46
+ #
47
+ # == Ordinal Numbers
48
+ #
49
+ # 5.en.ordinal
50
+ # # => "5th"
51
+ #
52
+ # 2004.en.ordinal
53
+ # # => "2004th"
24
54
  #
25
- # See: #a, #an, and #no.
26
55
  #
27
56
  # == Numbers to Words
28
57
  #
29
- # Conversion from Numeric values to words are supported using the American
30
- # "thousands" system. E.g., 2561 => "two thousand, five hundred and sixty-one".
58
+ # 5.en.numwords
59
+ # # => "five"
60
+ #
61
+ # 2004.en.numwords
62
+ # # => "two thousand and four"
63
+ #
64
+ # 2385762345876.en.numwords
65
+ # # => "two trillion, three hundred and eighty-five billion, seven hundred and
66
+ # # sixty-two million, three hundred and forty-five thousand, eight hundred
67
+ # # and seventy-six"
68
+ #
31
69
  #
32
- # See the #numwords method.
70
+ # == Quantification
33
71
  #
34
- # == Ordinals
72
+ # "cow".en.quantify( 5 )
73
+ # # => "several cows"
74
+ #
75
+ # "cow".en.quantify( 1005 )
76
+ # # => "thousands of cows"
77
+ #
78
+ # "cow".en.quantify( 20_432_123_000_000 )
79
+ # # => "tens of trillions of cows"
35
80
  #
36
- # It is also possible to inflect numerals (1,2,3) and number words ("one",
37
- # "two", "three") to ordinals (1st, 2nd, 3rd) and ordinates ("first", "second",
38
- # "third").
39
81
  #
40
82
  # == Conjunctions
41
83
  #
42
- # This module also supports the creation of English conjunctions from Arrays of
43
- # Strings or objects which respond to the #to_s message. Eg.,
84
+ # animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat
85
+ # dog cat cat dog cow goat goose goose ox alpaca}
86
+ # "The farm has: " + animals.en.conjunction
87
+ # # => "The farm has: four dogs, three cows, three geese, three goats, two
88
+ # # oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca"
89
+ #
90
+ # Note that 'goose' and 'ox' are both correctly pluralized, and the correct
91
+ # indefinite article 'an' has been used for 'alpaca'.
92
+ #
93
+ # You can also use the generalization function of the #quantify method to give
94
+ # general descriptions of object lists instead of literal counts:
95
+ #
96
+ # allobjs = []
97
+ # ObjectSpace::each_object {|obj| allobjs << obj.class.name }
98
+ # puts "The current Ruby objectspace contains: " +
99
+ # allobjs.en.conjunction( :generalize => true )
100
+ #
101
+ # Outputs:
102
+ #
103
+ # The current Ruby objectspace contains: hundreds of thousands of Strings,
104
+ # thousands of RubyVM::InstructionSequences, thousands of Arrays, thousands
105
+ # of Hashes, hundreds of Procs, hundreds of Regexps, [...], a
106
+ # SystemStackError, a Random, an ARGF.class, a Data, a fatal, an
107
+ # OptionParser::List, a YAML::EngineManager, a URI::Parser, a Rational, and
108
+ # a Gem::Platform
44
109
  #
45
- # %w{cow pig chicken cow dog cow duck duck moose}.en.conjunction
46
- # ==> "three cows, two ducks, a pig, a chicken, a dog, and a moose"
47
110
  #
48
111
  # == Infinitives
49
112
  #
50
- # Returns the infinitive form of English verbs:
113
+ # "leaving".en.infinitive
114
+ # # => "leave"
115
+ #
116
+ # "left".en.infinitive
117
+ # # => "leave"
118
+ #
119
+ # "leaving".en.infinitive.suffix
120
+ # # => "ing"
121
+ #
122
+ #
123
+ # == Conjugation
124
+ #
125
+ # Conjugate a verb given an infinitive:
126
+ #
127
+ # "run".en.past_tense
128
+ # # => "ran"
129
+ #
130
+ # "run".en.past_participle
131
+ # # => "run"
132
+ #
133
+ # "run".en.present_tense
134
+ # # => "run"
135
+ #
136
+ # "run".en.present_participle
137
+ # # => "running"
138
+ #
139
+ # Conjugate an infinitive with an explicit tense and grammatical person:
140
+ #
141
+ # "be".en.conjugate( :present, :third_person_singular )
142
+ # # => "is"
143
+ #
144
+ # "be".en.conjugate( :present, :first_person_singular )
145
+ # # => "am"
146
+ #
147
+ # "be".en.conjugate( :past, :first_person_singular )
148
+ # # => "was"
149
+ #
150
+ # The functionality is a port of the verb conjugation portion of Morph
151
+ # Adorner (http://morphadorner.northwestern.edu/).
152
+ #
153
+ # It includes a good number of irregular verbs, but it's not going to be
154
+ # 100% correct everytime.
155
+ #
156
+ #
157
+ # == WordNet® Integration
158
+ #
159
+ # If you have the 'wordnet' gem installed, you can look up WordNet synsets using
160
+ # the Linguistics interface:
161
+ #
162
+ # Test to be sure the WordNet module loaded okay.
163
+ #
164
+ # Linguistics::EN.has_wordnet?
165
+ # # => true
166
+ #
167
+ # Fetch the default synset for the word "balance"
168
+ #
169
+ # "balance".en.synset
170
+ # # => #<WordNet::Synset:0x7f9fb11012f8 {102777100} 'balance' (noun):
171
+ # # [noun.artifact] a scale for weighing; depends on pull of gravity>
172
+ #
173
+ # Fetch the synset for the first verb sense of "balance"
174
+ #
175
+ # "balance".en.synset( :verb )
176
+ # # => #<WordNet::Synset:0x7f9fb10f3fb8 {201602318} 'balance, poise' (verb):
177
+ # # [verb.contact] hold or carry in equilibrium>
178
+ #
179
+ # Fetch the second noun sense
180
+ #
181
+ # "balance".en.synset( 2, :noun )
182
+ # # => #<WordNet::Synset:0x7f9fb10ebbd8 {102777402} 'balance, balance wheel'
183
+ # # (noun): [noun.artifact] a wheel that regulates the rate of movement in a
184
+ # # machine; especially a wheel oscillating against the hairspring of a
185
+ # # timepiece to regulate its beat>
186
+ #
187
+ # Fetch the second noun sense's hypernyms (more-general words, like a
188
+ # superclass)
189
+ #
190
+ # "balance".en.synset( 2, :noun ).hypernyms
191
+ # # => [#<WordNet::Synset:0x7f9fb10dd100 {104574999} 'wheel' (noun):
192
+ # # [noun.artifact] a simple machine consisting of a circular frame with
193
+ # # spokes (or a solid disc) that can rotate on a shaft or axle (as in
194
+ # # vehicles or other machines)>]
51
195
  #
52
- # "dodging".en.infinitive
53
- # ==> "dodge"
196
+ # A simpler way of doing the same thing:
54
197
  #
198
+ # "balance".en.hypernyms( 2, :noun )
199
+ # # => [#<WordNet::Synset:0x7f9fb10d24d0 {104574999} 'wheel' (noun):
200
+ # # [noun.artifact] a simple machine consisting of a circular frame with
201
+ # # spokes (or a solid disc) that can rotate on a shaft or axle (as in
202
+ # # vehicles or other machines)>]
55
203
  #
56
- # == Authors
57
- #
58
- # * Michael Granger <ged@FaerieMUD.org>
59
- #
60
- # == Acknowledgements
204
+ # Fetch the first hypernym's hypernyms
61
205
  #
62
- # The inflection functions of this module were adapted from Damien Conway's
63
- # Lingua::EN::Inflect Perl module:
206
+ # "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
207
+ # # => [#<WordNet::Synset:0x7f9fb10c5190 {103700963} 'machine, simple machine'
208
+ # # (noun): [noun.artifact] a device for overcoming resistance at one point by
209
+ # # applying force at some other point>]
64
210
  #
65
- # Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
66
- # This module is free software. It may be used, redistributed
67
- # and/or modified under the same terms as Perl itself.
211
+ # Find the synset to which both the second noun sense of "balance" and the
212
+ # default sense of "shovel" belong.
68
213
  #
69
- # The conjunctions code was adapted from the Lingua::Conjunction Perl module
70
- # written by Robert Rothenberg and Damian Conway, which has no copyright
71
- # statement included.
214
+ # ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
215
+ # # => #<WordNet::Synset:0x7f9fb1091e58 {103183080} 'device' (noun):
216
+ # # [noun.artifact] an instrumentality invented for a particular purpose>
72
217
  #
73
- # :include: LICENSE
218
+ # Fetch words for the specific kinds of (device-ish) "instruments"
74
219
  #
75
- #--
220
+ # "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ')
221
+ # # => "analyser, analyzer, cauterant, cautery, drafting instrument, engine,
222
+ # # extractor, instrument of execution, instrument of punishment, measuring
223
+ # # device, measuring instrument, measuring system, medical instrument,
224
+ # # navigational instrument, optical instrument, plotter, scientific
225
+ # # instrument, sonograph, surveying instrument, surveyor's instrument,
226
+ # # tracer, arm, weapon, weapon system, whip"
227
+ #
228
+ # ...or musical instruments
229
+ #
230
+ # "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ')
231
+ # # => "barrel organ, grind organ, hand organ, hurdy-gurdy, hurdy gurdy,
232
+ # # street organ, bass, calliope, steam organ, electronic instrument,
233
+ # # electronic musical instrument, jew's harp, jews' harp, mouth bow, keyboard
234
+ # # instrument, music box, musical box, percussion instrument, percussive
235
+ # # instrument, stringed instrument, wind, wind instrument"
236
+ #
237
+ # There are many more WordNet methods supported--too many to list here. See the
238
+ # WordNet::Synset API documentation for the complete list.
239
+ #
240
+ #
241
+ # == LinkParser Integration
242
+ #
243
+ # If you have the 'linkparser' gem installed, you can create linkages
244
+ # from English sentences that let you query for parts of speech:
245
+ #
246
+ # Test to see whether or not the link parser is loaded.
247
+ #
248
+ # Linguistics::EN.has_linkparser?
249
+ # # => true
250
+ #
251
+ # Diagram the first linkage for a test sentence
252
+ #
253
+ # puts "he is a big dog".en.sentence.linkages.first.diagram
254
+ #
255
+ # Outputs:
256
+ #
257
+ # +-----Ost----+
258
+ # | +----Ds---+
259
+ # +-Ss+ | +--A--+
260
+ # | | | | |
261
+ # he is.v a big.a dog.n
262
+ #
263
+ # Find the verb in the sentence
264
+ #
265
+ # "he is a big dog".en.sentence.verb.to_s
266
+ # # => "is"
267
+ #
268
+ # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
269
+ # given sentence.
270
+ #
271
+ # "he is a big dog".en.sentence.verb.en.infinitive
272
+ # # => "be"
273
+ #
274
+ # Find the direct object of the sentence
275
+ #
276
+ # "he is a big dog".en.sentence.object.to_s
277
+ # # => "dog"
278
+ #
279
+ # Combine WordNet + LinkParser to find the definition of the direct object of
280
+ # the sentence
281
+ #
282
+ # "he is a big dog".en.sentence.object.en.definition
283
+ # # => "a member of the genus Canis (probably descended from the common wolf)
284
+ # # that has been domesticated by man since prehistoric times; occurs in many
285
+ # # breeds"
76
286
  #
77
- # Please see the file LICENSE in the base directory for licensing details.
78
287
  #
79
288
  module Linguistics::EN
289
+ extend Loggability
80
290
 
81
- # Load in the secondary modules and add them to Linguistics::EN.
82
- require 'linguistics/en/infinitive'
83
- require 'linguistics/en/wordnet'
84
- require 'linguistics/en/linkparser'
85
-
86
- # Add 'english' to the list of default languages
87
- Linguistics::DefaultLanguages.push( :en )
88
-
89
-
90
- #################################################################
91
- ### U T I L I T Y F U N C T I O N S
92
- #################################################################
93
-
94
- ### Wrap one or more parts in a non-capturing alteration Regexp
95
- def self::matchgroup( *parts )
96
- re = parts.flatten.join("|")
97
- "(?:#{re})"
98
- end
99
-
100
-
101
- @lprintf_formatters = {}
102
- class << self
103
- attr_accessor :lprintf_formatters
104
- end
105
-
106
- ### Add the specified method (which can be either a Method object or a
107
- ### Symbol for looking up a method)
108
- def self::def_lprintf_formatter( name, meth )
109
- meth = self.method( meth ) unless meth.is_a?( Method )
110
- self.lprintf_formatters[ name ] = meth
111
- end
112
-
113
-
114
-
115
- #################################################################
116
- ### C O N S T A N T S
117
- #################################################################
118
-
119
- # :stopdoc:
120
-
121
- #
122
- # Plurals
123
- #
124
-
125
- PL_sb_irregular_s = {
126
- "ephemeris" => "ephemerides",
127
- "iris" => "irises|irides",
128
- "clitoris" => "clitorises|clitorides",
129
- "corpus" => "corpuses|corpora",
130
- "opus" => "opuses|opera",
131
- "genus" => "genera",
132
- "mythos" => "mythoi",
133
- "penis" => "penises|penes",
134
- "testis" => "testes",
135
- }
136
-
137
- PL_sb_irregular_h = {
138
- "child" => "children",
139
- "brother" => "brothers|brethren",
140
- "loaf" => "loaves",
141
- "hoof" => "hoofs|hooves",
142
- "beef" => "beefs|beeves",
143
- "money" => "monies",
144
- "mongoose" => "mongooses",
145
- "ox" => "oxen",
146
- "cow" => "cows|kine",
147
- "soliloquy" => "soliloquies",
148
- "graffito" => "graffiti",
149
- "prima donna" => "prima donnas|prime donne",
150
- "octopus" => "octopuses|octopodes",
151
- "genie" => "genies|genii",
152
- "ganglion" => "ganglions|ganglia",
153
- "trilby" => "trilbys",
154
- "turf" => "turfs|turves",
155
- }.update( PL_sb_irregular_s )
156
- PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
157
-
158
-
159
- # Classical "..a" -> "..ata"
160
- PL_sb_C_a_ata = matchgroup %w[
161
- anathema bema carcinoma charisma diploma
162
- dogma drama edema enema enigma lemma
163
- lymphoma magma melisma miasma oedema
164
- sarcoma schema soma stigma stoma trauma
165
- gumma pragma
166
- ].collect {|word| word[0...-1]}
167
-
168
- # Unconditional "..a" -> "..ae"
169
- PL_sb_U_a_ae = matchgroup %w[
170
- alumna alga vertebra persona
171
- ]
172
-
173
- # Classical "..a" -> "..ae"
174
- PL_sb_C_a_ae = matchgroup %w[
175
- amoeba antenna formula hyperbola
176
- medusa nebula parabola abscissa
177
- hydra nova lacuna aurora .*umbra
178
- flora fauna
179
- ]
180
-
181
- # Classical "..en" -> "..ina"
182
- PL_sb_C_en_ina = matchgroup %w[
183
- stamen foramen lumen
184
- ].collect {|word| word[0...-2] }
185
-
186
- # Unconditional "..um" -> "..a"
187
- PL_sb_U_um_a = matchgroup %w[
188
- bacterium agendum desideratum erratum
189
- stratum datum ovum extremum
190
- candelabrum
191
- ].collect {|word| word[0...-2] }
192
-
193
- # Classical "..um" -> "..a"
194
- PL_sb_C_um_a = matchgroup %w[
195
- maximum minimum momentum optimum
196
- quantum cranium curriculum dictum
197
- phylum aquarium compendium emporium
198
- enconium gymnasium honorarium interregnum
199
- lustrum memorandum millenium rostrum
200
- spectrum speculum stadium trapezium
201
- ultimatum medium vacuum velum
202
- consortium
203
- ].collect {|word| word[0...-2]}
204
-
205
- # Unconditional "..us" -> "i"
206
- PL_sb_U_us_i = matchgroup %w[
207
- alumnus alveolus bacillus bronchus
208
- locus nucleus stimulus meniscus
209
- ].collect {|word| word[0...-2]}
210
-
211
- # Classical "..us" -> "..i"
212
- PL_sb_C_us_i = matchgroup %w[
213
- focus radius genius
214
- incubus succubus nimbus
215
- fungus nucleolus stylus
216
- torus umbilicus uterus
217
- hippopotamus
218
- ].collect {|word| word[0...-2]}
291
+ # Loggability API -- log to the Linguistics logger
292
+ log_to :linguistics
219
293
 
220
- # Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
221
- PL_sb_C_us_us = matchgroup %w[
222
- status apparatus prospectus sinus
223
- hiatus impetus plexus
224
- ]
294
+ # The list of loaded modules
295
+ MODULES = []
225
296
 
226
- # Unconditional "..on" -> "a"
227
- PL_sb_U_on_a = matchgroup %w[
228
- criterion perihelion aphelion
229
- phenomenon prolegomenon noumenon
230
- organon asyndeton hyperbaton
231
- ].collect {|word| word[0...-2]}
297
+ # The key to set in the thread-hash to indicate it's running in 'classical' mode
298
+ THREAD_CLASSICAL_KEY = :english_classical_mode
232
299
 
233
- # Classical "..on" -> "..a"
234
- PL_sb_C_on_a = matchgroup %w[
235
- oxymoron
236
- ].collect {|word| word[0...-2]}
237
300
 
238
- # Classical "..o" -> "..i" (but normally -> "..os")
239
- PL_sb_C_o_i_a = %w[
240
- solo soprano basso alto
241
- contralto tempo piano
242
- ]
243
- PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
301
+ # A Hash of 'lprintf' formatters keyed by name
302
+ @@lprintf_formatters = {}
244
303
 
245
- # Always "..o" -> "..os"
246
- PL_sb_U_o_os = matchgroup( %w[
247
- albino archipelago armadillo
248
- commando crescendo fiasco
249
- ditto dynamo embryo
250
- ghetto guano inferno
251
- jumbo lumbago magneto
252
- manifesto medico octavo
253
- photo pro quarto
254
- canto lingo generalissimo
255
- stylo rhino
256
- ] | PL_sb_C_o_i_a )
257
-
258
-
259
- # Unconditional "..[ei]x" -> "..ices"
260
- PL_sb_U_ex_ices = matchgroup %w[
261
- codex murex silex
262
- ].collect {|word| word[0...-2]}
263
- PL_sb_U_ix_ices = matchgroup %w[
264
- radix helix
265
- ].collect {|word| word[0...-2]}
266
-
267
- # Classical "..[ei]x" -> "..ices"
268
- PL_sb_C_ex_ices = matchgroup %w[
269
- vortex vertex cortex latex
270
- pontifex apex index simplex
271
- ].collect {|word| word[0...-2]}
272
- PL_sb_C_ix_ices = matchgroup %w[
273
- appendix
274
- ].collect {|word| word[0...-2]}
275
-
276
-
277
- # Arabic: ".." -> "..i"
278
- PL_sb_C_i = matchgroup %w[
279
- afrit afreet efreet
280
- ]
281
-
282
-
283
- # Hebrew: ".." -> "..im"
284
- PL_sb_C_im = matchgroup %w[
285
- goy seraph cherub
286
- ]
287
-
288
- # Unconditional "..man" -> "..mans"
289
- PL_sb_U_man_mans = matchgroup %w[
290
- human
291
- Alabaman Bahaman Burman German
292
- Hiroshiman Liman Nakayaman Oklahoman
293
- Panaman Selman Sonaman Tacoman Yakiman
294
- Yokohaman Yuman
295
- ]
296
-
297
-
298
- PL_sb_uninflected_s = [
299
- # Pairs or groups subsumed to a singular...
300
- "breeches", "britches", "clippers", "gallows", "hijinks",
301
- "headquarters", "pliers", "scissors", "testes", "herpes",
302
- "pincers", "shears", "proceedings", "trousers",
303
-
304
- # Unassimilated Latin 4th declension
305
- "cantus", "coitus", "nexus",
306
-
307
- # Recent imports...
308
- "contretemps", "corps", "debris",
309
- ".*ois",
310
-
311
- # Diseases
312
- ".*measles", "mumps",
313
-
314
- # Miscellaneous others...
315
- "diabetes", "jackanapes", "series", "species", "rabies",
316
- "chassis", "innings", "news", "mews",
317
- ]
318
-
319
-
320
- # Don't inflect in classical mode, otherwise normal inflection
321
- PL_sb_uninflected_herd = matchgroup %w[
322
- wildebeest swine eland bison buffalo
323
- elk moose rhinoceros
324
- ]
325
-
326
- PL_sb_uninflected = matchgroup [
327
-
328
- # Some fish and herd animals
329
- ".*fish", "tuna", "salmon", "mackerel", "trout",
330
- "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
331
-
332
- ".*deer", ".*sheep",
333
-
334
- # All nationals ending in -ese
335
- "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
336
- "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
337
- "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
338
- "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
339
- "Shavese", "Vermontese", "Wenchowese", "Yengeese",
340
- ".*[nrlm]ese",
341
-
342
- # Some words ending in ...s (often pairs taken as a whole)
343
- PL_sb_uninflected_s,
344
-
345
- # Diseases
346
- ".*pox",
347
-
348
- # Other oddities
349
- "graffiti", "djinn"
350
- ]
351
-
352
-
353
- # Singular words ending in ...s (all inflect with ...es)
354
- PL_sb_singular_s = matchgroup %w[
355
- .*ss
356
- acropolis aegis alias arthritis asbestos atlas
357
- bathos bias bronchitis bursitis caddis cannabis
358
- canvas chaos cosmos dais digitalis encephalitis
359
- epidermis ethos eyas gas glottis hepatitis
360
- hubris ibis lens mantis marquis metropolis
361
- neuritis pathos pelvis polis rhinoceros
362
- sassafras tonsillitis trellis .*us
363
- ]
364
-
365
- PL_v_special_s = matchgroup [
366
- PL_sb_singular_s,
367
- PL_sb_uninflected_s,
368
- PL_sb_irregular_s.keys,
369
- '(.*[csx])is',
370
- '(.*)ceps',
371
- '[A-Z].*s',
372
- ]
373
-
374
- PL_sb_postfix_adj = '(' + {
375
-
376
- 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
377
- 'martial' => ["court"],
378
-
379
- }.collect {|key,val|
380
- matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
381
- }.join("|") + ")(.*)"
382
-
383
-
384
- PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
385
- PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
386
-
387
- PL_prep = matchgroup %w[
388
- about above across after among around at athwart before behind
389
- below beneath beside besides between betwixt beyond but by
390
- during except for from in into near of off on onto out over
391
- since till to under until unto upon with
392
- ]
393
-
394
- PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
395
- PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
396
-
397
-
398
- PL_pron_nom_h = {
399
- # Nominative Reflexive
400
- "i" => "we", "myself" => "ourselves",
401
- "you" => "you", "yourself" => "yourselves",
402
- "she" => "they", "herself" => "themselves",
403
- "he" => "they", "himself" => "themselves",
404
- "it" => "they", "itself" => "themselves",
405
- "they" => "they", "themself" => "themselves",
406
-
407
- # Possessive
408
- "mine" => "ours",
409
- "yours" => "yours",
410
- "hers" => "theirs",
411
- "his" => "theirs",
412
- "its" => "theirs",
413
- "theirs" => "theirs",
414
- }
415
- PL_pron_nom = matchgroup PL_pron_nom_h.keys
416
-
417
- PL_pron_acc_h = {
418
- # Accusative Reflexive
419
- "me" => "us", "myself" => "ourselves",
420
- "you" => "you", "yourself" => "yourselves",
421
- "her" => "them", "herself" => "themselves",
422
- "him" => "them", "himself" => "themselves",
423
- "it" => "them", "itself" => "themselves",
424
- "them" => "them", "themself" => "themselves",
425
- }
426
- PL_pron_acc = matchgroup PL_pron_acc_h.keys
427
-
428
- PL_v_irregular_pres_h = {
429
- # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
430
- # 3rd pers. (indet.)
431
- "am" => "are", "are" => "are", "is" => "are",
432
- "was" => "were", "were" => "were", "was" => "were",
433
- "have" => "have", "have" => "have", "has" => "have",
434
- }
435
- PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
436
-
437
- PL_v_ambiguous_pres_h = {
438
- # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
439
- # 3rd pers. (indet.)
440
- "act" => "act", "act" => "act", "acts" => "act",
441
- "blame" => "blame", "blame" => "blame", "blames" => "blame",
442
- "can" => "can", "can" => "can", "can" => "can",
443
- "must" => "must", "must" => "must", "must" => "must",
444
- "fly" => "fly", "fly" => "fly", "flies" => "fly",
445
- "copy" => "copy", "copy" => "copy", "copies" => "copy",
446
- "drink" => "drink", "drink" => "drink", "drinks" => "drink",
447
- "fight" => "fight", "fight" => "fight", "fights" => "fight",
448
- "fire" => "fire", "fire" => "fire", "fires" => "fire",
449
- "like" => "like", "like" => "like", "likes" => "like",
450
- "look" => "look", "look" => "look", "looks" => "look",
451
- "make" => "make", "make" => "make", "makes" => "make",
452
- "reach" => "reach", "reach" => "reach", "reaches" => "reach",
453
- "run" => "run", "run" => "run", "runs" => "run",
454
- "sink" => "sink", "sink" => "sink", "sinks" => "sink",
455
- "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
456
- "view" => "view", "view" => "view", "views" => "view",
457
- }
458
- PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
459
-
460
- PL_v_irregular_non_pres = matchgroup %w[
461
- did had ate made put
462
- spent fought sank gave sought
463
- shall could ought should
464
- ]
465
-
466
- PL_v_ambiguous_non_pres = matchgroup %w[
467
- thought saw bent will might cut
468
- ]
469
-
470
- PL_count_zero = matchgroup %w[
471
- 0 no zero nil
472
- ]
473
-
474
- PL_count_one = matchgroup %w[
475
- 1 a an one each every this that
476
- ]
477
-
478
- PL_adj_special_h = {
479
- "a" => "some", "an" => "some",
480
- "this" => "these", "that" => "those",
481
- }
482
- PL_adj_special = matchgroup PL_adj_special_h.keys
483
-
484
- PL_adj_poss_h = {
485
- "my" => "our",
486
- "your" => "your",
487
- "its" => "their",
488
- "her" => "their",
489
- "his" => "their",
490
- "their" => "their",
491
- }
492
- PL_adj_poss = matchgroup PL_adj_poss_h.keys
493
-
494
-
495
- #
496
- # Numerals, ordinals, and numbers-to-words
497
- #
498
-
499
- # Numerical inflections
500
- Nth = {
501
- 0 => 'th',
502
- 1 => 'st',
503
- 2 => 'nd',
504
- 3 => 'rd',
505
- 4 => 'th',
506
- 5 => 'th',
507
- 6 => 'th',
508
- 7 => 'th',
509
- 8 => 'th',
510
- 9 => 'th',
511
- 11 => 'th',
512
- 12 => 'th',
513
- 13 => 'th',
514
- }
515
-
516
- # Ordinal word parts
517
- Ordinals = {
518
- 'ty' => 'tieth',
519
- 'one' => 'first',
520
- 'two' => 'second',
521
- 'three' => 'third',
522
- 'five' => 'fifth',
523
- 'eight' => 'eighth',
524
- 'nine' => 'ninth',
525
- 'twelve' => 'twelfth',
526
- }
527
- OrdinalSuffixes = Ordinals.keys.join("|") + "|"
528
- Ordinals[""] = 'th'
529
-
530
- # Numeral names
531
- Units = [''] + %w[one two three four five six seven eight nine]
532
- Teens = %w[ten eleven twelve thirteen fourteen
533
- fifteen sixteen seventeen eighteen nineteen]
534
- Tens = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
535
- Thousands = [' ', ' thousand'] + %w[
536
- m b tr quadr quint sext sept oct non dec undec duodec tredec
537
- quattuordec quindec sexdec septemdec octodec novemdec vigint
538
- ].collect {|prefix| ' ' + prefix + 'illion'}
539
-
540
- # A collection of functions for transforming digits into word
541
- # phrases. Indexed by the number of digits being transformed; e.g.,
542
- # <tt>NumberToWordsFunctions[2]</tt> is the function for transforming
543
- # double-digit numbers.
544
- NumberToWordsFunctions = [
545
- proc {|*args| raise "No digits (#{args.inspect})"},
546
-
547
- # Single-digits
548
- proc {|zero,x|
549
- (x.nonzero? ? to_units(x) : "#{zero} ")
550
- },
551
-
552
- # Double-digits
553
- proc {|zero,x,y|
554
- if x.nonzero?
555
- to_tens( x, y )
556
- elsif y.nonzero?
557
- "#{zero} " + NumberToWordsFunctions[1].call( zero, y )
558
- else
559
- ([zero] * 2).join(" ")
560
- end
561
- },
562
-
563
- # Triple-digits
564
- proc {|zero,x,y,z|
565
- NumberToWordsFunctions[1].call(zero,x) +
566
- NumberToWordsFunctions[2].call(zero,y,z)
567
- }
568
- ]
569
-
570
-
571
- #
572
- # Indefinite Articles
573
- #
574
-
575
- # This pattern matches strings of capitals starting with a "vowel-sound"
576
- # consonant followed by another consonant, and which are not likely
577
- # to be real words (oh, all right then, it's just magic!)
578
- A_abbrev = %{
579
- (?! FJO | [HLMNS]Y. | RY[EO] | SQU
580
- | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
581
- [FHLMNRSX][A-Z]
582
- }
583
-
584
- # This pattern codes the beginnings of all english words begining with a
585
- # 'y' followed by a consonant. Any other y-consonant prefix therefore
586
- # implies an abbreviation.
587
- A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
588
-
589
- # Exceptions to exceptions
590
- A_explicit_an = matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
591
-
592
-
593
- #
594
- # Configuration defaults
595
- #
596
-
597
- # Default configuration arguments for the #numwords function
598
- NumwordDefaults = {
599
- :group => 0,
600
- :comma => ', ',
601
- :and => ' and ',
602
- :zero => 'zero',
603
- :decimal => 'point',
604
- :asArray => false,
605
- }
606
-
607
- # Default ranges for #quantify
608
- SeveralRange = 2..5
609
- NumberRange = 6..19
610
- NumerousRange = 20..45
611
- ManyRange = 46..99
612
-
613
- # Default configuration arguments for the #quantify function
614
- QuantifyDefaults = {
615
- :joinword => " of ",
616
- }
617
-
618
- # Default configuration arguments for the #conjunction (junction, what's
619
- # your) function.
620
- ConjunctionDefaults = {
621
- :separator => ', ',
622
- :altsep => '; ',
623
- :penultimate => true,
624
- :conjunctive => 'and',
625
- :combine => true,
626
- :casefold => true,
627
- :generalize => false,
628
- :quantsort => true,
629
- }
630
-
631
-
632
- #
633
- # Title case
634
- #
635
-
636
- # "In titles, capitalize the first word, the last word, and all words in
637
- # between except articles (a, an, and the), prepositions under five letters
638
- # (in, of, to), and coordinating conjunctions (and, but). These rules apply
639
- # to titles of long, short, and partial works as well as your own papers"
640
- # (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
641
-
642
- # Build the list of exceptions to title-capitalization
643
- Articles = %w[a and the]
644
- ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
645
- "into", "like", "near", "of", "off", "on", "onto", "out", "over",
646
- "past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
647
- CoordConjunctions = %w[and but as]
648
- TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
649
-
650
-
651
- # :startdoc:
652
304
 
653
305
  #################################################################
654
- ### " B A C K E N D " F U N C T I O N S
306
+ ### U T I L I T Y F U N C T I O N S
655
307
  #################################################################
656
308
 
657
-
658
- ###############
659
- module_function
660
- ###############
661
-
662
- ### Debugging output
663
- def debug_msg( *msgs ) # :nodoc:
664
- $stderr.puts msgs.join(" ") if $DEBUG
665
- end
666
-
667
-
668
- ### Normalize a count to either 1 or 2 (singular or plural)
669
- def normalize_count( count, default=2 )
670
- return default if count.nil? # Default to plural
671
- if /^(#{PL_count_one})$/i =~ count.to_s ||
672
- Linguistics::classical? &&
673
- /^(#{PL_count_zero})$/ =~ count.to_s
674
- return 1
675
- else
676
- return default
677
- end
309
+ ### A Hash of formatters for the lprintf function.
310
+ def self::lprintf_formatters
311
+ return @@lprintf_formatters
678
312
  end
679
313
 
680
314
 
681
- ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
682
- ### examining the <tt>original</tt> input.
683
- def postprocess( original, inflected )
684
- inflected.sub!( /([^|]+)\|(.+)/ ) {
685
- Linguistics::classical? ? $2 : $1
686
- }
687
-
688
- case original
689
- when "I"
690
- return inflected
691
- when /^[A-Z]+$/
692
- return inflected.upcase
693
- when /^[A-Z]/
694
- # Can't use #capitalize, as it will downcase the rest of the string,
695
- # too.
696
- inflected[0,1] = inflected[0,1].upcase
697
- return inflected
698
- else
699
- return inflected
700
- end
701
- end
702
-
703
-
704
- ### Pluralize nouns
705
- def pluralize_noun( word, count=nil )
706
- value = nil
707
- count ||= Linguistics::num
708
- count = normalize_count( count )
709
-
710
- return word if count == 1
711
-
712
- # Handle user-defined nouns
713
- #if value = ud_match( word, PL_sb_user_defined )
714
- # return value
715
- #end
716
-
717
- # Handle empty word, singular count and uninflected plurals
718
- case word
719
- when ''
720
- return word
721
- when /^(#{PL_sb_uninflected})$/i
722
- return word
723
- else
724
- if Linguistics::classical? &&
725
- /^(#{PL_sb_uninflected_herd})$/i =~ word
726
- return word
727
- end
728
- end
729
-
730
- # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
731
- case word
732
- when /^(?:#{PL_sb_postfix_adj})$/i
733
- value = $2
734
- return pluralize_noun( $1, 2 ) + value
735
-
736
- when /^(?:#{PL_sb_prep_dual_compound})$/i
737
- value = [ $2, $3 ]
738
- return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )
739
-
740
- when /^(?:#{PL_sb_prep_compound})$/i
741
- value = $2
742
- return pluralize_noun( $1, 2 ) + value
743
-
744
- # Handle pronouns
745
- when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
746
- return $1 + PL_pron_acc_h[ $2.downcase ]
747
-
748
- when /^(#{PL_pron_nom})$/i
749
- return PL_pron_nom_h[ word.downcase ]
750
-
751
- when /^(#{PL_pron_acc})$/i
752
- return PL_pron_acc_h[ $1.downcase ]
753
-
754
- # Handle isolated irregular plurals
755
- when /(.*)\b(#{PL_sb_irregular})$/i
756
- return $1 + PL_sb_irregular_h[ $2.downcase ]
315
+ ### Register an English-language extension.
316
+ def self::register_extension( mod )
317
+ MODULES.push( mod )
318
+ self.log.debug "Registered English extension %p" % [ mod ]
757
319
 
758
- when /(#{PL_sb_U_man_mans})$/i
759
- return "#{$1}s"
320
+ include( mod )
321
+ mod.extend( Loggability )
322
+ mod.log_to( :linguistics )
760
323
 
761
- # Handle families of irregular plurals
762
- when /(.*)man$/i ; return "#{$1}men"
763
- when /(.*[ml])ouse$/i ; return "#{$1}ice"
764
- when /(.*)goose$/i ; return "#{$1}geese"
765
- when /(.*)tooth$/i ; return "#{$1}teeth"
766
- when /(.*)foot$/i ; return "#{$1}feet"
324
+ if mod.const_defined?( :SingletonMethods )
325
+ smod = mod.const_get(:SingletonMethods)
326
+ self.log.debug " and its singleton methods %p" % [ smod ]
327
+ extend( smod )
767
328
 
768
- # Handle unassimilated imports
769
- when /(.*)ceps$/i ; return word
770
- when /(.*)zoon$/i ; return "#{$1}zoa"
771
- when /(.*[csx])is$/i ; return "#{$1}es"
772
- when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
773
- when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
774
- when /(#{PL_sb_U_um_a})um$/i ; return "#{$1}a"
775
- when /(#{PL_sb_U_us_i})us$/i ; return "#{$1}i"
776
- when /(#{PL_sb_U_on_a})on$/i ; return "#{$1}a"
777
- when /(#{PL_sb_U_a_ae})$/i ; return "#{$1}e"
778
- end
779
-
780
- # Handle incompletely assimilated imports
781
- if Linguistics::classical?
782
- case word
783
- when /(.*)trix$/i ; return "#{$1}trices"
784
- when /(.*)eau$/i ; return "#{$1}eaux"
785
- when /(.*)ieu$/i ; return "#{$1}ieux"
786
- when /(.{2,}[yia])nx$/i ; return "#{$1}nges"
787
- when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
788
- when /(#{PL_sb_C_ex_ices})ex$/i; return "#{$1}ices"
789
- when /(#{PL_sb_C_ix_ices})ix$/i; return "#{$1}ices"
790
- when /(#{PL_sb_C_um_a})um$/i ; return "#{$1}a"
791
- when /(#{PL_sb_C_us_i})us$/i ; return "#{$1}i"
792
- when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
793
- when /(#{PL_sb_C_a_ae})$/i ; return "#{$1}e"
794
- when /(#{PL_sb_C_a_ata})a$/i ; return "#{$1}ata"
795
- when /(#{PL_sb_C_o_i})o$/i ; return "#{$1}i"
796
- when /(#{PL_sb_C_on_a})on$/i ; return "#{$1}a"
797
- when /#{PL_sb_C_im}$/i ; return "#{word}im"
798
- when /#{PL_sb_C_i}$/i ; return "#{word}i"
329
+ ivars = mod.instance_variables
330
+ self.log.debug " and instance variables %p" % [ ivars ]
331
+ ivars.each do |ivar|
332
+ instance_variable_set( ivar, mod.instance_variable_get(ivar) )
799
333
  end
800
334
  end
801
-
802
-
803
- # Handle singular nouns ending in ...s or other silibants
804
- case word
805
- when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
806
- when /^([A-Z].*s)$/; return "#{$1}es"
807
- when /(.*)([cs]h|[zx])$/i ; return "#{$1}#{$2}es"
808
- # when /(.*)(us)$/i ; return "#{$1}#{$2}es"
809
-
810
- # Handle ...f -> ...ves
811
- when /(.*[eao])lf$/i ; return "#{$1}lves";
812
- when /(.*[^d])eaf$/i ; return "#{$1}eaves"
813
- when /(.*[nlw])ife$/i ; return "#{$1}ives"
814
- when /(.*)arf$/i ; return "#{$1}arves"
815
-
816
- # Handle ...y
817
- when /(.*[aeiou])y$/i ; return "#{$1}ys"
818
- when /([A-Z].*y)$/ ; return "#{$1}s"
819
- when /(.*)y$/i ; return "#{$1}ies"
820
-
821
- # Handle ...o
822
- when /#{PL_sb_U_o_os}$/i ; return "#{word}s"
823
- when /[aeiou]o$/i ; return "#{word}s"
824
- when /o$/i ; return "#{word}es"
825
-
826
- # Otherwise just add ...s
827
- else
828
- return "#{word}s"
829
- end
830
- end # def pluralize_noun
831
-
832
-
833
-
834
- ### Pluralize special verbs
835
- def pluralize_special_verb( word, count )
836
- count ||= Linguistics::num
837
- count = normalize_count( count )
838
-
839
- return nil if /^(#{PL_count_one})$/i =~ count.to_s
840
-
841
- # Handle user-defined verbs
842
- #if value = ud_match( word, PL_v_user_defined )
843
- # return value
844
- #end
845
-
846
- case word
847
-
848
- # Handle irregular present tense (simple and compound)
849
- when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
850
- return PL_v_irregular_pres_h[ $1.downcase ] + $2
851
-
852
- # Handle irregular future, preterite and perfect tenses
853
- when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
854
- return word
855
-
856
- # Handle special cases
857
- when /^(#{PL_v_special_s})$/, /\s/
858
- return nil
859
-
860
- # Handle standard 3rd person (chop the ...(e)s off single words)
861
- when /^(.*)([cs]h|[x]|zz|ss)es$/i
862
- return $1 + $2
863
- when /^(..+)ies$/i
864
- return "#{$1}y"
865
- when /^(.+)oes$/i
866
- return "#{$1}o"
867
- when /^(.*[^s])s$/i
868
- return $1
869
-
870
- # Otherwise, a regular verb (handle elsewhere)
871
- else
872
- return nil
873
- end
874
335
  end
875
336
 
876
337
 
877
- ### Pluralize regular verbs
878
- def pluralize_general_verb( word, count )
879
- count ||= Linguistics::num
880
- count = normalize_count( count )
881
-
882
- return word if /^(#{PL_count_one})$/i =~ count.to_s
883
-
884
- case word
885
-
886
- # Handle ambiguous present tenses (simple and compound)
887
- when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
888
- return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
889
-
890
- # Handle ambiguous preterite and perfect tenses
891
- when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
892
- return word
893
-
894
- # Otherwise, 1st or 2nd person is uninflected
895
- else
896
- return word
338
+ ### Returns +true+ if the English-language module with the given +name+ was
339
+ ### successfully registered.
340
+ def self::has_extension?( name )
341
+ return MODULES.any? do |mod|
342
+ mod.name.sub( /.*::/, '' ).downcase == name.to_s.downcase
897
343
  end
898
344
  end
899
345
 
900
346
 
901
- ### Handle special adjectives
902
- def pluralize_special_adjective( word, count )
903
- count ||= Linguistics::num
904
- count = normalize_count( count )
905
-
906
- return word if /^(#{PL_count_one})$/i =~ count.to_s
907
-
908
- # Handle user-defined verbs
909
- #if value = ud_match( word, PL_adj_user_defined )
910
- # return value
911
- #end
912
-
913
- case word
914
-
915
- # Handle known cases
916
- when /^(#{PL_adj_special})$/i
917
- return PL_adj_special_h[ $1.downcase ]
918
-
919
- # Handle possessives
920
- when /^(#{PL_adj_poss})$/i
921
- return PL_adj_poss_h[ $1.downcase ]
922
-
923
- when /^(.*)'s?$/
924
- pl = plural_noun( $1 )
925
- if /s$/ =~ pl
926
- return "#{pl}'"
927
- else
928
- return "#{pl}'s"
929
- end
930
-
931
- # Otherwise, no idea
932
- else
933
- return nil
934
- end
935
- end
936
-
937
-
938
- ### Returns the given word with a prepended indefinite article, unless
939
- ### +count+ is non-nil and not singular.
940
- def indef_article( word, count )
941
- count ||= Linguistics::num
942
- return "#{count} #{word}" if
943
- count && /^(#{PL_count_one})$/i !~ count.to_s
944
-
945
- # Handle user-defined variants
946
- # return value if value = ud_match( word, A_a_user_defined )
947
-
948
- case word
949
-
950
- # Handle special cases
951
- when /^(#{A_explicit_an})/i
952
- return "an #{word}"
953
-
954
- # Handle abbreviations
955
- when /^(#{A_abbrev})/x
956
- return "an #{word}"
957
- when /^[aefhilmnorsx][.-]/i
958
- return "an #{word}"
959
- when /^[a-z][.-]/i
960
- return "a #{word}"
961
-
962
- # Handle consonants
963
- when /^[^aeiouy]/i
964
- return "a #{word}"
965
-
966
- # Handle special vowel-forms
967
- when /^e[uw]/i
968
- return "a #{word}"
969
- when /^onc?e\b/i
970
- return "a #{word}"
971
- when /^uni([^nmd]|mo)/i
972
- return "a #{word}"
973
- when /^u[bcfhjkqrst][aeiou]/i
974
- return "a #{word}"
975
-
976
- # Handle vowels
977
- when /^[aeiou]/i
978
- return "an #{word}"
979
-
980
- # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
981
- when /^(#{A_y_cons})/i
982
- return "an #{word}"
983
-
984
- # Otherwise, guess "a"
985
- else
986
- return "a #{word}"
987
- end
988
- end
989
-
990
-
991
- ### Transform the specified number of units-place numerals into a
992
- ### word-phrase at the given number of +thousands+ places.
993
- def to_units( units, thousands=0 )
994
- return Units[ units ] + to_thousands( thousands )
347
+ ### Debugging output
348
+ def self::debug_msg( *msgs ) # :nodoc:
349
+ $stderr.puts msgs.join(" ") if $DEBUG
995
350
  end
996
351
 
997
352
 
998
- ### Transform the specified number of tens- and units-place numerals into a
999
- ### word-phrase at the given number of +thousands+ places.
1000
- def to_tens( tens, units, thousands=0 )
1001
- unless tens == 1
1002
- return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
1003
- to_units( units, thousands )
1004
- else
1005
- return Teens[ units ] + to_thousands( thousands )
1006
- end
1007
- end
1008
-
353
+ ### Add an lprintf formatter named +name+ that will use the specified +callback+ method.
354
+ ### The name of the formatter is the placeholder that will be used in the
355
+ ### format string, and the +callback+ is the method to call on the english-language
356
+ ### inflector for the lprintf argument, and can either be an object that responds to
357
+ ### #call, or the name of a method to call as a Symbol.
358
+ ###
359
+ ### Using a Symbol:
360
+ ###
361
+ ### def plural( count=2 )
362
+ ### # return the plural of the inflected object
363
+ ### end
364
+ ### Linguistics::EN.register_lprintf_formatter :PL, :plural
365
+ ###
366
+ ### Using a method:
367
+ ###
368
+ ### Linguistics::EN.register_lprintf_formatter :PL, method( :plural )
369
+ ###
370
+ ### Using a block:
371
+ ###
372
+ ### Linguistics::EN.register_lprintf_formatter :PL do |obj|
373
+ ### obj.en.plural
374
+ ### end
375
+ ###
376
+ def self::register_lprintf_formatter( name, callback=nil )
377
+ raise LocalJumpError, "no callback or block given" unless callback || block_given?
378
+ callback ||= Proc.new
1009
379
 
1010
- ### Transform the specified number of hundreds-, tens-, and units-place
1011
- ### numerals into a word phrase. If the number of thousands (+thousands+) is
1012
- ### greater than 0, it will be used to determine where the decimal point is
1013
- ### in relation to the hundreds-place number.
1014
- def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
1015
- joinword = ' ' if joinword.empty?
1016
- if hundreds.nonzero?
1017
- return to_units( hundreds ) + " hundred" +
1018
- (tens.nonzero? || units.nonzero? ? joinword : '') +
1019
- to_tens( tens, units ) +
1020
- to_thousands( thousands )
1021
- elsif tens.nonzero? || units.nonzero?
1022
- return to_tens( tens, units ) + to_thousands( thousands )
1023
- else
1024
- return nil
1025
- end
380
+ @@lprintf_formatters[ name ] = callback.to_proc
1026
381
  end
1027
382
 
1028
- ### Transform the specified number into one or more words like 'thousand',
1029
- ### 'million', etc. Uses the thousands (American) system.
1030
- def to_thousands( thousands=0 )
1031
- parts = []
1032
- (0..thousands).step( Thousands.length - 1 ) {|i|
1033
- if i.zero?
1034
- parts.push Thousands[ thousands % (Thousands.length - 1) ]
1035
- else
1036
- parts.push Thousands.last
1037
- end
1038
- }
1039
383
 
1040
- return parts.join(" ")
384
+ ### Return +true+ if running in a 'classical' mode.
385
+ def self::classical?
386
+ return Thread.current[ THREAD_CLASSICAL_KEY ] ? true : false
1041
387
  end
1042
388
 
1043
389
 
1044
- ### Return the specified number +num+ as an array of number phrases.
1045
- def number_to_words( num, config )
1046
- return [config[:zero]] if num.to_i.zero?
1047
- chunks = []
1048
-
1049
- # Break into word-groups if groups is set
1050
- if config[:group].nonzero?
1051
-
1052
- # Build a Regexp with <config[:group]> number of digits. Any past
1053
- # the first are optional.
1054
- re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )
1055
-
1056
- # Scan the string, and call the word-chunk function that deals with
1057
- # chunks of the found number of digits.
1058
- num.to_s.scan( re ) {|digits|
1059
- debug_msg " digits = #{digits.inspect}"
1060
- fn = NumberToWordsFunctions[ digits.nitems ]
1061
- numerals = digits.flatten.compact.collect {|i| i.to_i}
1062
- debug_msg " numerals = #{numerals.inspect}"
1063
- chunks.push fn.call( config[:zero], *numerals ).strip
1064
- }
1065
- else
1066
- phrase = num.to_s
1067
- phrase.sub!( /\A\s*0+/, '' )
1068
- mill = 0
1069
-
1070
- # Match backward from the end of the digits in the string, turning
1071
- # chunks of three, of two, and of one into words.
1072
- mill += 1 while
1073
- phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
1074
- words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill,
1075
- config[:and] )
1076
- chunks.unshift words.strip.squeeze(' ') unless words.nil?
1077
- ''
1078
- }
1079
-
1080
- phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
1081
- chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
1082
- ''
1083
- }
1084
- phrase.sub!( /(\d)(?=\D*\Z)/ ) {
1085
- chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
1086
- ''
1087
- }
1088
- end
390
+ ### Set classical mode for the current thread inside the block, then
391
+ ### unset it when it returns.
392
+ def self::in_classical_mode
393
+ old_setting = Thread.current[ THREAD_CLASSICAL_KEY ]
394
+ Thread.current[ THREAD_CLASSICAL_KEY ] = true
1089
395
 
1090
- return chunks
396
+ yield
397
+ ensure
398
+ Thread.current[ THREAD_CLASSICAL_KEY ] = old_setting
1091
399
  end
1092
400
 
1093
401
 
@@ -1095,579 +403,6 @@ module Linguistics::EN
1095
403
  ### P U B L I C F U N C T I O N S
1096
404
  #################################################################
1097
405
 
1098
- ### Return the name of the language this module is for.
1099
- def language( unused=nil )
1100
- "English"
1101
- end
1102
-
1103
-
1104
- ### Return the plural of the given +phrase+ if +count+ indicates it should
1105
- ### be plural.
1106
- def plural( phrase, count=nil )
1107
- phrase = numwords( phrase ) if phrase.is_a?( Numeric )
1108
-
1109
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1110
- pre, word, post = md.to_a[1,3]
1111
- return phrase if word.nil? or word.empty?
1112
-
1113
- plural = postprocess( word,
1114
- pluralize_special_adjective(word, count) ||
1115
- pluralize_special_verb(word, count) ||
1116
- pluralize_noun(word, count) )
1117
-
1118
- return pre + plural + post
1119
- end
1120
- def_lprintf_formatter :PL, :plural
1121
-
1122
-
1123
- ### Return the plural of the given noun +phrase+ if +count+ indicates it
1124
- ### should be plural.
1125
- def plural_noun( phrase, count=nil )
1126
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1127
- pre, word, post = md.to_a[1,3]
1128
- return phrase if word.nil? or word.empty?
1129
-
1130
- plural = postprocess( word, pluralize_noun(word, count) )
1131
- return pre + plural + post
1132
- end
1133
- def_lprintf_formatter :PL_N, :plural_noun
1134
-
1135
-
1136
- ### Return the plural of the given verb +phrase+ if +count+ indicates it
1137
- ### should be plural.
1138
- def plural_verb( phrase, count=nil )
1139
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1140
- pre, word, post = md.to_a[1,3]
1141
- return phrase if word.nil? or word.empty?
1142
-
1143
- plural = postprocess( word,
1144
- pluralize_special_verb(word, count) ||
1145
- pluralize_general_verb(word, count) )
1146
- return pre + plural + post
1147
- end
1148
- def_lprintf_formatter :PL_V, :plural_verb
1149
-
1150
-
1151
- ### Return the plural of the given adjectival +phrase+ if +count+ indicates
1152
- ### it should be plural.
1153
- def plural_adjective( phrase, count=nil )
1154
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1155
- pre, word, post = md.to_a[1,3]
1156
- return phrase if word.nil? or word.empty?
1157
-
1158
- plural = postprocess( word,
1159
- pluralize_special_adjective(word, count) || word )
1160
- return pre + plural + post
1161
- end
1162
- alias_method :plural_adj, :plural_adjective
1163
- def_lprintf_formatter :PL_ADJ, :plural_adjective
1164
-
1165
-
1166
- ### Return the given phrase with the appropriate indefinite article ("a" or
1167
- ### "an") prepended.
1168
- def a( phrase, count=nil )
1169
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1170
- pre, word, post = md.to_a[1,3]
1171
- return phrase if word.nil? or word.empty?
1172
-
1173
- result = indef_article( word, count )
1174
- return pre + result + post
1175
- end
1176
- alias_method :an, :a
1177
- def_lprintf_formatter :A, :a
1178
- def_lprintf_formatter :AN, :a
1179
-
1180
-
1181
- ### Translate zero-quantified +phrase+ to "no +phrase.plural+"
1182
- def no( phrase, count=nil )
1183
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1184
- pre, word, post = md.to_a[1,3]
1185
- count ||= Linguistics::num || 0
1186
-
1187
- unless /^#{PL_count_zero}$/ =~ count.to_s
1188
- return "#{pre}#{count} " + plural( word, count ) + post
1189
- else
1190
- return "#{pre}no " + plural( word, 0 ) + post
1191
- end
1192
- end
1193
- def_lprintf_formatter :NO, :no
1194
-
1195
-
1196
- ### Participles
1197
- def present_participle( word )
1198
- plural = plural_verb( word.to_s, 2 )
1199
-
1200
- plural.sub!( /ie$/, 'y' ) or
1201
- plural.sub!( /ue$/, 'u' ) or
1202
- plural.sub!( /([auy])e$/, '$1' ) or
1203
- plural.sub!( /i$/, '' ) or
1204
- plural.sub!( /([^e])e$/, "\\1" ) or
1205
- /er$/.match( plural ) or
1206
- plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
1207
-
1208
- return "#{plural}ing"
1209
- end
1210
- alias_method :part_pres, :present_participle
1211
- def_lprintf_formatter :PART_PRES, :present_participle
1212
-
1213
-
1214
-
1215
- ### Return the specified number as english words. One or more configuration
1216
- ### values may be passed to control the returned String:
1217
- ###
1218
- ### [<b>:group</b>]
1219
- ### Controls how many numbers at a time are grouped together. Valid values
1220
- ### are <code>0</code> (normal grouping), <code>1</code> (single-digit
1221
- ### grouping, e.g., "one, two, three, four"), <code>2</code>
1222
- ### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
1223
- ### (triple-digit grouping, e.g., "one twenty-three, four").
1224
- ### [<b>:comma</b>]
1225
- ### Set the character/s used to separate word groups. Defaults to
1226
- ### <code>", "</code>.
1227
- ### [<b>:and</b>]
1228
- ### Set the word and/or characters used where <code>' and ' </code>(the
1229
- ### default) is normally used. Setting <code>:and</code> to
1230
- ### <code>' '</code>, for example, will cause <code>2556</code> to be
1231
- ### returned as "two-thousand, five hundred fifty-six" instead of
1232
- ### "two-thousand, five hundred and fifty-six".
1233
- ### [<b>:zero</b>]
1234
- ### Set the word used to represent the numeral <code>0</code> in the
1235
- ### result. <code>'zero'</code> is the default.
1236
- ### [<b>:decimal</b>]
1237
- ### Set the translation of any decimal points in the number; the default
1238
- ### is <code>'point'</code>.
1239
- ### [<b>:asArray</b>]
1240
- ### If set to a true value, the number will be returned as an array of
1241
- ### word groups instead of a String.
1242
- def numwords( number, hashargs={} )
1243
- num = number.to_s
1244
- config = NumwordDefaults.merge( hashargs )
1245
- raise "Bad chunking option: #{config[:group]}" unless
1246
- config[:group].between?( 0, 3 )
1247
-
1248
- # Array of number parts: first is everything to the left of the first
1249
- # decimal, followed by any groups of decimal-delimted numbers after that
1250
- parts = []
1251
-
1252
- # Wordify any sign prefix
1253
- sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
1254
-
1255
- # Strip any ordinal suffixes
1256
- ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
1257
-
1258
- # Split the number into chunks delimited by '.'
1259
- chunks = if !config[:decimal].empty? then
1260
- if config[:group].nonzero?
1261
- num.split(/\./)
1262
- else
1263
- num.split(/\./, 2)
1264
- end
1265
- else
1266
- [ num ]
1267
- end
1268
-
1269
- # Wordify each chunk, pushing arrays into the parts array
1270
- chunks.each_with_index {|chunk,section|
1271
- chunk.gsub!( /\D+/, '' )
1272
-
1273
- # If there's nothing in this chunk of the number, set it to zero
1274
- # unless it's the whole-number part, in which case just push an
1275
- # empty array.
1276
- if chunk.empty?
1277
- if section.zero?
1278
- parts.push []
1279
- next
1280
- end
1281
- end
1282
-
1283
- # Split the number section into wordified parts unless this is the
1284
- # second or succeeding part of a non-group number
1285
- unless config[:group].zero? && section.nonzero?
1286
- parts.push number_to_words( chunk, config )
1287
- else
1288
- parts.push number_to_words( chunk, config.merge(:group => 1) )
1289
- end
1290
- }
1291
-
1292
- debug_msg "Parts => #{parts.inspect}"
1293
-
1294
- # Turn the last word of the whole-number part back into an ordinal if
1295
- # the original number came in that way.
1296
- if ord && !parts[0].empty?
1297
- parts[0][-1] = ordinal( parts[0].last )
1298
- end
1299
-
1300
- # If the caller's expecting an Array return, just flatten and return the
1301
- # parts array.
1302
- if config[:asArray]
1303
- unless sign.empty?
1304
- parts[0].unshift( sign )
1305
- end
1306
- return parts.flatten
1307
- end
1308
-
1309
- # Catenate each sub-parts array into a whole number part and one or more
1310
- # post-decimal parts. If grouping is turned on, all sub-parts get joined
1311
- # with commas, otherwise just the whole-number part is.
1312
- if config[:group].zero?
1313
- if parts[0].length > 1
1314
-
1315
- # Join all but the last part together with commas
1316
- wholenum = parts[0][0...-1].join( config[:comma] )
1317
-
1318
- # If the last part is just a single word, append it to the
1319
- # wholenum part with an 'and'. This is to get things like 'three
1320
- # thousand and three' instead of 'three thousand, three'.
1321
- if /^\s*(\S+)\s*$/ =~ parts[0].last
1322
- wholenum += config[:and] + parts[0].last
1323
- else
1324
- wholenum += config[:comma] + parts[0].last
1325
- end
1326
- else
1327
- wholenum = parts[0][0]
1328
- end
1329
- decimals = parts[1..-1].collect {|part| part.join(" ")}
1330
-
1331
- debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1332
-
1333
- # Join with the configured decimal; if it's empty, just join with
1334
- # spaces.
1335
- unless config[:decimal].empty?
1336
- return sign + ([ wholenum ] + decimals).
1337
- join( " #{config[:decimal]} " ).strip
1338
- else
1339
- return sign + ([ wholenum ] + decimals).
1340
- join( " " ).strip
1341
- end
1342
- else
1343
- return parts.compact.
1344
- separate( config[:decimal] ).
1345
- delete_if {|el| el.empty?}.
1346
- join( config[:comma] ).
1347
- strip
1348
- end
1349
- end
1350
- def_lprintf_formatter :NUMWORDS, :numwords
1351
-
1352
-
1353
- ### Transform the given +number+ into an ordinal word. The +number+ object
1354
- ### can be either an Integer or a String.
1355
- def ordinal( number )
1356
- case number
1357
- when Integer
1358
- return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])
1359
-
1360
- else
1361
- return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
1362
- end
1363
- end
1364
- def_lprintf_formatter :ORD, :ordinal
1365
-
1366
-
1367
- ### Transform the given +number+ into an ordinate word.
1368
- def ordinate( number )
1369
- return Linguistics::EN.ordinal( Linguistics::EN.numwords(number) )
1370
- end
1371
-
1372
-
1373
- ### Return a phrase describing the specified +number+ of objects in the
1374
- ### given +phrase+ in general terms. The following options can be used to
1375
- ### control the makeup of the returned quantity String:
1376
- ###
1377
- ### [<b>:joinword</b>]
1378
- ### Sets the word (and any surrounding spaces) used as the word separating the
1379
- ### quantity from the noun in the resulting string. Defaults to <tt>' of
1380
- ### '</tt>.
1381
- def quantify( phrase, number=0, args={} )
1382
- num = number.to_i
1383
- config = QuantifyDefaults.merge( args )
1384
-
1385
- case num
1386
- when 0
1387
- no( phrase )
1388
- when 1
1389
- a( phrase )
1390
- when SeveralRange
1391
- "several " + plural( phrase, num )
1392
- when NumberRange
1393
- "a number of " + plural( phrase, num )
1394
- when NumerousRange
1395
- "numerous " + plural( phrase, num )
1396
- when ManyRange
1397
- "many " + plural( phrase, num )
1398
- else
1399
-
1400
- # Anything bigger than the ManyRange gets described like
1401
- # "hundreds of thousands of..." or "millions of..."
1402
- # depending, of course, on how many there are.
1403
- thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
1404
- stword =
1405
- case subthousands
1406
- when 2
1407
- "hundreds"
1408
- when 1
1409
- "tens"
1410
- else
1411
- nil
1412
- end
1413
- thword = plural( to_thousands(thousands).strip )
1414
- thword = nil if thword.empty?
1415
-
1416
- [ # Hundreds (of)...
1417
- stword,
1418
-
1419
- # thousands (of)
1420
- thword,
1421
-
1422
- # stars.
1423
- plural(phrase, number)
1424
- ].compact.join( config[:joinword] )
1425
- end
1426
- end
1427
- def_lprintf_formatter :QUANT, :quantify
1428
-
1429
-
1430
- # :TODO: Needs refactoring
1431
-
1432
- ### Return the specified +obj+ (which must support the <tt>#collect</tt>
1433
- ### method) as a conjunction. Each item is converted to a String if it is
1434
- ### not already (using #to_s) unless a block is given, in which case it is
1435
- ### called once for each object in the array, and the stringified return
1436
- ### value from the block is used instead. Returning +nil+ causes that
1437
- ### particular element to be omitted from the resulting conjunction. The
1438
- ### following options can be used to control the makeup of the returned
1439
- ### conjunction String:
1440
- ###
1441
- ### [<b>:separator</b>]
1442
- ### Specify one or more characters to separate items in the resulting
1443
- ### list. Defaults to <tt>', '</tt>.
1444
- ### [<b>:altsep</b>]
1445
- ### An alternate separator to use if any of the resulting conjunction's
1446
- ### clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
1447
- ### [<b>:penultimate</b>]
1448
- ### Flag that indicates whether or not to join the last clause onto the
1449
- ### rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
1450
- ### %w{duck, cow, dog}.en.conjunction
1451
- ### # => "a duck, a cow, and a dog"
1452
- ### %w{duck cow dog}.en.conjunction( :penultimate => false )
1453
- ### "a duck, a cow and a dog"
1454
- ### Default to <tt>true</tt>.
1455
- ### [<b>:conjunctive</b>]
1456
- ### Sets the word used as the conjunctive (separating word) of the
1457
- ### resulting string. Default to <tt>'and'</tt>.
1458
- ### [<b>:combine</b>]
1459
- ### If set to <tt>true</tt> (the default), items which are indentical (after
1460
- ### surrounding spaces are stripped) will be combined in the resulting
1461
- ### conjunction. E.g.,
1462
- ### %w{goose cow goose dog}.en.conjunction
1463
- ### # => "two geese, a cow, and a dog"
1464
- ### %w{goose cow goose dog}.en.conjunction( :combine => false )
1465
- ### # => "a goose, a cow, a goose, and a dog"
1466
- ### [<b>:casefold</b>]
1467
- ### If set to <tt>true</tt> (the default), then items are compared
1468
- ### case-insensitively when combining them. This has no effect if
1469
- ### <tt>:combine</tt> is <tt>false</tt>.
1470
- ### [<b>:generalize</b>]
1471
- ### If set to <tt>true</tt>, then quantities of combined items are turned into
1472
- ### general descriptions instead of exact amounts.
1473
- ### ary = %w{goose pig dog horse goose reindeer goose dog horse}
1474
- ### ary.en.conjunction
1475
- ### # => "three geese, two dogs, two horses, a pig, and a reindeer"
1476
- ### ary.en.conjunction( :generalize => true )
1477
- ### # => "several geese, several dogs, several horses, a pig, and a reindeer"
1478
- ### See the #quantify method for specifics on how quantities are
1479
- ### generalized. Generalization defaults to <tt>false</tt>, and has no effect if
1480
- ### :combine is <tt>false</tt>.
1481
- ### [<b>:quantsort</b>]
1482
- ### If set to <tt>true</tt> (the default), items which are combined in the
1483
- ### resulting conjunction will be listed in order of amount, with greater
1484
- ### quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
1485
- ### will appear where the first instance of them occurred in the
1486
- ### list. This sort is also the fallback for indentical quantities (ie.,
1487
- ### items of the same quantity will be listed in the order they appeared
1488
- ### in the source list).
1489
- ###
1490
- def conjunction( obj, args={} )
1491
- config = ConjunctionDefaults.merge( args )
1492
- phrases = []
1493
-
1494
- # Transform items in the obj to phrases
1495
- if block_given?
1496
- phrases = obj.collect {|item| yield(item) }.compact
1497
- else
1498
- phrases = obj.collect {|item| item.to_s }
1499
- end
1500
-
1501
- # No need for a conjunction if there's only one thing
1502
- return a(phrases[0]) if phrases.length < 2
1503
-
1504
- # Set up a Proc to derive a collector key from a phrase depending on the
1505
- # configuration
1506
- keyfunc =
1507
- if config[:casefold]
1508
- proc {|key| key.downcase.strip}
1509
- else
1510
- proc {|key| key.strip}
1511
- end
1512
-
1513
- # Count and delete phrases that hash the same when the keyfunc munges
1514
- # them into the same thing if we're combining (:combine => true).
1515
- collector = {}
1516
- if config[:combine]
1517
-
1518
- phrases.each_index do |i|
1519
- # Stop when reaching the end of a truncated list
1520
- break if phrases[i].nil?
1521
-
1522
- # Make the key using the configured key function
1523
- phrase = keyfunc[ phrases[i] ]
1524
-
1525
- # If the collector already has this key, increment its count,
1526
- # eliminate the duplicate from the phrase list, and redo the loop.
1527
- if collector.key?( phrase )
1528
- collector[ phrase ] += 1
1529
- phrases.delete_at( i )
1530
- redo
1531
- end
1532
-
1533
- collector[ phrase ] = 1
1534
- end
1535
- else
1536
- # If we're not combining, just make everything have a count of 1.
1537
- phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
1538
- end
1539
-
1540
- # If sort-by-quantity is turned on, sort the phrases first by how many
1541
- # there are (most-first), and then by the order they were specified in.
1542
- if config[:quantsort] && config[:combine]
1543
- origorder = {}
1544
- phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
1545
- phrases.sort! {|a,b|
1546
- (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
1547
- (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
1548
- }
1549
- end
1550
-
1551
- # Set up a filtering function that adds either an indefinite article, an
1552
- # indefinite quantifier, or a definite quantifier to each phrase
1553
- # depending on the configuration and the count of phrases in the
1554
- # collector.
1555
- filter =
1556
- if config[:generalize]
1557
- proc {|phrase, count| quantify(phrase, count) }
1558
- else
1559
- proc {|phrase, count|
1560
- if count > 1
1561
- "%s %s" % [
1562
- # :TODO: Make this threshold settable
1563
- count < 10 ? count.en.numwords : count.to_s,
1564
- plural(phrase, count)
1565
- ]
1566
- else
1567
- a( phrase )
1568
- end
1569
- }
1570
- end
1571
-
1572
- # Now use the configured filter to turn each phrase into its final
1573
- # form. Hmmm... square-bracket Lisp?
1574
- phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
1575
-
1576
- # Prepend the conjunctive to the last element unless it's empty or
1577
- # there's only one element
1578
- phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
1579
- config[:conjunctive].strip.empty? or
1580
- phrases.length < 2
1581
-
1582
- # Concatenate the last two elements if there's no penultimate separator,
1583
- # and pick a separator based on how many phrases there are and whether
1584
- # or not there's already an instance of it in the phrases.
1585
- phrase_count = phrases.length
1586
- phrases[-2] << " " << phrases.pop unless config[:penultimate]
1587
- sep = config[:separator]
1588
- if phrase_count <= 2
1589
- sep = ' '
1590
- elsif phrases.find {|str| str.include?(config[:separator]) }
1591
- sep = config[:altsep]
1592
- end
1593
-
1594
- return phrases.join( sep )
1595
- end
1596
- def_lprintf_formatter :CONJUNCT, :conjunction
1597
-
1598
-
1599
- ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
1600
- ### ("camel case to english"). Each word is decapitalized.
1601
- def camel_case_to_english( string )
1602
- string.to_s.
1603
- gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
1604
- gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1605
- end
1606
-
1607
-
1608
- ### Turns an English language +string+ into a CamelCase word.
1609
- def english_to_camel_case( string )
1610
- string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
1611
- end
1612
-
1613
-
1614
- ### This method doesn't work quite right yet. It does okay for simple cases,
1615
- ### but it misses more complex ones, e.g. 'as' used as a coordinating
1616
- ### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
1617
- ### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
1618
- ### up. Until then it'll just be undocumented.
1619
-
1620
- ### Returns the given +string+ as a title-cased phrase.
1621
- def titlecase( string ) # :nodoc:
1622
-
1623
- # Split on word-boundaries
1624
- words = string.split( /\b/ )
1625
-
1626
- # Always capitalize the first and last words
1627
- words.first.capitalize!
1628
- words.last.capitalize!
1629
-
1630
- # Now scan the rest of the tokens, skipping non-words and capitalization
1631
- # exceptions.
1632
- words.each_with_index do |word, i|
1633
-
1634
- # Non-words
1635
- next unless /^\w+$/.match( word )
1636
-
1637
- # Skip exception-words
1638
- next if TitleCaseExceptions.include?( word )
1639
-
1640
- # Skip second parts of contractions
1641
- next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
1642
-
1643
- # Have to do it this way instead of capitalize! because that method
1644
- # also downcases all other letters.
1645
- word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
1646
- end
1647
-
1648
- return words.join
1649
- end
1650
-
1651
-
1652
- ### Returns the proper noun form of a string by capitalizing most of the
1653
- ### words.
1654
- ###
1655
- ### Examples:
1656
- ### English.proper_noun("bosnia and herzegovina") ->
1657
- ### "Bosnia and Herzegovina"
1658
- ### English.proper_noun("macedonia, the former yugoslav republic of") ->
1659
- ### "Macedonia, the Former Yugoslav Republic of"
1660
- ### English.proper_noun("virgin islands, u.s.") ->
1661
- ### "Virgin Islands, U.S."
1662
- def proper_noun( string )
1663
- return string.split(/([ .]+)/).collect {|word|
1664
- next word unless /^[a-z]/.match( word ) &&
1665
- ! (%w{and the of}.include?( word ))
1666
- word.capitalize
1667
- }.join
1668
- end
1669
-
1670
-
1671
406
  ### Format the given +fmt+ string by replacing %-escaped sequences with the
1672
407
  ### result of performing a specified operation on the corresponding
1673
408
  ### argument, ala Kernel.sprintf.
@@ -1681,48 +416,22 @@ module Linguistics::EN
1681
416
  ### Convert a number into the corresponding words.
1682
417
  ### %CONJUNCT::
1683
418
  ### Conjunction.
1684
- def lprintf( fmt, *args )
1685
- fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
419
+ def lprintf( *args )
420
+ return self.to_s.gsub( /%([A-Z_]+)/ ) do |match|
1686
421
  op = $1.to_s.upcase.to_sym
1687
- if self.lprintf_formatters.key?( op )
422
+ if (( callback = Linguistics::EN.lprintf_formatters[op] ))
1688
423
  arg = args.shift
1689
- self.lprintf_formatters[ op ].call( arg )
424
+ callback.call( arg.en )
1690
425
  else
1691
- raise "no such formatter %p" % op
426
+ raise "no such formatter %p" % [ op ]
1692
427
  end
1693
428
  end
1694
429
  end
1695
430
 
1696
- end # module Linguistics::EN
1697
-
1698
-
1699
- ### Add the #separate and #separate! methods to Array.
1700
- class Array
1701
-
1702
- ### Returns a new Array that has had a new member inserted between all of
1703
- ### the current ones. The value used is the given +value+ argument unless a
1704
- ### block is given, in which case the block is called once for each pair of
1705
- ### the Array, and the return value is used as the separator.
1706
- def separate( value=:__no_arg__, &block )
1707
- ary = self.dup
1708
- ary.separate!( value, &block )
1709
- return ary
1710
- end
1711
431
 
1712
- ### The same as #separate, but modifies the Array in place.
1713
- def separate!( value=:__no_arg__ )
1714
- raise ArgumentError, "wrong number of arguments: (0 for 1)" if
1715
- value == :__no_arg__ && !block_given?
432
+ # Add 'english' to the list of default languages
433
+ Linguistics.register_language( :en, self )
1716
434
 
1717
- (1..( (self.length * 2) - 2 )).step(2) do |i|
1718
- if block_given?
1719
- self.insert( i, yield(self[i-1,2]) )
1720
- else
1721
- self.insert( i, value )
1722
- end
1723
- end
1724
- self
1725
- end
1726
435
 
1727
- end
436
+ end # module Linguistics::EN
1728
437