linguistics 1.0.9 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.gemtest +0 -0
  3. data/ChangeLog +849 -342
  4. data/History.rdoc +11 -0
  5. data/LICENSE +9 -9
  6. data/Manifest.txt +44 -0
  7. data/README.rdoc +226 -0
  8. data/Rakefile +32 -349
  9. data/examples/endocs.rb +272 -0
  10. data/examples/generalize_sentence.rb +2 -1
  11. data/examples/klingon.rb +22 -0
  12. data/lib/linguistics.rb +130 -292
  13. data/lib/linguistics/en.rb +337 -1628
  14. data/lib/linguistics/en/articles.rb +138 -0
  15. data/lib/linguistics/en/conjugation.rb +2245 -0
  16. data/lib/linguistics/en/conjunctions.rb +202 -0
  17. data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
  18. data/lib/linguistics/en/linkparser.rb +41 -49
  19. data/lib/linguistics/en/numbers.rb +483 -0
  20. data/lib/linguistics/en/participles.rb +33 -0
  21. data/lib/linguistics/en/pluralization.rb +810 -0
  22. data/lib/linguistics/en/stemmer.rb +75 -0
  23. data/lib/linguistics/en/titlecase.rb +121 -0
  24. data/lib/linguistics/en/wordnet.rb +63 -97
  25. data/lib/linguistics/inflector.rb +89 -0
  26. data/lib/linguistics/iso639.rb +534 -448
  27. data/lib/linguistics/languagebehavior.rb +36 -0
  28. data/lib/linguistics/monkeypatches.rb +42 -0
  29. data/spec/lib/constants.rb +15 -0
  30. data/spec/lib/helpers.rb +38 -0
  31. data/spec/linguistics/en/articles_spec.rb +797 -0
  32. data/spec/linguistics/en/conjugation_spec.rb +2083 -0
  33. data/spec/linguistics/en/conjunctions_spec.rb +154 -0
  34. data/spec/linguistics/en/infinitives_spec.rb +518 -0
  35. data/spec/linguistics/en/linkparser_spec.rb +66 -0
  36. data/spec/linguistics/en/numbers_spec.rb +1295 -0
  37. data/spec/linguistics/en/participles_spec.rb +55 -0
  38. data/spec/linguistics/en/pluralization_spec.rb +4636 -0
  39. data/spec/linguistics/en/stemmer_spec.rb +72 -0
  40. data/spec/linguistics/en/titlecase_spec.rb +841 -0
  41. data/spec/linguistics/en/wordnet_spec.rb +85 -0
  42. data/spec/linguistics/en_spec.rb +45 -167
  43. data/spec/linguistics/inflector_spec.rb +40 -0
  44. data/spec/linguistics/iso639_spec.rb +49 -53
  45. data/spec/linguistics/monkeypatches_spec.rb +40 -0
  46. data/spec/linguistics_spec.rb +46 -76
  47. metadata +241 -113
  48. metadata.gz.sig +0 -0
  49. data/README +0 -166
  50. data/README.english +0 -245
  51. data/rake/191_compat.rb +0 -26
  52. data/rake/dependencies.rb +0 -76
  53. data/rake/documentation.rb +0 -123
  54. data/rake/helpers.rb +0 -502
  55. data/rake/hg.rb +0 -318
  56. data/rake/manual.rb +0 -787
  57. data/rake/packaging.rb +0 -129
  58. data/rake/publishing.rb +0 -341
  59. data/rake/style.rb +0 -62
  60. data/rake/svn.rb +0 -668
  61. data/rake/testing.rb +0 -152
  62. data/rake/verifytask.rb +0 -64
  63. data/tests/en/infinitive.tests.rb +0 -207
  64. data/tests/en/inflect.tests.rb +0 -1389
  65. data/tests/en/lafcadio.tests.rb +0 -77
  66. data/tests/en/linkparser.tests.rb +0 -42
  67. data/tests/en/lprintf.tests.rb +0 -77
  68. data/tests/en/titlecase.tests.rb +0 -73
  69. data/tests/en/wordnet.tests.rb +0 -95
@@ -1,1093 +1,401 @@
1
1
  #!/usr/bin/ruby
2
- #
3
- # = Linguistics::EN
2
+
3
+ require 'rubygems' # For Gem.find_files
4
+ require 'pathname'
5
+
6
+ require 'linguistics' unless defined?( Linguistics )
7
+
8
+
9
+ # This module is a container for various English-language linguistic
10
+ # functions for the Linguistics library. It can be either loaded
11
+ # directly, or by passing some variant of +:en+ or +:eng+ to the
12
+ # Linguistics.use method.
4
13
  #
5
- # This module contains English-language linguistic functions for the Linguistics
6
- # module. It can be either loaded directly, or by passing some variant of 'en'
7
- # or 'eng' to the Linguistics::use method.
14
+ # == Pluralization
8
15
  #
9
- # The functions contained by the module provide:
16
+ # "box".en.plural
17
+ # # => "boxes"
10
18
  #
11
- # == Plural Inflections
12
- #
13
- # Plural forms of all nouns, most verbs, and some adjectives are provided. Where
14
- # appropriate, "classical" variants (for example: "brother" -> "brethren",
15
- # "dogma" -> "dogmata", etc.) are also provided.
19
+ # "mouse".en.plural
20
+ # # => "mice"
21
+ #
22
+ # "ruby".en.plural
23
+ # # => "rubies"
16
24
  #
17
- # These can be accessed via the #plural, #plural_noun, #plural_verb, and
18
- # #plural_adjective methods.
19
25
  #
20
26
  # == Indefinite Articles
21
27
  #
22
- # Pronunciation-based "a"/"an" selection is provided for all English words, and
23
- # most initialisms.
28
+ # "book".en.a
29
+ # # => "a book"
30
+ #
31
+ # "article".en.a
32
+ # # => "an article"
33
+ #
34
+ #
35
+ # == Present Participles
36
+ #
37
+ # "runs".en.present_participle
38
+ # # => "running"
39
+ #
40
+ # "eats".en.present_participle
41
+ # # => "eating"
42
+ #
43
+ # "spies".en.present_participle
44
+ # # => "spying"
45
+ #
46
+ #
47
+ # == Ordinal Numbers
48
+ #
49
+ # 5.en.ordinal
50
+ # # => "5th"
51
+ #
52
+ # 2004.en.ordinal
53
+ # # => "2004th"
24
54
  #
25
- # See: #a, #an, and #no.
26
55
  #
27
56
  # == Numbers to Words
28
57
  #
29
- # Conversion from Numeric values to words are supported using the American
30
- # "thousands" system. E.g., 2561 => "two thousand, five hundred and sixty-one".
58
+ # 5.en.numwords
59
+ # # => "five"
60
+ #
61
+ # 2004.en.numwords
62
+ # # => "two thousand and four"
63
+ #
64
+ # 2385762345876.en.numwords
65
+ # # => "two trillion, three hundred and eighty-five billion, seven hundred and
66
+ # # sixty-two million, three hundred and forty-five thousand, eight hundred
67
+ # # and seventy-six"
68
+ #
31
69
  #
32
- # See the #numwords method.
70
+ # == Quantification
33
71
  #
34
- # == Ordinals
72
+ # "cow".en.quantify( 5 )
73
+ # # => "several cows"
74
+ #
75
+ # "cow".en.quantify( 1005 )
76
+ # # => "thousands of cows"
77
+ #
78
+ # "cow".en.quantify( 20_432_123_000_000 )
79
+ # # => "tens of trillions of cows"
35
80
  #
36
- # It is also possible to inflect numerals (1,2,3) and number words ("one",
37
- # "two", "three") to ordinals (1st, 2nd, 3rd) and ordinates ("first", "second",
38
- # "third").
39
81
  #
40
82
  # == Conjunctions
41
83
  #
42
- # This module also supports the creation of English conjunctions from Arrays of
43
- # Strings or objects which respond to the #to_s message. Eg.,
84
+ # animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat
85
+ # dog cat cat dog cow goat goose goose ox alpaca}
86
+ # "The farm has: " + animals.en.conjunction
87
+ # # => "The farm has: four dogs, three cows, three geese, three goats, two
88
+ # # oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca"
89
+ #
90
+ # Note that 'goose' and 'ox' are both correctly pluralized, and the correct
91
+ # indefinite article 'an' has been used for 'alpaca'.
92
+ #
93
+ # You can also use the generalization function of the #quantify method to give
94
+ # general descriptions of object lists instead of literal counts:
95
+ #
96
+ # allobjs = []
97
+ # ObjectSpace::each_object {|obj| allobjs << obj.class.name }
98
+ # puts "The current Ruby objectspace contains: " +
99
+ # allobjs.en.conjunction( :generalize => true )
100
+ #
101
+ # Outputs:
102
+ #
103
+ # The current Ruby objectspace contains: hundreds of thousands of Strings,
104
+ # thousands of RubyVM::InstructionSequences, thousands of Arrays, thousands
105
+ # of Hashes, hundreds of Procs, hundreds of Regexps, [...], a
106
+ # SystemStackError, a Random, an ARGF.class, a Data, a fatal, an
107
+ # OptionParser::List, a YAML::EngineManager, a URI::Parser, a Rational, and
108
+ # a Gem::Platform
44
109
  #
45
- # %w{cow pig chicken cow dog cow duck duck moose}.en.conjunction
46
- # ==> "three cows, two ducks, a pig, a chicken, a dog, and a moose"
47
110
  #
48
111
  # == Infinitives
49
112
  #
50
- # Returns the infinitive form of English verbs:
113
+ # "leaving".en.infinitive
114
+ # # => "leave"
115
+ #
116
+ # "left".en.infinitive
117
+ # # => "leave"
118
+ #
119
+ # "leaving".en.infinitive.suffix
120
+ # # => "ing"
121
+ #
122
+ #
123
+ # == Conjugation
124
+ #
125
+ # Conjugate a verb given an infinitive:
126
+ #
127
+ # "run".en.past_tense
128
+ # # => "ran"
129
+ #
130
+ # "run".en.past_participle
131
+ # # => "run"
132
+ #
133
+ # "run".en.present_tense
134
+ # # => "run"
135
+ #
136
+ # "run".en.present_participle
137
+ # # => "running"
138
+ #
139
+ # Conjugate an infinitive with an explicit tense and grammatical person:
140
+ #
141
+ # "be".en.conjugate( :present, :third_person_singular )
142
+ # # => "is"
143
+ #
144
+ # "be".en.conjugate( :present, :first_person_singular )
145
+ # # => "am"
146
+ #
147
+ # "be".en.conjugate( :past, :first_person_singular )
148
+ # # => "was"
149
+ #
150
+ # The functionality is a port of the verb conjugation portion of Morph
151
+ # Adorner (http://morphadorner.northwestern.edu/).
152
+ #
153
+ # It includes a good number of irregular verbs, but it's not going to be
154
+ # 100% correct everytime.
155
+ #
156
+ #
157
+ # == WordNet® Integration
158
+ #
159
+ # If you have the 'wordnet' gem installed, you can look up WordNet synsets using
160
+ # the Linguistics interface:
161
+ #
162
+ # Test to be sure the WordNet module loaded okay.
163
+ #
164
+ # Linguistics::EN.has_wordnet?
165
+ # # => true
166
+ #
167
+ # Fetch the default synset for the word "balance"
168
+ #
169
+ # "balance".en.synset
170
+ # # => #<WordNet::Synset:0x7f9fb11012f8 {102777100} 'balance' (noun):
171
+ # # [noun.artifact] a scale for weighing; depends on pull of gravity>
172
+ #
173
+ # Fetch the synset for the first verb sense of "balance"
174
+ #
175
+ # "balance".en.synset( :verb )
176
+ # # => #<WordNet::Synset:0x7f9fb10f3fb8 {201602318} 'balance, poise' (verb):
177
+ # # [verb.contact] hold or carry in equilibrium>
178
+ #
179
+ # Fetch the second noun sense
180
+ #
181
+ # "balance".en.synset( 2, :noun )
182
+ # # => #<WordNet::Synset:0x7f9fb10ebbd8 {102777402} 'balance, balance wheel'
183
+ # # (noun): [noun.artifact] a wheel that regulates the rate of movement in a
184
+ # # machine; especially a wheel oscillating against the hairspring of a
185
+ # # timepiece to regulate its beat>
186
+ #
187
+ # Fetch the second noun sense's hypernyms (more-general words, like a
188
+ # superclass)
189
+ #
190
+ # "balance".en.synset( 2, :noun ).hypernyms
191
+ # # => [#<WordNet::Synset:0x7f9fb10dd100 {104574999} 'wheel' (noun):
192
+ # # [noun.artifact] a simple machine consisting of a circular frame with
193
+ # # spokes (or a solid disc) that can rotate on a shaft or axle (as in
194
+ # # vehicles or other machines)>]
51
195
  #
52
- # "dodging".en.infinitive
53
- # ==> "dodge"
196
+ # A simpler way of doing the same thing:
54
197
  #
198
+ # "balance".en.hypernyms( 2, :noun )
199
+ # # => [#<WordNet::Synset:0x7f9fb10d24d0 {104574999} 'wheel' (noun):
200
+ # # [noun.artifact] a simple machine consisting of a circular frame with
201
+ # # spokes (or a solid disc) that can rotate on a shaft or axle (as in
202
+ # # vehicles or other machines)>]
55
203
  #
56
- # == Authors
57
- #
58
- # * Michael Granger <ged@FaerieMUD.org>
59
- #
60
- # == Acknowledgements
204
+ # Fetch the first hypernym's hypernyms
61
205
  #
62
- # The inflection functions of this module were adapted from Damien Conway's
63
- # Lingua::EN::Inflect Perl module:
206
+ # "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
207
+ # # => [#<WordNet::Synset:0x7f9fb10c5190 {103700963} 'machine, simple machine'
208
+ # # (noun): [noun.artifact] a device for overcoming resistance at one point by
209
+ # # applying force at some other point>]
64
210
  #
65
- # Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
66
- # This module is free software. It may be used, redistributed
67
- # and/or modified under the same terms as Perl itself.
211
+ # Find the synset to which both the second noun sense of "balance" and the
212
+ # default sense of "shovel" belong.
68
213
  #
69
- # The conjunctions code was adapted from the Lingua::Conjunction Perl module
70
- # written by Robert Rothenberg and Damian Conway, which has no copyright
71
- # statement included.
214
+ # ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
215
+ # # => #<WordNet::Synset:0x7f9fb1091e58 {103183080} 'device' (noun):
216
+ # # [noun.artifact] an instrumentality invented for a particular purpose>
72
217
  #
73
- # :include: LICENSE
218
+ # Fetch words for the specific kinds of (device-ish) "instruments"
74
219
  #
75
- #--
220
+ # "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ')
221
+ # # => "analyser, analyzer, cauterant, cautery, drafting instrument, engine,
222
+ # # extractor, instrument of execution, instrument of punishment, measuring
223
+ # # device, measuring instrument, measuring system, medical instrument,
224
+ # # navigational instrument, optical instrument, plotter, scientific
225
+ # # instrument, sonograph, surveying instrument, surveyor's instrument,
226
+ # # tracer, arm, weapon, weapon system, whip"
227
+ #
228
+ # ...or musical instruments
229
+ #
230
+ # "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ')
231
+ # # => "barrel organ, grind organ, hand organ, hurdy-gurdy, hurdy gurdy,
232
+ # # street organ, bass, calliope, steam organ, electronic instrument,
233
+ # # electronic musical instrument, jew's harp, jews' harp, mouth bow, keyboard
234
+ # # instrument, music box, musical box, percussion instrument, percussive
235
+ # # instrument, stringed instrument, wind, wind instrument"
236
+ #
237
+ # There are many more WordNet methods supported--too many to list here. See the
238
+ # WordNet::Synset API documentation for the complete list.
239
+ #
240
+ #
241
+ # == LinkParser Integration
242
+ #
243
+ # If you have the 'linkparser' gem installed, you can create linkages
244
+ # from English sentences that let you query for parts of speech:
245
+ #
246
+ # Test to see whether or not the link parser is loaded.
247
+ #
248
+ # Linguistics::EN.has_linkparser?
249
+ # # => true
250
+ #
251
+ # Diagram the first linkage for a test sentence
252
+ #
253
+ # puts "he is a big dog".en.sentence.linkages.first.diagram
254
+ #
255
+ # Outputs:
256
+ #
257
+ # +-----Ost----+
258
+ # | +----Ds---+
259
+ # +-Ss+ | +--A--+
260
+ # | | | | |
261
+ # he is.v a big.a dog.n
262
+ #
263
+ # Find the verb in the sentence
264
+ #
265
+ # "he is a big dog".en.sentence.verb.to_s
266
+ # # => "is"
267
+ #
268
+ # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
269
+ # given sentence.
270
+ #
271
+ # "he is a big dog".en.sentence.verb.en.infinitive
272
+ # # => "be"
273
+ #
274
+ # Find the direct object of the sentence
275
+ #
276
+ # "he is a big dog".en.sentence.object.to_s
277
+ # # => "dog"
278
+ #
279
+ # Combine WordNet + LinkParser to find the definition of the direct object of
280
+ # the sentence
281
+ #
282
+ # "he is a big dog".en.sentence.object.en.definition
283
+ # # => "a member of the genus Canis (probably descended from the common wolf)
284
+ # # that has been domesticated by man since prehistoric times; occurs in many
285
+ # # breeds"
76
286
  #
77
- # Please see the file LICENSE in the base directory for licensing details.
78
287
  #
79
288
  module Linguistics::EN
289
+ extend Loggability
80
290
 
81
- # Load in the secondary modules and add them to Linguistics::EN.
82
- require 'linguistics/en/infinitive'
83
- require 'linguistics/en/wordnet'
84
- require 'linguistics/en/linkparser'
85
-
86
- # Add 'english' to the list of default languages
87
- Linguistics::DefaultLanguages.push( :en )
88
-
89
-
90
- #################################################################
91
- ### U T I L I T Y F U N C T I O N S
92
- #################################################################
93
-
94
- ### Wrap one or more parts in a non-capturing alteration Regexp
95
- def self::matchgroup( *parts )
96
- re = parts.flatten.join("|")
97
- "(?:#{re})"
98
- end
99
-
100
-
101
- @lprintf_formatters = {}
102
- class << self
103
- attr_accessor :lprintf_formatters
104
- end
105
-
106
- ### Add the specified method (which can be either a Method object or a
107
- ### Symbol for looking up a method)
108
- def self::def_lprintf_formatter( name, meth )
109
- meth = self.method( meth ) unless meth.is_a?( Method )
110
- self.lprintf_formatters[ name ] = meth
111
- end
112
-
113
-
114
-
115
- #################################################################
116
- ### C O N S T A N T S
117
- #################################################################
118
-
119
- # :stopdoc:
120
-
121
- #
122
- # Plurals
123
- #
124
-
125
- PL_sb_irregular_s = {
126
- "ephemeris" => "ephemerides",
127
- "iris" => "irises|irides",
128
- "clitoris" => "clitorises|clitorides",
129
- "corpus" => "corpuses|corpora",
130
- "opus" => "opuses|opera",
131
- "genus" => "genera",
132
- "mythos" => "mythoi",
133
- "penis" => "penises|penes",
134
- "testis" => "testes",
135
- }
136
-
137
- PL_sb_irregular_h = {
138
- "child" => "children",
139
- "brother" => "brothers|brethren",
140
- "loaf" => "loaves",
141
- "hoof" => "hoofs|hooves",
142
- "beef" => "beefs|beeves",
143
- "money" => "monies",
144
- "mongoose" => "mongooses",
145
- "ox" => "oxen",
146
- "cow" => "cows|kine",
147
- "soliloquy" => "soliloquies",
148
- "graffito" => "graffiti",
149
- "prima donna" => "prima donnas|prime donne",
150
- "octopus" => "octopuses|octopodes",
151
- "genie" => "genies|genii",
152
- "ganglion" => "ganglions|ganglia",
153
- "trilby" => "trilbys",
154
- "turf" => "turfs|turves",
155
- }.update( PL_sb_irregular_s )
156
- PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
157
-
158
-
159
- # Classical "..a" -> "..ata"
160
- PL_sb_C_a_ata = matchgroup %w[
161
- anathema bema carcinoma charisma diploma
162
- dogma drama edema enema enigma lemma
163
- lymphoma magma melisma miasma oedema
164
- sarcoma schema soma stigma stoma trauma
165
- gumma pragma
166
- ].collect {|word| word[0...-1]}
167
-
168
- # Unconditional "..a" -> "..ae"
169
- PL_sb_U_a_ae = matchgroup %w[
170
- alumna alga vertebra persona
171
- ]
172
-
173
- # Classical "..a" -> "..ae"
174
- PL_sb_C_a_ae = matchgroup %w[
175
- amoeba antenna formula hyperbola
176
- medusa nebula parabola abscissa
177
- hydra nova lacuna aurora .*umbra
178
- flora fauna
179
- ]
180
-
181
- # Classical "..en" -> "..ina"
182
- PL_sb_C_en_ina = matchgroup %w[
183
- stamen foramen lumen
184
- ].collect {|word| word[0...-2] }
185
-
186
- # Unconditional "..um" -> "..a"
187
- PL_sb_U_um_a = matchgroup %w[
188
- bacterium agendum desideratum erratum
189
- stratum datum ovum extremum
190
- candelabrum
191
- ].collect {|word| word[0...-2] }
192
-
193
- # Classical "..um" -> "..a"
194
- PL_sb_C_um_a = matchgroup %w[
195
- maximum minimum momentum optimum
196
- quantum cranium curriculum dictum
197
- phylum aquarium compendium emporium
198
- enconium gymnasium honorarium interregnum
199
- lustrum memorandum millenium rostrum
200
- spectrum speculum stadium trapezium
201
- ultimatum medium vacuum velum
202
- consortium
203
- ].collect {|word| word[0...-2]}
204
-
205
- # Unconditional "..us" -> "i"
206
- PL_sb_U_us_i = matchgroup %w[
207
- alumnus alveolus bacillus bronchus
208
- locus nucleus stimulus meniscus
209
- ].collect {|word| word[0...-2]}
210
-
211
- # Classical "..us" -> "..i"
212
- PL_sb_C_us_i = matchgroup %w[
213
- focus radius genius
214
- incubus succubus nimbus
215
- fungus nucleolus stylus
216
- torus umbilicus uterus
217
- hippopotamus
218
- ].collect {|word| word[0...-2]}
291
+ # Loggability API -- log to the Linguistics logger
292
+ log_to :linguistics
219
293
 
220
- # Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
221
- PL_sb_C_us_us = matchgroup %w[
222
- status apparatus prospectus sinus
223
- hiatus impetus plexus
224
- ]
294
+ # The list of loaded modules
295
+ MODULES = []
225
296
 
226
- # Unconditional "..on" -> "a"
227
- PL_sb_U_on_a = matchgroup %w[
228
- criterion perihelion aphelion
229
- phenomenon prolegomenon noumenon
230
- organon asyndeton hyperbaton
231
- ].collect {|word| word[0...-2]}
297
+ # The key to set in the thread-hash to indicate it's running in 'classical' mode
298
+ THREAD_CLASSICAL_KEY = :english_classical_mode
232
299
 
233
- # Classical "..on" -> "..a"
234
- PL_sb_C_on_a = matchgroup %w[
235
- oxymoron
236
- ].collect {|word| word[0...-2]}
237
300
 
238
- # Classical "..o" -> "..i" (but normally -> "..os")
239
- PL_sb_C_o_i_a = %w[
240
- solo soprano basso alto
241
- contralto tempo piano
242
- ]
243
- PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
301
+ # A Hash of 'lprintf' formatters keyed by name
302
+ @@lprintf_formatters = {}
244
303
 
245
- # Always "..o" -> "..os"
246
- PL_sb_U_o_os = matchgroup( %w[
247
- albino archipelago armadillo
248
- commando crescendo fiasco
249
- ditto dynamo embryo
250
- ghetto guano inferno
251
- jumbo lumbago magneto
252
- manifesto medico octavo
253
- photo pro quarto
254
- canto lingo generalissimo
255
- stylo rhino
256
- ] | PL_sb_C_o_i_a )
257
-
258
-
259
- # Unconditional "..[ei]x" -> "..ices"
260
- PL_sb_U_ex_ices = matchgroup %w[
261
- codex murex silex
262
- ].collect {|word| word[0...-2]}
263
- PL_sb_U_ix_ices = matchgroup %w[
264
- radix helix
265
- ].collect {|word| word[0...-2]}
266
-
267
- # Classical "..[ei]x" -> "..ices"
268
- PL_sb_C_ex_ices = matchgroup %w[
269
- vortex vertex cortex latex
270
- pontifex apex index simplex
271
- ].collect {|word| word[0...-2]}
272
- PL_sb_C_ix_ices = matchgroup %w[
273
- appendix
274
- ].collect {|word| word[0...-2]}
275
-
276
-
277
- # Arabic: ".." -> "..i"
278
- PL_sb_C_i = matchgroup %w[
279
- afrit afreet efreet
280
- ]
281
-
282
-
283
- # Hebrew: ".." -> "..im"
284
- PL_sb_C_im = matchgroup %w[
285
- goy seraph cherub
286
- ]
287
-
288
- # Unconditional "..man" -> "..mans"
289
- PL_sb_U_man_mans = matchgroup %w[
290
- human
291
- Alabaman Bahaman Burman German
292
- Hiroshiman Liman Nakayaman Oklahoman
293
- Panaman Selman Sonaman Tacoman Yakiman
294
- Yokohaman Yuman
295
- ]
296
-
297
-
298
- PL_sb_uninflected_s = [
299
- # Pairs or groups subsumed to a singular...
300
- "breeches", "britches", "clippers", "gallows", "hijinks",
301
- "headquarters", "pliers", "scissors", "testes", "herpes",
302
- "pincers", "shears", "proceedings", "trousers",
303
-
304
- # Unassimilated Latin 4th declension
305
- "cantus", "coitus", "nexus",
306
-
307
- # Recent imports...
308
- "contretemps", "corps", "debris",
309
- ".*ois",
310
-
311
- # Diseases
312
- ".*measles", "mumps",
313
-
314
- # Miscellaneous others...
315
- "diabetes", "jackanapes", "series", "species", "rabies",
316
- "chassis", "innings", "news", "mews",
317
- ]
318
-
319
-
320
- # Don't inflect in classical mode, otherwise normal inflection
321
- PL_sb_uninflected_herd = matchgroup %w[
322
- wildebeest swine eland bison buffalo
323
- elk moose rhinoceros
324
- ]
325
-
326
- PL_sb_uninflected = matchgroup [
327
-
328
- # Some fish and herd animals
329
- ".*fish", "tuna", "salmon", "mackerel", "trout",
330
- "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
331
-
332
- ".*deer", ".*sheep",
333
-
334
- # All nationals ending in -ese
335
- "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
336
- "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
337
- "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
338
- "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
339
- "Shavese", "Vermontese", "Wenchowese", "Yengeese",
340
- ".*[nrlm]ese",
341
-
342
- # Some words ending in ...s (often pairs taken as a whole)
343
- PL_sb_uninflected_s,
344
-
345
- # Diseases
346
- ".*pox",
347
-
348
- # Other oddities
349
- "graffiti", "djinn"
350
- ]
351
-
352
-
353
- # Singular words ending in ...s (all inflect with ...es)
354
- PL_sb_singular_s = matchgroup %w[
355
- .*ss
356
- acropolis aegis alias arthritis asbestos atlas
357
- bathos bias bronchitis bursitis caddis cannabis
358
- canvas chaos cosmos dais digitalis encephalitis
359
- epidermis ethos eyas gas glottis hepatitis
360
- hubris ibis lens mantis marquis metropolis
361
- neuritis pathos pelvis polis rhinoceros
362
- sassafras tonsillitis trellis .*us
363
- ]
364
-
365
- PL_v_special_s = matchgroup [
366
- PL_sb_singular_s,
367
- PL_sb_uninflected_s,
368
- PL_sb_irregular_s.keys,
369
- '(.*[csx])is',
370
- '(.*)ceps',
371
- '[A-Z].*s',
372
- ]
373
-
374
- PL_sb_postfix_adj = '(' + {
375
-
376
- 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
377
- 'martial' => ["court"],
378
-
379
- }.collect {|key,val|
380
- matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
381
- }.join("|") + ")(.*)"
382
-
383
-
384
- PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
385
- PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
386
-
387
- PL_prep = matchgroup %w[
388
- about above across after among around at athwart before behind
389
- below beneath beside besides between betwixt beyond but by
390
- during except for from in into near of off on onto out over
391
- since till to under until unto upon with
392
- ]
393
-
394
- PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
395
- PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
396
-
397
-
398
- PL_pron_nom_h = {
399
- # Nominative Reflexive
400
- "i" => "we", "myself" => "ourselves",
401
- "you" => "you", "yourself" => "yourselves",
402
- "she" => "they", "herself" => "themselves",
403
- "he" => "they", "himself" => "themselves",
404
- "it" => "they", "itself" => "themselves",
405
- "they" => "they", "themself" => "themselves",
406
-
407
- # Possessive
408
- "mine" => "ours",
409
- "yours" => "yours",
410
- "hers" => "theirs",
411
- "his" => "theirs",
412
- "its" => "theirs",
413
- "theirs" => "theirs",
414
- }
415
- PL_pron_nom = matchgroup PL_pron_nom_h.keys
416
-
417
- PL_pron_acc_h = {
418
- # Accusative Reflexive
419
- "me" => "us", "myself" => "ourselves",
420
- "you" => "you", "yourself" => "yourselves",
421
- "her" => "them", "herself" => "themselves",
422
- "him" => "them", "himself" => "themselves",
423
- "it" => "them", "itself" => "themselves",
424
- "them" => "them", "themself" => "themselves",
425
- }
426
- PL_pron_acc = matchgroup PL_pron_acc_h.keys
427
-
428
- PL_v_irregular_pres_h = {
429
- # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
430
- # 3rd pers. (indet.)
431
- "am" => "are", "are" => "are", "is" => "are",
432
- "was" => "were", "were" => "were", "was" => "were",
433
- "have" => "have", "have" => "have", "has" => "have",
434
- }
435
- PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
436
-
437
- PL_v_ambiguous_pres_h = {
438
- # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
439
- # 3rd pers. (indet.)
440
- "act" => "act", "act" => "act", "acts" => "act",
441
- "blame" => "blame", "blame" => "blame", "blames" => "blame",
442
- "can" => "can", "can" => "can", "can" => "can",
443
- "must" => "must", "must" => "must", "must" => "must",
444
- "fly" => "fly", "fly" => "fly", "flies" => "fly",
445
- "copy" => "copy", "copy" => "copy", "copies" => "copy",
446
- "drink" => "drink", "drink" => "drink", "drinks" => "drink",
447
- "fight" => "fight", "fight" => "fight", "fights" => "fight",
448
- "fire" => "fire", "fire" => "fire", "fires" => "fire",
449
- "like" => "like", "like" => "like", "likes" => "like",
450
- "look" => "look", "look" => "look", "looks" => "look",
451
- "make" => "make", "make" => "make", "makes" => "make",
452
- "reach" => "reach", "reach" => "reach", "reaches" => "reach",
453
- "run" => "run", "run" => "run", "runs" => "run",
454
- "sink" => "sink", "sink" => "sink", "sinks" => "sink",
455
- "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
456
- "view" => "view", "view" => "view", "views" => "view",
457
- }
458
- PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
459
-
460
- PL_v_irregular_non_pres = matchgroup %w[
461
- did had ate made put
462
- spent fought sank gave sought
463
- shall could ought should
464
- ]
465
-
466
- PL_v_ambiguous_non_pres = matchgroup %w[
467
- thought saw bent will might cut
468
- ]
469
-
470
- PL_count_zero = matchgroup %w[
471
- 0 no zero nil
472
- ]
473
-
474
- PL_count_one = matchgroup %w[
475
- 1 a an one each every this that
476
- ]
477
-
478
- PL_adj_special_h = {
479
- "a" => "some", "an" => "some",
480
- "this" => "these", "that" => "those",
481
- }
482
- PL_adj_special = matchgroup PL_adj_special_h.keys
483
-
484
- PL_adj_poss_h = {
485
- "my" => "our",
486
- "your" => "your",
487
- "its" => "their",
488
- "her" => "their",
489
- "his" => "their",
490
- "their" => "their",
491
- }
492
- PL_adj_poss = matchgroup PL_adj_poss_h.keys
493
-
494
-
495
- #
496
- # Numerals, ordinals, and numbers-to-words
497
- #
498
-
499
- # Numerical inflections
500
- Nth = {
501
- 0 => 'th',
502
- 1 => 'st',
503
- 2 => 'nd',
504
- 3 => 'rd',
505
- 4 => 'th',
506
- 5 => 'th',
507
- 6 => 'th',
508
- 7 => 'th',
509
- 8 => 'th',
510
- 9 => 'th',
511
- 11 => 'th',
512
- 12 => 'th',
513
- 13 => 'th',
514
- }
515
-
516
- # Ordinal word parts
517
- Ordinals = {
518
- 'ty' => 'tieth',
519
- 'one' => 'first',
520
- 'two' => 'second',
521
- 'three' => 'third',
522
- 'five' => 'fifth',
523
- 'eight' => 'eighth',
524
- 'nine' => 'ninth',
525
- 'twelve' => 'twelfth',
526
- }
527
- OrdinalSuffixes = Ordinals.keys.join("|") + "|"
528
- Ordinals[""] = 'th'
529
-
530
- # Numeral names
531
- Units = [''] + %w[one two three four five six seven eight nine]
532
- Teens = %w[ten eleven twelve thirteen fourteen
533
- fifteen sixteen seventeen eighteen nineteen]
534
- Tens = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
535
- Thousands = [' ', ' thousand'] + %w[
536
- m b tr quadr quint sext sept oct non dec undec duodec tredec
537
- quattuordec quindec sexdec septemdec octodec novemdec vigint
538
- ].collect {|prefix| ' ' + prefix + 'illion'}
539
-
540
- # A collection of functions for transforming digits into word
541
- # phrases. Indexed by the number of digits being transformed; e.g.,
542
- # <tt>NumberToWordsFunctions[2]</tt> is the function for transforming
543
- # double-digit numbers.
544
- NumberToWordsFunctions = [
545
- proc {|*args| raise "No digits (#{args.inspect})"},
546
-
547
- # Single-digits
548
- proc {|zero,x|
549
- (x.nonzero? ? to_units(x) : "#{zero} ")
550
- },
551
-
552
- # Double-digits
553
- proc {|zero,x,y|
554
- if x.nonzero?
555
- to_tens( x, y )
556
- elsif y.nonzero?
557
- "#{zero} " + NumberToWordsFunctions[1].call( zero, y )
558
- else
559
- ([zero] * 2).join(" ")
560
- end
561
- },
562
-
563
- # Triple-digits
564
- proc {|zero,x,y,z|
565
- NumberToWordsFunctions[1].call(zero,x) +
566
- NumberToWordsFunctions[2].call(zero,y,z)
567
- }
568
- ]
569
-
570
-
571
- #
572
- # Indefinite Articles
573
- #
574
-
575
- # This pattern matches strings of capitals starting with a "vowel-sound"
576
- # consonant followed by another consonant, and which are not likely
577
- # to be real words (oh, all right then, it's just magic!)
578
- A_abbrev = %{
579
- (?! FJO | [HLMNS]Y. | RY[EO] | SQU
580
- | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
581
- [FHLMNRSX][A-Z]
582
- }
583
-
584
- # This pattern codes the beginnings of all english words begining with a
585
- # 'y' followed by a consonant. Any other y-consonant prefix therefore
586
- # implies an abbreviation.
587
- A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
588
-
589
- # Exceptions to exceptions
590
- A_explicit_an = matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
591
-
592
-
593
- #
594
- # Configuration defaults
595
- #
596
-
597
- # Default configuration arguments for the #numwords function
598
- NumwordDefaults = {
599
- :group => 0,
600
- :comma => ', ',
601
- :and => ' and ',
602
- :zero => 'zero',
603
- :decimal => 'point',
604
- :asArray => false,
605
- }
606
-
607
- # Default ranges for #quantify
608
- SeveralRange = 2..5
609
- NumberRange = 6..19
610
- NumerousRange = 20..45
611
- ManyRange = 46..99
612
-
613
- # Default configuration arguments for the #quantify function
614
- QuantifyDefaults = {
615
- :joinword => " of ",
616
- }
617
-
618
- # Default configuration arguments for the #conjunction (junction, what's
619
- # your) function.
620
- ConjunctionDefaults = {
621
- :separator => ', ',
622
- :altsep => '; ',
623
- :penultimate => true,
624
- :conjunctive => 'and',
625
- :combine => true,
626
- :casefold => true,
627
- :generalize => false,
628
- :quantsort => true,
629
- }
630
-
631
-
632
- #
633
- # Title case
634
- #
635
-
636
- # "In titles, capitalize the first word, the last word, and all words in
637
- # between except articles (a, an, and the), prepositions under five letters
638
- # (in, of, to), and coordinating conjunctions (and, but). These rules apply
639
- # to titles of long, short, and partial works as well as your own papers"
640
- # (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
641
-
642
- # Build the list of exceptions to title-capitalization
643
- Articles = %w[a and the]
644
- ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
645
- "into", "like", "near", "of", "off", "on", "onto", "out", "over",
646
- "past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
647
- CoordConjunctions = %w[and but as]
648
- TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
649
-
650
-
651
- # :startdoc:
652
304
 
653
305
  #################################################################
654
- ### " B A C K E N D " F U N C T I O N S
306
+ ### U T I L I T Y F U N C T I O N S
655
307
  #################################################################
656
308
 
657
-
658
- ###############
659
- module_function
660
- ###############
661
-
662
- ### Debugging output
663
- def debug_msg( *msgs ) # :nodoc:
664
- $stderr.puts msgs.join(" ") if $DEBUG
665
- end
666
-
667
-
668
- ### Normalize a count to either 1 or 2 (singular or plural)
669
- def normalize_count( count, default=2 )
670
- return default if count.nil? # Default to plural
671
- if /^(#{PL_count_one})$/i =~ count.to_s ||
672
- Linguistics::classical? &&
673
- /^(#{PL_count_zero})$/ =~ count.to_s
674
- return 1
675
- else
676
- return default
677
- end
309
+ ### A Hash of formatters for the lprintf function.
310
+ def self::lprintf_formatters
311
+ return @@lprintf_formatters
678
312
  end
679
313
 
680
314
 
681
- ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
682
- ### examining the <tt>original</tt> input.
683
- def postprocess( original, inflected )
684
- inflected.sub!( /([^|]+)\|(.+)/ ) {
685
- Linguistics::classical? ? $2 : $1
686
- }
687
-
688
- case original
689
- when "I"
690
- return inflected
691
- when /^[A-Z]+$/
692
- return inflected.upcase
693
- when /^[A-Z]/
694
- # Can't use #capitalize, as it will downcase the rest of the string,
695
- # too.
696
- inflected[0,1] = inflected[0,1].upcase
697
- return inflected
698
- else
699
- return inflected
700
- end
701
- end
702
-
703
-
704
- ### Pluralize nouns
705
- def pluralize_noun( word, count=nil )
706
- value = nil
707
- count ||= Linguistics::num
708
- count = normalize_count( count )
709
-
710
- return word if count == 1
711
-
712
- # Handle user-defined nouns
713
- #if value = ud_match( word, PL_sb_user_defined )
714
- # return value
715
- #end
716
-
717
- # Handle empty word, singular count and uninflected plurals
718
- case word
719
- when ''
720
- return word
721
- when /^(#{PL_sb_uninflected})$/i
722
- return word
723
- else
724
- if Linguistics::classical? &&
725
- /^(#{PL_sb_uninflected_herd})$/i =~ word
726
- return word
727
- end
728
- end
729
-
730
- # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
731
- case word
732
- when /^(?:#{PL_sb_postfix_adj})$/i
733
- value = $2
734
- return pluralize_noun( $1, 2 ) + value
735
-
736
- when /^(?:#{PL_sb_prep_dual_compound})$/i
737
- value = [ $2, $3 ]
738
- return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )
739
-
740
- when /^(?:#{PL_sb_prep_compound})$/i
741
- value = $2
742
- return pluralize_noun( $1, 2 ) + value
743
-
744
- # Handle pronouns
745
- when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
746
- return $1 + PL_pron_acc_h[ $2.downcase ]
747
-
748
- when /^(#{PL_pron_nom})$/i
749
- return PL_pron_nom_h[ word.downcase ]
750
-
751
- when /^(#{PL_pron_acc})$/i
752
- return PL_pron_acc_h[ $1.downcase ]
753
-
754
- # Handle isolated irregular plurals
755
- when /(.*)\b(#{PL_sb_irregular})$/i
756
- return $1 + PL_sb_irregular_h[ $2.downcase ]
315
+ ### Register an English-language extension.
316
+ def self::register_extension( mod )
317
+ MODULES.push( mod )
318
+ self.log.debug "Registered English extension %p" % [ mod ]
757
319
 
758
- when /(#{PL_sb_U_man_mans})$/i
759
- return "#{$1}s"
320
+ include( mod )
321
+ mod.extend( Loggability )
322
+ mod.log_to( :linguistics )
760
323
 
761
- # Handle families of irregular plurals
762
- when /(.*)man$/i ; return "#{$1}men"
763
- when /(.*[ml])ouse$/i ; return "#{$1}ice"
764
- when /(.*)goose$/i ; return "#{$1}geese"
765
- when /(.*)tooth$/i ; return "#{$1}teeth"
766
- when /(.*)foot$/i ; return "#{$1}feet"
324
+ if mod.const_defined?( :SingletonMethods )
325
+ smod = mod.const_get(:SingletonMethods)
326
+ self.log.debug " and its singleton methods %p" % [ smod ]
327
+ extend( smod )
767
328
 
768
- # Handle unassimilated imports
769
- when /(.*)ceps$/i ; return word
770
- when /(.*)zoon$/i ; return "#{$1}zoa"
771
- when /(.*[csx])is$/i ; return "#{$1}es"
772
- when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
773
- when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
774
- when /(#{PL_sb_U_um_a})um$/i ; return "#{$1}a"
775
- when /(#{PL_sb_U_us_i})us$/i ; return "#{$1}i"
776
- when /(#{PL_sb_U_on_a})on$/i ; return "#{$1}a"
777
- when /(#{PL_sb_U_a_ae})$/i ; return "#{$1}e"
778
- end
779
-
780
- # Handle incompletely assimilated imports
781
- if Linguistics::classical?
782
- case word
783
- when /(.*)trix$/i ; return "#{$1}trices"
784
- when /(.*)eau$/i ; return "#{$1}eaux"
785
- when /(.*)ieu$/i ; return "#{$1}ieux"
786
- when /(.{2,}[yia])nx$/i ; return "#{$1}nges"
787
- when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
788
- when /(#{PL_sb_C_ex_ices})ex$/i; return "#{$1}ices"
789
- when /(#{PL_sb_C_ix_ices})ix$/i; return "#{$1}ices"
790
- when /(#{PL_sb_C_um_a})um$/i ; return "#{$1}a"
791
- when /(#{PL_sb_C_us_i})us$/i ; return "#{$1}i"
792
- when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
793
- when /(#{PL_sb_C_a_ae})$/i ; return "#{$1}e"
794
- when /(#{PL_sb_C_a_ata})a$/i ; return "#{$1}ata"
795
- when /(#{PL_sb_C_o_i})o$/i ; return "#{$1}i"
796
- when /(#{PL_sb_C_on_a})on$/i ; return "#{$1}a"
797
- when /#{PL_sb_C_im}$/i ; return "#{word}im"
798
- when /#{PL_sb_C_i}$/i ; return "#{word}i"
329
+ ivars = mod.instance_variables
330
+ self.log.debug " and instance variables %p" % [ ivars ]
331
+ ivars.each do |ivar|
332
+ instance_variable_set( ivar, mod.instance_variable_get(ivar) )
799
333
  end
800
334
  end
801
-
802
-
803
- # Handle singular nouns ending in ...s or other silibants
804
- case word
805
- when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
806
- when /^([A-Z].*s)$/; return "#{$1}es"
807
- when /(.*)([cs]h|[zx])$/i ; return "#{$1}#{$2}es"
808
- # when /(.*)(us)$/i ; return "#{$1}#{$2}es"
809
-
810
- # Handle ...f -> ...ves
811
- when /(.*[eao])lf$/i ; return "#{$1}lves";
812
- when /(.*[^d])eaf$/i ; return "#{$1}eaves"
813
- when /(.*[nlw])ife$/i ; return "#{$1}ives"
814
- when /(.*)arf$/i ; return "#{$1}arves"
815
-
816
- # Handle ...y
817
- when /(.*[aeiou])y$/i ; return "#{$1}ys"
818
- when /([A-Z].*y)$/ ; return "#{$1}s"
819
- when /(.*)y$/i ; return "#{$1}ies"
820
-
821
- # Handle ...o
822
- when /#{PL_sb_U_o_os}$/i ; return "#{word}s"
823
- when /[aeiou]o$/i ; return "#{word}s"
824
- when /o$/i ; return "#{word}es"
825
-
826
- # Otherwise just add ...s
827
- else
828
- return "#{word}s"
829
- end
830
- end # def pluralize_noun
831
-
832
-
833
-
834
- ### Pluralize special verbs
835
- def pluralize_special_verb( word, count )
836
- count ||= Linguistics::num
837
- count = normalize_count( count )
838
-
839
- return nil if /^(#{PL_count_one})$/i =~ count.to_s
840
-
841
- # Handle user-defined verbs
842
- #if value = ud_match( word, PL_v_user_defined )
843
- # return value
844
- #end
845
-
846
- case word
847
-
848
- # Handle irregular present tense (simple and compound)
849
- when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
850
- return PL_v_irregular_pres_h[ $1.downcase ] + $2
851
-
852
- # Handle irregular future, preterite and perfect tenses
853
- when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
854
- return word
855
-
856
- # Handle special cases
857
- when /^(#{PL_v_special_s})$/, /\s/
858
- return nil
859
-
860
- # Handle standard 3rd person (chop the ...(e)s off single words)
861
- when /^(.*)([cs]h|[x]|zz|ss)es$/i
862
- return $1 + $2
863
- when /^(..+)ies$/i
864
- return "#{$1}y"
865
- when /^(.+)oes$/i
866
- return "#{$1}o"
867
- when /^(.*[^s])s$/i
868
- return $1
869
-
870
- # Otherwise, a regular verb (handle elsewhere)
871
- else
872
- return nil
873
- end
874
335
  end
875
336
 
876
337
 
877
- ### Pluralize regular verbs
878
- def pluralize_general_verb( word, count )
879
- count ||= Linguistics::num
880
- count = normalize_count( count )
881
-
882
- return word if /^(#{PL_count_one})$/i =~ count.to_s
883
-
884
- case word
885
-
886
- # Handle ambiguous present tenses (simple and compound)
887
- when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
888
- return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
889
-
890
- # Handle ambiguous preterite and perfect tenses
891
- when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
892
- return word
893
-
894
- # Otherwise, 1st or 2nd person is uninflected
895
- else
896
- return word
338
+ ### Returns +true+ if the English-language module with the given +name+ was
339
+ ### successfully registered.
340
+ def self::has_extension?( name )
341
+ return MODULES.any? do |mod|
342
+ mod.name.sub( /.*::/, '' ).downcase == name.to_s.downcase
897
343
  end
898
344
  end
899
345
 
900
346
 
901
- ### Handle special adjectives
902
- def pluralize_special_adjective( word, count )
903
- count ||= Linguistics::num
904
- count = normalize_count( count )
905
-
906
- return word if /^(#{PL_count_one})$/i =~ count.to_s
907
-
908
- # Handle user-defined verbs
909
- #if value = ud_match( word, PL_adj_user_defined )
910
- # return value
911
- #end
912
-
913
- case word
914
-
915
- # Handle known cases
916
- when /^(#{PL_adj_special})$/i
917
- return PL_adj_special_h[ $1.downcase ]
918
-
919
- # Handle possessives
920
- when /^(#{PL_adj_poss})$/i
921
- return PL_adj_poss_h[ $1.downcase ]
922
-
923
- when /^(.*)'s?$/
924
- pl = plural_noun( $1 )
925
- if /s$/ =~ pl
926
- return "#{pl}'"
927
- else
928
- return "#{pl}'s"
929
- end
930
-
931
- # Otherwise, no idea
932
- else
933
- return nil
934
- end
935
- end
936
-
937
-
938
- ### Returns the given word with a prepended indefinite article, unless
939
- ### +count+ is non-nil and not singular.
940
- def indef_article( word, count )
941
- count ||= Linguistics::num
942
- return "#{count} #{word}" if
943
- count && /^(#{PL_count_one})$/i !~ count.to_s
944
-
945
- # Handle user-defined variants
946
- # return value if value = ud_match( word, A_a_user_defined )
947
-
948
- case word
949
-
950
- # Handle special cases
951
- when /^(#{A_explicit_an})/i
952
- return "an #{word}"
953
-
954
- # Handle abbreviations
955
- when /^(#{A_abbrev})/x
956
- return "an #{word}"
957
- when /^[aefhilmnorsx][.-]/i
958
- return "an #{word}"
959
- when /^[a-z][.-]/i
960
- return "a #{word}"
961
-
962
- # Handle consonants
963
- when /^[^aeiouy]/i
964
- return "a #{word}"
965
-
966
- # Handle special vowel-forms
967
- when /^e[uw]/i
968
- return "a #{word}"
969
- when /^onc?e\b/i
970
- return "a #{word}"
971
- when /^uni([^nmd]|mo)/i
972
- return "a #{word}"
973
- when /^u[bcfhjkqrst][aeiou]/i
974
- return "a #{word}"
975
-
976
- # Handle vowels
977
- when /^[aeiou]/i
978
- return "an #{word}"
979
-
980
- # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
981
- when /^(#{A_y_cons})/i
982
- return "an #{word}"
983
-
984
- # Otherwise, guess "a"
985
- else
986
- return "a #{word}"
987
- end
988
- end
989
-
990
-
991
- ### Transform the specified number of units-place numerals into a
992
- ### word-phrase at the given number of +thousands+ places.
993
- def to_units( units, thousands=0 )
994
- return Units[ units ] + to_thousands( thousands )
347
+ ### Debugging output
348
+ def self::debug_msg( *msgs ) # :nodoc:
349
+ $stderr.puts msgs.join(" ") if $DEBUG
995
350
  end
996
351
 
997
352
 
998
- ### Transform the specified number of tens- and units-place numerals into a
999
- ### word-phrase at the given number of +thousands+ places.
1000
- def to_tens( tens, units, thousands=0 )
1001
- unless tens == 1
1002
- return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
1003
- to_units( units, thousands )
1004
- else
1005
- return Teens[ units ] + to_thousands( thousands )
1006
- end
1007
- end
1008
-
353
+ ### Add an lprintf formatter named +name+ that will use the specified +callback+ method.
354
+ ### The name of the formatter is the placeholder that will be used in the
355
+ ### format string, and the +callback+ is the method to call on the english-language
356
+ ### inflector for the lprintf argument, and can either be an object that responds to
357
+ ### #call, or the name of a method to call as a Symbol.
358
+ ###
359
+ ### Using a Symbol:
360
+ ###
361
+ ### def plural( count=2 )
362
+ ### # return the plural of the inflected object
363
+ ### end
364
+ ### Linguistics::EN.register_lprintf_formatter :PL, :plural
365
+ ###
366
+ ### Using a method:
367
+ ###
368
+ ### Linguistics::EN.register_lprintf_formatter :PL, method( :plural )
369
+ ###
370
+ ### Using a block:
371
+ ###
372
+ ### Linguistics::EN.register_lprintf_formatter :PL do |obj|
373
+ ### obj.en.plural
374
+ ### end
375
+ ###
376
+ def self::register_lprintf_formatter( name, callback=nil )
377
+ raise LocalJumpError, "no callback or block given" unless callback || block_given?
378
+ callback ||= Proc.new
1009
379
 
1010
- ### Transform the specified number of hundreds-, tens-, and units-place
1011
- ### numerals into a word phrase. If the number of thousands (+thousands+) is
1012
- ### greater than 0, it will be used to determine where the decimal point is
1013
- ### in relation to the hundreds-place number.
1014
- def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
1015
- joinword = ' ' if joinword.empty?
1016
- if hundreds.nonzero?
1017
- return to_units( hundreds ) + " hundred" +
1018
- (tens.nonzero? || units.nonzero? ? joinword : '') +
1019
- to_tens( tens, units ) +
1020
- to_thousands( thousands )
1021
- elsif tens.nonzero? || units.nonzero?
1022
- return to_tens( tens, units ) + to_thousands( thousands )
1023
- else
1024
- return nil
1025
- end
380
+ @@lprintf_formatters[ name ] = callback.to_proc
1026
381
  end
1027
382
 
1028
- ### Transform the specified number into one or more words like 'thousand',
1029
- ### 'million', etc. Uses the thousands (American) system.
1030
- def to_thousands( thousands=0 )
1031
- parts = []
1032
- (0..thousands).step( Thousands.length - 1 ) {|i|
1033
- if i.zero?
1034
- parts.push Thousands[ thousands % (Thousands.length - 1) ]
1035
- else
1036
- parts.push Thousands.last
1037
- end
1038
- }
1039
383
 
1040
- return parts.join(" ")
384
+ ### Return +true+ if running in a 'classical' mode.
385
+ def self::classical?
386
+ return Thread.current[ THREAD_CLASSICAL_KEY ] ? true : false
1041
387
  end
1042
388
 
1043
389
 
1044
- ### Return the specified number +num+ as an array of number phrases.
1045
- def number_to_words( num, config )
1046
- return [config[:zero]] if num.to_i.zero?
1047
- chunks = []
1048
-
1049
- # Break into word-groups if groups is set
1050
- if config[:group].nonzero?
1051
-
1052
- # Build a Regexp with <config[:group]> number of digits. Any past
1053
- # the first are optional.
1054
- re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )
1055
-
1056
- # Scan the string, and call the word-chunk function that deals with
1057
- # chunks of the found number of digits.
1058
- num.to_s.scan( re ) {|digits|
1059
- debug_msg " digits = #{digits.inspect}"
1060
- fn = NumberToWordsFunctions[ digits.nitems ]
1061
- numerals = digits.flatten.compact.collect {|i| i.to_i}
1062
- debug_msg " numerals = #{numerals.inspect}"
1063
- chunks.push fn.call( config[:zero], *numerals ).strip
1064
- }
1065
- else
1066
- phrase = num.to_s
1067
- phrase.sub!( /\A\s*0+/, '' )
1068
- mill = 0
1069
-
1070
- # Match backward from the end of the digits in the string, turning
1071
- # chunks of three, of two, and of one into words.
1072
- mill += 1 while
1073
- phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
1074
- words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill,
1075
- config[:and] )
1076
- chunks.unshift words.strip.squeeze(' ') unless words.nil?
1077
- ''
1078
- }
1079
-
1080
- phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
1081
- chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
1082
- ''
1083
- }
1084
- phrase.sub!( /(\d)(?=\D*\Z)/ ) {
1085
- chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
1086
- ''
1087
- }
1088
- end
390
+ ### Set classical mode for the current thread inside the block, then
391
+ ### unset it when it returns.
392
+ def self::in_classical_mode
393
+ old_setting = Thread.current[ THREAD_CLASSICAL_KEY ]
394
+ Thread.current[ THREAD_CLASSICAL_KEY ] = true
1089
395
 
1090
- return chunks
396
+ yield
397
+ ensure
398
+ Thread.current[ THREAD_CLASSICAL_KEY ] = old_setting
1091
399
  end
1092
400
 
1093
401
 
@@ -1095,579 +403,6 @@ module Linguistics::EN
1095
403
  ### P U B L I C F U N C T I O N S
1096
404
  #################################################################
1097
405
 
1098
- ### Return the name of the language this module is for.
1099
- def language( unused=nil )
1100
- "English"
1101
- end
1102
-
1103
-
1104
- ### Return the plural of the given +phrase+ if +count+ indicates it should
1105
- ### be plural.
1106
- def plural( phrase, count=nil )
1107
- phrase = numwords( phrase ) if phrase.is_a?( Numeric )
1108
-
1109
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1110
- pre, word, post = md.to_a[1,3]
1111
- return phrase if word.nil? or word.empty?
1112
-
1113
- plural = postprocess( word,
1114
- pluralize_special_adjective(word, count) ||
1115
- pluralize_special_verb(word, count) ||
1116
- pluralize_noun(word, count) )
1117
-
1118
- return pre + plural + post
1119
- end
1120
- def_lprintf_formatter :PL, :plural
1121
-
1122
-
1123
- ### Return the plural of the given noun +phrase+ if +count+ indicates it
1124
- ### should be plural.
1125
- def plural_noun( phrase, count=nil )
1126
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1127
- pre, word, post = md.to_a[1,3]
1128
- return phrase if word.nil? or word.empty?
1129
-
1130
- plural = postprocess( word, pluralize_noun(word, count) )
1131
- return pre + plural + post
1132
- end
1133
- def_lprintf_formatter :PL_N, :plural_noun
1134
-
1135
-
1136
- ### Return the plural of the given verb +phrase+ if +count+ indicates it
1137
- ### should be plural.
1138
- def plural_verb( phrase, count=nil )
1139
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1140
- pre, word, post = md.to_a[1,3]
1141
- return phrase if word.nil? or word.empty?
1142
-
1143
- plural = postprocess( word,
1144
- pluralize_special_verb(word, count) ||
1145
- pluralize_general_verb(word, count) )
1146
- return pre + plural + post
1147
- end
1148
- def_lprintf_formatter :PL_V, :plural_verb
1149
-
1150
-
1151
- ### Return the plural of the given adjectival +phrase+ if +count+ indicates
1152
- ### it should be plural.
1153
- def plural_adjective( phrase, count=nil )
1154
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1155
- pre, word, post = md.to_a[1,3]
1156
- return phrase if word.nil? or word.empty?
1157
-
1158
- plural = postprocess( word,
1159
- pluralize_special_adjective(word, count) || word )
1160
- return pre + plural + post
1161
- end
1162
- alias_method :plural_adj, :plural_adjective
1163
- def_lprintf_formatter :PL_ADJ, :plural_adjective
1164
-
1165
-
1166
- ### Return the given phrase with the appropriate indefinite article ("a" or
1167
- ### "an") prepended.
1168
- def a( phrase, count=nil )
1169
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1170
- pre, word, post = md.to_a[1,3]
1171
- return phrase if word.nil? or word.empty?
1172
-
1173
- result = indef_article( word, count )
1174
- return pre + result + post
1175
- end
1176
- alias_method :an, :a
1177
- def_lprintf_formatter :A, :a
1178
- def_lprintf_formatter :AN, :a
1179
-
1180
-
1181
- ### Translate zero-quantified +phrase+ to "no +phrase.plural+"
1182
- def no( phrase, count=nil )
1183
- md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1184
- pre, word, post = md.to_a[1,3]
1185
- count ||= Linguistics::num || 0
1186
-
1187
- unless /^#{PL_count_zero}$/ =~ count.to_s
1188
- return "#{pre}#{count} " + plural( word, count ) + post
1189
- else
1190
- return "#{pre}no " + plural( word, 0 ) + post
1191
- end
1192
- end
1193
- def_lprintf_formatter :NO, :no
1194
-
1195
-
1196
- ### Participles
1197
- def present_participle( word )
1198
- plural = plural_verb( word.to_s, 2 )
1199
-
1200
- plural.sub!( /ie$/, 'y' ) or
1201
- plural.sub!( /ue$/, 'u' ) or
1202
- plural.sub!( /([auy])e$/, '$1' ) or
1203
- plural.sub!( /i$/, '' ) or
1204
- plural.sub!( /([^e])e$/, "\\1" ) or
1205
- /er$/.match( plural ) or
1206
- plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
1207
-
1208
- return "#{plural}ing"
1209
- end
1210
- alias_method :part_pres, :present_participle
1211
- def_lprintf_formatter :PART_PRES, :present_participle
1212
-
1213
-
1214
-
1215
- ### Return the specified number as english words. One or more configuration
1216
- ### values may be passed to control the returned String:
1217
- ###
1218
- ### [<b>:group</b>]
1219
- ### Controls how many numbers at a time are grouped together. Valid values
1220
- ### are <code>0</code> (normal grouping), <code>1</code> (single-digit
1221
- ### grouping, e.g., "one, two, three, four"), <code>2</code>
1222
- ### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
1223
- ### (triple-digit grouping, e.g., "one twenty-three, four").
1224
- ### [<b>:comma</b>]
1225
- ### Set the character/s used to separate word groups. Defaults to
1226
- ### <code>", "</code>.
1227
- ### [<b>:and</b>]
1228
- ### Set the word and/or characters used where <code>' and ' </code>(the
1229
- ### default) is normally used. Setting <code>:and</code> to
1230
- ### <code>' '</code>, for example, will cause <code>2556</code> to be
1231
- ### returned as "two-thousand, five hundred fifty-six" instead of
1232
- ### "two-thousand, five hundred and fifty-six".
1233
- ### [<b>:zero</b>]
1234
- ### Set the word used to represent the numeral <code>0</code> in the
1235
- ### result. <code>'zero'</code> is the default.
1236
- ### [<b>:decimal</b>]
1237
- ### Set the translation of any decimal points in the number; the default
1238
- ### is <code>'point'</code>.
1239
- ### [<b>:asArray</b>]
1240
- ### If set to a true value, the number will be returned as an array of
1241
- ### word groups instead of a String.
1242
- def numwords( number, hashargs={} )
1243
- num = number.to_s
1244
- config = NumwordDefaults.merge( hashargs )
1245
- raise "Bad chunking option: #{config[:group]}" unless
1246
- config[:group].between?( 0, 3 )
1247
-
1248
- # Array of number parts: first is everything to the left of the first
1249
- # decimal, followed by any groups of decimal-delimted numbers after that
1250
- parts = []
1251
-
1252
- # Wordify any sign prefix
1253
- sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
1254
-
1255
- # Strip any ordinal suffixes
1256
- ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
1257
-
1258
- # Split the number into chunks delimited by '.'
1259
- chunks = if !config[:decimal].empty? then
1260
- if config[:group].nonzero?
1261
- num.split(/\./)
1262
- else
1263
- num.split(/\./, 2)
1264
- end
1265
- else
1266
- [ num ]
1267
- end
1268
-
1269
- # Wordify each chunk, pushing arrays into the parts array
1270
- chunks.each_with_index {|chunk,section|
1271
- chunk.gsub!( /\D+/, '' )
1272
-
1273
- # If there's nothing in this chunk of the number, set it to zero
1274
- # unless it's the whole-number part, in which case just push an
1275
- # empty array.
1276
- if chunk.empty?
1277
- if section.zero?
1278
- parts.push []
1279
- next
1280
- end
1281
- end
1282
-
1283
- # Split the number section into wordified parts unless this is the
1284
- # second or succeeding part of a non-group number
1285
- unless config[:group].zero? && section.nonzero?
1286
- parts.push number_to_words( chunk, config )
1287
- else
1288
- parts.push number_to_words( chunk, config.merge(:group => 1) )
1289
- end
1290
- }
1291
-
1292
- debug_msg "Parts => #{parts.inspect}"
1293
-
1294
- # Turn the last word of the whole-number part back into an ordinal if
1295
- # the original number came in that way.
1296
- if ord && !parts[0].empty?
1297
- parts[0][-1] = ordinal( parts[0].last )
1298
- end
1299
-
1300
- # If the caller's expecting an Array return, just flatten and return the
1301
- # parts array.
1302
- if config[:asArray]
1303
- unless sign.empty?
1304
- parts[0].unshift( sign )
1305
- end
1306
- return parts.flatten
1307
- end
1308
-
1309
- # Catenate each sub-parts array into a whole number part and one or more
1310
- # post-decimal parts. If grouping is turned on, all sub-parts get joined
1311
- # with commas, otherwise just the whole-number part is.
1312
- if config[:group].zero?
1313
- if parts[0].length > 1
1314
-
1315
- # Join all but the last part together with commas
1316
- wholenum = parts[0][0...-1].join( config[:comma] )
1317
-
1318
- # If the last part is just a single word, append it to the
1319
- # wholenum part with an 'and'. This is to get things like 'three
1320
- # thousand and three' instead of 'three thousand, three'.
1321
- if /^\s*(\S+)\s*$/ =~ parts[0].last
1322
- wholenum += config[:and] + parts[0].last
1323
- else
1324
- wholenum += config[:comma] + parts[0].last
1325
- end
1326
- else
1327
- wholenum = parts[0][0]
1328
- end
1329
- decimals = parts[1..-1].collect {|part| part.join(" ")}
1330
-
1331
- debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1332
-
1333
- # Join with the configured decimal; if it's empty, just join with
1334
- # spaces.
1335
- unless config[:decimal].empty?
1336
- return sign + ([ wholenum ] + decimals).
1337
- join( " #{config[:decimal]} " ).strip
1338
- else
1339
- return sign + ([ wholenum ] + decimals).
1340
- join( " " ).strip
1341
- end
1342
- else
1343
- return parts.compact.
1344
- separate( config[:decimal] ).
1345
- delete_if {|el| el.empty?}.
1346
- join( config[:comma] ).
1347
- strip
1348
- end
1349
- end
1350
- def_lprintf_formatter :NUMWORDS, :numwords
1351
-
1352
-
1353
- ### Transform the given +number+ into an ordinal word. The +number+ object
1354
- ### can be either an Integer or a String.
1355
- def ordinal( number )
1356
- case number
1357
- when Integer
1358
- return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])
1359
-
1360
- else
1361
- return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
1362
- end
1363
- end
1364
- def_lprintf_formatter :ORD, :ordinal
1365
-
1366
-
1367
- ### Transform the given +number+ into an ordinate word.
1368
- def ordinate( number )
1369
- return Linguistics::EN.ordinal( Linguistics::EN.numwords(number) )
1370
- end
1371
-
1372
-
1373
- ### Return a phrase describing the specified +number+ of objects in the
1374
- ### given +phrase+ in general terms. The following options can be used to
1375
- ### control the makeup of the returned quantity String:
1376
- ###
1377
- ### [<b>:joinword</b>]
1378
- ### Sets the word (and any surrounding spaces) used as the word separating the
1379
- ### quantity from the noun in the resulting string. Defaults to <tt>' of
1380
- ### '</tt>.
1381
- def quantify( phrase, number=0, args={} )
1382
- num = number.to_i
1383
- config = QuantifyDefaults.merge( args )
1384
-
1385
- case num
1386
- when 0
1387
- no( phrase )
1388
- when 1
1389
- a( phrase )
1390
- when SeveralRange
1391
- "several " + plural( phrase, num )
1392
- when NumberRange
1393
- "a number of " + plural( phrase, num )
1394
- when NumerousRange
1395
- "numerous " + plural( phrase, num )
1396
- when ManyRange
1397
- "many " + plural( phrase, num )
1398
- else
1399
-
1400
- # Anything bigger than the ManyRange gets described like
1401
- # "hundreds of thousands of..." or "millions of..."
1402
- # depending, of course, on how many there are.
1403
- thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
1404
- stword =
1405
- case subthousands
1406
- when 2
1407
- "hundreds"
1408
- when 1
1409
- "tens"
1410
- else
1411
- nil
1412
- end
1413
- thword = plural( to_thousands(thousands).strip )
1414
- thword = nil if thword.empty?
1415
-
1416
- [ # Hundreds (of)...
1417
- stword,
1418
-
1419
- # thousands (of)
1420
- thword,
1421
-
1422
- # stars.
1423
- plural(phrase, number)
1424
- ].compact.join( config[:joinword] )
1425
- end
1426
- end
1427
- def_lprintf_formatter :QUANT, :quantify
1428
-
1429
-
1430
- # :TODO: Needs refactoring
1431
-
1432
- ### Return the specified +obj+ (which must support the <tt>#collect</tt>
1433
- ### method) as a conjunction. Each item is converted to a String if it is
1434
- ### not already (using #to_s) unless a block is given, in which case it is
1435
- ### called once for each object in the array, and the stringified return
1436
- ### value from the block is used instead. Returning +nil+ causes that
1437
- ### particular element to be omitted from the resulting conjunction. The
1438
- ### following options can be used to control the makeup of the returned
1439
- ### conjunction String:
1440
- ###
1441
- ### [<b>:separator</b>]
1442
- ### Specify one or more characters to separate items in the resulting
1443
- ### list. Defaults to <tt>', '</tt>.
1444
- ### [<b>:altsep</b>]
1445
- ### An alternate separator to use if any of the resulting conjunction's
1446
- ### clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
1447
- ### [<b>:penultimate</b>]
1448
- ### Flag that indicates whether or not to join the last clause onto the
1449
- ### rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
1450
- ### %w{duck, cow, dog}.en.conjunction
1451
- ### # => "a duck, a cow, and a dog"
1452
- ### %w{duck cow dog}.en.conjunction( :penultimate => false )
1453
- ### "a duck, a cow and a dog"
1454
- ### Default to <tt>true</tt>.
1455
- ### [<b>:conjunctive</b>]
1456
- ### Sets the word used as the conjunctive (separating word) of the
1457
- ### resulting string. Default to <tt>'and'</tt>.
1458
- ### [<b>:combine</b>]
1459
- ### If set to <tt>true</tt> (the default), items which are indentical (after
1460
- ### surrounding spaces are stripped) will be combined in the resulting
1461
- ### conjunction. E.g.,
1462
- ### %w{goose cow goose dog}.en.conjunction
1463
- ### # => "two geese, a cow, and a dog"
1464
- ### %w{goose cow goose dog}.en.conjunction( :combine => false )
1465
- ### # => "a goose, a cow, a goose, and a dog"
1466
- ### [<b>:casefold</b>]
1467
- ### If set to <tt>true</tt> (the default), then items are compared
1468
- ### case-insensitively when combining them. This has no effect if
1469
- ### <tt>:combine</tt> is <tt>false</tt>.
1470
- ### [<b>:generalize</b>]
1471
- ### If set to <tt>true</tt>, then quantities of combined items are turned into
1472
- ### general descriptions instead of exact amounts.
1473
- ### ary = %w{goose pig dog horse goose reindeer goose dog horse}
1474
- ### ary.en.conjunction
1475
- ### # => "three geese, two dogs, two horses, a pig, and a reindeer"
1476
- ### ary.en.conjunction( :generalize => true )
1477
- ### # => "several geese, several dogs, several horses, a pig, and a reindeer"
1478
- ### See the #quantify method for specifics on how quantities are
1479
- ### generalized. Generalization defaults to <tt>false</tt>, and has no effect if
1480
- ### :combine is <tt>false</tt>.
1481
- ### [<b>:quantsort</b>]
1482
- ### If set to <tt>true</tt> (the default), items which are combined in the
1483
- ### resulting conjunction will be listed in order of amount, with greater
1484
- ### quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
1485
- ### will appear where the first instance of them occurred in the
1486
- ### list. This sort is also the fallback for indentical quantities (ie.,
1487
- ### items of the same quantity will be listed in the order they appeared
1488
- ### in the source list).
1489
- ###
1490
- def conjunction( obj, args={} )
1491
- config = ConjunctionDefaults.merge( args )
1492
- phrases = []
1493
-
1494
- # Transform items in the obj to phrases
1495
- if block_given?
1496
- phrases = obj.collect {|item| yield(item) }.compact
1497
- else
1498
- phrases = obj.collect {|item| item.to_s }
1499
- end
1500
-
1501
- # No need for a conjunction if there's only one thing
1502
- return a(phrases[0]) if phrases.length < 2
1503
-
1504
- # Set up a Proc to derive a collector key from a phrase depending on the
1505
- # configuration
1506
- keyfunc =
1507
- if config[:casefold]
1508
- proc {|key| key.downcase.strip}
1509
- else
1510
- proc {|key| key.strip}
1511
- end
1512
-
1513
- # Count and delete phrases that hash the same when the keyfunc munges
1514
- # them into the same thing if we're combining (:combine => true).
1515
- collector = {}
1516
- if config[:combine]
1517
-
1518
- phrases.each_index do |i|
1519
- # Stop when reaching the end of a truncated list
1520
- break if phrases[i].nil?
1521
-
1522
- # Make the key using the configured key function
1523
- phrase = keyfunc[ phrases[i] ]
1524
-
1525
- # If the collector already has this key, increment its count,
1526
- # eliminate the duplicate from the phrase list, and redo the loop.
1527
- if collector.key?( phrase )
1528
- collector[ phrase ] += 1
1529
- phrases.delete_at( i )
1530
- redo
1531
- end
1532
-
1533
- collector[ phrase ] = 1
1534
- end
1535
- else
1536
- # If we're not combining, just make everything have a count of 1.
1537
- phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
1538
- end
1539
-
1540
- # If sort-by-quantity is turned on, sort the phrases first by how many
1541
- # there are (most-first), and then by the order they were specified in.
1542
- if config[:quantsort] && config[:combine]
1543
- origorder = {}
1544
- phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
1545
- phrases.sort! {|a,b|
1546
- (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
1547
- (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
1548
- }
1549
- end
1550
-
1551
- # Set up a filtering function that adds either an indefinite article, an
1552
- # indefinite quantifier, or a definite quantifier to each phrase
1553
- # depending on the configuration and the count of phrases in the
1554
- # collector.
1555
- filter =
1556
- if config[:generalize]
1557
- proc {|phrase, count| quantify(phrase, count) }
1558
- else
1559
- proc {|phrase, count|
1560
- if count > 1
1561
- "%s %s" % [
1562
- # :TODO: Make this threshold settable
1563
- count < 10 ? count.en.numwords : count.to_s,
1564
- plural(phrase, count)
1565
- ]
1566
- else
1567
- a( phrase )
1568
- end
1569
- }
1570
- end
1571
-
1572
- # Now use the configured filter to turn each phrase into its final
1573
- # form. Hmmm... square-bracket Lisp?
1574
- phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
1575
-
1576
- # Prepend the conjunctive to the last element unless it's empty or
1577
- # there's only one element
1578
- phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
1579
- config[:conjunctive].strip.empty? or
1580
- phrases.length < 2
1581
-
1582
- # Concatenate the last two elements if there's no penultimate separator,
1583
- # and pick a separator based on how many phrases there are and whether
1584
- # or not there's already an instance of it in the phrases.
1585
- phrase_count = phrases.length
1586
- phrases[-2] << " " << phrases.pop unless config[:penultimate]
1587
- sep = config[:separator]
1588
- if phrase_count <= 2
1589
- sep = ' '
1590
- elsif phrases.find {|str| str.include?(config[:separator]) }
1591
- sep = config[:altsep]
1592
- end
1593
-
1594
- return phrases.join( sep )
1595
- end
1596
- def_lprintf_formatter :CONJUNCT, :conjunction
1597
-
1598
-
1599
- ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
1600
- ### ("camel case to english"). Each word is decapitalized.
1601
- def camel_case_to_english( string )
1602
- string.to_s.
1603
- gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
1604
- gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1605
- end
1606
-
1607
-
1608
- ### Turns an English language +string+ into a CamelCase word.
1609
- def english_to_camel_case( string )
1610
- string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
1611
- end
1612
-
1613
-
1614
- ### This method doesn't work quite right yet. It does okay for simple cases,
1615
- ### but it misses more complex ones, e.g. 'as' used as a coordinating
1616
- ### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
1617
- ### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
1618
- ### up. Until then it'll just be undocumented.
1619
-
1620
- ### Returns the given +string+ as a title-cased phrase.
1621
- def titlecase( string ) # :nodoc:
1622
-
1623
- # Split on word-boundaries
1624
- words = string.split( /\b/ )
1625
-
1626
- # Always capitalize the first and last words
1627
- words.first.capitalize!
1628
- words.last.capitalize!
1629
-
1630
- # Now scan the rest of the tokens, skipping non-words and capitalization
1631
- # exceptions.
1632
- words.each_with_index do |word, i|
1633
-
1634
- # Non-words
1635
- next unless /^\w+$/.match( word )
1636
-
1637
- # Skip exception-words
1638
- next if TitleCaseExceptions.include?( word )
1639
-
1640
- # Skip second parts of contractions
1641
- next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
1642
-
1643
- # Have to do it this way instead of capitalize! because that method
1644
- # also downcases all other letters.
1645
- word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
1646
- end
1647
-
1648
- return words.join
1649
- end
1650
-
1651
-
1652
- ### Returns the proper noun form of a string by capitalizing most of the
1653
- ### words.
1654
- ###
1655
- ### Examples:
1656
- ### English.proper_noun("bosnia and herzegovina") ->
1657
- ### "Bosnia and Herzegovina"
1658
- ### English.proper_noun("macedonia, the former yugoslav republic of") ->
1659
- ### "Macedonia, the Former Yugoslav Republic of"
1660
- ### English.proper_noun("virgin islands, u.s.") ->
1661
- ### "Virgin Islands, U.S."
1662
- def proper_noun( string )
1663
- return string.split(/([ .]+)/).collect {|word|
1664
- next word unless /^[a-z]/.match( word ) &&
1665
- ! (%w{and the of}.include?( word ))
1666
- word.capitalize
1667
- }.join
1668
- end
1669
-
1670
-
1671
406
  ### Format the given +fmt+ string by replacing %-escaped sequences with the
1672
407
  ### result of performing a specified operation on the corresponding
1673
408
  ### argument, ala Kernel.sprintf.
@@ -1681,48 +416,22 @@ module Linguistics::EN
1681
416
  ### Convert a number into the corresponding words.
1682
417
  ### %CONJUNCT::
1683
418
  ### Conjunction.
1684
- def lprintf( fmt, *args )
1685
- fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
419
+ def lprintf( *args )
420
+ return self.to_s.gsub( /%([A-Z_]+)/ ) do |match|
1686
421
  op = $1.to_s.upcase.to_sym
1687
- if self.lprintf_formatters.key?( op )
422
+ if (( callback = Linguistics::EN.lprintf_formatters[op] ))
1688
423
  arg = args.shift
1689
- self.lprintf_formatters[ op ].call( arg )
424
+ callback.call( arg.en )
1690
425
  else
1691
- raise "no such formatter %p" % op
426
+ raise "no such formatter %p" % [ op ]
1692
427
  end
1693
428
  end
1694
429
  end
1695
430
 
1696
- end # module Linguistics::EN
1697
-
1698
-
1699
- ### Add the #separate and #separate! methods to Array.
1700
- class Array
1701
-
1702
- ### Returns a new Array that has had a new member inserted between all of
1703
- ### the current ones. The value used is the given +value+ argument unless a
1704
- ### block is given, in which case the block is called once for each pair of
1705
- ### the Array, and the return value is used as the separator.
1706
- def separate( value=:__no_arg__, &block )
1707
- ary = self.dup
1708
- ary.separate!( value, &block )
1709
- return ary
1710
- end
1711
431
 
1712
- ### The same as #separate, but modifies the Array in place.
1713
- def separate!( value=:__no_arg__ )
1714
- raise ArgumentError, "wrong number of arguments: (0 for 1)" if
1715
- value == :__no_arg__ && !block_given?
432
+ # Add 'english' to the list of default languages
433
+ Linguistics.register_language( :en, self )
1716
434
 
1717
- (1..( (self.length * 2) - 2 )).step(2) do |i|
1718
- if block_given?
1719
- self.insert( i, yield(self[i-1,2]) )
1720
- else
1721
- self.insert( i, value )
1722
- end
1723
- end
1724
- self
1725
- end
1726
435
 
1727
- end
436
+ end # module Linguistics::EN
1728
437