linguistics 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/ruby
2
+
3
+ BEGIN {
4
+ require 'pathname'
5
+
6
+ basedir = Pathname.new( __FILE__ ).dirname.parent.expand_path
7
+ libdir = basedir + "lib"
8
+ $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
9
+ }
10
+
11
+ require 'linguistics'
12
+ require 'readline'
13
+
14
+ Linguistics.use( :en, :installProxy => true )
15
+
16
+ def generalized_word( word )
17
+ $deferr.puts " Traversing hypernyms for #{word}"
18
+ syn = word.synset or return word
19
+ nyms = syn.traverse( :hypernyms )
20
+ return word if nyms.empty?
21
+
22
+ general_subj = nyms[ nyms.length / 4 ]
23
+ $deferr.puts " %d synsets returned. Picking %d (%s)" % [
24
+ nyms.length,
25
+ nyms.length / 4,
26
+ general_subj.words.first,
27
+ ]
28
+ return general_subj.words.first
29
+ end
30
+
31
+ while input = Readline.readline( "Sentence to generalize: " )
32
+ sent = input.sentence
33
+
34
+ subj = sent.subject
35
+ obj = sent.object
36
+ verb = sent.verb
37
+
38
+ input.sub!( /\b#{subj}\b/, generalized_word(subj) ) if subj
39
+ input.sub!( /\b#{obj}\b/, generalized_word(obj) ) if obj
40
+ input.sub!( /\b#{verb}\b/, generalized_word(verb) ) if verb
41
+
42
+ puts input
43
+ end
44
+
45
+
46
+
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'linguistics/iso639'
4
+
5
+ # A language-independent framework for adding linguistics functions to Ruby
6
+ # classes.
7
+ #
8
+ # == Synopsis
9
+ #
10
+ # require 'linguistics'
11
+ # Linguistics::use( :en )
12
+ # MyClass::extend( Linguistics )
13
+ #
14
+ # == Authors
15
+ #
16
+ # * Michael Granger <ged@FaerieMUD.org>
17
+ #
18
+ # :include: LICENSE
19
+ #
20
+ #--
21
+ #
22
+ # Please see the file LICENSE in the base directory for licensing details.
23
+ #
24
+ module Linguistics
25
+
26
+ ### Class constants
27
+
28
+ # Release version
29
+ VERSION = '1.0.8'
30
+
31
+ # Language module implementors should do something like:
32
+ # Linguistics::DefaultLanguages.push( :ja ) # or whatever
33
+ # so that direct requiring of a language module sets the default.
34
+ DefaultLanguages = []
35
+
36
+ # The list of Classes to add linguistic behaviours to.
37
+ DefaultExtClasses = [String, Numeric, Array]
38
+
39
+
40
+ #################################################################
41
+ ### I N F L E C T O R C L A S S F A C T O R Y
42
+ #################################################################
43
+
44
+ ### A class which is inherited from by proxies for classes being extended
45
+ ### with one or more linguistic interfaces. It provides on-the-fly creation
46
+ ### of linguistic methods when the <tt>:installProxy</tt> option is passed
47
+ ### to the call to Linguistics#use.
48
+ class LanguageProxyClass
49
+
50
+ ### Class instance variable + accessor. Contains the module which knows
51
+ ### the specifics of the language the languageProxy class is providing
52
+ ### methods for.
53
+ @langmod = nil
54
+ class << self
55
+ attr_accessor :langmod
56
+ end
57
+
58
+
59
+ ### Create a new LanguageProxy for the given +receiver+.
60
+ def initialize( receiver )
61
+ @receiver = receiver
62
+ end
63
+
64
+
65
+ ######
66
+ public
67
+ ######
68
+
69
+ ### Overloaded to take into account the proxy method.
70
+ def respond_to?( sym )
71
+ self.class.langmod.respond_to?( sym ) || super
72
+ end
73
+
74
+
75
+ ### Autoload linguistic methods defined in the module this object's
76
+ ### class uses for inflection.
77
+ def method_missing( sym, *args, &block )
78
+ return super unless self.class.langmod.respond_to?( sym )
79
+
80
+ self.class.module_eval %{
81
+ def #{sym}( *args, &block )
82
+ self.class.langmod.#{sym}( @receiver, *args, &block )
83
+ end
84
+ }, "{Autoloaded: " + __FILE__ + "}", __LINE__
85
+
86
+ self.method( sym ).call( *args, &block )
87
+ end
88
+
89
+
90
+ ### Returns a human-readable representation of the languageProxy for
91
+ ### debugging, logging, etc.
92
+ def inspect
93
+ "<%s languageProxy for %s object %s>" % [
94
+ self.class.langmod.language,
95
+ @receiver.class.name,
96
+ @receiver.inspect,
97
+ ]
98
+ end
99
+
100
+ end
101
+
102
+
103
+ ### Extend the specified target object with one or more language proxy
104
+ ### methods, each of which provides access to one or more linguistic methods
105
+ ### for that language.
106
+ def self::extend_object( obj )
107
+ case obj
108
+ when Class
109
+ # $stderr.puts "Extending %p" % obj if $DEBUG
110
+ self::install_language_proxy( obj )
111
+ else
112
+ sclass = (class << obj; self; end)
113
+ # $stderr.puts "Extending a object's metaclass: %p" % obj if $DEBUG
114
+ self::install_language_proxy( sclass )
115
+ end
116
+
117
+ super
118
+ end
119
+
120
+
121
+ ### Extend the including class with linguistics proxy methods.
122
+ def self::included( mod )
123
+ # $stderr.puts "Including Linguistics in %p" % mod if $DEBUG
124
+ mod.extend( self ) unless mod == Linguistics
125
+ end
126
+
127
+
128
+ ### Make an languageProxy class that encapsulates all of the inflect operations
129
+ ### using the given language module.
130
+ def self::make_language_proxy( mod )
131
+ # $stderr.puts "Making language proxy for mod %p" % [mod]
132
+ Class::new( LanguageProxyClass ) {
133
+ @langmod = mod
134
+ }
135
+ end
136
+
137
+
138
+ ### Install the language proxy
139
+ def self::install_language_proxy( klass, languages=DefaultLanguages )
140
+ languages.replace( DefaultLanguages ) if languages.empty?
141
+
142
+ # Create an languageProxy class for each language specified
143
+ languages.each do |lang|
144
+ # $stderr.puts "Extending the %p class with %p" %
145
+ # [ klass, lang ] if $DEBUG
146
+
147
+ # Load the language module (skipping to the next if it's already
148
+ # loaded), make a languageProxy class that delegates to it, and
149
+ # figure out what the languageProxy method will be called.
150
+ mod = load_language( lang.to_s.downcase )
151
+ ifaceMeth = mod.name.downcase.sub( /.*:/, '' )
152
+ languageProxyClass = make_language_proxy( mod )
153
+
154
+ # Install a hash for languageProxy classes and an accessor for the
155
+ # hash if it's not already present.
156
+ if !klass.class_variables.include?( "@@__languageProxy_class" )
157
+ klass.module_eval %{
158
+ @@__languageProxy_class = {}
159
+ def self::__languageProxy_class; @@__languageProxy_class; end
160
+ }, __FILE__, __LINE__
161
+ end
162
+
163
+ # Merge the current languageProxy into the hash
164
+ klass.__languageProxy_class.merge!( ifaceMeth => languageProxyClass )
165
+
166
+ # Set the language-code proxy method for the class unless it has one
167
+ # already
168
+ unless klass.instance_methods(true).include?( ifaceMeth )
169
+ klass.module_eval %{
170
+ def #{ifaceMeth}
171
+ @__#{ifaceMeth}_languageProxy ||=
172
+ self.class.__languageProxy_class["#{ifaceMeth}"].
173
+ new( self )
174
+ end
175
+ }, __FILE__, __LINE__
176
+ end
177
+ end
178
+ end
179
+
180
+
181
+
182
+ ### Install a regular proxy method in the given klass that will delegate
183
+ ### calls to missing method to the languageProxy for the given +language+.
184
+ def self::install_delegator_proxy( klass, langcode )
185
+ raise ArgumentError, "Missing langcode" if langcode.nil?
186
+
187
+ # Alias any currently-extant
188
+ if klass.instance_methods( false ).include?( "method_missing" )
189
+ klass.module_eval %{
190
+ alias_method :__orig_method_missing, :method_missing
191
+ }
192
+ end
193
+
194
+ # Add the #method_missing method that auto-installs delegator methods
195
+ # for methods supported by the linguistic proxy objects.
196
+ klass.module_eval %{
197
+ def method_missing( sym, *args, &block )
198
+
199
+ # If the linguistic delegator answers the message, install a
200
+ # delegator method and call it.
201
+ if self.send( :#{langcode} ).respond_to?( sym )
202
+
203
+ # $stderr.puts "Installing linguistic delegator method \#{sym} " \
204
+ # "for the '#{langcode}' proxy"
205
+ self.class.module_eval %{
206
+ def \#{sym}( *args, &block )
207
+ self.#{langcode}.\#{sym}( *args, &block )
208
+ end
209
+ }
210
+ self.method( sym ).call( *args, &block )
211
+
212
+ # Otherwise either call the overridden proxy method if there is
213
+ # one, or just let our parent deal with it.
214
+ else
215
+ if self.respond_to?( :__orig_method_missing )
216
+ return self.__orig_method_missing( sym, *args, &block )
217
+ else
218
+ super( sym, *args, &block )
219
+ end
220
+ end
221
+ end
222
+ }
223
+ end
224
+
225
+
226
+
227
+ #################################################################
228
+ ### L A N G U A G E - I N D E P E N D E N T F U N C T I O N S
229
+ #################################################################
230
+
231
+
232
+ ### Handle auto-magic usage
233
+ def self::const_missing( sym )
234
+ load_language( sym.to_s.downcase )
235
+ end
236
+
237
+
238
+ ###############
239
+ module_function
240
+ ###############
241
+
242
+ ### Add linguistics functions for the specified languages to Ruby's core
243
+ ### classes. The interface to all linguistic functions for a given language
244
+ ### is through a method which is the same the language's international 2- or
245
+ ### 3-letter code (ISO 639). You can also specify a Hash of configuration
246
+ ### options which control which classes are extended:
247
+ ###
248
+ ### [<b>:classes</b>]
249
+ ### Specify the classes which are to be extended. If this is not specified,
250
+ ### the Class objects in Linguistics::DefaultExtClasses (an Array) are
251
+ ### extended.
252
+ ### [<b>:installProxy</b>]
253
+ ### Install a proxy method in each of the classes which are to be extended
254
+ ### which will search for missing methods in the languageProxy for the
255
+ ### language code specified as the value. This allows linguistics methods
256
+ ### to be called directly on extended objects directly (e.g.,
257
+ ### 12.en.ordinal becomes 12.ordinal). Obviously, methods which would
258
+ ### collide with the object's builtin methods will need to be invoked
259
+ ### through the languageProxy. Any existing proxy methods in the extended
260
+ ### classes will be preserved.
261
+ def use( *languages )
262
+ config = {}
263
+ config = languages.pop if languages.last.is_a?( Hash )
264
+
265
+ classes = config.key?( :classes ) ? config[:classes] : DefaultExtClasses
266
+ classes = [ classes ] unless classes.is_a?( Array )
267
+
268
+ # Install the languageProxy in each class.
269
+ classes.each {|klass|
270
+
271
+ # Create an languageProxy class for each installed language
272
+ install_language_proxy( klass, languages )
273
+
274
+ # Install the delegator proxy if configured
275
+ if config[:installProxy]
276
+ case config[:installProxy]
277
+ when Symbol
278
+ langcode = config[:installProxy]
279
+ when String
280
+ langcode = config[:installProxy].intern
281
+ when TrueClass
282
+ langcode = languages[0] || DefaultLanguages[0] || :en
283
+ else
284
+ raise ArgumentError,
285
+ "Unexpected value %p for :installProxy" %
286
+ config[:installProxy]
287
+ end
288
+
289
+ install_delegator_proxy( klass, langcode )
290
+ end
291
+ }
292
+ end
293
+
294
+
295
+
296
+ ### Support Lingua::EN::Inflect-style globals in a threadsafe way by using
297
+ ### Thread-local variables.
298
+
299
+ ### Set the default count for all unspecified plurals to +val+. Setting is
300
+ ### local to calling thread.
301
+ def num=( val )
302
+ Thread.current[:persistent_count] = val
303
+ end
304
+ alias_method :NUM=, :num=
305
+
306
+ ### Get the default count for all unspecified plurals. Setting is local to
307
+ ### calling thread.
308
+ def num
309
+ Thread.current[:persistent_count]
310
+ end
311
+ alias_method :NUM, :num
312
+
313
+
314
+ ### Set the 'classical pluralizations' flag to +val+. Setting is local to
315
+ ### calling thread.
316
+ def classical=( val )
317
+ Thread.current[:classical_plurals] = val
318
+ end
319
+
320
+ ### Return the value of the 'classical pluralizations' flag. Setting is
321
+ ### local to calling thread.
322
+ def classical?
323
+ Thread.current[:classical_plurals] ? true : false
324
+ end
325
+
326
+
327
+ #######
328
+ private
329
+ #######
330
+
331
+ ### Try to load the module that implements the given language, returning
332
+ ### the Module object if successful.
333
+ def self::load_language( lang )
334
+ raise "Unknown language code '#{lang}'" unless
335
+ LanguageCodes.key?( lang )
336
+
337
+ # Sort all the codes for the specified language, trying the 2-letter
338
+ # versions first in alphabetical order, then the 3-letter ones
339
+ msgs = []
340
+ mod = LanguageCodes[ lang ][:codes].sort {|a,b|
341
+ (a.length <=> b.length).nonzero? ||
342
+ (a <=> b)
343
+ }.each do |code|
344
+ unless Linguistics::const_defined?( code.upcase )
345
+ begin
346
+ require "linguistics/#{code}"
347
+ rescue LoadError => err
348
+ msgs << "Tried 'linguistics/#{code}': #{err.message}\n"
349
+ next
350
+ end
351
+ end
352
+
353
+ break Linguistics::const_get( code.upcase ) if
354
+ Linguistics::const_defined?( code.upcase )
355
+ end
356
+
357
+ if mod.is_a?( Array )
358
+ raise LoadError,
359
+ "Failed to load language extension %s:\n%s" %
360
+ [ lang, msgs.join ]
361
+ end
362
+ return mod
363
+ end
364
+
365
+ end # class linguistics
366
+
@@ -0,0 +1,1728 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # = Linguistics::EN
4
+ #
5
+ # This module contains English-language linguistic functions for the Linguistics
6
+ # module. It can be either loaded directly, or by passing some variant of 'en'
7
+ # or 'eng' to the Linguistics::use method.
8
+ #
9
+ # The functions contained by the module provide:
10
+ #
11
+ # == Plural Inflections
12
+ #
13
+ # Plural forms of all nouns, most verbs, and some adjectives are provided. Where
14
+ # appropriate, "classical" variants (for example: "brother" -> "brethren",
15
+ # "dogma" -> "dogmata", etc.) are also provided.
16
+ #
17
+ # These can be accessed via the #plural, #plural_noun, #plural_verb, and
18
+ # #plural_adjective methods.
19
+ #
20
+ # == Indefinite Articles
21
+ #
22
+ # Pronunciation-based "a"/"an" selection is provided for all English words, and
23
+ # most initialisms.
24
+ #
25
+ # See: #a, #an, and #no.
26
+ #
27
+ # == Numbers to Words
28
+ #
29
+ # Conversion from Numeric values to words are supported using the American
30
+ # "thousands" system. E.g., 2561 => "two thousand, five hundred and sixty-one".
31
+ #
32
+ # See the #numwords method.
33
+ #
34
+ # == Ordinals
35
+ #
36
+ # It is also possible to inflect numerals (1,2,3) and number words ("one",
37
+ # "two", "three") to ordinals (1st, 2nd, 3rd) and ordinates ("first", "second",
38
+ # "third").
39
+ #
40
+ # == Conjunctions
41
+ #
42
+ # This module also supports the creation of English conjunctions from Arrays of
43
+ # Strings or objects which respond to the #to_s message. Eg.,
44
+ #
45
+ # %w{cow pig chicken cow dog cow duck duck moose}.en.conjunction
46
+ # ==> "three cows, two ducks, a pig, a chicken, a dog, and a moose"
47
+ #
48
+ # == Infinitives
49
+ #
50
+ # Returns the infinitive form of English verbs:
51
+ #
52
+ # "dodging".en.infinitive
53
+ # ==> "dodge"
54
+ #
55
+ #
56
+ # == Authors
57
+ #
58
+ # * Michael Granger <ged@FaerieMUD.org>
59
+ #
60
+ # == Acknowledgements
61
+ #
62
+ # The inflection functions of this module were adapted from Damien Conway's
63
+ # Lingua::EN::Inflect Perl module:
64
+ #
65
+ # Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
66
+ # This module is free software. It may be used, redistributed
67
+ # and/or modified under the same terms as Perl itself.
68
+ #
69
+ # The conjunctions code was adapted from the Lingua::Conjunction Perl module
70
+ # written by Robert Rothenberg and Damian Conway, which has no copyright
71
+ # statement included.
72
+ #
73
+ # :include: LICENSE
74
+ #
75
+ #--
76
+ #
77
+ # Please see the file LICENSE in the base directory for licensing details.
78
+ #
79
+ module Linguistics::EN
80
+
81
+ # Load in the secondary modules and add them to Linguistics::EN.
82
+ require 'linguistics/en/infinitive'
83
+ require 'linguistics/en/wordnet'
84
+ require 'linguistics/en/linkparser'
85
+
86
+ # Add 'english' to the list of default languages
87
+ Linguistics::DefaultLanguages.push( :en )
88
+
89
+
90
+ #################################################################
91
+ ### U T I L I T Y F U N C T I O N S
92
+ #################################################################
93
+
94
+ ### Wrap one or more parts in a non-capturing alteration Regexp
95
+ def self::matchgroup( *parts )
96
+ re = parts.flatten.join("|")
97
+ "(?:#{re})"
98
+ end
99
+
100
+
101
+ @lprintf_formatters = {}
102
+ class << self
103
+ attr_accessor :lprintf_formatters
104
+ end
105
+
106
+ ### Add the specified method (which can be either a Method object or a
107
+ ### Symbol for looking up a method)
108
+ def self::def_lprintf_formatter( name, meth )
109
+ meth = self.method( meth ) unless meth.is_a?( Method )
110
+ self.lprintf_formatters[ name ] = meth
111
+ end
112
+
113
+
114
+
115
+ #################################################################
116
+ ### C O N S T A N T S
117
+ #################################################################
118
+
119
+ # :stopdoc:
120
+
121
+ #
122
+ # Plurals
123
+ #
124
+
125
+ PL_sb_irregular_s = {
126
+ "ephemeris" => "ephemerides",
127
+ "iris" => "irises|irides",
128
+ "clitoris" => "clitorises|clitorides",
129
+ "corpus" => "corpuses|corpora",
130
+ "opus" => "opuses|opera",
131
+ "genus" => "genera",
132
+ "mythos" => "mythoi",
133
+ "penis" => "penises|penes",
134
+ "testis" => "testes",
135
+ }
136
+
137
+ PL_sb_irregular_h = {
138
+ "child" => "children",
139
+ "brother" => "brothers|brethren",
140
+ "loaf" => "loaves",
141
+ "hoof" => "hoofs|hooves",
142
+ "beef" => "beefs|beeves",
143
+ "money" => "monies",
144
+ "mongoose" => "mongooses",
145
+ "ox" => "oxen",
146
+ "cow" => "cows|kine",
147
+ "soliloquy" => "soliloquies",
148
+ "graffito" => "graffiti",
149
+ "prima donna" => "prima donnas|prime donne",
150
+ "octopus" => "octopuses|octopodes",
151
+ "genie" => "genies|genii",
152
+ "ganglion" => "ganglions|ganglia",
153
+ "trilby" => "trilbys",
154
+ "turf" => "turfs|turves",
155
+ }.update( PL_sb_irregular_s )
156
+ PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
157
+
158
+
159
+ # Classical "..a" -> "..ata"
160
+ PL_sb_C_a_ata = matchgroup %w[
161
+ anathema bema carcinoma charisma diploma
162
+ dogma drama edema enema enigma lemma
163
+ lymphoma magma melisma miasma oedema
164
+ sarcoma schema soma stigma stoma trauma
165
+ gumma pragma
166
+ ].collect {|word| word[0...-1]}
167
+
168
+ # Unconditional "..a" -> "..ae"
169
+ PL_sb_U_a_ae = matchgroup %w[
170
+ alumna alga vertebra persona
171
+ ]
172
+
173
+ # Classical "..a" -> "..ae"
174
+ PL_sb_C_a_ae = matchgroup %w[
175
+ amoeba antenna formula hyperbola
176
+ medusa nebula parabola abscissa
177
+ hydra nova lacuna aurora .*umbra
178
+ flora fauna
179
+ ]
180
+
181
+ # Classical "..en" -> "..ina"
182
+ PL_sb_C_en_ina = matchgroup %w[
183
+ stamen foramen lumen
184
+ ].collect {|word| word[0...-2] }
185
+
186
+ # Unconditional "..um" -> "..a"
187
+ PL_sb_U_um_a = matchgroup %w[
188
+ bacterium agendum desideratum erratum
189
+ stratum datum ovum extremum
190
+ candelabrum
191
+ ].collect {|word| word[0...-2] }
192
+
193
+ # Classical "..um" -> "..a"
194
+ PL_sb_C_um_a = matchgroup %w[
195
+ maximum minimum momentum optimum
196
+ quantum cranium curriculum dictum
197
+ phylum aquarium compendium emporium
198
+ enconium gymnasium honorarium interregnum
199
+ lustrum memorandum millenium rostrum
200
+ spectrum speculum stadium trapezium
201
+ ultimatum medium vacuum velum
202
+ consortium
203
+ ].collect {|word| word[0...-2]}
204
+
205
+ # Unconditional "..us" -> "i"
206
+ PL_sb_U_us_i = matchgroup %w[
207
+ alumnus alveolus bacillus bronchus
208
+ locus nucleus stimulus meniscus
209
+ ].collect {|word| word[0...-2]}
210
+
211
+ # Classical "..us" -> "..i"
212
+ PL_sb_C_us_i = matchgroup %w[
213
+ focus radius genius
214
+ incubus succubus nimbus
215
+ fungus nucleolus stylus
216
+ torus umbilicus uterus
217
+ hippopotamus
218
+ ].collect {|word| word[0...-2]}
219
+
220
+ # Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
221
+ PL_sb_C_us_us = matchgroup %w[
222
+ status apparatus prospectus sinus
223
+ hiatus impetus plexus
224
+ ]
225
+
226
+ # Unconditional "..on" -> "a"
227
+ PL_sb_U_on_a = matchgroup %w[
228
+ criterion perihelion aphelion
229
+ phenomenon prolegomenon noumenon
230
+ organon asyndeton hyperbaton
231
+ ].collect {|word| word[0...-2]}
232
+
233
+ # Classical "..on" -> "..a"
234
+ PL_sb_C_on_a = matchgroup %w[
235
+ oxymoron
236
+ ].collect {|word| word[0...-2]}
237
+
238
+ # Classical "..o" -> "..i" (but normally -> "..os")
239
+ PL_sb_C_o_i_a = %w[
240
+ solo soprano basso alto
241
+ contralto tempo piano
242
+ ]
243
+ PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
244
+
245
+ # Always "..o" -> "..os"
246
+ PL_sb_U_o_os = matchgroup( %w[
247
+ albino archipelago armadillo
248
+ commando crescendo fiasco
249
+ ditto dynamo embryo
250
+ ghetto guano inferno
251
+ jumbo lumbago magneto
252
+ manifesto medico octavo
253
+ photo pro quarto
254
+ canto lingo generalissimo
255
+ stylo rhino
256
+ ] | PL_sb_C_o_i_a )
257
+
258
+
259
+ # Unconditional "..[ei]x" -> "..ices"
260
+ PL_sb_U_ex_ices = matchgroup %w[
261
+ codex murex silex
262
+ ].collect {|word| word[0...-2]}
263
+ PL_sb_U_ix_ices = matchgroup %w[
264
+ radix helix
265
+ ].collect {|word| word[0...-2]}
266
+
267
+ # Classical "..[ei]x" -> "..ices"
268
+ PL_sb_C_ex_ices = matchgroup %w[
269
+ vortex vertex cortex latex
270
+ pontifex apex index simplex
271
+ ].collect {|word| word[0...-2]}
272
+ PL_sb_C_ix_ices = matchgroup %w[
273
+ appendix
274
+ ].collect {|word| word[0...-2]}
275
+
276
+
277
+ # Arabic: ".." -> "..i"
278
+ PL_sb_C_i = matchgroup %w[
279
+ afrit afreet efreet
280
+ ]
281
+
282
+
283
+ # Hebrew: ".." -> "..im"
284
+ PL_sb_C_im = matchgroup %w[
285
+ goy seraph cherub
286
+ ]
287
+
288
+ # Unconditional "..man" -> "..mans"
289
+ PL_sb_U_man_mans = matchgroup %w[
290
+ human
291
+ Alabaman Bahaman Burman German
292
+ Hiroshiman Liman Nakayaman Oklahoman
293
+ Panaman Selman Sonaman Tacoman Yakiman
294
+ Yokohaman Yuman
295
+ ]
296
+
297
+
298
+ PL_sb_uninflected_s = [
299
+ # Pairs or groups subsumed to a singular...
300
+ "breeches", "britches", "clippers", "gallows", "hijinks",
301
+ "headquarters", "pliers", "scissors", "testes", "herpes",
302
+ "pincers", "shears", "proceedings", "trousers",
303
+
304
+ # Unassimilated Latin 4th declension
305
+ "cantus", "coitus", "nexus",
306
+
307
+ # Recent imports...
308
+ "contretemps", "corps", "debris",
309
+ ".*ois",
310
+
311
+ # Diseases
312
+ ".*measles", "mumps",
313
+
314
+ # Miscellaneous others...
315
+ "diabetes", "jackanapes", "series", "species", "rabies",
316
+ "chassis", "innings", "news", "mews",
317
+ ]
318
+
319
+
320
+ # Don't inflect in classical mode, otherwise normal inflection
321
+ PL_sb_uninflected_herd = matchgroup %w[
322
+ wildebeest swine eland bison buffalo
323
+ elk moose rhinoceros
324
+ ]
325
+
326
+ PL_sb_uninflected = matchgroup [
327
+
328
+ # Some fish and herd animals
329
+ ".*fish", "tuna", "salmon", "mackerel", "trout",
330
+ "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
331
+
332
+ ".*deer", ".*sheep",
333
+
334
+ # All nationals ending in -ese
335
+ "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
336
+ "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
337
+ "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
338
+ "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
339
+ "Shavese", "Vermontese", "Wenchowese", "Yengeese",
340
+ ".*[nrlm]ese",
341
+
342
+ # Some words ending in ...s (often pairs taken as a whole)
343
+ PL_sb_uninflected_s,
344
+
345
+ # Diseases
346
+ ".*pox",
347
+
348
+ # Other oddities
349
+ "graffiti", "djinn"
350
+ ]
351
+
352
+
353
+ # Singular words ending in ...s (all inflect with ...es)
354
+ PL_sb_singular_s = matchgroup %w[
355
+ .*ss
356
+ acropolis aegis alias arthritis asbestos atlas
357
+ bathos bias bronchitis bursitis caddis cannabis
358
+ canvas chaos cosmos dais digitalis encephalitis
359
+ epidermis ethos eyas gas glottis hepatitis
360
+ hubris ibis lens mantis marquis metropolis
361
+ neuritis pathos pelvis polis rhinoceros
362
+ sassafras tonsillitis trellis .*us
363
+ ]
364
+
365
+ PL_v_special_s = matchgroup [
366
+ PL_sb_singular_s,
367
+ PL_sb_uninflected_s,
368
+ PL_sb_irregular_s.keys,
369
+ '(.*[csx])is',
370
+ '(.*)ceps',
371
+ '[A-Z].*s',
372
+ ]
373
+
374
+ PL_sb_postfix_adj = '(' + {
375
+
376
+ 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
377
+ 'martial' => ["court"],
378
+
379
+ }.collect {|key,val|
380
+ matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
381
+ }.join("|") + ")(.*)"
382
+
383
+
384
+ PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
385
+ PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
386
+
387
+ PL_prep = matchgroup %w[
388
+ about above across after among around at athwart before behind
389
+ below beneath beside besides between betwixt beyond but by
390
+ during except for from in into near of off on onto out over
391
+ since till to under until unto upon with
392
+ ]
393
+
394
+ PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
395
+ PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
396
+
397
+
398
+ PL_pron_nom_h = {
399
+ # Nominative Reflexive
400
+ "i" => "we", "myself" => "ourselves",
401
+ "you" => "you", "yourself" => "yourselves",
402
+ "she" => "they", "herself" => "themselves",
403
+ "he" => "they", "himself" => "themselves",
404
+ "it" => "they", "itself" => "themselves",
405
+ "they" => "they", "themself" => "themselves",
406
+
407
+ # Possessive
408
+ "mine" => "ours",
409
+ "yours" => "yours",
410
+ "hers" => "theirs",
411
+ "his" => "theirs",
412
+ "its" => "theirs",
413
+ "theirs" => "theirs",
414
+ }
415
+ PL_pron_nom = matchgroup PL_pron_nom_h.keys
416
+
417
+ PL_pron_acc_h = {
418
+ # Accusative Reflexive
419
+ "me" => "us", "myself" => "ourselves",
420
+ "you" => "you", "yourself" => "yourselves",
421
+ "her" => "them", "herself" => "themselves",
422
+ "him" => "them", "himself" => "themselves",
423
+ "it" => "them", "itself" => "themselves",
424
+ "them" => "them", "themself" => "themselves",
425
+ }
426
+ PL_pron_acc = matchgroup PL_pron_acc_h.keys
427
+
428
+ PL_v_irregular_pres_h = {
429
+ # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
430
+ # 3rd pers. (indet.)
431
+ "am" => "are", "are" => "are", "is" => "are",
432
+ "was" => "were", "were" => "were", "was" => "were",
433
+ "have" => "have", "have" => "have", "has" => "have",
434
+ }
435
+ PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
436
+
437
+ PL_v_ambiguous_pres_h = {
438
+ # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
439
+ # 3rd pers. (indet.)
440
+ "act" => "act", "act" => "act", "acts" => "act",
441
+ "blame" => "blame", "blame" => "blame", "blames" => "blame",
442
+ "can" => "can", "can" => "can", "can" => "can",
443
+ "must" => "must", "must" => "must", "must" => "must",
444
+ "fly" => "fly", "fly" => "fly", "flies" => "fly",
445
+ "copy" => "copy", "copy" => "copy", "copies" => "copy",
446
+ "drink" => "drink", "drink" => "drink", "drinks" => "drink",
447
+ "fight" => "fight", "fight" => "fight", "fights" => "fight",
448
+ "fire" => "fire", "fire" => "fire", "fires" => "fire",
449
+ "like" => "like", "like" => "like", "likes" => "like",
450
+ "look" => "look", "look" => "look", "looks" => "look",
451
+ "make" => "make", "make" => "make", "makes" => "make",
452
+ "reach" => "reach", "reach" => "reach", "reaches" => "reach",
453
+ "run" => "run", "run" => "run", "runs" => "run",
454
+ "sink" => "sink", "sink" => "sink", "sinks" => "sink",
455
+ "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
456
+ "view" => "view", "view" => "view", "views" => "view",
457
+ }
458
+ PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
459
+
460
+ PL_v_irregular_non_pres = matchgroup %w[
461
+ did had ate made put
462
+ spent fought sank gave sought
463
+ shall could ought should
464
+ ]
465
+
466
+ PL_v_ambiguous_non_pres = matchgroup %w[
467
+ thought saw bent will might cut
468
+ ]
469
+
470
+ PL_count_zero = matchgroup %w[
471
+ 0 no zero nil
472
+ ]
473
+
474
+ PL_count_one = matchgroup %w[
475
+ 1 a an one each every this that
476
+ ]
477
+
478
+ PL_adj_special_h = {
479
+ "a" => "some", "an" => "some",
480
+ "this" => "these", "that" => "those",
481
+ }
482
+ PL_adj_special = matchgroup PL_adj_special_h.keys
483
+
484
+ PL_adj_poss_h = {
485
+ "my" => "our",
486
+ "your" => "your",
487
+ "its" => "their",
488
+ "her" => "their",
489
+ "his" => "their",
490
+ "their" => "their",
491
+ }
492
+ PL_adj_poss = matchgroup PL_adj_poss_h.keys
493
+
494
+
495
+ #
496
+ # Numerals, ordinals, and numbers-to-words
497
+ #
498
+
499
+ # Numerical inflections
500
+ Nth = {
501
+ 0 => 'th',
502
+ 1 => 'st',
503
+ 2 => 'nd',
504
+ 3 => 'rd',
505
+ 4 => 'th',
506
+ 5 => 'th',
507
+ 6 => 'th',
508
+ 7 => 'th',
509
+ 8 => 'th',
510
+ 9 => 'th',
511
+ 11 => 'th',
512
+ 12 => 'th',
513
+ 13 => 'th',
514
+ }
515
+
516
+ # Ordinal word parts
517
+ Ordinals = {
518
+ 'ty' => 'tieth',
519
+ 'one' => 'first',
520
+ 'two' => 'second',
521
+ 'three' => 'third',
522
+ 'five' => 'fifth',
523
+ 'eight' => 'eighth',
524
+ 'nine' => 'ninth',
525
+ 'twelve' => 'twelfth',
526
+ }
527
+ OrdinalSuffixes = Ordinals.keys.join("|") + "|"
528
+ Ordinals[""] = 'th'
529
+
530
+ # Numeral names
531
+ Units = [''] + %w[one two three four five six seven eight nine]
532
+ Teens = %w[ten eleven twelve thirteen fourteen
533
+ fifteen sixteen seventeen eighteen nineteen]
534
+ Tens = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
535
+ Thousands = [' ', ' thousand'] + %w[
536
+ m b tr quadr quint sext sept oct non dec undec duodec tredec
537
+ quattuordec quindec sexdec septemdec octodec novemdec vigint
538
+ ].collect {|prefix| ' ' + prefix + 'illion'}
539
+
540
+ # A collection of functions for transforming digits into word
541
+ # phrases. Indexed by the number of digits being transformed; e.g.,
542
+ # <tt>NumberToWordsFunctions[2]</tt> is the function for transforming
543
+ # double-digit numbers.
544
+ NumberToWordsFunctions = [
545
+ proc {|*args| raise "No digits (#{args.inspect})"},
546
+
547
+ # Single-digits
548
+ proc {|zero,x|
549
+ (x.nonzero? ? to_units(x) : "#{zero} ")
550
+ },
551
+
552
+ # Double-digits
553
+ proc {|zero,x,y|
554
+ if x.nonzero?
555
+ to_tens( x, y )
556
+ elsif y.nonzero?
557
+ "#{zero} " + NumberToWordsFunctions[1].call( zero, y )
558
+ else
559
+ ([zero] * 2).join(" ")
560
+ end
561
+ },
562
+
563
+ # Triple-digits
564
+ proc {|zero,x,y,z|
565
+ NumberToWordsFunctions[1].call(zero,x) +
566
+ NumberToWordsFunctions[2].call(zero,y,z)
567
+ }
568
+ ]
569
+
570
+
571
+ #
572
+ # Indefinite Articles
573
+ #
574
+
575
+ # This pattern matches strings of capitals starting with a "vowel-sound"
576
+ # consonant followed by another consonant, and which are not likely
577
+ # to be real words (oh, all right then, it's just magic!)
578
+ A_abbrev = %{
579
+ (?! FJO | [HLMNS]Y. | RY[EO] | SQU
580
+ | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
581
+ [FHLMNRSX][A-Z]
582
+ }
583
+
584
+ # This pattern codes the beginnings of all english words begining with a
585
+ # 'y' followed by a consonant. Any other y-consonant prefix therefore
586
+ # implies an abbreviation.
587
+ A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
588
+
589
+ # Exceptions to exceptions
590
+ A_explicit_an = matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
591
+
592
+
593
+ #
594
+ # Configuration defaults
595
+ #
596
+
597
+ # Default configuration arguments for the #numwords function
598
+ NumwordDefaults = {
599
+ :group => 0,
600
+ :comma => ', ',
601
+ :and => ' and ',
602
+ :zero => 'zero',
603
+ :decimal => 'point',
604
+ :asArray => false,
605
+ }
606
+
607
+ # Default ranges for #quantify
608
+ SeveralRange = 2..5
609
+ NumberRange = 6..19
610
+ NumerousRange = 20..45
611
+ ManyRange = 46..99
612
+
613
+ # Default configuration arguments for the #quantify function
614
+ QuantifyDefaults = {
615
+ :joinword => " of ",
616
+ }
617
+
618
+ # Default configuration arguments for the #conjunction (junction, what's
619
+ # your) function.
620
+ ConjunctionDefaults = {
621
+ :separator => ', ',
622
+ :altsep => '; ',
623
+ :penultimate => true,
624
+ :conjunctive => 'and',
625
+ :combine => true,
626
+ :casefold => true,
627
+ :generalize => false,
628
+ :quantsort => true,
629
+ }
630
+
631
+
632
+ #
633
+ # Title case
634
+ #
635
+
636
+ # "In titles, capitalize the first word, the last word, and all words in
637
+ # between except articles (a, an, and the), prepositions under five letters
638
+ # (in, of, to), and coordinating conjunctions (and, but). These rules apply
639
+ # to titles of long, short, and partial works as well as your own papers"
640
+ # (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
641
+
642
+ # Build the list of exceptions to title-capitalization
643
+ Articles = %w[a and the]
644
+ ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
645
+ "into", "like", "near", "of", "off", "on", "onto", "out", "over",
646
+ "past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
647
+ CoordConjunctions = %w[and but as]
648
+ TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
649
+
650
+
651
+ # :startdoc:
652
+
653
+ #################################################################
654
+ ### " B A C K E N D " F U N C T I O N S
655
+ #################################################################
656
+
657
+
658
+ ###############
659
+ module_function
660
+ ###############
661
+
662
+ ### Debugging output
663
+ def debug_msg( *msgs ) # :nodoc:
664
+ $stderr.puts msgs.join(" ") if $DEBUG
665
+ end
666
+
667
+
668
+ ### Normalize a count to either 1 or 2 (singular or plural)
669
+ def normalize_count( count, default=2 )
670
+ return default if count.nil? # Default to plural
671
+ if /^(#{PL_count_one})$/i =~ count.to_s ||
672
+ Linguistics::classical? &&
673
+ /^(#{PL_count_zero})$/ =~ count.to_s
674
+ return 1
675
+ else
676
+ return default
677
+ end
678
+ end
679
+
680
+
681
+ ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
682
+ ### examining the <tt>original</tt> input.
683
+ def postprocess( original, inflected )
684
+ inflected.sub!( /([^|]+)\|(.+)/ ) {
685
+ Linguistics::classical? ? $2 : $1
686
+ }
687
+
688
+ case original
689
+ when "I"
690
+ return inflected
691
+ when /^[A-Z]+$/
692
+ return inflected.upcase
693
+ when /^[A-Z]/
694
+ # Can't use #capitalize, as it will downcase the rest of the string,
695
+ # too.
696
+ inflected[0,1] = inflected[0,1].upcase
697
+ return inflected
698
+ else
699
+ return inflected
700
+ end
701
+ end
702
+
703
+
704
+ ### Pluralize nouns
705
+ def pluralize_noun( word, count=nil )
706
+ value = nil
707
+ count ||= Linguistics::num
708
+ count = normalize_count( count )
709
+
710
+ return word if count == 1
711
+
712
+ # Handle user-defined nouns
713
+ #if value = ud_match( word, PL_sb_user_defined )
714
+ # return value
715
+ #end
716
+
717
+ # Handle empty word, singular count and uninflected plurals
718
+ case word
719
+ when ''
720
+ return word
721
+ when /^(#{PL_sb_uninflected})$/i
722
+ return word
723
+ else
724
+ if Linguistics::classical? &&
725
+ /^(#{PL_sb_uninflected_herd})$/i =~ word
726
+ return word
727
+ end
728
+ end
729
+
730
+ # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
731
+ case word
732
+ when /^(?:#{PL_sb_postfix_adj})$/i
733
+ value = $2
734
+ return pluralize_noun( $1, 2 ) + value
735
+
736
+ when /^(?:#{PL_sb_prep_dual_compound})$/i
737
+ value = [ $2, $3 ]
738
+ return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )
739
+
740
+ when /^(?:#{PL_sb_prep_compound})$/i
741
+ value = $2
742
+ return pluralize_noun( $1, 2 ) + value
743
+
744
+ # Handle pronouns
745
+ when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
746
+ return $1 + PL_pron_acc_h[ $2.downcase ]
747
+
748
+ when /^(#{PL_pron_nom})$/i
749
+ return PL_pron_nom_h[ word.downcase ]
750
+
751
+ when /^(#{PL_pron_acc})$/i
752
+ return PL_pron_acc_h[ $1.downcase ]
753
+
754
+ # Handle isolated irregular plurals
755
+ when /(.*)\b(#{PL_sb_irregular})$/i
756
+ return $1 + PL_sb_irregular_h[ $2.downcase ]
757
+
758
+ when /(#{PL_sb_U_man_mans})$/i
759
+ return "#{$1}s"
760
+
761
+ # Handle families of irregular plurals
762
+ when /(.*)man$/i ; return "#{$1}men"
763
+ when /(.*[ml])ouse$/i ; return "#{$1}ice"
764
+ when /(.*)goose$/i ; return "#{$1}geese"
765
+ when /(.*)tooth$/i ; return "#{$1}teeth"
766
+ when /(.*)foot$/i ; return "#{$1}feet"
767
+
768
+ # Handle unassimilated imports
769
+ when /(.*)ceps$/i ; return word
770
+ when /(.*)zoon$/i ; return "#{$1}zoa"
771
+ when /(.*[csx])is$/i ; return "#{$1}es"
772
+ when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
773
+ when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
774
+ when /(#{PL_sb_U_um_a})um$/i ; return "#{$1}a"
775
+ when /(#{PL_sb_U_us_i})us$/i ; return "#{$1}i"
776
+ when /(#{PL_sb_U_on_a})on$/i ; return "#{$1}a"
777
+ when /(#{PL_sb_U_a_ae})$/i ; return "#{$1}e"
778
+ end
779
+
780
+ # Handle incompletely assimilated imports
781
+ if Linguistics::classical?
782
+ case word
783
+ when /(.*)trix$/i ; return "#{$1}trices"
784
+ when /(.*)eau$/i ; return "#{$1}eaux"
785
+ when /(.*)ieu$/i ; return "#{$1}ieux"
786
+ when /(.{2,}[yia])nx$/i ; return "#{$1}nges"
787
+ when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
788
+ when /(#{PL_sb_C_ex_ices})ex$/i; return "#{$1}ices"
789
+ when /(#{PL_sb_C_ix_ices})ix$/i; return "#{$1}ices"
790
+ when /(#{PL_sb_C_um_a})um$/i ; return "#{$1}a"
791
+ when /(#{PL_sb_C_us_i})us$/i ; return "#{$1}i"
792
+ when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
793
+ when /(#{PL_sb_C_a_ae})$/i ; return "#{$1}e"
794
+ when /(#{PL_sb_C_a_ata})a$/i ; return "#{$1}ata"
795
+ when /(#{PL_sb_C_o_i})o$/i ; return "#{$1}i"
796
+ when /(#{PL_sb_C_on_a})on$/i ; return "#{$1}a"
797
+ when /#{PL_sb_C_im}$/i ; return "#{word}im"
798
+ when /#{PL_sb_C_i}$/i ; return "#{word}i"
799
+ end
800
+ end
801
+
802
+
803
+ # Handle singular nouns ending in ...s or other silibants
804
+ case word
805
+ when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
806
+ when /^([A-Z].*s)$/; return "#{$1}es"
807
+ when /(.*)([cs]h|[zx])$/i ; return "#{$1}#{$2}es"
808
+ # when /(.*)(us)$/i ; return "#{$1}#{$2}es"
809
+
810
+ # Handle ...f -> ...ves
811
+ when /(.*[eao])lf$/i ; return "#{$1}lves";
812
+ when /(.*[^d])eaf$/i ; return "#{$1}eaves"
813
+ when /(.*[nlw])ife$/i ; return "#{$1}ives"
814
+ when /(.*)arf$/i ; return "#{$1}arves"
815
+
816
+ # Handle ...y
817
+ when /(.*[aeiou])y$/i ; return "#{$1}ys"
818
+ when /([A-Z].*y)$/ ; return "#{$1}s"
819
+ when /(.*)y$/i ; return "#{$1}ies"
820
+
821
+ # Handle ...o
822
+ when /#{PL_sb_U_o_os}$/i ; return "#{word}s"
823
+ when /[aeiou]o$/i ; return "#{word}s"
824
+ when /o$/i ; return "#{word}es"
825
+
826
+ # Otherwise just add ...s
827
+ else
828
+ return "#{word}s"
829
+ end
830
+ end # def pluralize_noun
831
+
832
+
833
+
834
+ ### Pluralize special verbs
835
+ def pluralize_special_verb( word, count )
836
+ count ||= Linguistics::num
837
+ count = normalize_count( count )
838
+
839
+ return nil if /^(#{PL_count_one})$/i =~ count.to_s
840
+
841
+ # Handle user-defined verbs
842
+ #if value = ud_match( word, PL_v_user_defined )
843
+ # return value
844
+ #end
845
+
846
+ case word
847
+
848
+ # Handle irregular present tense (simple and compound)
849
+ when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
850
+ return PL_v_irregular_pres_h[ $1.downcase ] + $2
851
+
852
+ # Handle irregular future, preterite and perfect tenses
853
+ when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
854
+ return word
855
+
856
+ # Handle special cases
857
+ when /^(#{PL_v_special_s})$/, /\s/
858
+ return nil
859
+
860
+ # Handle standard 3rd person (chop the ...(e)s off single words)
861
+ when /^(.*)([cs]h|[x]|zz|ss)es$/i
862
+ return $1 + $2
863
+ when /^(..+)ies$/i
864
+ return "#{$1}y"
865
+ when /^(.+)oes$/i
866
+ return "#{$1}o"
867
+ when /^(.*[^s])s$/i
868
+ return $1
869
+
870
+ # Otherwise, a regular verb (handle elsewhere)
871
+ else
872
+ return nil
873
+ end
874
+ end
875
+
876
+
877
+ ### Pluralize regular verbs
878
+ def pluralize_general_verb( word, count )
879
+ count ||= Linguistics::num
880
+ count = normalize_count( count )
881
+
882
+ return word if /^(#{PL_count_one})$/i =~ count.to_s
883
+
884
+ case word
885
+
886
+ # Handle ambiguous present tenses (simple and compound)
887
+ when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
888
+ return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
889
+
890
+ # Handle ambiguous preterite and perfect tenses
891
+ when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
892
+ return word
893
+
894
+ # Otherwise, 1st or 2nd person is uninflected
895
+ else
896
+ return word
897
+ end
898
+ end
899
+
900
+
901
+ ### Handle special adjectives
902
+ def pluralize_special_adjective( word, count )
903
+ count ||= Linguistics::num
904
+ count = normalize_count( count )
905
+
906
+ return word if /^(#{PL_count_one})$/i =~ count.to_s
907
+
908
+ # Handle user-defined verbs
909
+ #if value = ud_match( word, PL_adj_user_defined )
910
+ # return value
911
+ #end
912
+
913
+ case word
914
+
915
+ # Handle known cases
916
+ when /^(#{PL_adj_special})$/i
917
+ return PL_adj_special_h[ $1.downcase ]
918
+
919
+ # Handle possessives
920
+ when /^(#{PL_adj_poss})$/i
921
+ return PL_adj_poss_h[ $1.downcase ]
922
+
923
+ when /^(.*)'s?$/
924
+ pl = plural_noun( $1 )
925
+ if /s$/ =~ pl
926
+ return "#{pl}'"
927
+ else
928
+ return "#{pl}'s"
929
+ end
930
+
931
+ # Otherwise, no idea
932
+ else
933
+ return nil
934
+ end
935
+ end
936
+
937
+
938
+ ### Returns the given word with a prepended indefinite article, unless
939
+ ### +count+ is non-nil and not singular.
940
+ def indef_article( word, count )
941
+ count ||= Linguistics::num
942
+ return "#{count} #{word}" if
943
+ count && /^(#{PL_count_one})$/i !~ count.to_s
944
+
945
+ # Handle user-defined variants
946
+ # return value if value = ud_match( word, A_a_user_defined )
947
+
948
+ case word
949
+
950
+ # Handle special cases
951
+ when /^(#{A_explicit_an})/i
952
+ return "an #{word}"
953
+
954
+ # Handle abbreviations
955
+ when /^(#{A_abbrev})/x
956
+ return "an #{word}"
957
+ when /^[aefhilmnorsx][.-]/i
958
+ return "an #{word}"
959
+ when /^[a-z][.-]/i
960
+ return "a #{word}"
961
+
962
+ # Handle consonants
963
+ when /^[^aeiouy]/i
964
+ return "a #{word}"
965
+
966
+ # Handle special vowel-forms
967
+ when /^e[uw]/i
968
+ return "a #{word}"
969
+ when /^onc?e\b/i
970
+ return "a #{word}"
971
+ when /^uni([^nmd]|mo)/i
972
+ return "a #{word}"
973
+ when /^u[bcfhjkqrst][aeiou]/i
974
+ return "a #{word}"
975
+
976
+ # Handle vowels
977
+ when /^[aeiou]/i
978
+ return "an #{word}"
979
+
980
+ # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
981
+ when /^(#{A_y_cons})/i
982
+ return "an #{word}"
983
+
984
+ # Otherwise, guess "a"
985
+ else
986
+ return "a #{word}"
987
+ end
988
+ end
989
+
990
+
991
+ ### Transform the specified number of units-place numerals into a
992
+ ### word-phrase at the given number of +thousands+ places.
993
+ def to_units( units, thousands=0 )
994
+ return Units[ units ] + to_thousands( thousands )
995
+ end
996
+
997
+
998
+ ### Transform the specified number of tens- and units-place numerals into a
999
+ ### word-phrase at the given number of +thousands+ places.
1000
+ def to_tens( tens, units, thousands=0 )
1001
+ unless tens == 1
1002
+ return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
1003
+ to_units( units, thousands )
1004
+ else
1005
+ return Teens[ units ] + to_thousands( thousands )
1006
+ end
1007
+ end
1008
+
1009
+
1010
+ ### Transform the specified number of hundreds-, tens-, and units-place
1011
+ ### numerals into a word phrase. If the number of thousands (+thousands+) is
1012
+ ### greater than 0, it will be used to determine where the decimal point is
1013
+ ### in relation to the hundreds-place number.
1014
+ def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
1015
+ joinword = ' ' if joinword.empty?
1016
+ if hundreds.nonzero?
1017
+ return to_units( hundreds ) + " hundred" +
1018
+ (tens.nonzero? || units.nonzero? ? joinword : '') +
1019
+ to_tens( tens, units ) +
1020
+ to_thousands( thousands )
1021
+ elsif tens.nonzero? || units.nonzero?
1022
+ return to_tens( tens, units ) + to_thousands( thousands )
1023
+ else
1024
+ return nil
1025
+ end
1026
+ end
1027
+
1028
+ ### Transform the specified number into one or more words like 'thousand',
1029
+ ### 'million', etc. Uses the thousands (American) system.
1030
+ def to_thousands( thousands=0 )
1031
+ parts = []
1032
+ (0..thousands).step( Thousands.length - 1 ) {|i|
1033
+ if i.zero?
1034
+ parts.push Thousands[ thousands % (Thousands.length - 1) ]
1035
+ else
1036
+ parts.push Thousands.last
1037
+ end
1038
+ }
1039
+
1040
+ return parts.join(" ")
1041
+ end
1042
+
1043
+
1044
+ ### Return the specified number +num+ as an array of number phrases.
1045
+ def number_to_words( num, config )
1046
+ return [config[:zero]] if num.to_i.zero?
1047
+ chunks = []
1048
+
1049
+ # Break into word-groups if groups is set
1050
+ if config[:group].nonzero?
1051
+
1052
+ # Build a Regexp with <config[:group]> number of digits. Any past
1053
+ # the first are optional.
1054
+ re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )
1055
+
1056
+ # Scan the string, and call the word-chunk function that deals with
1057
+ # chunks of the found number of digits.
1058
+ num.to_s.scan( re ) {|digits|
1059
+ debug_msg " digits = #{digits.inspect}"
1060
+ fn = NumberToWordsFunctions[ digits.nitems ]
1061
+ numerals = digits.flatten.compact.collect {|i| i.to_i}
1062
+ debug_msg " numerals = #{numerals.inspect}"
1063
+ chunks.push fn.call( config[:zero], *numerals ).strip
1064
+ }
1065
+ else
1066
+ phrase = num.to_s
1067
+ phrase.sub!( /\A\s*0+/, '' )
1068
+ mill = 0
1069
+
1070
+ # Match backward from the end of the digits in the string, turning
1071
+ # chunks of three, of two, and of one into words.
1072
+ mill += 1 while
1073
+ phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
1074
+ words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill,
1075
+ config[:and] )
1076
+ chunks.unshift words.strip.squeeze(' ') unless words.nil?
1077
+ ''
1078
+ }
1079
+
1080
+ phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
1081
+ chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
1082
+ ''
1083
+ }
1084
+ phrase.sub!( /(\d)(?=\D*\Z)/ ) {
1085
+ chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
1086
+ ''
1087
+ }
1088
+ end
1089
+
1090
+ return chunks
1091
+ end
1092
+
1093
+
1094
+ #################################################################
1095
+ ### P U B L I C F U N C T I O N S
1096
+ #################################################################
1097
+
1098
+ ### Return the name of the language this module is for.
1099
+ def language( unused=nil )
1100
+ "English"
1101
+ end
1102
+
1103
+
1104
+ ### Return the plural of the given +phrase+ if +count+ indicates it should
1105
+ ### be plural.
1106
+ def plural( phrase, count=nil )
1107
+ phrase = numwords( phrase ) if phrase.is_a?( Numeric )
1108
+
1109
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1110
+ pre, word, post = md.to_a[1,3]
1111
+ return phrase if word.nil? or word.empty?
1112
+
1113
+ plural = postprocess( word,
1114
+ pluralize_special_adjective(word, count) ||
1115
+ pluralize_special_verb(word, count) ||
1116
+ pluralize_noun(word, count) )
1117
+
1118
+ return pre + plural + post
1119
+ end
1120
+ def_lprintf_formatter :PL, :plural
1121
+
1122
+
1123
+ ### Return the plural of the given noun +phrase+ if +count+ indicates it
1124
+ ### should be plural.
1125
+ def plural_noun( phrase, count=nil )
1126
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1127
+ pre, word, post = md.to_a[1,3]
1128
+ return phrase if word.nil? or word.empty?
1129
+
1130
+ plural = postprocess( word, pluralize_noun(word, count) )
1131
+ return pre + plural + post
1132
+ end
1133
+ def_lprintf_formatter :PL_N, :plural_noun
1134
+
1135
+
1136
+ ### Return the plural of the given verb +phrase+ if +count+ indicates it
1137
+ ### should be plural.
1138
+ def plural_verb( phrase, count=nil )
1139
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1140
+ pre, word, post = md.to_a[1,3]
1141
+ return phrase if word.nil? or word.empty?
1142
+
1143
+ plural = postprocess( word,
1144
+ pluralize_special_verb(word, count) ||
1145
+ pluralize_general_verb(word, count) )
1146
+ return pre + plural + post
1147
+ end
1148
+ def_lprintf_formatter :PL_V, :plural_verb
1149
+
1150
+
1151
+ ### Return the plural of the given adjectival +phrase+ if +count+ indicates
1152
+ ### it should be plural.
1153
+ def plural_adjective( phrase, count=nil )
1154
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1155
+ pre, word, post = md.to_a[1,3]
1156
+ return phrase if word.nil? or word.empty?
1157
+
1158
+ plural = postprocess( word,
1159
+ pluralize_special_adjective(word, count) || word )
1160
+ return pre + plural + post
1161
+ end
1162
+ alias_method :plural_adj, :plural_adjective
1163
+ def_lprintf_formatter :PL_ADJ, :plural_adjective
1164
+
1165
+
1166
+ ### Return the given phrase with the appropriate indefinite article ("a" or
1167
+ ### "an") prepended.
1168
+ def a( phrase, count=nil )
1169
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1170
+ pre, word, post = md.to_a[1,3]
1171
+ return phrase if word.nil? or word.empty?
1172
+
1173
+ result = indef_article( word, count )
1174
+ return pre + result + post
1175
+ end
1176
+ alias_method :an, :a
1177
+ def_lprintf_formatter :A, :a
1178
+ def_lprintf_formatter :AN, :a
1179
+
1180
+
1181
+ ### Translate zero-quantified +phrase+ to "no +phrase.plural+"
1182
+ def no( phrase, count=nil )
1183
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1184
+ pre, word, post = md.to_a[1,3]
1185
+ count ||= Linguistics::num || 0
1186
+
1187
+ unless /^#{PL_count_zero}$/ =~ count.to_s
1188
+ return "#{pre}#{count} " + plural( word, count ) + post
1189
+ else
1190
+ return "#{pre}no " + plural( word, 0 ) + post
1191
+ end
1192
+ end
1193
+ def_lprintf_formatter :NO, :no
1194
+
1195
+
1196
+ ### Participles
1197
+ def present_participle( word )
1198
+ plural = plural_verb( word.to_s, 2 )
1199
+
1200
+ plural.sub!( /ie$/, 'y' ) or
1201
+ plural.sub!( /ue$/, 'u' ) or
1202
+ plural.sub!( /([auy])e$/, '$1' ) or
1203
+ plural.sub!( /i$/, '' ) or
1204
+ plural.sub!( /([^e])e$/, "\\1" ) or
1205
+ /er$/.match( plural ) or
1206
+ plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
1207
+
1208
+ return "#{plural}ing"
1209
+ end
1210
+ alias_method :part_pres, :present_participle
1211
+ def_lprintf_formatter :PART_PRES, :present_participle
1212
+
1213
+
1214
+
1215
+ ### Return the specified number as english words. One or more configuration
1216
+ ### values may be passed to control the returned String:
1217
+ ###
1218
+ ### [<b>:group</b>]
1219
+ ### Controls how many numbers at a time are grouped together. Valid values
1220
+ ### are <code>0</code> (normal grouping), <code>1</code> (single-digit
1221
+ ### grouping, e.g., "one, two, three, four"), <code>2</code>
1222
+ ### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
1223
+ ### (triple-digit grouping, e.g., "one twenty-three, four").
1224
+ ### [<b>:comma</b>]
1225
+ ### Set the character/s used to separate word groups. Defaults to
1226
+ ### <code>", "</code>.
1227
+ ### [<b>:and</b>]
1228
+ ### Set the word and/or characters used where <code>' and ' </code>(the
1229
+ ### default) is normally used. Setting <code>:and</code> to
1230
+ ### <code>' '</code>, for example, will cause <code>2556</code> to be
1231
+ ### returned as "two-thousand, five hundred fifty-six" instead of
1232
+ ### "two-thousand, five hundred and fifty-six".
1233
+ ### [<b>:zero</b>]
1234
+ ### Set the word used to represent the numeral <code>0</code> in the
1235
+ ### result. <code>'zero'</code> is the default.
1236
+ ### [<b>:decimal</b>]
1237
+ ### Set the translation of any decimal points in the number; the default
1238
+ ### is <code>'point'</code>.
1239
+ ### [<b>:asArray</b>]
1240
+ ### If set to a true value, the number will be returned as an array of
1241
+ ### word groups instead of a String.
1242
+ def numwords( number, hashargs={} )
1243
+ num = number.to_s
1244
+ config = NumwordDefaults.merge( hashargs )
1245
+ raise "Bad chunking option: #{config[:group]}" unless
1246
+ config[:group].between?( 0, 3 )
1247
+
1248
+ # Array of number parts: first is everything to the left of the first
1249
+ # decimal, followed by any groups of decimal-delimted numbers after that
1250
+ parts = []
1251
+
1252
+ # Wordify any sign prefix
1253
+ sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
1254
+
1255
+ # Strip any ordinal suffixes
1256
+ ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
1257
+
1258
+ # Split the number into chunks delimited by '.'
1259
+ chunks = if !config[:decimal].empty? then
1260
+ if config[:group].nonzero?
1261
+ num.split(/\./)
1262
+ else
1263
+ num.split(/\./, 2)
1264
+ end
1265
+ else
1266
+ [ num ]
1267
+ end
1268
+
1269
+ # Wordify each chunk, pushing arrays into the parts array
1270
+ chunks.each_with_index {|chunk,section|
1271
+ chunk.gsub!( /\D+/, '' )
1272
+
1273
+ # If there's nothing in this chunk of the number, set it to zero
1274
+ # unless it's the whole-number part, in which case just push an
1275
+ # empty array.
1276
+ if chunk.empty?
1277
+ if section.zero?
1278
+ parts.push []
1279
+ next
1280
+ end
1281
+ end
1282
+
1283
+ # Split the number section into wordified parts unless this is the
1284
+ # second or succeeding part of a non-group number
1285
+ unless config[:group].zero? && section.nonzero?
1286
+ parts.push number_to_words( chunk, config )
1287
+ else
1288
+ parts.push number_to_words( chunk, config.merge(:group => 1) )
1289
+ end
1290
+ }
1291
+
1292
+ debug_msg "Parts => #{parts.inspect}"
1293
+
1294
+ # Turn the last word of the whole-number part back into an ordinal if
1295
+ # the original number came in that way.
1296
+ if ord && !parts[0].empty?
1297
+ parts[0][-1] = ordinal( parts[0].last )
1298
+ end
1299
+
1300
+ # If the caller's expecting an Array return, just flatten and return the
1301
+ # parts array.
1302
+ if config[:asArray]
1303
+ unless sign.empty?
1304
+ parts[0].unshift( sign )
1305
+ end
1306
+ return parts.flatten
1307
+ end
1308
+
1309
+ # Catenate each sub-parts array into a whole number part and one or more
1310
+ # post-decimal parts. If grouping is turned on, all sub-parts get joined
1311
+ # with commas, otherwise just the whole-number part is.
1312
+ if config[:group].zero?
1313
+ if parts[0].length > 1
1314
+
1315
+ # Join all but the last part together with commas
1316
+ wholenum = parts[0][0...-1].join( config[:comma] )
1317
+
1318
+ # If the last part is just a single word, append it to the
1319
+ # wholenum part with an 'and'. This is to get things like 'three
1320
+ # thousand and three' instead of 'three thousand, three'.
1321
+ if /^\s*(\S+)\s*$/ =~ parts[0].last
1322
+ wholenum += config[:and] + parts[0].last
1323
+ else
1324
+ wholenum += config[:comma] + parts[0].last
1325
+ end
1326
+ else
1327
+ wholenum = parts[0][0]
1328
+ end
1329
+ decimals = parts[1..-1].collect {|part| part.join(" ")}
1330
+
1331
+ debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1332
+
1333
+ # Join with the configured decimal; if it's empty, just join with
1334
+ # spaces.
1335
+ unless config[:decimal].empty?
1336
+ return sign + ([ wholenum ] + decimals).
1337
+ join( " #{config[:decimal]} " ).strip
1338
+ else
1339
+ return sign + ([ wholenum ] + decimals).
1340
+ join( " " ).strip
1341
+ end
1342
+ else
1343
+ return parts.compact.
1344
+ separate( config[:decimal] ).
1345
+ delete_if {|el| el.empty?}.
1346
+ join( config[:comma] ).
1347
+ strip
1348
+ end
1349
+ end
1350
+ def_lprintf_formatter :NUMWORDS, :numwords
1351
+
1352
+
1353
+ ### Transform the given +number+ into an ordinal word. The +number+ object
1354
+ ### can be either an Integer or a String.
1355
+ def ordinal( number )
1356
+ case number
1357
+ when Integer
1358
+ return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])
1359
+
1360
+ else
1361
+ return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
1362
+ end
1363
+ end
1364
+ def_lprintf_formatter :ORD, :ordinal
1365
+
1366
+
1367
+ ### Transform the given +number+ into an ordinate word.
1368
+ def ordinate( number )
1369
+ numwords( number ).ordinal
1370
+ end
1371
+
1372
+
1373
+ ### Return a phrase describing the specified +number+ of objects in the
1374
+ ### given +phrase+ in general terms. The following options can be used to
1375
+ ### control the makeup of the returned quantity String:
1376
+ ###
1377
+ ### [<b>:joinword</b>]
1378
+ ### Sets the word (and any surrounding spaces) used as the word separating the
1379
+ ### quantity from the noun in the resulting string. Defaults to <tt>' of
1380
+ ### '</tt>.
1381
+ def quantify( phrase, number=0, args={} )
1382
+ num = number.to_i
1383
+ config = QuantifyDefaults.merge( args )
1384
+
1385
+ case num
1386
+ when 0
1387
+ no( phrase )
1388
+ when 1
1389
+ a( phrase )
1390
+ when SeveralRange
1391
+ "several " + plural( phrase, num )
1392
+ when NumberRange
1393
+ "a number of " + plural( phrase, num )
1394
+ when NumerousRange
1395
+ "numerous " + plural( phrase, num )
1396
+ when ManyRange
1397
+ "many " + plural( phrase, num )
1398
+ else
1399
+
1400
+ # Anything bigger than the ManyRange gets described like
1401
+ # "hundreds of thousands of..." or "millions of..."
1402
+ # depending, of course, on how many there are.
1403
+ thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
1404
+ stword =
1405
+ case subthousands
1406
+ when 2
1407
+ "hundreds"
1408
+ when 1
1409
+ "tens"
1410
+ else
1411
+ nil
1412
+ end
1413
+ thword = plural( to_thousands(thousands).strip )
1414
+ thword = nil if thword.empty?
1415
+
1416
+ [ # Hundreds (of)...
1417
+ stword,
1418
+
1419
+ # thousands (of)
1420
+ thword,
1421
+
1422
+ # stars.
1423
+ plural(phrase, number)
1424
+ ].compact.join( config[:joinword] )
1425
+ end
1426
+ end
1427
+ def_lprintf_formatter :QUANT, :quantify
1428
+
1429
+
1430
+ # :TODO: Needs refactoring
1431
+
1432
+ ### Return the specified +obj+ (which must support the <tt>#collect</tt>
1433
+ ### method) as a conjunction. Each item is converted to a String if it is
1434
+ ### not already (using #to_s) unless a block is given, in which case it is
1435
+ ### called once for each object in the array, and the stringified return
1436
+ ### value from the block is used instead. Returning +nil+ causes that
1437
+ ### particular element to be omitted from the resulting conjunction. The
1438
+ ### following options can be used to control the makeup of the returned
1439
+ ### conjunction String:
1440
+ ###
1441
+ ### [<b>:separator</b>]
1442
+ ### Specify one or more characters to separate items in the resulting
1443
+ ### list. Defaults to <tt>', '</tt>.
1444
+ ### [<b>:altsep</b>]
1445
+ ### An alternate separator to use if any of the resulting conjunction's
1446
+ ### clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
1447
+ ### [<b>:penultimate</b>]
1448
+ ### Flag that indicates whether or not to join the last clause onto the
1449
+ ### rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
1450
+ ### %w{duck, cow, dog}.en.conjunction
1451
+ ### # => "a duck, a cow, and a dog"
1452
+ ### %w{duck cow dog}.en.conjunction( :penultimate => false )
1453
+ ### "a duck, a cow and a dog"
1454
+ ### Default to <tt>true</tt>.
1455
+ ### [<b>:conjunctive</b>]
1456
+ ### Sets the word used as the conjunctive (separating word) of the
1457
+ ### resulting string. Default to <tt>'and'</tt>.
1458
+ ### [<b>:combine</b>]
1459
+ ### If set to <tt>true</tt> (the default), items which are indentical (after
1460
+ ### surrounding spaces are stripped) will be combined in the resulting
1461
+ ### conjunction. E.g.,
1462
+ ### %w{goose cow goose dog}.en.conjunction
1463
+ ### # => "two geese, a cow, and a dog"
1464
+ ### %w{goose cow goose dog}.en.conjunction( :combine => false )
1465
+ ### # => "a goose, a cow, a goose, and a dog"
1466
+ ### [<b>:casefold</b>]
1467
+ ### If set to <tt>true</tt> (the default), then items are compared
1468
+ ### case-insensitively when combining them. This has no effect if
1469
+ ### <tt>:combine</tt> is <tt>false</tt>.
1470
+ ### [<b>:generalize</b>]
1471
+ ### If set to <tt>true</tt>, then quantities of combined items are turned into
1472
+ ### general descriptions instead of exact amounts.
1473
+ ### ary = %w{goose pig dog horse goose reindeer goose dog horse}
1474
+ ### ary.en.conjunction
1475
+ ### # => "three geese, two dogs, two horses, a pig, and a reindeer"
1476
+ ### ary.en.conjunction( :generalize => true )
1477
+ ### # => "several geese, several dogs, several horses, a pig, and a reindeer"
1478
+ ### See the #quantify method for specifics on how quantities are
1479
+ ### generalized. Generalization defaults to <tt>false</tt>, and has no effect if
1480
+ ### :combine is <tt>false</tt>.
1481
+ ### [<b>:quantsort</b>]
1482
+ ### If set to <tt>true</tt> (the default), items which are combined in the
1483
+ ### resulting conjunction will be listed in order of amount, with greater
1484
+ ### quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
1485
+ ### will appear where the first instance of them occurred in the
1486
+ ### list. This sort is also the fallback for indentical quantities (ie.,
1487
+ ### items of the same quantity will be listed in the order they appeared
1488
+ ### in the source list).
1489
+ ###
1490
+ def conjunction( obj, args={} )
1491
+ config = ConjunctionDefaults.merge( args )
1492
+ phrases = []
1493
+
1494
+ # Transform items in the obj to phrases
1495
+ if block_given?
1496
+ phrases = obj.collect {|item| yield(item) }.compact
1497
+ else
1498
+ phrases = obj.collect {|item| item.to_s }
1499
+ end
1500
+
1501
+ # No need for a conjunction if there's only one thing
1502
+ return a(phrases[0]) if phrases.length < 2
1503
+
1504
+ # Set up a Proc to derive a collector key from a phrase depending on the
1505
+ # configuration
1506
+ keyfunc =
1507
+ if config[:casefold]
1508
+ proc {|key| key.downcase.strip}
1509
+ else
1510
+ proc {|key| key.strip}
1511
+ end
1512
+
1513
+ # Count and delete phrases that hash the same when the keyfunc munges
1514
+ # them into the same thing if we're combining (:combine => true).
1515
+ collector = {}
1516
+ if config[:combine]
1517
+
1518
+ phrases.each_index do |i|
1519
+ # Stop when reaching the end of a truncated list
1520
+ break if phrases[i].nil?
1521
+
1522
+ # Make the key using the configured key function
1523
+ phrase = keyfunc[ phrases[i] ]
1524
+
1525
+ # If the collector already has this key, increment its count,
1526
+ # eliminate the duplicate from the phrase list, and redo the loop.
1527
+ if collector.key?( phrase )
1528
+ collector[ phrase ] += 1
1529
+ phrases.delete_at( i )
1530
+ redo
1531
+ end
1532
+
1533
+ collector[ phrase ] = 1
1534
+ end
1535
+ else
1536
+ # If we're not combining, just make everything have a count of 1.
1537
+ phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
1538
+ end
1539
+
1540
+ # If sort-by-quantity is turned on, sort the phrases first by how many
1541
+ # there are (most-first), and then by the order they were specified in.
1542
+ if config[:quantsort] && config[:combine]
1543
+ origorder = {}
1544
+ phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
1545
+ phrases.sort! {|a,b|
1546
+ (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
1547
+ (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
1548
+ }
1549
+ end
1550
+
1551
+ # Set up a filtering function that adds either an indefinite article, an
1552
+ # indefinite quantifier, or a definite quantifier to each phrase
1553
+ # depending on the configuration and the count of phrases in the
1554
+ # collector.
1555
+ filter =
1556
+ if config[:generalize]
1557
+ proc {|phrase, count| quantify(phrase, count) }
1558
+ else
1559
+ proc {|phrase, count|
1560
+ if count > 1
1561
+ "%s %s" % [
1562
+ # :TODO: Make this threshold settable
1563
+ count < 10 ? count.en.numwords : count.to_s,
1564
+ plural(phrase, count)
1565
+ ]
1566
+ else
1567
+ a( phrase )
1568
+ end
1569
+ }
1570
+ end
1571
+
1572
+ # Now use the configured filter to turn each phrase into its final
1573
+ # form. Hmmm... square-bracket Lisp?
1574
+ phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
1575
+
1576
+ # Prepend the conjunctive to the last element unless it's empty or
1577
+ # there's only one element
1578
+ phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
1579
+ config[:conjunctive].strip.empty? or
1580
+ phrases.length < 2
1581
+
1582
+ # Concatenate the last two elements if there's no penultimate separator,
1583
+ # and pick a separator based on how many phrases there are and whether
1584
+ # or not there's already an instance of it in the phrases.
1585
+ phrase_count = phrases.length
1586
+ phrases[-2] << " " << phrases.pop unless config[:penultimate]
1587
+ sep = config[:separator]
1588
+ if phrase_count <= 2
1589
+ sep = ' '
1590
+ elsif phrases.find {|str| str.include?(config[:separator]) }
1591
+ sep = config[:altsep]
1592
+ end
1593
+
1594
+ return phrases.join( sep )
1595
+ end
1596
+ def_lprintf_formatter :CONJUNCT, :conjunction
1597
+
1598
+
1599
+ ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
1600
+ ### ("camel case to english"). Each word is decapitalized.
1601
+ def camel_case_to_english( string )
1602
+ string.to_s.
1603
+ gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
1604
+ gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1605
+ end
1606
+
1607
+
1608
+ ### Turns an English language +string+ into a CamelCase word.
1609
+ def english_to_camel_case( string )
1610
+ string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
1611
+ end
1612
+
1613
+
1614
+ ### This method doesn't work quite right yet. It does okay for simple cases,
1615
+ ### but it misses more complex ones, e.g. 'as' used as a coordinating
1616
+ ### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
1617
+ ### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
1618
+ ### up. Until then it'll just be undocumented.
1619
+
1620
+ ### Returns the given +string+ as a title-cased phrase.
1621
+ def titlecase( string ) # :nodoc:
1622
+
1623
+ # Split on word-boundaries
1624
+ words = string.split( /\b/ )
1625
+
1626
+ # Always capitalize the first and last words
1627
+ words.first.capitalize!
1628
+ words.last.capitalize!
1629
+
1630
+ # Now scan the rest of the tokens, skipping non-words and capitalization
1631
+ # exceptions.
1632
+ words.each_with_index do |word, i|
1633
+
1634
+ # Non-words
1635
+ next unless /^\w+$/.match( word )
1636
+
1637
+ # Skip exception-words
1638
+ next if TitleCaseExceptions.include?( word )
1639
+
1640
+ # Skip second parts of contractions
1641
+ next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
1642
+
1643
+ # Have to do it this way instead of capitalize! because that method
1644
+ # also downcases all other letters.
1645
+ word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
1646
+ end
1647
+
1648
+ return words.join
1649
+ end
1650
+
1651
+
1652
+ ### Returns the proper noun form of a string by capitalizing most of the
1653
+ ### words.
1654
+ ###
1655
+ ### Examples:
1656
+ ### English.proper_noun("bosnia and herzegovina") ->
1657
+ ### "Bosnia and Herzegovina"
1658
+ ### English.proper_noun("macedonia, the former yugoslav republic of") ->
1659
+ ### "Macedonia, the Former Yugoslav Republic of"
1660
+ ### English.proper_noun("virgin islands, u.s.") ->
1661
+ ### "Virgin Islands, U.S."
1662
+ def proper_noun( string )
1663
+ return string.split(/([ .]+)/).collect {|word|
1664
+ next word unless /^[a-z]/.match( word ) &&
1665
+ ! (%w{and the of}.include?( word ))
1666
+ word.capitalize
1667
+ }.join
1668
+ end
1669
+
1670
+
1671
+ ### Format the given +fmt+ string by replacing %-escaped sequences with the
1672
+ ### result of performing a specified operation on the corresponding
1673
+ ### argument, ala Kernel.sprintf.
1674
+ ### %PL::
1675
+ ### Plural.
1676
+ ### %A, %AN::
1677
+ ### Prepend indefinite article.
1678
+ ### %NO::
1679
+ ### Zero-quantified phrase.
1680
+ ### %NUMWORDS::
1681
+ ### Convert a number into the corresponding words.
1682
+ ### %CONJUNCT::
1683
+ ### Conjunction.
1684
+ def lprintf( fmt, *args )
1685
+ fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
1686
+ op = $1.to_s.upcase.to_sym
1687
+ if self.lprintf_formatters.key?( op )
1688
+ arg = args.shift
1689
+ self.lprintf_formatters[ op ].call( arg )
1690
+ else
1691
+ raise "no such formatter %p" % op
1692
+ end
1693
+ end
1694
+ end
1695
+
1696
+ end # module Linguistics::EN
1697
+
1698
+
1699
+ ### Add the #separate and #separate! methods to Array.
1700
+ class Array
1701
+
1702
+ ### Returns a new Array that has had a new member inserted between all of
1703
+ ### the current ones. The value used is the given +value+ argument unless a
1704
+ ### block is given, in which case the block is called once for each pair of
1705
+ ### the Array, and the return value is used as the separator.
1706
+ def separate( value=:__no_arg__, &block )
1707
+ ary = self.dup
1708
+ ary.separate!( value, &block )
1709
+ return ary
1710
+ end
1711
+
1712
+ ### The same as #separate, but modifies the Array in place.
1713
+ def separate!( value=:__no_arg__ )
1714
+ raise ArgumentError, "wrong number of arguments: (0 for 1)" if
1715
+ value == :__no_arg__ && !block_given?
1716
+
1717
+ (1..( (self.length * 2) - 2 )).step(2) do |i|
1718
+ if block_given?
1719
+ self.insert( i, yield(self[i-1,2]) )
1720
+ else
1721
+ self.insert( i, value )
1722
+ end
1723
+ end
1724
+ self
1725
+ end
1726
+
1727
+ end
1728
+