linguistics 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/ruby
2
+
3
+ BEGIN {
4
+ require 'pathname'
5
+
6
+ basedir = Pathname.new( __FILE__ ).dirname.parent.expand_path
7
+ libdir = basedir + "lib"
8
+ $LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
9
+ }
10
+
11
+ require 'linguistics'
12
+ require 'readline'
13
+
14
+ Linguistics.use( :en, :installProxy => true )
15
+
16
+ def generalized_word( word )
17
+ $deferr.puts " Traversing hypernyms for #{word}"
18
+ syn = word.synset or return word
19
+ nyms = syn.traverse( :hypernyms )
20
+ return word if nyms.empty?
21
+
22
+ general_subj = nyms[ nyms.length / 4 ]
23
+ $deferr.puts " %d synsets returned. Picking %d (%s)" % [
24
+ nyms.length,
25
+ nyms.length / 4,
26
+ general_subj.words.first,
27
+ ]
28
+ return general_subj.words.first
29
+ end
30
+
31
+ while input = Readline.readline( "Sentence to generalize: " )
32
+ sent = input.sentence
33
+
34
+ subj = sent.subject
35
+ obj = sent.object
36
+ verb = sent.verb
37
+
38
+ input.sub!( /\b#{subj}\b/, generalized_word(subj) ) if subj
39
+ input.sub!( /\b#{obj}\b/, generalized_word(obj) ) if obj
40
+ input.sub!( /\b#{verb}\b/, generalized_word(verb) ) if verb
41
+
42
+ puts input
43
+ end
44
+
45
+
46
+
@@ -0,0 +1,366 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'linguistics/iso639'
4
+
5
+ # A language-independent framework for adding linguistics functions to Ruby
6
+ # classes.
7
+ #
8
+ # == Synopsis
9
+ #
10
+ # require 'linguistics'
11
+ # Linguistics::use( :en )
12
+ # MyClass::extend( Linguistics )
13
+ #
14
+ # == Authors
15
+ #
16
+ # * Michael Granger <ged@FaerieMUD.org>
17
+ #
18
+ # :include: LICENSE
19
+ #
20
+ #--
21
+ #
22
+ # Please see the file LICENSE in the base directory for licensing details.
23
+ #
24
+ module Linguistics
25
+
26
+ ### Class constants
27
+
28
+ # Release version
29
+ VERSION = '1.0.8'
30
+
31
+ # Language module implementors should do something like:
32
+ # Linguistics::DefaultLanguages.push( :ja ) # or whatever
33
+ # so that direct requiring of a language module sets the default.
34
+ DefaultLanguages = []
35
+
36
+ # The list of Classes to add linguistic behaviours to.
37
+ DefaultExtClasses = [String, Numeric, Array]
38
+
39
+
40
+ #################################################################
41
+ ### I N F L E C T O R C L A S S F A C T O R Y
42
+ #################################################################
43
+
44
+ ### A class which is inherited from by proxies for classes being extended
45
+ ### with one or more linguistic interfaces. It provides on-the-fly creation
46
+ ### of linguistic methods when the <tt>:installProxy</tt> option is passed
47
+ ### to the call to Linguistics#use.
48
+ class LanguageProxyClass
49
+
50
+ ### Class instance variable + accessor. Contains the module which knows
51
+ ### the specifics of the language the languageProxy class is providing
52
+ ### methods for.
53
+ @langmod = nil
54
+ class << self
55
+ attr_accessor :langmod
56
+ end
57
+
58
+
59
+ ### Create a new LanguageProxy for the given +receiver+.
60
+ def initialize( receiver )
61
+ @receiver = receiver
62
+ end
63
+
64
+
65
+ ######
66
+ public
67
+ ######
68
+
69
+ ### Overloaded to take into account the proxy method.
70
+ def respond_to?( sym )
71
+ self.class.langmod.respond_to?( sym ) || super
72
+ end
73
+
74
+
75
+ ### Autoload linguistic methods defined in the module this object's
76
+ ### class uses for inflection.
77
+ def method_missing( sym, *args, &block )
78
+ return super unless self.class.langmod.respond_to?( sym )
79
+
80
+ self.class.module_eval %{
81
+ def #{sym}( *args, &block )
82
+ self.class.langmod.#{sym}( @receiver, *args, &block )
83
+ end
84
+ }, "{Autoloaded: " + __FILE__ + "}", __LINE__
85
+
86
+ self.method( sym ).call( *args, &block )
87
+ end
88
+
89
+
90
+ ### Returns a human-readable representation of the languageProxy for
91
+ ### debugging, logging, etc.
92
+ def inspect
93
+ "<%s languageProxy for %s object %s>" % [
94
+ self.class.langmod.language,
95
+ @receiver.class.name,
96
+ @receiver.inspect,
97
+ ]
98
+ end
99
+
100
+ end
101
+
102
+
103
+ ### Extend the specified target object with one or more language proxy
104
+ ### methods, each of which provides access to one or more linguistic methods
105
+ ### for that language.
106
+ def self::extend_object( obj )
107
+ case obj
108
+ when Class
109
+ # $stderr.puts "Extending %p" % obj if $DEBUG
110
+ self::install_language_proxy( obj )
111
+ else
112
+ sclass = (class << obj; self; end)
113
+ # $stderr.puts "Extending a object's metaclass: %p" % obj if $DEBUG
114
+ self::install_language_proxy( sclass )
115
+ end
116
+
117
+ super
118
+ end
119
+
120
+
121
+ ### Extend the including class with linguistics proxy methods.
122
+ def self::included( mod )
123
+ # $stderr.puts "Including Linguistics in %p" % mod if $DEBUG
124
+ mod.extend( self ) unless mod == Linguistics
125
+ end
126
+
127
+
128
+ ### Make an languageProxy class that encapsulates all of the inflect operations
129
+ ### using the given language module.
130
+ def self::make_language_proxy( mod )
131
+ # $stderr.puts "Making language proxy for mod %p" % [mod]
132
+ Class::new( LanguageProxyClass ) {
133
+ @langmod = mod
134
+ }
135
+ end
136
+
137
+
138
+ ### Install the language proxy
139
+ def self::install_language_proxy( klass, languages=DefaultLanguages )
140
+ languages.replace( DefaultLanguages ) if languages.empty?
141
+
142
+ # Create an languageProxy class for each language specified
143
+ languages.each do |lang|
144
+ # $stderr.puts "Extending the %p class with %p" %
145
+ # [ klass, lang ] if $DEBUG
146
+
147
+ # Load the language module (skipping to the next if it's already
148
+ # loaded), make a languageProxy class that delegates to it, and
149
+ # figure out what the languageProxy method will be called.
150
+ mod = load_language( lang.to_s.downcase )
151
+ ifaceMeth = mod.name.downcase.sub( /.*:/, '' )
152
+ languageProxyClass = make_language_proxy( mod )
153
+
154
+ # Install a hash for languageProxy classes and an accessor for the
155
+ # hash if it's not already present.
156
+ if !klass.class_variables.include?( "@@__languageProxy_class" )
157
+ klass.module_eval %{
158
+ @@__languageProxy_class = {}
159
+ def self::__languageProxy_class; @@__languageProxy_class; end
160
+ }, __FILE__, __LINE__
161
+ end
162
+
163
+ # Merge the current languageProxy into the hash
164
+ klass.__languageProxy_class.merge!( ifaceMeth => languageProxyClass )
165
+
166
+ # Set the language-code proxy method for the class unless it has one
167
+ # already
168
+ unless klass.instance_methods(true).include?( ifaceMeth )
169
+ klass.module_eval %{
170
+ def #{ifaceMeth}
171
+ @__#{ifaceMeth}_languageProxy ||=
172
+ self.class.__languageProxy_class["#{ifaceMeth}"].
173
+ new( self )
174
+ end
175
+ }, __FILE__, __LINE__
176
+ end
177
+ end
178
+ end
179
+
180
+
181
+
182
+ ### Install a regular proxy method in the given klass that will delegate
183
+ ### calls to missing method to the languageProxy for the given +language+.
184
+ def self::install_delegator_proxy( klass, langcode )
185
+ raise ArgumentError, "Missing langcode" if langcode.nil?
186
+
187
+ # Alias any currently-extant
188
+ if klass.instance_methods( false ).include?( "method_missing" )
189
+ klass.module_eval %{
190
+ alias_method :__orig_method_missing, :method_missing
191
+ }
192
+ end
193
+
194
+ # Add the #method_missing method that auto-installs delegator methods
195
+ # for methods supported by the linguistic proxy objects.
196
+ klass.module_eval %{
197
+ def method_missing( sym, *args, &block )
198
+
199
+ # If the linguistic delegator answers the message, install a
200
+ # delegator method and call it.
201
+ if self.send( :#{langcode} ).respond_to?( sym )
202
+
203
+ # $stderr.puts "Installing linguistic delegator method \#{sym} " \
204
+ # "for the '#{langcode}' proxy"
205
+ self.class.module_eval %{
206
+ def \#{sym}( *args, &block )
207
+ self.#{langcode}.\#{sym}( *args, &block )
208
+ end
209
+ }
210
+ self.method( sym ).call( *args, &block )
211
+
212
+ # Otherwise either call the overridden proxy method if there is
213
+ # one, or just let our parent deal with it.
214
+ else
215
+ if self.respond_to?( :__orig_method_missing )
216
+ return self.__orig_method_missing( sym, *args, &block )
217
+ else
218
+ super( sym, *args, &block )
219
+ end
220
+ end
221
+ end
222
+ }
223
+ end
224
+
225
+
226
+
227
+ #################################################################
228
+ ### L A N G U A G E - I N D E P E N D E N T F U N C T I O N S
229
+ #################################################################
230
+
231
+
232
+ ### Handle auto-magic usage
233
+ def self::const_missing( sym )
234
+ load_language( sym.to_s.downcase )
235
+ end
236
+
237
+
238
+ ###############
239
+ module_function
240
+ ###############
241
+
242
+ ### Add linguistics functions for the specified languages to Ruby's core
243
+ ### classes. The interface to all linguistic functions for a given language
244
+ ### is through a method which is the same the language's international 2- or
245
+ ### 3-letter code (ISO 639). You can also specify a Hash of configuration
246
+ ### options which control which classes are extended:
247
+ ###
248
+ ### [<b>:classes</b>]
249
+ ### Specify the classes which are to be extended. If this is not specified,
250
+ ### the Class objects in Linguistics::DefaultExtClasses (an Array) are
251
+ ### extended.
252
+ ### [<b>:installProxy</b>]
253
+ ### Install a proxy method in each of the classes which are to be extended
254
+ ### which will search for missing methods in the languageProxy for the
255
+ ### language code specified as the value. This allows linguistics methods
256
+ ### to be called directly on extended objects directly (e.g.,
257
+ ### 12.en.ordinal becomes 12.ordinal). Obviously, methods which would
258
+ ### collide with the object's builtin methods will need to be invoked
259
+ ### through the languageProxy. Any existing proxy methods in the extended
260
+ ### classes will be preserved.
261
+ def use( *languages )
262
+ config = {}
263
+ config = languages.pop if languages.last.is_a?( Hash )
264
+
265
+ classes = config.key?( :classes ) ? config[:classes] : DefaultExtClasses
266
+ classes = [ classes ] unless classes.is_a?( Array )
267
+
268
+ # Install the languageProxy in each class.
269
+ classes.each {|klass|
270
+
271
+ # Create an languageProxy class for each installed language
272
+ install_language_proxy( klass, languages )
273
+
274
+ # Install the delegator proxy if configured
275
+ if config[:installProxy]
276
+ case config[:installProxy]
277
+ when Symbol
278
+ langcode = config[:installProxy]
279
+ when String
280
+ langcode = config[:installProxy].intern
281
+ when TrueClass
282
+ langcode = languages[0] || DefaultLanguages[0] || :en
283
+ else
284
+ raise ArgumentError,
285
+ "Unexpected value %p for :installProxy" %
286
+ config[:installProxy]
287
+ end
288
+
289
+ install_delegator_proxy( klass, langcode )
290
+ end
291
+ }
292
+ end
293
+
294
+
295
+
296
+ ### Support Lingua::EN::Inflect-style globals in a threadsafe way by using
297
+ ### Thread-local variables.
298
+
299
+ ### Set the default count for all unspecified plurals to +val+. Setting is
300
+ ### local to calling thread.
301
+ def num=( val )
302
+ Thread.current[:persistent_count] = val
303
+ end
304
+ alias_method :NUM=, :num=
305
+
306
+ ### Get the default count for all unspecified plurals. Setting is local to
307
+ ### calling thread.
308
+ def num
309
+ Thread.current[:persistent_count]
310
+ end
311
+ alias_method :NUM, :num
312
+
313
+
314
+ ### Set the 'classical pluralizations' flag to +val+. Setting is local to
315
+ ### calling thread.
316
+ def classical=( val )
317
+ Thread.current[:classical_plurals] = val
318
+ end
319
+
320
+ ### Return the value of the 'classical pluralizations' flag. Setting is
321
+ ### local to calling thread.
322
+ def classical?
323
+ Thread.current[:classical_plurals] ? true : false
324
+ end
325
+
326
+
327
+ #######
328
+ private
329
+ #######
330
+
331
+ ### Try to load the module that implements the given language, returning
332
+ ### the Module object if successful.
333
+ def self::load_language( lang )
334
+ raise "Unknown language code '#{lang}'" unless
335
+ LanguageCodes.key?( lang )
336
+
337
+ # Sort all the codes for the specified language, trying the 2-letter
338
+ # versions first in alphabetical order, then the 3-letter ones
339
+ msgs = []
340
+ mod = LanguageCodes[ lang ][:codes].sort {|a,b|
341
+ (a.length <=> b.length).nonzero? ||
342
+ (a <=> b)
343
+ }.each do |code|
344
+ unless Linguistics::const_defined?( code.upcase )
345
+ begin
346
+ require "linguistics/#{code}"
347
+ rescue LoadError => err
348
+ msgs << "Tried 'linguistics/#{code}': #{err.message}\n"
349
+ next
350
+ end
351
+ end
352
+
353
+ break Linguistics::const_get( code.upcase ) if
354
+ Linguistics::const_defined?( code.upcase )
355
+ end
356
+
357
+ if mod.is_a?( Array )
358
+ raise LoadError,
359
+ "Failed to load language extension %s:\n%s" %
360
+ [ lang, msgs.join ]
361
+ end
362
+ return mod
363
+ end
364
+
365
+ end # class linguistics
366
+
@@ -0,0 +1,1728 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # = Linguistics::EN
4
+ #
5
+ # This module contains English-language linguistic functions for the Linguistics
6
+ # module. It can be either loaded directly, or by passing some variant of 'en'
7
+ # or 'eng' to the Linguistics::use method.
8
+ #
9
+ # The functions contained by the module provide:
10
+ #
11
+ # == Plural Inflections
12
+ #
13
+ # Plural forms of all nouns, most verbs, and some adjectives are provided. Where
14
+ # appropriate, "classical" variants (for example: "brother" -> "brethren",
15
+ # "dogma" -> "dogmata", etc.) are also provided.
16
+ #
17
+ # These can be accessed via the #plural, #plural_noun, #plural_verb, and
18
+ # #plural_adjective methods.
19
+ #
20
+ # == Indefinite Articles
21
+ #
22
+ # Pronunciation-based "a"/"an" selection is provided for all English words, and
23
+ # most initialisms.
24
+ #
25
+ # See: #a, #an, and #no.
26
+ #
27
+ # == Numbers to Words
28
+ #
29
+ # Conversion from Numeric values to words are supported using the American
30
+ # "thousands" system. E.g., 2561 => "two thousand, five hundred and sixty-one".
31
+ #
32
+ # See the #numwords method.
33
+ #
34
+ # == Ordinals
35
+ #
36
+ # It is also possible to inflect numerals (1,2,3) and number words ("one",
37
+ # "two", "three") to ordinals (1st, 2nd, 3rd) and ordinates ("first", "second",
38
+ # "third").
39
+ #
40
+ # == Conjunctions
41
+ #
42
+ # This module also supports the creation of English conjunctions from Arrays of
43
+ # Strings or objects which respond to the #to_s message. Eg.,
44
+ #
45
+ # %w{cow pig chicken cow dog cow duck duck moose}.en.conjunction
46
+ # ==> "three cows, two ducks, a pig, a chicken, a dog, and a moose"
47
+ #
48
+ # == Infinitives
49
+ #
50
+ # Returns the infinitive form of English verbs:
51
+ #
52
+ # "dodging".en.infinitive
53
+ # ==> "dodge"
54
+ #
55
+ #
56
+ # == Authors
57
+ #
58
+ # * Michael Granger <ged@FaerieMUD.org>
59
+ #
60
+ # == Acknowledgements
61
+ #
62
+ # The inflection functions of this module were adapted from Damien Conway's
63
+ # Lingua::EN::Inflect Perl module:
64
+ #
65
+ # Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
66
+ # This module is free software. It may be used, redistributed
67
+ # and/or modified under the same terms as Perl itself.
68
+ #
69
+ # The conjunctions code was adapted from the Lingua::Conjunction Perl module
70
+ # written by Robert Rothenberg and Damian Conway, which has no copyright
71
+ # statement included.
72
+ #
73
+ # :include: LICENSE
74
+ #
75
+ #--
76
+ #
77
+ # Please see the file LICENSE in the base directory for licensing details.
78
+ #
79
+ module Linguistics::EN
80
+
81
+ # Load in the secondary modules and add them to Linguistics::EN.
82
+ require 'linguistics/en/infinitive'
83
+ require 'linguistics/en/wordnet'
84
+ require 'linguistics/en/linkparser'
85
+
86
+ # Add 'english' to the list of default languages
87
+ Linguistics::DefaultLanguages.push( :en )
88
+
89
+
90
+ #################################################################
91
+ ### U T I L I T Y F U N C T I O N S
92
+ #################################################################
93
+
94
+ ### Wrap one or more parts in a non-capturing alteration Regexp
95
+ def self::matchgroup( *parts )
96
+ re = parts.flatten.join("|")
97
+ "(?:#{re})"
98
+ end
99
+
100
+
101
+ @lprintf_formatters = {}
102
+ class << self
103
+ attr_accessor :lprintf_formatters
104
+ end
105
+
106
+ ### Add the specified method (which can be either a Method object or a
107
+ ### Symbol for looking up a method)
108
+ def self::def_lprintf_formatter( name, meth )
109
+ meth = self.method( meth ) unless meth.is_a?( Method )
110
+ self.lprintf_formatters[ name ] = meth
111
+ end
112
+
113
+
114
+
115
+ #################################################################
116
+ ### C O N S T A N T S
117
+ #################################################################
118
+
119
+ # :stopdoc:
120
+
121
+ #
122
+ # Plurals
123
+ #
124
+
125
+ PL_sb_irregular_s = {
126
+ "ephemeris" => "ephemerides",
127
+ "iris" => "irises|irides",
128
+ "clitoris" => "clitorises|clitorides",
129
+ "corpus" => "corpuses|corpora",
130
+ "opus" => "opuses|opera",
131
+ "genus" => "genera",
132
+ "mythos" => "mythoi",
133
+ "penis" => "penises|penes",
134
+ "testis" => "testes",
135
+ }
136
+
137
+ PL_sb_irregular_h = {
138
+ "child" => "children",
139
+ "brother" => "brothers|brethren",
140
+ "loaf" => "loaves",
141
+ "hoof" => "hoofs|hooves",
142
+ "beef" => "beefs|beeves",
143
+ "money" => "monies",
144
+ "mongoose" => "mongooses",
145
+ "ox" => "oxen",
146
+ "cow" => "cows|kine",
147
+ "soliloquy" => "soliloquies",
148
+ "graffito" => "graffiti",
149
+ "prima donna" => "prima donnas|prime donne",
150
+ "octopus" => "octopuses|octopodes",
151
+ "genie" => "genies|genii",
152
+ "ganglion" => "ganglions|ganglia",
153
+ "trilby" => "trilbys",
154
+ "turf" => "turfs|turves",
155
+ }.update( PL_sb_irregular_s )
156
+ PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
157
+
158
+
159
+ # Classical "..a" -> "..ata"
160
+ PL_sb_C_a_ata = matchgroup %w[
161
+ anathema bema carcinoma charisma diploma
162
+ dogma drama edema enema enigma lemma
163
+ lymphoma magma melisma miasma oedema
164
+ sarcoma schema soma stigma stoma trauma
165
+ gumma pragma
166
+ ].collect {|word| word[0...-1]}
167
+
168
+ # Unconditional "..a" -> "..ae"
169
+ PL_sb_U_a_ae = matchgroup %w[
170
+ alumna alga vertebra persona
171
+ ]
172
+
173
+ # Classical "..a" -> "..ae"
174
+ PL_sb_C_a_ae = matchgroup %w[
175
+ amoeba antenna formula hyperbola
176
+ medusa nebula parabola abscissa
177
+ hydra nova lacuna aurora .*umbra
178
+ flora fauna
179
+ ]
180
+
181
+ # Classical "..en" -> "..ina"
182
+ PL_sb_C_en_ina = matchgroup %w[
183
+ stamen foramen lumen
184
+ ].collect {|word| word[0...-2] }
185
+
186
+ # Unconditional "..um" -> "..a"
187
+ PL_sb_U_um_a = matchgroup %w[
188
+ bacterium agendum desideratum erratum
189
+ stratum datum ovum extremum
190
+ candelabrum
191
+ ].collect {|word| word[0...-2] }
192
+
193
+ # Classical "..um" -> "..a"
194
+ PL_sb_C_um_a = matchgroup %w[
195
+ maximum minimum momentum optimum
196
+ quantum cranium curriculum dictum
197
+ phylum aquarium compendium emporium
198
+ enconium gymnasium honorarium interregnum
199
+ lustrum memorandum millenium rostrum
200
+ spectrum speculum stadium trapezium
201
+ ultimatum medium vacuum velum
202
+ consortium
203
+ ].collect {|word| word[0...-2]}
204
+
205
+ # Unconditional "..us" -> "i"
206
+ PL_sb_U_us_i = matchgroup %w[
207
+ alumnus alveolus bacillus bronchus
208
+ locus nucleus stimulus meniscus
209
+ ].collect {|word| word[0...-2]}
210
+
211
+ # Classical "..us" -> "..i"
212
+ PL_sb_C_us_i = matchgroup %w[
213
+ focus radius genius
214
+ incubus succubus nimbus
215
+ fungus nucleolus stylus
216
+ torus umbilicus uterus
217
+ hippopotamus
218
+ ].collect {|word| word[0...-2]}
219
+
220
+ # Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
221
+ PL_sb_C_us_us = matchgroup %w[
222
+ status apparatus prospectus sinus
223
+ hiatus impetus plexus
224
+ ]
225
+
226
+ # Unconditional "..on" -> "a"
227
+ PL_sb_U_on_a = matchgroup %w[
228
+ criterion perihelion aphelion
229
+ phenomenon prolegomenon noumenon
230
+ organon asyndeton hyperbaton
231
+ ].collect {|word| word[0...-2]}
232
+
233
+ # Classical "..on" -> "..a"
234
+ PL_sb_C_on_a = matchgroup %w[
235
+ oxymoron
236
+ ].collect {|word| word[0...-2]}
237
+
238
+ # Classical "..o" -> "..i" (but normally -> "..os")
239
+ PL_sb_C_o_i_a = %w[
240
+ solo soprano basso alto
241
+ contralto tempo piano
242
+ ]
243
+ PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
244
+
245
+ # Always "..o" -> "..os"
246
+ PL_sb_U_o_os = matchgroup( %w[
247
+ albino archipelago armadillo
248
+ commando crescendo fiasco
249
+ ditto dynamo embryo
250
+ ghetto guano inferno
251
+ jumbo lumbago magneto
252
+ manifesto medico octavo
253
+ photo pro quarto
254
+ canto lingo generalissimo
255
+ stylo rhino
256
+ ] | PL_sb_C_o_i_a )
257
+
258
+
259
+ # Unconditional "..[ei]x" -> "..ices"
260
+ PL_sb_U_ex_ices = matchgroup %w[
261
+ codex murex silex
262
+ ].collect {|word| word[0...-2]}
263
+ PL_sb_U_ix_ices = matchgroup %w[
264
+ radix helix
265
+ ].collect {|word| word[0...-2]}
266
+
267
+ # Classical "..[ei]x" -> "..ices"
268
+ PL_sb_C_ex_ices = matchgroup %w[
269
+ vortex vertex cortex latex
270
+ pontifex apex index simplex
271
+ ].collect {|word| word[0...-2]}
272
+ PL_sb_C_ix_ices = matchgroup %w[
273
+ appendix
274
+ ].collect {|word| word[0...-2]}
275
+
276
+
277
+ # Arabic: ".." -> "..i"
278
+ PL_sb_C_i = matchgroup %w[
279
+ afrit afreet efreet
280
+ ]
281
+
282
+
283
+ # Hebrew: ".." -> "..im"
284
+ PL_sb_C_im = matchgroup %w[
285
+ goy seraph cherub
286
+ ]
287
+
288
+ # Unconditional "..man" -> "..mans"
289
+ PL_sb_U_man_mans = matchgroup %w[
290
+ human
291
+ Alabaman Bahaman Burman German
292
+ Hiroshiman Liman Nakayaman Oklahoman
293
+ Panaman Selman Sonaman Tacoman Yakiman
294
+ Yokohaman Yuman
295
+ ]
296
+
297
+
298
+ PL_sb_uninflected_s = [
299
+ # Pairs or groups subsumed to a singular...
300
+ "breeches", "britches", "clippers", "gallows", "hijinks",
301
+ "headquarters", "pliers", "scissors", "testes", "herpes",
302
+ "pincers", "shears", "proceedings", "trousers",
303
+
304
+ # Unassimilated Latin 4th declension
305
+ "cantus", "coitus", "nexus",
306
+
307
+ # Recent imports...
308
+ "contretemps", "corps", "debris",
309
+ ".*ois",
310
+
311
+ # Diseases
312
+ ".*measles", "mumps",
313
+
314
+ # Miscellaneous others...
315
+ "diabetes", "jackanapes", "series", "species", "rabies",
316
+ "chassis", "innings", "news", "mews",
317
+ ]
318
+
319
+
320
+ # Don't inflect in classical mode, otherwise normal inflection
321
+ PL_sb_uninflected_herd = matchgroup %w[
322
+ wildebeest swine eland bison buffalo
323
+ elk moose rhinoceros
324
+ ]
325
+
326
+ PL_sb_uninflected = matchgroup [
327
+
328
+ # Some fish and herd animals
329
+ ".*fish", "tuna", "salmon", "mackerel", "trout",
330
+ "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
331
+
332
+ ".*deer", ".*sheep",
333
+
334
+ # All nationals ending in -ese
335
+ "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
336
+ "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
337
+ "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
338
+ "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
339
+ "Shavese", "Vermontese", "Wenchowese", "Yengeese",
340
+ ".*[nrlm]ese",
341
+
342
+ # Some words ending in ...s (often pairs taken as a whole)
343
+ PL_sb_uninflected_s,
344
+
345
+ # Diseases
346
+ ".*pox",
347
+
348
+ # Other oddities
349
+ "graffiti", "djinn"
350
+ ]
351
+
352
+
353
+ # Singular words ending in ...s (all inflect with ...es)
354
+ PL_sb_singular_s = matchgroup %w[
355
+ .*ss
356
+ acropolis aegis alias arthritis asbestos atlas
357
+ bathos bias bronchitis bursitis caddis cannabis
358
+ canvas chaos cosmos dais digitalis encephalitis
359
+ epidermis ethos eyas gas glottis hepatitis
360
+ hubris ibis lens mantis marquis metropolis
361
+ neuritis pathos pelvis polis rhinoceros
362
+ sassafras tonsillitis trellis .*us
363
+ ]
364
+
365
+ PL_v_special_s = matchgroup [
366
+ PL_sb_singular_s,
367
+ PL_sb_uninflected_s,
368
+ PL_sb_irregular_s.keys,
369
+ '(.*[csx])is',
370
+ '(.*)ceps',
371
+ '[A-Z].*s',
372
+ ]
373
+
374
+ PL_sb_postfix_adj = '(' + {
375
+
376
+ 'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
377
+ 'martial' => ["court"],
378
+
379
+ }.collect {|key,val|
380
+ matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
381
+ }.join("|") + ")(.*)"
382
+
383
+
384
+ PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
385
+ PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
386
+
387
+ PL_prep = matchgroup %w[
388
+ about above across after among around at athwart before behind
389
+ below beneath beside besides between betwixt beyond but by
390
+ during except for from in into near of off on onto out over
391
+ since till to under until unto upon with
392
+ ]
393
+
394
+ PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
395
+ PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
396
+
397
+
398
+ PL_pron_nom_h = {
399
+ # Nominative Reflexive
400
+ "i" => "we", "myself" => "ourselves",
401
+ "you" => "you", "yourself" => "yourselves",
402
+ "she" => "they", "herself" => "themselves",
403
+ "he" => "they", "himself" => "themselves",
404
+ "it" => "they", "itself" => "themselves",
405
+ "they" => "they", "themself" => "themselves",
406
+
407
+ # Possessive
408
+ "mine" => "ours",
409
+ "yours" => "yours",
410
+ "hers" => "theirs",
411
+ "his" => "theirs",
412
+ "its" => "theirs",
413
+ "theirs" => "theirs",
414
+ }
415
+ PL_pron_nom = matchgroup PL_pron_nom_h.keys
416
+
417
+ PL_pron_acc_h = {
418
+ # Accusative Reflexive
419
+ "me" => "us", "myself" => "ourselves",
420
+ "you" => "you", "yourself" => "yourselves",
421
+ "her" => "them", "herself" => "themselves",
422
+ "him" => "them", "himself" => "themselves",
423
+ "it" => "them", "itself" => "themselves",
424
+ "them" => "them", "themself" => "themselves",
425
+ }
426
+ PL_pron_acc = matchgroup PL_pron_acc_h.keys
427
+
428
+ PL_v_irregular_pres_h = {
429
+ # 1St pers. sing. 2nd pers. sing. 3rd pers. singular
430
+ # 3rd pers. (indet.)
431
+ "am" => "are", "are" => "are", "is" => "are",
432
+ "was" => "were", "were" => "were", "was" => "were",
433
+ "have" => "have", "have" => "have", "has" => "have",
434
+ }
435
+ PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
436
+
437
+ PL_v_ambiguous_pres_h = {
438
+ # 1st pers. sing. 2nd pers. sing. 3rd pers. singular
439
+ # 3rd pers. (indet.)
440
+ "act" => "act", "act" => "act", "acts" => "act",
441
+ "blame" => "blame", "blame" => "blame", "blames" => "blame",
442
+ "can" => "can", "can" => "can", "can" => "can",
443
+ "must" => "must", "must" => "must", "must" => "must",
444
+ "fly" => "fly", "fly" => "fly", "flies" => "fly",
445
+ "copy" => "copy", "copy" => "copy", "copies" => "copy",
446
+ "drink" => "drink", "drink" => "drink", "drinks" => "drink",
447
+ "fight" => "fight", "fight" => "fight", "fights" => "fight",
448
+ "fire" => "fire", "fire" => "fire", "fires" => "fire",
449
+ "like" => "like", "like" => "like", "likes" => "like",
450
+ "look" => "look", "look" => "look", "looks" => "look",
451
+ "make" => "make", "make" => "make", "makes" => "make",
452
+ "reach" => "reach", "reach" => "reach", "reaches" => "reach",
453
+ "run" => "run", "run" => "run", "runs" => "run",
454
+ "sink" => "sink", "sink" => "sink", "sinks" => "sink",
455
+ "sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
456
+ "view" => "view", "view" => "view", "views" => "view",
457
+ }
458
+ PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
459
+
460
+ PL_v_irregular_non_pres = matchgroup %w[
461
+ did had ate made put
462
+ spent fought sank gave sought
463
+ shall could ought should
464
+ ]
465
+
466
+ PL_v_ambiguous_non_pres = matchgroup %w[
467
+ thought saw bent will might cut
468
+ ]
469
+
470
+ PL_count_zero = matchgroup %w[
471
+ 0 no zero nil
472
+ ]
473
+
474
+ PL_count_one = matchgroup %w[
475
+ 1 a an one each every this that
476
+ ]
477
+
478
+ PL_adj_special_h = {
479
+ "a" => "some", "an" => "some",
480
+ "this" => "these", "that" => "those",
481
+ }
482
+ PL_adj_special = matchgroup PL_adj_special_h.keys
483
+
484
+ PL_adj_poss_h = {
485
+ "my" => "our",
486
+ "your" => "your",
487
+ "its" => "their",
488
+ "her" => "their",
489
+ "his" => "their",
490
+ "their" => "their",
491
+ }
492
+ PL_adj_poss = matchgroup PL_adj_poss_h.keys
493
+
494
+
495
+ #
496
+ # Numerals, ordinals, and numbers-to-words
497
+ #
498
+
499
+ # Numerical inflections
500
+ Nth = {
501
+ 0 => 'th',
502
+ 1 => 'st',
503
+ 2 => 'nd',
504
+ 3 => 'rd',
505
+ 4 => 'th',
506
+ 5 => 'th',
507
+ 6 => 'th',
508
+ 7 => 'th',
509
+ 8 => 'th',
510
+ 9 => 'th',
511
+ 11 => 'th',
512
+ 12 => 'th',
513
+ 13 => 'th',
514
+ }
515
+
516
+ # Ordinal word parts
517
+ Ordinals = {
518
+ 'ty' => 'tieth',
519
+ 'one' => 'first',
520
+ 'two' => 'second',
521
+ 'three' => 'third',
522
+ 'five' => 'fifth',
523
+ 'eight' => 'eighth',
524
+ 'nine' => 'ninth',
525
+ 'twelve' => 'twelfth',
526
+ }
527
+ OrdinalSuffixes = Ordinals.keys.join("|") + "|"
528
+ Ordinals[""] = 'th'
529
+
530
+ # Numeral names
531
+ Units = [''] + %w[one two three four five six seven eight nine]
532
+ Teens = %w[ten eleven twelve thirteen fourteen
533
+ fifteen sixteen seventeen eighteen nineteen]
534
+ Tens = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
535
+ Thousands = [' ', ' thousand'] + %w[
536
+ m b tr quadr quint sext sept oct non dec undec duodec tredec
537
+ quattuordec quindec sexdec septemdec octodec novemdec vigint
538
+ ].collect {|prefix| ' ' + prefix + 'illion'}
539
+
540
+ # A collection of functions for transforming digits into word
541
+ # phrases. Indexed by the number of digits being transformed; e.g.,
542
+ # <tt>NumberToWordsFunctions[2]</tt> is the function for transforming
543
+ # double-digit numbers.
544
+ NumberToWordsFunctions = [
545
+ proc {|*args| raise "No digits (#{args.inspect})"},
546
+
547
+ # Single-digits
548
+ proc {|zero,x|
549
+ (x.nonzero? ? to_units(x) : "#{zero} ")
550
+ },
551
+
552
+ # Double-digits
553
+ proc {|zero,x,y|
554
+ if x.nonzero?
555
+ to_tens( x, y )
556
+ elsif y.nonzero?
557
+ "#{zero} " + NumberToWordsFunctions[1].call( zero, y )
558
+ else
559
+ ([zero] * 2).join(" ")
560
+ end
561
+ },
562
+
563
+ # Triple-digits
564
+ proc {|zero,x,y,z|
565
+ NumberToWordsFunctions[1].call(zero,x) +
566
+ NumberToWordsFunctions[2].call(zero,y,z)
567
+ }
568
+ ]
569
+
570
+
571
+ #
572
+ # Indefinite Articles
573
+ #
574
+
575
+ # This pattern matches strings of capitals starting with a "vowel-sound"
576
+ # consonant followed by another consonant, and which are not likely
577
+ # to be real words (oh, all right then, it's just magic!)
578
+ A_abbrev = %{
579
+ (?! FJO | [HLMNS]Y. | RY[EO] | SQU
580
+ | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
581
+ [FHLMNRSX][A-Z]
582
+ }
583
+
584
+ # This pattern codes the beginnings of all english words begining with a
585
+ # 'y' followed by a consonant. Any other y-consonant prefix therefore
586
+ # implies an abbreviation.
587
+ A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
588
+
589
+ # Exceptions to exceptions
590
+ A_explicit_an = matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
591
+
592
+
593
+ #
594
+ # Configuration defaults
595
+ #
596
+
597
+ # Default configuration arguments for the #numwords function
598
+ NumwordDefaults = {
599
+ :group => 0,
600
+ :comma => ', ',
601
+ :and => ' and ',
602
+ :zero => 'zero',
603
+ :decimal => 'point',
604
+ :asArray => false,
605
+ }
606
+
607
+ # Default ranges for #quantify
608
+ SeveralRange = 2..5
609
+ NumberRange = 6..19
610
+ NumerousRange = 20..45
611
+ ManyRange = 46..99
612
+
613
+ # Default configuration arguments for the #quantify function
614
+ QuantifyDefaults = {
615
+ :joinword => " of ",
616
+ }
617
+
618
+ # Default configuration arguments for the #conjunction (junction, what's
619
+ # your) function.
620
+ ConjunctionDefaults = {
621
+ :separator => ', ',
622
+ :altsep => '; ',
623
+ :penultimate => true,
624
+ :conjunctive => 'and',
625
+ :combine => true,
626
+ :casefold => true,
627
+ :generalize => false,
628
+ :quantsort => true,
629
+ }
630
+
631
+
632
+ #
633
+ # Title case
634
+ #
635
+
636
+ # "In titles, capitalize the first word, the last word, and all words in
637
+ # between except articles (a, an, and the), prepositions under five letters
638
+ # (in, of, to), and coordinating conjunctions (and, but). These rules apply
639
+ # to titles of long, short, and partial works as well as your own papers"
640
+ # (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
641
+
642
+ # Build the list of exceptions to title-capitalization
643
+ Articles = %w[a and the]
644
+ ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
645
+ "into", "like", "near", "of", "off", "on", "onto", "out", "over",
646
+ "past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
647
+ CoordConjunctions = %w[and but as]
648
+ TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
649
+
650
+
651
+ # :startdoc:
652
+
653
+ #################################################################
654
+ ### " B A C K E N D " F U N C T I O N S
655
+ #################################################################
656
+
657
+
658
+ ###############
659
+ module_function
660
+ ###############
661
+
662
+ ### Debugging output
663
+ def debug_msg( *msgs ) # :nodoc:
664
+ $stderr.puts msgs.join(" ") if $DEBUG
665
+ end
666
+
667
+
668
+ ### Normalize a count to either 1 or 2 (singular or plural)
669
+ def normalize_count( count, default=2 )
670
+ return default if count.nil? # Default to plural
671
+ if /^(#{PL_count_one})$/i =~ count.to_s ||
672
+ Linguistics::classical? &&
673
+ /^(#{PL_count_zero})$/ =~ count.to_s
674
+ return 1
675
+ else
676
+ return default
677
+ end
678
+ end
679
+
680
+
681
+ ### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
682
+ ### examining the <tt>original</tt> input.
683
+ def postprocess( original, inflected )
684
+ inflected.sub!( /([^|]+)\|(.+)/ ) {
685
+ Linguistics::classical? ? $2 : $1
686
+ }
687
+
688
+ case original
689
+ when "I"
690
+ return inflected
691
+ when /^[A-Z]+$/
692
+ return inflected.upcase
693
+ when /^[A-Z]/
694
+ # Can't use #capitalize, as it will downcase the rest of the string,
695
+ # too.
696
+ inflected[0,1] = inflected[0,1].upcase
697
+ return inflected
698
+ else
699
+ return inflected
700
+ end
701
+ end
702
+
703
+
704
+ ### Pluralize nouns
705
+ def pluralize_noun( word, count=nil )
706
+ value = nil
707
+ count ||= Linguistics::num
708
+ count = normalize_count( count )
709
+
710
+ return word if count == 1
711
+
712
+ # Handle user-defined nouns
713
+ #if value = ud_match( word, PL_sb_user_defined )
714
+ # return value
715
+ #end
716
+
717
+ # Handle empty word, singular count and uninflected plurals
718
+ case word
719
+ when ''
720
+ return word
721
+ when /^(#{PL_sb_uninflected})$/i
722
+ return word
723
+ else
724
+ if Linguistics::classical? &&
725
+ /^(#{PL_sb_uninflected_herd})$/i =~ word
726
+ return word
727
+ end
728
+ end
729
+
730
+ # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
731
+ case word
732
+ when /^(?:#{PL_sb_postfix_adj})$/i
733
+ value = $2
734
+ return pluralize_noun( $1, 2 ) + value
735
+
736
+ when /^(?:#{PL_sb_prep_dual_compound})$/i
737
+ value = [ $2, $3 ]
738
+ return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )
739
+
740
+ when /^(?:#{PL_sb_prep_compound})$/i
741
+ value = $2
742
+ return pluralize_noun( $1, 2 ) + value
743
+
744
+ # Handle pronouns
745
+ when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
746
+ return $1 + PL_pron_acc_h[ $2.downcase ]
747
+
748
+ when /^(#{PL_pron_nom})$/i
749
+ return PL_pron_nom_h[ word.downcase ]
750
+
751
+ when /^(#{PL_pron_acc})$/i
752
+ return PL_pron_acc_h[ $1.downcase ]
753
+
754
+ # Handle isolated irregular plurals
755
+ when /(.*)\b(#{PL_sb_irregular})$/i
756
+ return $1 + PL_sb_irregular_h[ $2.downcase ]
757
+
758
+ when /(#{PL_sb_U_man_mans})$/i
759
+ return "#{$1}s"
760
+
761
+ # Handle families of irregular plurals
762
+ when /(.*)man$/i ; return "#{$1}men"
763
+ when /(.*[ml])ouse$/i ; return "#{$1}ice"
764
+ when /(.*)goose$/i ; return "#{$1}geese"
765
+ when /(.*)tooth$/i ; return "#{$1}teeth"
766
+ when /(.*)foot$/i ; return "#{$1}feet"
767
+
768
+ # Handle unassimilated imports
769
+ when /(.*)ceps$/i ; return word
770
+ when /(.*)zoon$/i ; return "#{$1}zoa"
771
+ when /(.*[csx])is$/i ; return "#{$1}es"
772
+ when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
773
+ when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
774
+ when /(#{PL_sb_U_um_a})um$/i ; return "#{$1}a"
775
+ when /(#{PL_sb_U_us_i})us$/i ; return "#{$1}i"
776
+ when /(#{PL_sb_U_on_a})on$/i ; return "#{$1}a"
777
+ when /(#{PL_sb_U_a_ae})$/i ; return "#{$1}e"
778
+ end
779
+
780
+ # Handle incompletely assimilated imports
781
+ if Linguistics::classical?
782
+ case word
783
+ when /(.*)trix$/i ; return "#{$1}trices"
784
+ when /(.*)eau$/i ; return "#{$1}eaux"
785
+ when /(.*)ieu$/i ; return "#{$1}ieux"
786
+ when /(.{2,}[yia])nx$/i ; return "#{$1}nges"
787
+ when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
788
+ when /(#{PL_sb_C_ex_ices})ex$/i; return "#{$1}ices"
789
+ when /(#{PL_sb_C_ix_ices})ix$/i; return "#{$1}ices"
790
+ when /(#{PL_sb_C_um_a})um$/i ; return "#{$1}a"
791
+ when /(#{PL_sb_C_us_i})us$/i ; return "#{$1}i"
792
+ when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
793
+ when /(#{PL_sb_C_a_ae})$/i ; return "#{$1}e"
794
+ when /(#{PL_sb_C_a_ata})a$/i ; return "#{$1}ata"
795
+ when /(#{PL_sb_C_o_i})o$/i ; return "#{$1}i"
796
+ when /(#{PL_sb_C_on_a})on$/i ; return "#{$1}a"
797
+ when /#{PL_sb_C_im}$/i ; return "#{word}im"
798
+ when /#{PL_sb_C_i}$/i ; return "#{word}i"
799
+ end
800
+ end
801
+
802
+
803
+ # Handle singular nouns ending in ...s or other silibants
804
+ case word
805
+ when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
806
+ when /^([A-Z].*s)$/; return "#{$1}es"
807
+ when /(.*)([cs]h|[zx])$/i ; return "#{$1}#{$2}es"
808
+ # when /(.*)(us)$/i ; return "#{$1}#{$2}es"
809
+
810
+ # Handle ...f -> ...ves
811
+ when /(.*[eao])lf$/i ; return "#{$1}lves";
812
+ when /(.*[^d])eaf$/i ; return "#{$1}eaves"
813
+ when /(.*[nlw])ife$/i ; return "#{$1}ives"
814
+ when /(.*)arf$/i ; return "#{$1}arves"
815
+
816
+ # Handle ...y
817
+ when /(.*[aeiou])y$/i ; return "#{$1}ys"
818
+ when /([A-Z].*y)$/ ; return "#{$1}s"
819
+ when /(.*)y$/i ; return "#{$1}ies"
820
+
821
+ # Handle ...o
822
+ when /#{PL_sb_U_o_os}$/i ; return "#{word}s"
823
+ when /[aeiou]o$/i ; return "#{word}s"
824
+ when /o$/i ; return "#{word}es"
825
+
826
+ # Otherwise just add ...s
827
+ else
828
+ return "#{word}s"
829
+ end
830
+ end # def pluralize_noun
831
+
832
+
833
+
834
+ ### Pluralize special verbs
835
+ def pluralize_special_verb( word, count )
836
+ count ||= Linguistics::num
837
+ count = normalize_count( count )
838
+
839
+ return nil if /^(#{PL_count_one})$/i =~ count.to_s
840
+
841
+ # Handle user-defined verbs
842
+ #if value = ud_match( word, PL_v_user_defined )
843
+ # return value
844
+ #end
845
+
846
+ case word
847
+
848
+ # Handle irregular present tense (simple and compound)
849
+ when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
850
+ return PL_v_irregular_pres_h[ $1.downcase ] + $2
851
+
852
+ # Handle irregular future, preterite and perfect tenses
853
+ when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
854
+ return word
855
+
856
+ # Handle special cases
857
+ when /^(#{PL_v_special_s})$/, /\s/
858
+ return nil
859
+
860
+ # Handle standard 3rd person (chop the ...(e)s off single words)
861
+ when /^(.*)([cs]h|[x]|zz|ss)es$/i
862
+ return $1 + $2
863
+ when /^(..+)ies$/i
864
+ return "#{$1}y"
865
+ when /^(.+)oes$/i
866
+ return "#{$1}o"
867
+ when /^(.*[^s])s$/i
868
+ return $1
869
+
870
+ # Otherwise, a regular verb (handle elsewhere)
871
+ else
872
+ return nil
873
+ end
874
+ end
875
+
876
+
877
+ ### Pluralize regular verbs
878
+ def pluralize_general_verb( word, count )
879
+ count ||= Linguistics::num
880
+ count = normalize_count( count )
881
+
882
+ return word if /^(#{PL_count_one})$/i =~ count.to_s
883
+
884
+ case word
885
+
886
+ # Handle ambiguous present tenses (simple and compound)
887
+ when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
888
+ return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
889
+
890
+ # Handle ambiguous preterite and perfect tenses
891
+ when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
892
+ return word
893
+
894
+ # Otherwise, 1st or 2nd person is uninflected
895
+ else
896
+ return word
897
+ end
898
+ end
899
+
900
+
901
+ ### Handle special adjectives
902
+ def pluralize_special_adjective( word, count )
903
+ count ||= Linguistics::num
904
+ count = normalize_count( count )
905
+
906
+ return word if /^(#{PL_count_one})$/i =~ count.to_s
907
+
908
+ # Handle user-defined verbs
909
+ #if value = ud_match( word, PL_adj_user_defined )
910
+ # return value
911
+ #end
912
+
913
+ case word
914
+
915
+ # Handle known cases
916
+ when /^(#{PL_adj_special})$/i
917
+ return PL_adj_special_h[ $1.downcase ]
918
+
919
+ # Handle possessives
920
+ when /^(#{PL_adj_poss})$/i
921
+ return PL_adj_poss_h[ $1.downcase ]
922
+
923
+ when /^(.*)'s?$/
924
+ pl = plural_noun( $1 )
925
+ if /s$/ =~ pl
926
+ return "#{pl}'"
927
+ else
928
+ return "#{pl}'s"
929
+ end
930
+
931
+ # Otherwise, no idea
932
+ else
933
+ return nil
934
+ end
935
+ end
936
+
937
+
938
+ ### Returns the given word with a prepended indefinite article, unless
939
+ ### +count+ is non-nil and not singular.
940
+ def indef_article( word, count )
941
+ count ||= Linguistics::num
942
+ return "#{count} #{word}" if
943
+ count && /^(#{PL_count_one})$/i !~ count.to_s
944
+
945
+ # Handle user-defined variants
946
+ # return value if value = ud_match( word, A_a_user_defined )
947
+
948
+ case word
949
+
950
+ # Handle special cases
951
+ when /^(#{A_explicit_an})/i
952
+ return "an #{word}"
953
+
954
+ # Handle abbreviations
955
+ when /^(#{A_abbrev})/x
956
+ return "an #{word}"
957
+ when /^[aefhilmnorsx][.-]/i
958
+ return "an #{word}"
959
+ when /^[a-z][.-]/i
960
+ return "a #{word}"
961
+
962
+ # Handle consonants
963
+ when /^[^aeiouy]/i
964
+ return "a #{word}"
965
+
966
+ # Handle special vowel-forms
967
+ when /^e[uw]/i
968
+ return "a #{word}"
969
+ when /^onc?e\b/i
970
+ return "a #{word}"
971
+ when /^uni([^nmd]|mo)/i
972
+ return "a #{word}"
973
+ when /^u[bcfhjkqrst][aeiou]/i
974
+ return "a #{word}"
975
+
976
+ # Handle vowels
977
+ when /^[aeiou]/i
978
+ return "an #{word}"
979
+
980
+ # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
981
+ when /^(#{A_y_cons})/i
982
+ return "an #{word}"
983
+
984
+ # Otherwise, guess "a"
985
+ else
986
+ return "a #{word}"
987
+ end
988
+ end
989
+
990
+
991
+ ### Transform the specified number of units-place numerals into a
992
+ ### word-phrase at the given number of +thousands+ places.
993
+ def to_units( units, thousands=0 )
994
+ return Units[ units ] + to_thousands( thousands )
995
+ end
996
+
997
+
998
+ ### Transform the specified number of tens- and units-place numerals into a
999
+ ### word-phrase at the given number of +thousands+ places.
1000
+ def to_tens( tens, units, thousands=0 )
1001
+ unless tens == 1
1002
+ return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
1003
+ to_units( units, thousands )
1004
+ else
1005
+ return Teens[ units ] + to_thousands( thousands )
1006
+ end
1007
+ end
1008
+
1009
+
1010
+ ### Transform the specified number of hundreds-, tens-, and units-place
1011
+ ### numerals into a word phrase. If the number of thousands (+thousands+) is
1012
+ ### greater than 0, it will be used to determine where the decimal point is
1013
+ ### in relation to the hundreds-place number.
1014
+ def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
1015
+ joinword = ' ' if joinword.empty?
1016
+ if hundreds.nonzero?
1017
+ return to_units( hundreds ) + " hundred" +
1018
+ (tens.nonzero? || units.nonzero? ? joinword : '') +
1019
+ to_tens( tens, units ) +
1020
+ to_thousands( thousands )
1021
+ elsif tens.nonzero? || units.nonzero?
1022
+ return to_tens( tens, units ) + to_thousands( thousands )
1023
+ else
1024
+ return nil
1025
+ end
1026
+ end
1027
+
1028
+ ### Transform the specified number into one or more words like 'thousand',
1029
+ ### 'million', etc. Uses the thousands (American) system.
1030
+ def to_thousands( thousands=0 )
1031
+ parts = []
1032
+ (0..thousands).step( Thousands.length - 1 ) {|i|
1033
+ if i.zero?
1034
+ parts.push Thousands[ thousands % (Thousands.length - 1) ]
1035
+ else
1036
+ parts.push Thousands.last
1037
+ end
1038
+ }
1039
+
1040
+ return parts.join(" ")
1041
+ end
1042
+
1043
+
1044
+ ### Return the specified number +num+ as an array of number phrases.
1045
+ def number_to_words( num, config )
1046
+ return [config[:zero]] if num.to_i.zero?
1047
+ chunks = []
1048
+
1049
+ # Break into word-groups if groups is set
1050
+ if config[:group].nonzero?
1051
+
1052
+ # Build a Regexp with <config[:group]> number of digits. Any past
1053
+ # the first are optional.
1054
+ re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )
1055
+
1056
+ # Scan the string, and call the word-chunk function that deals with
1057
+ # chunks of the found number of digits.
1058
+ num.to_s.scan( re ) {|digits|
1059
+ debug_msg " digits = #{digits.inspect}"
1060
+ fn = NumberToWordsFunctions[ digits.nitems ]
1061
+ numerals = digits.flatten.compact.collect {|i| i.to_i}
1062
+ debug_msg " numerals = #{numerals.inspect}"
1063
+ chunks.push fn.call( config[:zero], *numerals ).strip
1064
+ }
1065
+ else
1066
+ phrase = num.to_s
1067
+ phrase.sub!( /\A\s*0+/, '' )
1068
+ mill = 0
1069
+
1070
+ # Match backward from the end of the digits in the string, turning
1071
+ # chunks of three, of two, and of one into words.
1072
+ mill += 1 while
1073
+ phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
1074
+ words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill,
1075
+ config[:and] )
1076
+ chunks.unshift words.strip.squeeze(' ') unless words.nil?
1077
+ ''
1078
+ }
1079
+
1080
+ phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
1081
+ chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
1082
+ ''
1083
+ }
1084
+ phrase.sub!( /(\d)(?=\D*\Z)/ ) {
1085
+ chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
1086
+ ''
1087
+ }
1088
+ end
1089
+
1090
+ return chunks
1091
+ end
1092
+
1093
+
1094
+ #################################################################
1095
+ ### P U B L I C F U N C T I O N S
1096
+ #################################################################
1097
+
1098
+ ### Return the name of the language this module is for.
1099
+ def language( unused=nil )
1100
+ "English"
1101
+ end
1102
+
1103
+
1104
+ ### Return the plural of the given +phrase+ if +count+ indicates it should
1105
+ ### be plural.
1106
+ def plural( phrase, count=nil )
1107
+ phrase = numwords( phrase ) if phrase.is_a?( Numeric )
1108
+
1109
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1110
+ pre, word, post = md.to_a[1,3]
1111
+ return phrase if word.nil? or word.empty?
1112
+
1113
+ plural = postprocess( word,
1114
+ pluralize_special_adjective(word, count) ||
1115
+ pluralize_special_verb(word, count) ||
1116
+ pluralize_noun(word, count) )
1117
+
1118
+ return pre + plural + post
1119
+ end
1120
+ def_lprintf_formatter :PL, :plural
1121
+
1122
+
1123
+ ### Return the plural of the given noun +phrase+ if +count+ indicates it
1124
+ ### should be plural.
1125
+ def plural_noun( phrase, count=nil )
1126
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1127
+ pre, word, post = md.to_a[1,3]
1128
+ return phrase if word.nil? or word.empty?
1129
+
1130
+ plural = postprocess( word, pluralize_noun(word, count) )
1131
+ return pre + plural + post
1132
+ end
1133
+ def_lprintf_formatter :PL_N, :plural_noun
1134
+
1135
+
1136
+ ### Return the plural of the given verb +phrase+ if +count+ indicates it
1137
+ ### should be plural.
1138
+ def plural_verb( phrase, count=nil )
1139
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1140
+ pre, word, post = md.to_a[1,3]
1141
+ return phrase if word.nil? or word.empty?
1142
+
1143
+ plural = postprocess( word,
1144
+ pluralize_special_verb(word, count) ||
1145
+ pluralize_general_verb(word, count) )
1146
+ return pre + plural + post
1147
+ end
1148
+ def_lprintf_formatter :PL_V, :plural_verb
1149
+
1150
+
1151
+ ### Return the plural of the given adjectival +phrase+ if +count+ indicates
1152
+ ### it should be plural.
1153
+ def plural_adjective( phrase, count=nil )
1154
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1155
+ pre, word, post = md.to_a[1,3]
1156
+ return phrase if word.nil? or word.empty?
1157
+
1158
+ plural = postprocess( word,
1159
+ pluralize_special_adjective(word, count) || word )
1160
+ return pre + plural + post
1161
+ end
1162
+ alias_method :plural_adj, :plural_adjective
1163
+ def_lprintf_formatter :PL_ADJ, :plural_adjective
1164
+
1165
+
1166
+ ### Return the given phrase with the appropriate indefinite article ("a" or
1167
+ ### "an") prepended.
1168
+ def a( phrase, count=nil )
1169
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1170
+ pre, word, post = md.to_a[1,3]
1171
+ return phrase if word.nil? or word.empty?
1172
+
1173
+ result = indef_article( word, count )
1174
+ return pre + result + post
1175
+ end
1176
+ alias_method :an, :a
1177
+ def_lprintf_formatter :A, :a
1178
+ def_lprintf_formatter :AN, :a
1179
+
1180
+
1181
+ ### Translate zero-quantified +phrase+ to "no +phrase.plural+"
1182
+ def no( phrase, count=nil )
1183
+ md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
1184
+ pre, word, post = md.to_a[1,3]
1185
+ count ||= Linguistics::num || 0
1186
+
1187
+ unless /^#{PL_count_zero}$/ =~ count.to_s
1188
+ return "#{pre}#{count} " + plural( word, count ) + post
1189
+ else
1190
+ return "#{pre}no " + plural( word, 0 ) + post
1191
+ end
1192
+ end
1193
+ def_lprintf_formatter :NO, :no
1194
+
1195
+
1196
+ ### Participles
1197
+ def present_participle( word )
1198
+ plural = plural_verb( word.to_s, 2 )
1199
+
1200
+ plural.sub!( /ie$/, 'y' ) or
1201
+ plural.sub!( /ue$/, 'u' ) or
1202
+ plural.sub!( /([auy])e$/, '$1' ) or
1203
+ plural.sub!( /i$/, '' ) or
1204
+ plural.sub!( /([^e])e$/, "\\1" ) or
1205
+ /er$/.match( plural ) or
1206
+ plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
1207
+
1208
+ return "#{plural}ing"
1209
+ end
1210
+ alias_method :part_pres, :present_participle
1211
+ def_lprintf_formatter :PART_PRES, :present_participle
1212
+
1213
+
1214
+
1215
+ ### Return the specified number as english words. One or more configuration
1216
+ ### values may be passed to control the returned String:
1217
+ ###
1218
+ ### [<b>:group</b>]
1219
+ ### Controls how many numbers at a time are grouped together. Valid values
1220
+ ### are <code>0</code> (normal grouping), <code>1</code> (single-digit
1221
+ ### grouping, e.g., "one, two, three, four"), <code>2</code>
1222
+ ### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
1223
+ ### (triple-digit grouping, e.g., "one twenty-three, four").
1224
+ ### [<b>:comma</b>]
1225
+ ### Set the character/s used to separate word groups. Defaults to
1226
+ ### <code>", "</code>.
1227
+ ### [<b>:and</b>]
1228
+ ### Set the word and/or characters used where <code>' and ' </code>(the
1229
+ ### default) is normally used. Setting <code>:and</code> to
1230
+ ### <code>' '</code>, for example, will cause <code>2556</code> to be
1231
+ ### returned as "two-thousand, five hundred fifty-six" instead of
1232
+ ### "two-thousand, five hundred and fifty-six".
1233
+ ### [<b>:zero</b>]
1234
+ ### Set the word used to represent the numeral <code>0</code> in the
1235
+ ### result. <code>'zero'</code> is the default.
1236
+ ### [<b>:decimal</b>]
1237
+ ### Set the translation of any decimal points in the number; the default
1238
+ ### is <code>'point'</code>.
1239
+ ### [<b>:asArray</b>]
1240
+ ### If set to a true value, the number will be returned as an array of
1241
+ ### word groups instead of a String.
1242
+ def numwords( number, hashargs={} )
1243
+ num = number.to_s
1244
+ config = NumwordDefaults.merge( hashargs )
1245
+ raise "Bad chunking option: #{config[:group]}" unless
1246
+ config[:group].between?( 0, 3 )
1247
+
1248
+ # Array of number parts: first is everything to the left of the first
1249
+ # decimal, followed by any groups of decimal-delimted numbers after that
1250
+ parts = []
1251
+
1252
+ # Wordify any sign prefix
1253
+ sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
1254
+
1255
+ # Strip any ordinal suffixes
1256
+ ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
1257
+
1258
+ # Split the number into chunks delimited by '.'
1259
+ chunks = if !config[:decimal].empty? then
1260
+ if config[:group].nonzero?
1261
+ num.split(/\./)
1262
+ else
1263
+ num.split(/\./, 2)
1264
+ end
1265
+ else
1266
+ [ num ]
1267
+ end
1268
+
1269
+ # Wordify each chunk, pushing arrays into the parts array
1270
+ chunks.each_with_index {|chunk,section|
1271
+ chunk.gsub!( /\D+/, '' )
1272
+
1273
+ # If there's nothing in this chunk of the number, set it to zero
1274
+ # unless it's the whole-number part, in which case just push an
1275
+ # empty array.
1276
+ if chunk.empty?
1277
+ if section.zero?
1278
+ parts.push []
1279
+ next
1280
+ end
1281
+ end
1282
+
1283
+ # Split the number section into wordified parts unless this is the
1284
+ # second or succeeding part of a non-group number
1285
+ unless config[:group].zero? && section.nonzero?
1286
+ parts.push number_to_words( chunk, config )
1287
+ else
1288
+ parts.push number_to_words( chunk, config.merge(:group => 1) )
1289
+ end
1290
+ }
1291
+
1292
+ debug_msg "Parts => #{parts.inspect}"
1293
+
1294
+ # Turn the last word of the whole-number part back into an ordinal if
1295
+ # the original number came in that way.
1296
+ if ord && !parts[0].empty?
1297
+ parts[0][-1] = ordinal( parts[0].last )
1298
+ end
1299
+
1300
+ # If the caller's expecting an Array return, just flatten and return the
1301
+ # parts array.
1302
+ if config[:asArray]
1303
+ unless sign.empty?
1304
+ parts[0].unshift( sign )
1305
+ end
1306
+ return parts.flatten
1307
+ end
1308
+
1309
+ # Catenate each sub-parts array into a whole number part and one or more
1310
+ # post-decimal parts. If grouping is turned on, all sub-parts get joined
1311
+ # with commas, otherwise just the whole-number part is.
1312
+ if config[:group].zero?
1313
+ if parts[0].length > 1
1314
+
1315
+ # Join all but the last part together with commas
1316
+ wholenum = parts[0][0...-1].join( config[:comma] )
1317
+
1318
+ # If the last part is just a single word, append it to the
1319
+ # wholenum part with an 'and'. This is to get things like 'three
1320
+ # thousand and three' instead of 'three thousand, three'.
1321
+ if /^\s*(\S+)\s*$/ =~ parts[0].last
1322
+ wholenum += config[:and] + parts[0].last
1323
+ else
1324
+ wholenum += config[:comma] + parts[0].last
1325
+ end
1326
+ else
1327
+ wholenum = parts[0][0]
1328
+ end
1329
+ decimals = parts[1..-1].collect {|part| part.join(" ")}
1330
+
1331
+ debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
1332
+
1333
+ # Join with the configured decimal; if it's empty, just join with
1334
+ # spaces.
1335
+ unless config[:decimal].empty?
1336
+ return sign + ([ wholenum ] + decimals).
1337
+ join( " #{config[:decimal]} " ).strip
1338
+ else
1339
+ return sign + ([ wholenum ] + decimals).
1340
+ join( " " ).strip
1341
+ end
1342
+ else
1343
+ return parts.compact.
1344
+ separate( config[:decimal] ).
1345
+ delete_if {|el| el.empty?}.
1346
+ join( config[:comma] ).
1347
+ strip
1348
+ end
1349
+ end
1350
+ def_lprintf_formatter :NUMWORDS, :numwords
1351
+
1352
+
1353
+ ### Transform the given +number+ into an ordinal word. The +number+ object
1354
+ ### can be either an Integer or a String.
1355
+ def ordinal( number )
1356
+ case number
1357
+ when Integer
1358
+ return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])
1359
+
1360
+ else
1361
+ return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
1362
+ end
1363
+ end
1364
+ def_lprintf_formatter :ORD, :ordinal
1365
+
1366
+
1367
+ ### Transform the given +number+ into an ordinate word.
1368
+ def ordinate( number )
1369
+ numwords( number ).ordinal
1370
+ end
1371
+
1372
+
1373
+ ### Return a phrase describing the specified +number+ of objects in the
1374
+ ### given +phrase+ in general terms. The following options can be used to
1375
+ ### control the makeup of the returned quantity String:
1376
+ ###
1377
+ ### [<b>:joinword</b>]
1378
+ ### Sets the word (and any surrounding spaces) used as the word separating the
1379
+ ### quantity from the noun in the resulting string. Defaults to <tt>' of
1380
+ ### '</tt>.
1381
+ def quantify( phrase, number=0, args={} )
1382
+ num = number.to_i
1383
+ config = QuantifyDefaults.merge( args )
1384
+
1385
+ case num
1386
+ when 0
1387
+ no( phrase )
1388
+ when 1
1389
+ a( phrase )
1390
+ when SeveralRange
1391
+ "several " + plural( phrase, num )
1392
+ when NumberRange
1393
+ "a number of " + plural( phrase, num )
1394
+ when NumerousRange
1395
+ "numerous " + plural( phrase, num )
1396
+ when ManyRange
1397
+ "many " + plural( phrase, num )
1398
+ else
1399
+
1400
+ # Anything bigger than the ManyRange gets described like
1401
+ # "hundreds of thousands of..." or "millions of..."
1402
+ # depending, of course, on how many there are.
1403
+ thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
1404
+ stword =
1405
+ case subthousands
1406
+ when 2
1407
+ "hundreds"
1408
+ when 1
1409
+ "tens"
1410
+ else
1411
+ nil
1412
+ end
1413
+ thword = plural( to_thousands(thousands).strip )
1414
+ thword = nil if thword.empty?
1415
+
1416
+ [ # Hundreds (of)...
1417
+ stword,
1418
+
1419
+ # thousands (of)
1420
+ thword,
1421
+
1422
+ # stars.
1423
+ plural(phrase, number)
1424
+ ].compact.join( config[:joinword] )
1425
+ end
1426
+ end
1427
+ def_lprintf_formatter :QUANT, :quantify
1428
+
1429
+
1430
+ # :TODO: Needs refactoring
1431
+
1432
+ ### Return the specified +obj+ (which must support the <tt>#collect</tt>
1433
+ ### method) as a conjunction. Each item is converted to a String if it is
1434
+ ### not already (using #to_s) unless a block is given, in which case it is
1435
+ ### called once for each object in the array, and the stringified return
1436
+ ### value from the block is used instead. Returning +nil+ causes that
1437
+ ### particular element to be omitted from the resulting conjunction. The
1438
+ ### following options can be used to control the makeup of the returned
1439
+ ### conjunction String:
1440
+ ###
1441
+ ### [<b>:separator</b>]
1442
+ ### Specify one or more characters to separate items in the resulting
1443
+ ### list. Defaults to <tt>', '</tt>.
1444
+ ### [<b>:altsep</b>]
1445
+ ### An alternate separator to use if any of the resulting conjunction's
1446
+ ### clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
1447
+ ### [<b>:penultimate</b>]
1448
+ ### Flag that indicates whether or not to join the last clause onto the
1449
+ ### rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
1450
+ ### %w{duck, cow, dog}.en.conjunction
1451
+ ### # => "a duck, a cow, and a dog"
1452
+ ### %w{duck cow dog}.en.conjunction( :penultimate => false )
1453
+ ### "a duck, a cow and a dog"
1454
+ ### Default to <tt>true</tt>.
1455
+ ### [<b>:conjunctive</b>]
1456
+ ### Sets the word used as the conjunctive (separating word) of the
1457
+ ### resulting string. Default to <tt>'and'</tt>.
1458
+ ### [<b>:combine</b>]
1459
+ ### If set to <tt>true</tt> (the default), items which are indentical (after
1460
+ ### surrounding spaces are stripped) will be combined in the resulting
1461
+ ### conjunction. E.g.,
1462
+ ### %w{goose cow goose dog}.en.conjunction
1463
+ ### # => "two geese, a cow, and a dog"
1464
+ ### %w{goose cow goose dog}.en.conjunction( :combine => false )
1465
+ ### # => "a goose, a cow, a goose, and a dog"
1466
+ ### [<b>:casefold</b>]
1467
+ ### If set to <tt>true</tt> (the default), then items are compared
1468
+ ### case-insensitively when combining them. This has no effect if
1469
+ ### <tt>:combine</tt> is <tt>false</tt>.
1470
+ ### [<b>:generalize</b>]
1471
+ ### If set to <tt>true</tt>, then quantities of combined items are turned into
1472
+ ### general descriptions instead of exact amounts.
1473
+ ### ary = %w{goose pig dog horse goose reindeer goose dog horse}
1474
+ ### ary.en.conjunction
1475
+ ### # => "three geese, two dogs, two horses, a pig, and a reindeer"
1476
+ ### ary.en.conjunction( :generalize => true )
1477
+ ### # => "several geese, several dogs, several horses, a pig, and a reindeer"
1478
+ ### See the #quantify method for specifics on how quantities are
1479
+ ### generalized. Generalization defaults to <tt>false</tt>, and has no effect if
1480
+ ### :combine is <tt>false</tt>.
1481
+ ### [<b>:quantsort</b>]
1482
+ ### If set to <tt>true</tt> (the default), items which are combined in the
1483
+ ### resulting conjunction will be listed in order of amount, with greater
1484
+ ### quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
1485
+ ### will appear where the first instance of them occurred in the
1486
+ ### list. This sort is also the fallback for indentical quantities (ie.,
1487
+ ### items of the same quantity will be listed in the order they appeared
1488
+ ### in the source list).
1489
+ ###
1490
+ def conjunction( obj, args={} )
1491
+ config = ConjunctionDefaults.merge( args )
1492
+ phrases = []
1493
+
1494
+ # Transform items in the obj to phrases
1495
+ if block_given?
1496
+ phrases = obj.collect {|item| yield(item) }.compact
1497
+ else
1498
+ phrases = obj.collect {|item| item.to_s }
1499
+ end
1500
+
1501
+ # No need for a conjunction if there's only one thing
1502
+ return a(phrases[0]) if phrases.length < 2
1503
+
1504
+ # Set up a Proc to derive a collector key from a phrase depending on the
1505
+ # configuration
1506
+ keyfunc =
1507
+ if config[:casefold]
1508
+ proc {|key| key.downcase.strip}
1509
+ else
1510
+ proc {|key| key.strip}
1511
+ end
1512
+
1513
+ # Count and delete phrases that hash the same when the keyfunc munges
1514
+ # them into the same thing if we're combining (:combine => true).
1515
+ collector = {}
1516
+ if config[:combine]
1517
+
1518
+ phrases.each_index do |i|
1519
+ # Stop when reaching the end of a truncated list
1520
+ break if phrases[i].nil?
1521
+
1522
+ # Make the key using the configured key function
1523
+ phrase = keyfunc[ phrases[i] ]
1524
+
1525
+ # If the collector already has this key, increment its count,
1526
+ # eliminate the duplicate from the phrase list, and redo the loop.
1527
+ if collector.key?( phrase )
1528
+ collector[ phrase ] += 1
1529
+ phrases.delete_at( i )
1530
+ redo
1531
+ end
1532
+
1533
+ collector[ phrase ] = 1
1534
+ end
1535
+ else
1536
+ # If we're not combining, just make everything have a count of 1.
1537
+ phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
1538
+ end
1539
+
1540
+ # If sort-by-quantity is turned on, sort the phrases first by how many
1541
+ # there are (most-first), and then by the order they were specified in.
1542
+ if config[:quantsort] && config[:combine]
1543
+ origorder = {}
1544
+ phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
1545
+ phrases.sort! {|a,b|
1546
+ (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
1547
+ (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
1548
+ }
1549
+ end
1550
+
1551
+ # Set up a filtering function that adds either an indefinite article, an
1552
+ # indefinite quantifier, or a definite quantifier to each phrase
1553
+ # depending on the configuration and the count of phrases in the
1554
+ # collector.
1555
+ filter =
1556
+ if config[:generalize]
1557
+ proc {|phrase, count| quantify(phrase, count) }
1558
+ else
1559
+ proc {|phrase, count|
1560
+ if count > 1
1561
+ "%s %s" % [
1562
+ # :TODO: Make this threshold settable
1563
+ count < 10 ? count.en.numwords : count.to_s,
1564
+ plural(phrase, count)
1565
+ ]
1566
+ else
1567
+ a( phrase )
1568
+ end
1569
+ }
1570
+ end
1571
+
1572
+ # Now use the configured filter to turn each phrase into its final
1573
+ # form. Hmmm... square-bracket Lisp?
1574
+ phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
1575
+
1576
+ # Prepend the conjunctive to the last element unless it's empty or
1577
+ # there's only one element
1578
+ phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
1579
+ config[:conjunctive].strip.empty? or
1580
+ phrases.length < 2
1581
+
1582
+ # Concatenate the last two elements if there's no penultimate separator,
1583
+ # and pick a separator based on how many phrases there are and whether
1584
+ # or not there's already an instance of it in the phrases.
1585
+ phrase_count = phrases.length
1586
+ phrases[-2] << " " << phrases.pop unless config[:penultimate]
1587
+ sep = config[:separator]
1588
+ if phrase_count <= 2
1589
+ sep = ' '
1590
+ elsif phrases.find {|str| str.include?(config[:separator]) }
1591
+ sep = config[:altsep]
1592
+ end
1593
+
1594
+ return phrases.join( sep )
1595
+ end
1596
+ def_lprintf_formatter :CONJUNCT, :conjunction
1597
+
1598
+
1599
+ ### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
1600
+ ### ("camel case to english"). Each word is decapitalized.
1601
+ def camel_case_to_english( string )
1602
+ string.to_s.
1603
+ gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
1604
+ gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
1605
+ end
1606
+
1607
+
1608
+ ### Turns an English language +string+ into a CamelCase word.
1609
+ def english_to_camel_case( string )
1610
+ string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
1611
+ end
1612
+
1613
+
1614
+ ### This method doesn't work quite right yet. It does okay for simple cases,
1615
+ ### but it misses more complex ones, e.g. 'as' used as a coordinating
1616
+ ### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
1617
+ ### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
1618
+ ### up. Until then it'll just be undocumented.
1619
+
1620
+ ### Returns the given +string+ as a title-cased phrase.
1621
+ def titlecase( string ) # :nodoc:
1622
+
1623
+ # Split on word-boundaries
1624
+ words = string.split( /\b/ )
1625
+
1626
+ # Always capitalize the first and last words
1627
+ words.first.capitalize!
1628
+ words.last.capitalize!
1629
+
1630
+ # Now scan the rest of the tokens, skipping non-words and capitalization
1631
+ # exceptions.
1632
+ words.each_with_index do |word, i|
1633
+
1634
+ # Non-words
1635
+ next unless /^\w+$/.match( word )
1636
+
1637
+ # Skip exception-words
1638
+ next if TitleCaseExceptions.include?( word )
1639
+
1640
+ # Skip second parts of contractions
1641
+ next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
1642
+
1643
+ # Have to do it this way instead of capitalize! because that method
1644
+ # also downcases all other letters.
1645
+ word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
1646
+ end
1647
+
1648
+ return words.join
1649
+ end
1650
+
1651
+
1652
+ ### Returns the proper noun form of a string by capitalizing most of the
1653
+ ### words.
1654
+ ###
1655
+ ### Examples:
1656
+ ### English.proper_noun("bosnia and herzegovina") ->
1657
+ ### "Bosnia and Herzegovina"
1658
+ ### English.proper_noun("macedonia, the former yugoslav republic of") ->
1659
+ ### "Macedonia, the Former Yugoslav Republic of"
1660
+ ### English.proper_noun("virgin islands, u.s.") ->
1661
+ ### "Virgin Islands, U.S."
1662
+ def proper_noun( string )
1663
+ return string.split(/([ .]+)/).collect {|word|
1664
+ next word unless /^[a-z]/.match( word ) &&
1665
+ ! (%w{and the of}.include?( word ))
1666
+ word.capitalize
1667
+ }.join
1668
+ end
1669
+
1670
+
1671
+ ### Format the given +fmt+ string by replacing %-escaped sequences with the
1672
+ ### result of performing a specified operation on the corresponding
1673
+ ### argument, ala Kernel.sprintf.
1674
+ ### %PL::
1675
+ ### Plural.
1676
+ ### %A, %AN::
1677
+ ### Prepend indefinite article.
1678
+ ### %NO::
1679
+ ### Zero-quantified phrase.
1680
+ ### %NUMWORDS::
1681
+ ### Convert a number into the corresponding words.
1682
+ ### %CONJUNCT::
1683
+ ### Conjunction.
1684
+ def lprintf( fmt, *args )
1685
+ fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
1686
+ op = $1.to_s.upcase.to_sym
1687
+ if self.lprintf_formatters.key?( op )
1688
+ arg = args.shift
1689
+ self.lprintf_formatters[ op ].call( arg )
1690
+ else
1691
+ raise "no such formatter %p" % op
1692
+ end
1693
+ end
1694
+ end
1695
+
1696
+ end # module Linguistics::EN
1697
+
1698
+
1699
+ ### Add the #separate and #separate! methods to Array.
1700
+ class Array
1701
+
1702
+ ### Returns a new Array that has had a new member inserted between all of
1703
+ ### the current ones. The value used is the given +value+ argument unless a
1704
+ ### block is given, in which case the block is called once for each pair of
1705
+ ### the Array, and the return value is used as the separator.
1706
+ def separate( value=:__no_arg__, &block )
1707
+ ary = self.dup
1708
+ ary.separate!( value, &block )
1709
+ return ary
1710
+ end
1711
+
1712
+ ### The same as #separate, but modifies the Array in place.
1713
+ def separate!( value=:__no_arg__ )
1714
+ raise ArgumentError, "wrong number of arguments: (0 for 1)" if
1715
+ value == :__no_arg__ && !block_given?
1716
+
1717
+ (1..( (self.length * 2) - 2 )).step(2) do |i|
1718
+ if block_given?
1719
+ self.insert( i, yield(self[i-1,2]) )
1720
+ else
1721
+ self.insert( i, value )
1722
+ end
1723
+ end
1724
+ self
1725
+ end
1726
+
1727
+ end
1728
+