markos_linguistics 1.0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +640 -0
- data/LICENSE +27 -0
- data/README +166 -0
- data/README.english +245 -0
- data/Rakefile +338 -0
- data/examples/generalize_sentence.rb +46 -0
- data/lib/linguistics.rb +366 -0
- data/lib/linguistics/en.rb +1728 -0
- data/lib/linguistics/en/infinitive.rb +1145 -0
- data/lib/linguistics/en/linkparser.rb +109 -0
- data/lib/linguistics/en/wordnet.rb +257 -0
- data/lib/linguistics/iso639.rb +461 -0
- data/rake/191_compat.rb +26 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +434 -0
- data/rake/hg.rb +261 -0
- data/rake/manual.rb +782 -0
- data/rake/packaging.rb +144 -0
- data/rake/publishing.rb +318 -0
- data/rake/rdoc.rb +30 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +668 -0
- data/rake/testing.rb +187 -0
- data/rake/verifytask.rb +64 -0
- data/rake/win32.rb +190 -0
- data/spec/linguistics/en_spec.rb +215 -0
- data/spec/linguistics/iso639_spec.rb +72 -0
- data/spec/linguistics_spec.rb +107 -0
- data/tests/en/infinitive.tests.rb +207 -0
- data/tests/en/inflect.tests.rb +1389 -0
- data/tests/en/lafcadio.tests.rb +77 -0
- data/tests/en/linkparser.tests.rb +42 -0
- data/tests/en/lprintf.tests.rb +77 -0
- data/tests/en/titlecase.tests.rb +73 -0
- data/tests/en/wordnet.tests.rb +95 -0
- metadata +121 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
BEGIN {
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
basedir = Pathname.new( __FILE__ ).dirname.parent.expand_path
|
7
|
+
libdir = basedir + "lib"
|
8
|
+
$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
|
9
|
+
}
|
10
|
+
|
11
|
+
require 'linguistics'
|
12
|
+
require 'readline'
|
13
|
+
|
14
|
+
Linguistics.use( :en, :installProxy => true )
|
15
|
+
|
16
|
+
def generalized_word( word )
|
17
|
+
$deferr.puts " Traversing hypernyms for #{word}"
|
18
|
+
syn = word.synset or return word
|
19
|
+
nyms = syn.traverse( :hypernyms )
|
20
|
+
return word if nyms.empty?
|
21
|
+
|
22
|
+
general_subj = nyms[ nyms.length / 4 ]
|
23
|
+
$deferr.puts " %d synsets returned. Picking %d (%s)" % [
|
24
|
+
nyms.length,
|
25
|
+
nyms.length / 4,
|
26
|
+
general_subj.words.first,
|
27
|
+
]
|
28
|
+
return general_subj.words.first
|
29
|
+
end
|
30
|
+
|
31
|
+
while input = Readline.readline( "Sentence to generalize: " )
|
32
|
+
sent = input.sentence
|
33
|
+
|
34
|
+
subj = sent.subject
|
35
|
+
obj = sent.object
|
36
|
+
verb = sent.verb
|
37
|
+
|
38
|
+
input.sub!( /\b#{subj}\b/, generalized_word(subj) ) if subj
|
39
|
+
input.sub!( /\b#{obj}\b/, generalized_word(obj) ) if obj
|
40
|
+
input.sub!( /\b#{verb}\b/, generalized_word(verb) ) if verb
|
41
|
+
|
42
|
+
puts input
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
data/lib/linguistics.rb
ADDED
@@ -0,0 +1,366 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'linguistics/iso639'
|
4
|
+
|
5
|
+
# A language-independent framework for adding linguistics functions to Ruby
|
6
|
+
# classes.
|
7
|
+
#
|
8
|
+
# == Synopsis
|
9
|
+
#
|
10
|
+
# require 'linguistics'
|
11
|
+
# Linguistics::use( :en )
|
12
|
+
# MyClass::extend( Linguistics )
|
13
|
+
#
|
14
|
+
# == Authors
|
15
|
+
#
|
16
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
17
|
+
#
|
18
|
+
# :include: LICENSE
|
19
|
+
#
|
20
|
+
#--
|
21
|
+
#
|
22
|
+
# Please see the file LICENSE in the base directory for licensing details.
|
23
|
+
#
|
24
|
+
module Linguistics
|
25
|
+
|
26
|
+
### Class constants
|
27
|
+
|
28
|
+
# Release version
|
29
|
+
VERSION = '1.0.8'
|
30
|
+
|
31
|
+
# Language module implementors should do something like:
|
32
|
+
# Linguistics::DefaultLanguages.push( :ja ) # or whatever
|
33
|
+
# so that direct requiring of a language module sets the default.
|
34
|
+
DefaultLanguages = []
|
35
|
+
|
36
|
+
# The list of Classes to add linguistic behaviours to.
|
37
|
+
DefaultExtClasses = [String, Numeric, Array]
|
38
|
+
|
39
|
+
|
40
|
+
#################################################################
|
41
|
+
### I N F L E C T O R C L A S S F A C T O R Y
|
42
|
+
#################################################################
|
43
|
+
|
44
|
+
### A class which is inherited from by proxies for classes being extended
|
45
|
+
### with one or more linguistic interfaces. It provides on-the-fly creation
|
46
|
+
### of linguistic methods when the <tt>:installProxy</tt> option is passed
|
47
|
+
### to the call to Linguistics#use.
|
48
|
+
class LanguageProxyClass
|
49
|
+
|
50
|
+
### Class instance variable + accessor. Contains the module which knows
|
51
|
+
### the specifics of the language the languageProxy class is providing
|
52
|
+
### methods for.
|
53
|
+
@langmod = nil
|
54
|
+
class << self
|
55
|
+
attr_accessor :langmod
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
### Create a new LanguageProxy for the given +receiver+.
|
60
|
+
def initialize( receiver )
|
61
|
+
@receiver = receiver
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
######
|
66
|
+
public
|
67
|
+
######
|
68
|
+
|
69
|
+
### Overloaded to take into account the proxy method.
|
70
|
+
def respond_to?( sym )
|
71
|
+
self.class.langmod.respond_to?( sym ) || super
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
### Autoload linguistic methods defined in the module this object's
|
76
|
+
### class uses for inflection.
|
77
|
+
def method_missing( sym, *args, &block )
|
78
|
+
return super unless self.class.langmod.respond_to?( sym )
|
79
|
+
|
80
|
+
self.class.module_eval %{
|
81
|
+
def #{sym}( *args, &block )
|
82
|
+
self.class.langmod.#{sym}( @receiver, *args, &block )
|
83
|
+
end
|
84
|
+
}, "{Autoloaded: " + __FILE__ + "}", __LINE__
|
85
|
+
|
86
|
+
self.method( sym ).call( *args, &block )
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
### Returns a human-readable representation of the languageProxy for
|
91
|
+
### debugging, logging, etc.
|
92
|
+
def inspect
|
93
|
+
"<%s languageProxy for %s object %s>" % [
|
94
|
+
self.class.langmod.language,
|
95
|
+
@receiver.class.name,
|
96
|
+
@receiver.inspect,
|
97
|
+
]
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
### Extend the specified target object with one or more language proxy
|
104
|
+
### methods, each of which provides access to one or more linguistic methods
|
105
|
+
### for that language.
|
106
|
+
def self::extend_object( obj )
|
107
|
+
case obj
|
108
|
+
when Class
|
109
|
+
# $stderr.puts "Extending %p" % obj if $DEBUG
|
110
|
+
self::install_language_proxy( obj )
|
111
|
+
else
|
112
|
+
sclass = (class << obj; self; end)
|
113
|
+
# $stderr.puts "Extending a object's metaclass: %p" % obj if $DEBUG
|
114
|
+
self::install_language_proxy( sclass )
|
115
|
+
end
|
116
|
+
|
117
|
+
super
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
### Extend the including class with linguistics proxy methods.
|
122
|
+
def self::included( mod )
|
123
|
+
# $stderr.puts "Including Linguistics in %p" % mod if $DEBUG
|
124
|
+
mod.extend( self ) unless mod == Linguistics
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
### Make an languageProxy class that encapsulates all of the inflect operations
|
129
|
+
### using the given language module.
|
130
|
+
def self::make_language_proxy( mod )
|
131
|
+
# $stderr.puts "Making language proxy for mod %p" % [mod]
|
132
|
+
Class::new( LanguageProxyClass ) {
|
133
|
+
@langmod = mod
|
134
|
+
}
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
### Install the language proxy
|
139
|
+
def self::install_language_proxy( klass, languages=DefaultLanguages )
|
140
|
+
languages.replace( DefaultLanguages ) if languages.empty?
|
141
|
+
|
142
|
+
# Create an languageProxy class for each language specified
|
143
|
+
languages.each do |lang|
|
144
|
+
# $stderr.puts "Extending the %p class with %p" %
|
145
|
+
# [ klass, lang ] if $DEBUG
|
146
|
+
|
147
|
+
# Load the language module (skipping to the next if it's already
|
148
|
+
# loaded), make a languageProxy class that delegates to it, and
|
149
|
+
# figure out what the languageProxy method will be called.
|
150
|
+
mod = load_language( lang.to_s.downcase )
|
151
|
+
ifaceMeth = mod.name.downcase.sub( /.*:/, '' )
|
152
|
+
languageProxyClass = make_language_proxy( mod )
|
153
|
+
|
154
|
+
# Install a hash for languageProxy classes and an accessor for the
|
155
|
+
# hash if it's not already present.
|
156
|
+
if !klass.class_variables.include?( "@@__languageProxy_class" )
|
157
|
+
klass.module_eval %{
|
158
|
+
@@__languageProxy_class = {}
|
159
|
+
def self::__languageProxy_class; @@__languageProxy_class; end
|
160
|
+
}, __FILE__, __LINE__
|
161
|
+
end
|
162
|
+
|
163
|
+
# Merge the current languageProxy into the hash
|
164
|
+
klass.__languageProxy_class.merge!( ifaceMeth => languageProxyClass )
|
165
|
+
|
166
|
+
# Set the language-code proxy method for the class unless it has one
|
167
|
+
# already
|
168
|
+
unless klass.instance_methods(true).include?( ifaceMeth )
|
169
|
+
klass.module_eval %{
|
170
|
+
def #{ifaceMeth}
|
171
|
+
@__#{ifaceMeth}_languageProxy ||=
|
172
|
+
self.class.__languageProxy_class["#{ifaceMeth}"].
|
173
|
+
new( self )
|
174
|
+
end
|
175
|
+
}, __FILE__, __LINE__
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
### Install a regular proxy method in the given klass that will delegate
|
183
|
+
### calls to missing method to the languageProxy for the given +language+.
|
184
|
+
def self::install_delegator_proxy( klass, langcode )
|
185
|
+
raise ArgumentError, "Missing langcode" if langcode.nil?
|
186
|
+
|
187
|
+
# Alias any currently-extant
|
188
|
+
if klass.instance_methods( false ).include?( "method_missing" )
|
189
|
+
klass.module_eval %{
|
190
|
+
alias_method :__orig_method_missing, :method_missing
|
191
|
+
}
|
192
|
+
end
|
193
|
+
|
194
|
+
# Add the #method_missing method that auto-installs delegator methods
|
195
|
+
# for methods supported by the linguistic proxy objects.
|
196
|
+
klass.module_eval %{
|
197
|
+
def method_missing( sym, *args, &block )
|
198
|
+
|
199
|
+
# If the linguistic delegator answers the message, install a
|
200
|
+
# delegator method and call it.
|
201
|
+
if self.send( :#{langcode} ).respond_to?( sym )
|
202
|
+
|
203
|
+
# $stderr.puts "Installing linguistic delegator method \#{sym} " \
|
204
|
+
# "for the '#{langcode}' proxy"
|
205
|
+
self.class.module_eval %{
|
206
|
+
def \#{sym}( *args, &block )
|
207
|
+
self.#{langcode}.\#{sym}( *args, &block )
|
208
|
+
end
|
209
|
+
}
|
210
|
+
self.method( sym ).call( *args, &block )
|
211
|
+
|
212
|
+
# Otherwise either call the overridden proxy method if there is
|
213
|
+
# one, or just let our parent deal with it.
|
214
|
+
else
|
215
|
+
if self.respond_to?( :__orig_method_missing )
|
216
|
+
return self.__orig_method_missing( sym, *args, &block )
|
217
|
+
else
|
218
|
+
super( sym, *args, &block )
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
}
|
223
|
+
end
|
224
|
+
|
225
|
+
|
226
|
+
|
227
|
+
#################################################################
|
228
|
+
### L A N G U A G E - I N D E P E N D E N T F U N C T I O N S
|
229
|
+
#################################################################
|
230
|
+
|
231
|
+
|
232
|
+
### Handle auto-magic usage
|
233
|
+
def self::const_missing( sym )
|
234
|
+
load_language( sym.to_s.downcase )
|
235
|
+
end
|
236
|
+
|
237
|
+
|
238
|
+
###############
|
239
|
+
module_function
|
240
|
+
###############
|
241
|
+
|
242
|
+
### Add linguistics functions for the specified languages to Ruby's core
|
243
|
+
### classes. The interface to all linguistic functions for a given language
|
244
|
+
### is through a method which is the same the language's international 2- or
|
245
|
+
### 3-letter code (ISO 639). You can also specify a Hash of configuration
|
246
|
+
### options which control which classes are extended:
|
247
|
+
###
|
248
|
+
### [<b>:classes</b>]
|
249
|
+
### Specify the classes which are to be extended. If this is not specified,
|
250
|
+
### the Class objects in Linguistics::DefaultExtClasses (an Array) are
|
251
|
+
### extended.
|
252
|
+
### [<b>:installProxy</b>]
|
253
|
+
### Install a proxy method in each of the classes which are to be extended
|
254
|
+
### which will search for missing methods in the languageProxy for the
|
255
|
+
### language code specified as the value. This allows linguistics methods
|
256
|
+
### to be called directly on extended objects directly (e.g.,
|
257
|
+
### 12.en.ordinal becomes 12.ordinal). Obviously, methods which would
|
258
|
+
### collide with the object's builtin methods will need to be invoked
|
259
|
+
### through the languageProxy. Any existing proxy methods in the extended
|
260
|
+
### classes will be preserved.
|
261
|
+
def use( *languages )
|
262
|
+
config = {}
|
263
|
+
config = languages.pop if languages.last.is_a?( Hash )
|
264
|
+
|
265
|
+
classes = config.key?( :classes ) ? config[:classes] : DefaultExtClasses
|
266
|
+
classes = [ classes ] unless classes.is_a?( Array )
|
267
|
+
|
268
|
+
# Install the languageProxy in each class.
|
269
|
+
classes.each {|klass|
|
270
|
+
|
271
|
+
# Create an languageProxy class for each installed language
|
272
|
+
install_language_proxy( klass, languages )
|
273
|
+
|
274
|
+
# Install the delegator proxy if configured
|
275
|
+
if config[:installProxy]
|
276
|
+
case config[:installProxy]
|
277
|
+
when Symbol
|
278
|
+
langcode = config[:installProxy]
|
279
|
+
when String
|
280
|
+
langcode = config[:installProxy].intern
|
281
|
+
when TrueClass
|
282
|
+
langcode = languages[0] || DefaultLanguages[0] || :en
|
283
|
+
else
|
284
|
+
raise ArgumentError,
|
285
|
+
"Unexpected value %p for :installProxy" %
|
286
|
+
config[:installProxy]
|
287
|
+
end
|
288
|
+
|
289
|
+
install_delegator_proxy( klass, langcode )
|
290
|
+
end
|
291
|
+
}
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
|
296
|
+
### Support Lingua::EN::Inflect-style globals in a threadsafe way by using
|
297
|
+
### Thread-local variables.
|
298
|
+
|
299
|
+
### Set the default count for all unspecified plurals to +val+. Setting is
|
300
|
+
### local to calling thread.
|
301
|
+
def num=( val )
|
302
|
+
Thread.current[:persistent_count] = val
|
303
|
+
end
|
304
|
+
alias_method :NUM=, :num=
|
305
|
+
|
306
|
+
### Get the default count for all unspecified plurals. Setting is local to
|
307
|
+
### calling thread.
|
308
|
+
def num
|
309
|
+
Thread.current[:persistent_count]
|
310
|
+
end
|
311
|
+
alias_method :NUM, :num
|
312
|
+
|
313
|
+
|
314
|
+
### Set the 'classical pluralizations' flag to +val+. Setting is local to
|
315
|
+
### calling thread.
|
316
|
+
def classical=( val )
|
317
|
+
Thread.current[:classical_plurals] = val
|
318
|
+
end
|
319
|
+
|
320
|
+
### Return the value of the 'classical pluralizations' flag. Setting is
|
321
|
+
### local to calling thread.
|
322
|
+
def classical?
|
323
|
+
Thread.current[:classical_plurals] ? true : false
|
324
|
+
end
|
325
|
+
|
326
|
+
|
327
|
+
#######
|
328
|
+
private
|
329
|
+
#######
|
330
|
+
|
331
|
+
### Try to load the module that implements the given language, returning
|
332
|
+
### the Module object if successful.
|
333
|
+
def self::load_language( lang )
|
334
|
+
raise "Unknown language code '#{lang}'" unless
|
335
|
+
LanguageCodes.key?( lang )
|
336
|
+
|
337
|
+
# Sort all the codes for the specified language, trying the 2-letter
|
338
|
+
# versions first in alphabetical order, then the 3-letter ones
|
339
|
+
msgs = []
|
340
|
+
mod = LanguageCodes[ lang ][:codes].sort {|a,b|
|
341
|
+
(a.length <=> b.length).nonzero? ||
|
342
|
+
(a <=> b)
|
343
|
+
}.each do |code|
|
344
|
+
unless Linguistics::const_defined?( code.upcase )
|
345
|
+
begin
|
346
|
+
require "linguistics/#{code}"
|
347
|
+
rescue LoadError => err
|
348
|
+
msgs << "Tried 'linguistics/#{code}': #{err.message}\n"
|
349
|
+
next
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
break Linguistics::const_get( code.upcase ) if
|
354
|
+
Linguistics::const_defined?( code.upcase )
|
355
|
+
end
|
356
|
+
|
357
|
+
if mod.is_a?( Array )
|
358
|
+
raise LoadError,
|
359
|
+
"Failed to load language extension %s:\n%s" %
|
360
|
+
[ lang, msgs.join ]
|
361
|
+
end
|
362
|
+
return mod
|
363
|
+
end
|
364
|
+
|
365
|
+
end # class linguistics
|
366
|
+
|
@@ -0,0 +1,1728 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# = Linguistics::EN
|
4
|
+
#
|
5
|
+
# This module contains English-language linguistic functions for the Linguistics
|
6
|
+
# module. It can be either loaded directly, or by passing some variant of 'en'
|
7
|
+
# or 'eng' to the Linguistics::use method.
|
8
|
+
#
|
9
|
+
# The functions contained by the module provide:
|
10
|
+
#
|
11
|
+
# == Plural Inflections
|
12
|
+
#
|
13
|
+
# Plural forms of all nouns, most verbs, and some adjectives are provided. Where
|
14
|
+
# appropriate, "classical" variants (for example: "brother" -> "brethren",
|
15
|
+
# "dogma" -> "dogmata", etc.) are also provided.
|
16
|
+
#
|
17
|
+
# These can be accessed via the #plural, #plural_noun, #plural_verb, and
|
18
|
+
# #plural_adjective methods.
|
19
|
+
#
|
20
|
+
# == Indefinite Articles
|
21
|
+
#
|
22
|
+
# Pronunciation-based "a"/"an" selection is provided for all English words, and
|
23
|
+
# most initialisms.
|
24
|
+
#
|
25
|
+
# See: #a, #an, and #no.
|
26
|
+
#
|
27
|
+
# == Numbers to Words
|
28
|
+
#
|
29
|
+
# Conversion from Numeric values to words are supported using the American
|
30
|
+
# "thousands" system. E.g., 2561 => "two thousand, five hundred and sixty-one".
|
31
|
+
#
|
32
|
+
# See the #numwords method.
|
33
|
+
#
|
34
|
+
# == Ordinals
|
35
|
+
#
|
36
|
+
# It is also possible to inflect numerals (1,2,3) and number words ("one",
|
37
|
+
# "two", "three") to ordinals (1st, 2nd, 3rd) and ordinates ("first", "second",
|
38
|
+
# "third").
|
39
|
+
#
|
40
|
+
# == Conjunctions
|
41
|
+
#
|
42
|
+
# This module also supports the creation of English conjunctions from Arrays of
|
43
|
+
# Strings or objects which respond to the #to_s message. Eg.,
|
44
|
+
#
|
45
|
+
# %w{cow pig chicken cow dog cow duck duck moose}.en.conjunction
|
46
|
+
# ==> "three cows, two ducks, a pig, a chicken, a dog, and a moose"
|
47
|
+
#
|
48
|
+
# == Infinitives
|
49
|
+
#
|
50
|
+
# Returns the infinitive form of English verbs:
|
51
|
+
#
|
52
|
+
# "dodging".en.infinitive
|
53
|
+
# ==> "dodge"
|
54
|
+
#
|
55
|
+
#
|
56
|
+
# == Authors
|
57
|
+
#
|
58
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
59
|
+
#
|
60
|
+
# == Acknowledgements
|
61
|
+
#
|
62
|
+
# The inflection functions of this module were adapted from Damien Conway's
|
63
|
+
# Lingua::EN::Inflect Perl module:
|
64
|
+
#
|
65
|
+
# Copyright (c) 1997-2000, Damian Conway. All Rights Reserved.
|
66
|
+
# This module is free software. It may be used, redistributed
|
67
|
+
# and/or modified under the same terms as Perl itself.
|
68
|
+
#
|
69
|
+
# The conjunctions code was adapted from the Lingua::Conjunction Perl module
|
70
|
+
# written by Robert Rothenberg and Damian Conway, which has no copyright
|
71
|
+
# statement included.
|
72
|
+
#
|
73
|
+
# :include: LICENSE
|
74
|
+
#
|
75
|
+
#--
|
76
|
+
#
|
77
|
+
# Please see the file LICENSE in the base directory for licensing details.
|
78
|
+
#
|
79
|
+
module Linguistics::EN
|
80
|
+
|
81
|
+
# Load in the secondary modules and add them to Linguistics::EN.
|
82
|
+
require 'linguistics/en/infinitive'
|
83
|
+
require 'linguistics/en/wordnet'
|
84
|
+
require 'linguistics/en/linkparser'
|
85
|
+
|
86
|
+
# Add 'english' to the list of default languages
|
87
|
+
Linguistics::DefaultLanguages.push( :en )
|
88
|
+
|
89
|
+
|
90
|
+
#################################################################
|
91
|
+
### U T I L I T Y F U N C T I O N S
|
92
|
+
#################################################################
|
93
|
+
|
94
|
+
### Wrap one or more parts in a non-capturing alteration Regexp
|
95
|
+
def self::matchgroup( *parts )
|
96
|
+
re = parts.flatten.join("|")
|
97
|
+
"(?:#{re})"
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
@lprintf_formatters = {}
|
102
|
+
class << self
|
103
|
+
attr_accessor :lprintf_formatters
|
104
|
+
end
|
105
|
+
|
106
|
+
### Add the specified method (which can be either a Method object or a
|
107
|
+
### Symbol for looking up a method)
|
108
|
+
def self::def_lprintf_formatter( name, meth )
|
109
|
+
meth = self.method( meth ) unless meth.is_a?( Method )
|
110
|
+
self.lprintf_formatters[ name ] = meth
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
#################################################################
|
116
|
+
### C O N S T A N T S
|
117
|
+
#################################################################
|
118
|
+
|
119
|
+
# :stopdoc:
|
120
|
+
|
121
|
+
#
|
122
|
+
# Plurals
|
123
|
+
#
|
124
|
+
|
125
|
+
PL_sb_irregular_s = {
|
126
|
+
"ephemeris" => "ephemerides",
|
127
|
+
"iris" => "irises|irides",
|
128
|
+
"clitoris" => "clitorises|clitorides",
|
129
|
+
"corpus" => "corpuses|corpora",
|
130
|
+
"opus" => "opuses|opera",
|
131
|
+
"genus" => "genera",
|
132
|
+
"mythos" => "mythoi",
|
133
|
+
"penis" => "penises|penes",
|
134
|
+
"testis" => "testes",
|
135
|
+
}
|
136
|
+
|
137
|
+
PL_sb_irregular_h = {
|
138
|
+
"child" => "children",
|
139
|
+
"brother" => "brothers|brethren",
|
140
|
+
"loaf" => "loaves",
|
141
|
+
"hoof" => "hoofs|hooves",
|
142
|
+
"beef" => "beefs|beeves",
|
143
|
+
"money" => "monies",
|
144
|
+
"mongoose" => "mongooses",
|
145
|
+
"ox" => "oxen",
|
146
|
+
"cow" => "cows|kine",
|
147
|
+
"soliloquy" => "soliloquies",
|
148
|
+
"graffito" => "graffiti",
|
149
|
+
"prima donna" => "prima donnas|prime donne",
|
150
|
+
"octopus" => "octopuses|octopodes",
|
151
|
+
"genie" => "genies|genii",
|
152
|
+
"ganglion" => "ganglions|ganglia",
|
153
|
+
"trilby" => "trilbys",
|
154
|
+
"turf" => "turfs|turves",
|
155
|
+
}.update( PL_sb_irregular_s )
|
156
|
+
PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
|
157
|
+
|
158
|
+
|
159
|
+
# Classical "..a" -> "..ata"
|
160
|
+
PL_sb_C_a_ata = matchgroup %w[
|
161
|
+
anathema bema carcinoma charisma diploma
|
162
|
+
dogma drama edema enema enigma lemma
|
163
|
+
lymphoma magma melisma miasma oedema
|
164
|
+
sarcoma schema soma stigma stoma trauma
|
165
|
+
gumma pragma
|
166
|
+
].collect {|word| word[0...-1]}
|
167
|
+
|
168
|
+
# Unconditional "..a" -> "..ae"
|
169
|
+
PL_sb_U_a_ae = matchgroup %w[
|
170
|
+
alumna alga vertebra persona
|
171
|
+
]
|
172
|
+
|
173
|
+
# Classical "..a" -> "..ae"
|
174
|
+
PL_sb_C_a_ae = matchgroup %w[
|
175
|
+
amoeba antenna formula hyperbola
|
176
|
+
medusa nebula parabola abscissa
|
177
|
+
hydra nova lacuna aurora .*umbra
|
178
|
+
flora fauna
|
179
|
+
]
|
180
|
+
|
181
|
+
# Classical "..en" -> "..ina"
|
182
|
+
PL_sb_C_en_ina = matchgroup %w[
|
183
|
+
stamen foramen lumen
|
184
|
+
].collect {|word| word[0...-2] }
|
185
|
+
|
186
|
+
# Unconditional "..um" -> "..a"
|
187
|
+
PL_sb_U_um_a = matchgroup %w[
|
188
|
+
bacterium agendum desideratum erratum
|
189
|
+
stratum datum ovum extremum
|
190
|
+
candelabrum
|
191
|
+
].collect {|word| word[0...-2] }
|
192
|
+
|
193
|
+
# Classical "..um" -> "..a"
|
194
|
+
PL_sb_C_um_a = matchgroup %w[
|
195
|
+
maximum minimum momentum optimum
|
196
|
+
quantum cranium curriculum dictum
|
197
|
+
phylum aquarium compendium emporium
|
198
|
+
enconium gymnasium honorarium interregnum
|
199
|
+
lustrum memorandum millenium rostrum
|
200
|
+
spectrum speculum stadium trapezium
|
201
|
+
ultimatum medium vacuum velum
|
202
|
+
consortium
|
203
|
+
].collect {|word| word[0...-2]}
|
204
|
+
|
205
|
+
# Unconditional "..us" -> "i"
|
206
|
+
PL_sb_U_us_i = matchgroup %w[
|
207
|
+
alumnus alveolus bacillus bronchus
|
208
|
+
locus nucleus stimulus meniscus
|
209
|
+
].collect {|word| word[0...-2]}
|
210
|
+
|
211
|
+
# Classical "..us" -> "..i"
|
212
|
+
PL_sb_C_us_i = matchgroup %w[
|
213
|
+
focus radius genius
|
214
|
+
incubus succubus nimbus
|
215
|
+
fungus nucleolus stylus
|
216
|
+
torus umbilicus uterus
|
217
|
+
hippopotamus
|
218
|
+
].collect {|word| word[0...-2]}
|
219
|
+
|
220
|
+
# Classical "..us" -> "..us" (assimilated 4th declension latin nouns)
|
221
|
+
PL_sb_C_us_us = matchgroup %w[
|
222
|
+
status apparatus prospectus sinus
|
223
|
+
hiatus impetus plexus
|
224
|
+
]
|
225
|
+
|
226
|
+
# Unconditional "..on" -> "a"
|
227
|
+
PL_sb_U_on_a = matchgroup %w[
|
228
|
+
criterion perihelion aphelion
|
229
|
+
phenomenon prolegomenon noumenon
|
230
|
+
organon asyndeton hyperbaton
|
231
|
+
].collect {|word| word[0...-2]}
|
232
|
+
|
233
|
+
# Classical "..on" -> "..a"
|
234
|
+
PL_sb_C_on_a = matchgroup %w[
|
235
|
+
oxymoron
|
236
|
+
].collect {|word| word[0...-2]}
|
237
|
+
|
238
|
+
# Classical "..o" -> "..i" (but normally -> "..os")
|
239
|
+
PL_sb_C_o_i_a = %w[
|
240
|
+
solo soprano basso alto
|
241
|
+
contralto tempo piano
|
242
|
+
]
|
243
|
+
PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
|
244
|
+
|
245
|
+
# Always "..o" -> "..os"
|
246
|
+
PL_sb_U_o_os = matchgroup( %w[
|
247
|
+
albino archipelago armadillo
|
248
|
+
commando crescendo fiasco
|
249
|
+
ditto dynamo embryo
|
250
|
+
ghetto guano inferno
|
251
|
+
jumbo lumbago magneto
|
252
|
+
manifesto medico octavo
|
253
|
+
photo pro quarto
|
254
|
+
canto lingo generalissimo
|
255
|
+
stylo rhino
|
256
|
+
] | PL_sb_C_o_i_a )
|
257
|
+
|
258
|
+
|
259
|
+
# Unconditional "..[ei]x" -> "..ices"
|
260
|
+
PL_sb_U_ex_ices = matchgroup %w[
|
261
|
+
codex murex silex
|
262
|
+
].collect {|word| word[0...-2]}
|
263
|
+
PL_sb_U_ix_ices = matchgroup %w[
|
264
|
+
radix helix
|
265
|
+
].collect {|word| word[0...-2]}
|
266
|
+
|
267
|
+
# Classical "..[ei]x" -> "..ices"
|
268
|
+
PL_sb_C_ex_ices = matchgroup %w[
|
269
|
+
vortex vertex cortex latex
|
270
|
+
pontifex apex index simplex
|
271
|
+
].collect {|word| word[0...-2]}
|
272
|
+
PL_sb_C_ix_ices = matchgroup %w[
|
273
|
+
appendix
|
274
|
+
].collect {|word| word[0...-2]}
|
275
|
+
|
276
|
+
|
277
|
+
# Arabic: ".." -> "..i"
|
278
|
+
PL_sb_C_i = matchgroup %w[
|
279
|
+
afrit afreet efreet
|
280
|
+
]
|
281
|
+
|
282
|
+
|
283
|
+
# Hebrew: ".." -> "..im"
|
284
|
+
PL_sb_C_im = matchgroup %w[
|
285
|
+
goy seraph cherub
|
286
|
+
]
|
287
|
+
|
288
|
+
# Unconditional "..man" -> "..mans"
|
289
|
+
PL_sb_U_man_mans = matchgroup %w[
|
290
|
+
human
|
291
|
+
Alabaman Bahaman Burman German
|
292
|
+
Hiroshiman Liman Nakayaman Oklahoman
|
293
|
+
Panaman Selman Sonaman Tacoman Yakiman
|
294
|
+
Yokohaman Yuman
|
295
|
+
]
|
296
|
+
|
297
|
+
|
298
|
+
PL_sb_uninflected_s = [
|
299
|
+
# Pairs or groups subsumed to a singular...
|
300
|
+
"breeches", "britches", "clippers", "gallows", "hijinks",
|
301
|
+
"headquarters", "pliers", "scissors", "testes", "herpes",
|
302
|
+
"pincers", "shears", "proceedings", "trousers",
|
303
|
+
|
304
|
+
# Unassimilated Latin 4th declension
|
305
|
+
"cantus", "coitus", "nexus",
|
306
|
+
|
307
|
+
# Recent imports...
|
308
|
+
"contretemps", "corps", "debris",
|
309
|
+
".*ois",
|
310
|
+
|
311
|
+
# Diseases
|
312
|
+
".*measles", "mumps",
|
313
|
+
|
314
|
+
# Miscellaneous others...
|
315
|
+
"diabetes", "jackanapes", "series", "species", "rabies",
|
316
|
+
"chassis", "innings", "news", "mews",
|
317
|
+
]
|
318
|
+
|
319
|
+
|
320
|
+
# Don't inflect in classical mode, otherwise normal inflection
|
321
|
+
PL_sb_uninflected_herd = matchgroup %w[
|
322
|
+
wildebeest swine eland bison buffalo
|
323
|
+
elk moose rhinoceros
|
324
|
+
]
|
325
|
+
|
326
|
+
PL_sb_uninflected = matchgroup [
|
327
|
+
|
328
|
+
# Some fish and herd animals
|
329
|
+
".*fish", "tuna", "salmon", "mackerel", "trout",
|
330
|
+
"bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
|
331
|
+
|
332
|
+
".*deer", ".*sheep",
|
333
|
+
|
334
|
+
# All nationals ending in -ese
|
335
|
+
"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
|
336
|
+
"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
|
337
|
+
"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
|
338
|
+
"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
|
339
|
+
"Shavese", "Vermontese", "Wenchowese", "Yengeese",
|
340
|
+
".*[nrlm]ese",
|
341
|
+
|
342
|
+
# Some words ending in ...s (often pairs taken as a whole)
|
343
|
+
PL_sb_uninflected_s,
|
344
|
+
|
345
|
+
# Diseases
|
346
|
+
".*pox",
|
347
|
+
|
348
|
+
# Other oddities
|
349
|
+
"graffiti", "djinn"
|
350
|
+
]
|
351
|
+
|
352
|
+
|
353
|
+
# Singular words ending in ...s (all inflect with ...es)
|
354
|
+
PL_sb_singular_s = matchgroup %w[
|
355
|
+
.*ss
|
356
|
+
acropolis aegis alias arthritis asbestos atlas
|
357
|
+
bathos bias bronchitis bursitis caddis cannabis
|
358
|
+
canvas chaos cosmos dais digitalis encephalitis
|
359
|
+
epidermis ethos eyas gas glottis hepatitis
|
360
|
+
hubris ibis lens mantis marquis metropolis
|
361
|
+
neuritis pathos pelvis polis rhinoceros
|
362
|
+
sassafras tonsillitis trellis .*us
|
363
|
+
]
|
364
|
+
|
365
|
+
PL_v_special_s = matchgroup [
|
366
|
+
PL_sb_singular_s,
|
367
|
+
PL_sb_uninflected_s,
|
368
|
+
PL_sb_irregular_s.keys,
|
369
|
+
'(.*[csx])is',
|
370
|
+
'(.*)ceps',
|
371
|
+
'[A-Z].*s',
|
372
|
+
]
|
373
|
+
|
374
|
+
PL_sb_postfix_adj = '(' + {
|
375
|
+
|
376
|
+
'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
|
377
|
+
'martial' => ["court"],
|
378
|
+
|
379
|
+
}.collect {|key,val|
|
380
|
+
matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
|
381
|
+
}.join("|") + ")(.*)"
|
382
|
+
|
383
|
+
|
384
|
+
PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
|
385
|
+
PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
|
386
|
+
|
387
|
+
PL_prep = matchgroup %w[
|
388
|
+
about above across after among around at athwart before behind
|
389
|
+
below beneath beside besides between betwixt beyond but by
|
390
|
+
during except for from in into near of off on onto out over
|
391
|
+
since till to under until unto upon with
|
392
|
+
]
|
393
|
+
|
394
|
+
PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
|
395
|
+
PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
|
396
|
+
|
397
|
+
|
398
|
+
PL_pron_nom_h = {
|
399
|
+
# Nominative Reflexive
|
400
|
+
"i" => "we", "myself" => "ourselves",
|
401
|
+
"you" => "you", "yourself" => "yourselves",
|
402
|
+
"she" => "they", "herself" => "themselves",
|
403
|
+
"he" => "they", "himself" => "themselves",
|
404
|
+
"it" => "they", "itself" => "themselves",
|
405
|
+
"they" => "they", "themself" => "themselves",
|
406
|
+
|
407
|
+
# Possessive
|
408
|
+
"mine" => "ours",
|
409
|
+
"yours" => "yours",
|
410
|
+
"hers" => "theirs",
|
411
|
+
"his" => "theirs",
|
412
|
+
"its" => "theirs",
|
413
|
+
"theirs" => "theirs",
|
414
|
+
}
|
415
|
+
PL_pron_nom = matchgroup PL_pron_nom_h.keys
|
416
|
+
|
417
|
+
PL_pron_acc_h = {
|
418
|
+
# Accusative Reflexive
|
419
|
+
"me" => "us", "myself" => "ourselves",
|
420
|
+
"you" => "you", "yourself" => "yourselves",
|
421
|
+
"her" => "them", "herself" => "themselves",
|
422
|
+
"him" => "them", "himself" => "themselves",
|
423
|
+
"it" => "them", "itself" => "themselves",
|
424
|
+
"them" => "them", "themself" => "themselves",
|
425
|
+
}
|
426
|
+
PL_pron_acc = matchgroup PL_pron_acc_h.keys
|
427
|
+
|
428
|
+
PL_v_irregular_pres_h = {
|
429
|
+
# 1St pers. sing. 2nd pers. sing. 3rd pers. singular
|
430
|
+
# 3rd pers. (indet.)
|
431
|
+
"am" => "are", "are" => "are", "is" => "are",
|
432
|
+
"was" => "were", "were" => "were", "was" => "were",
|
433
|
+
"have" => "have", "have" => "have", "has" => "have",
|
434
|
+
}
|
435
|
+
PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
|
436
|
+
|
437
|
+
PL_v_ambiguous_pres_h = {
|
438
|
+
# 1st pers. sing. 2nd pers. sing. 3rd pers. singular
|
439
|
+
# 3rd pers. (indet.)
|
440
|
+
"act" => "act", "act" => "act", "acts" => "act",
|
441
|
+
"blame" => "blame", "blame" => "blame", "blames" => "blame",
|
442
|
+
"can" => "can", "can" => "can", "can" => "can",
|
443
|
+
"must" => "must", "must" => "must", "must" => "must",
|
444
|
+
"fly" => "fly", "fly" => "fly", "flies" => "fly",
|
445
|
+
"copy" => "copy", "copy" => "copy", "copies" => "copy",
|
446
|
+
"drink" => "drink", "drink" => "drink", "drinks" => "drink",
|
447
|
+
"fight" => "fight", "fight" => "fight", "fights" => "fight",
|
448
|
+
"fire" => "fire", "fire" => "fire", "fires" => "fire",
|
449
|
+
"like" => "like", "like" => "like", "likes" => "like",
|
450
|
+
"look" => "look", "look" => "look", "looks" => "look",
|
451
|
+
"make" => "make", "make" => "make", "makes" => "make",
|
452
|
+
"reach" => "reach", "reach" => "reach", "reaches" => "reach",
|
453
|
+
"run" => "run", "run" => "run", "runs" => "run",
|
454
|
+
"sink" => "sink", "sink" => "sink", "sinks" => "sink",
|
455
|
+
"sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
|
456
|
+
"view" => "view", "view" => "view", "views" => "view",
|
457
|
+
}
|
458
|
+
PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
|
459
|
+
|
460
|
+
PL_v_irregular_non_pres = matchgroup %w[
|
461
|
+
did had ate made put
|
462
|
+
spent fought sank gave sought
|
463
|
+
shall could ought should
|
464
|
+
]
|
465
|
+
|
466
|
+
PL_v_ambiguous_non_pres = matchgroup %w[
|
467
|
+
thought saw bent will might cut
|
468
|
+
]
|
469
|
+
|
470
|
+
PL_count_zero = matchgroup %w[
|
471
|
+
0 no zero nil
|
472
|
+
]
|
473
|
+
|
474
|
+
PL_count_one = matchgroup %w[
|
475
|
+
1 a an one each every this that
|
476
|
+
]
|
477
|
+
|
478
|
+
PL_adj_special_h = {
|
479
|
+
"a" => "some", "an" => "some",
|
480
|
+
"this" => "these", "that" => "those",
|
481
|
+
}
|
482
|
+
PL_adj_special = matchgroup PL_adj_special_h.keys
|
483
|
+
|
484
|
+
PL_adj_poss_h = {
|
485
|
+
"my" => "our",
|
486
|
+
"your" => "your",
|
487
|
+
"its" => "their",
|
488
|
+
"her" => "their",
|
489
|
+
"his" => "their",
|
490
|
+
"their" => "their",
|
491
|
+
}
|
492
|
+
PL_adj_poss = matchgroup PL_adj_poss_h.keys
|
493
|
+
|
494
|
+
|
495
|
+
#
|
496
|
+
# Numerals, ordinals, and numbers-to-words
|
497
|
+
#
|
498
|
+
|
499
|
+
# Numerical inflections
|
500
|
+
Nth = {
|
501
|
+
0 => 'th',
|
502
|
+
1 => 'st',
|
503
|
+
2 => 'nd',
|
504
|
+
3 => 'rd',
|
505
|
+
4 => 'th',
|
506
|
+
5 => 'th',
|
507
|
+
6 => 'th',
|
508
|
+
7 => 'th',
|
509
|
+
8 => 'th',
|
510
|
+
9 => 'th',
|
511
|
+
11 => 'th',
|
512
|
+
12 => 'th',
|
513
|
+
13 => 'th',
|
514
|
+
}
|
515
|
+
|
516
|
+
# Ordinal word parts
|
517
|
+
Ordinals = {
|
518
|
+
'ty' => 'tieth',
|
519
|
+
'one' => 'first',
|
520
|
+
'two' => 'second',
|
521
|
+
'three' => 'third',
|
522
|
+
'five' => 'fifth',
|
523
|
+
'eight' => 'eighth',
|
524
|
+
'nine' => 'ninth',
|
525
|
+
'twelve' => 'twelfth',
|
526
|
+
}
|
527
|
+
OrdinalSuffixes = Ordinals.keys.join("|") + "|"
|
528
|
+
Ordinals[""] = 'th'
|
529
|
+
|
530
|
+
# Numeral names
|
531
|
+
Units = [''] + %w[one two three four five six seven eight nine]
|
532
|
+
Teens = %w[ten eleven twelve thirteen fourteen
|
533
|
+
fifteen sixteen seventeen eighteen nineteen]
|
534
|
+
Tens = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
|
535
|
+
Thousands = [' ', ' thousand'] + %w[
|
536
|
+
m b tr quadr quint sext sept oct non dec undec duodec tredec
|
537
|
+
quattuordec quindec sexdec septemdec octodec novemdec vigint
|
538
|
+
].collect {|prefix| ' ' + prefix + 'illion'}
|
539
|
+
|
540
|
+
# A collection of functions for transforming digits into word
|
541
|
+
# phrases. Indexed by the number of digits being transformed; e.g.,
|
542
|
+
# <tt>NumberToWordsFunctions[2]</tt> is the function for transforming
|
543
|
+
# double-digit numbers.
|
544
|
+
NumberToWordsFunctions = [
|
545
|
+
proc {|*args| raise "No digits (#{args.inspect})"},
|
546
|
+
|
547
|
+
# Single-digits
|
548
|
+
proc {|zero,x|
|
549
|
+
(x.nonzero? ? to_units(x) : "#{zero} ")
|
550
|
+
},
|
551
|
+
|
552
|
+
# Double-digits
|
553
|
+
proc {|zero,x,y|
|
554
|
+
if x.nonzero?
|
555
|
+
to_tens( x, y )
|
556
|
+
elsif y.nonzero?
|
557
|
+
"#{zero} " + NumberToWordsFunctions[1].call( zero, y )
|
558
|
+
else
|
559
|
+
([zero] * 2).join(" ")
|
560
|
+
end
|
561
|
+
},
|
562
|
+
|
563
|
+
# Triple-digits
|
564
|
+
proc {|zero,x,y,z|
|
565
|
+
NumberToWordsFunctions[1].call(zero,x) +
|
566
|
+
NumberToWordsFunctions[2].call(zero,y,z)
|
567
|
+
}
|
568
|
+
]
|
569
|
+
|
570
|
+
|
571
|
+
#
|
572
|
+
# Indefinite Articles
|
573
|
+
#
|
574
|
+
|
575
|
+
# This pattern matches strings of capitals starting with a "vowel-sound"
|
576
|
+
# consonant followed by another consonant, and which are not likely
|
577
|
+
# to be real words (oh, all right then, it's just magic!)
|
578
|
+
A_abbrev = %{
|
579
|
+
(?! FJO | [HLMNS]Y. | RY[EO] | SQU
|
580
|
+
| ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
|
581
|
+
[FHLMNRSX][A-Z]
|
582
|
+
}
|
583
|
+
|
584
|
+
# This pattern codes the beginnings of all english words begining with a
|
585
|
+
# 'y' followed by a consonant. Any other y-consonant prefix therefore
|
586
|
+
# implies an abbreviation.
|
587
|
+
A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
|
588
|
+
|
589
|
+
# Exceptions to exceptions
|
590
|
+
A_explicit_an = matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
|
591
|
+
|
592
|
+
|
593
|
+
#
|
594
|
+
# Configuration defaults
|
595
|
+
#
|
596
|
+
|
597
|
+
# Default configuration arguments for the #numwords function
|
598
|
+
NumwordDefaults = {
|
599
|
+
:group => 0,
|
600
|
+
:comma => ', ',
|
601
|
+
:and => ' and ',
|
602
|
+
:zero => 'zero',
|
603
|
+
:decimal => 'point',
|
604
|
+
:asArray => false,
|
605
|
+
}
|
606
|
+
|
607
|
+
# Default ranges for #quantify
|
608
|
+
SeveralRange = 2..5
|
609
|
+
NumberRange = 6..19
|
610
|
+
NumerousRange = 20..45
|
611
|
+
ManyRange = 46..99
|
612
|
+
|
613
|
+
# Default configuration arguments for the #quantify function
|
614
|
+
QuantifyDefaults = {
|
615
|
+
:joinword => " of ",
|
616
|
+
}
|
617
|
+
|
618
|
+
# Default configuration arguments for the #conjunction (junction, what's
|
619
|
+
# your) function.
|
620
|
+
ConjunctionDefaults = {
|
621
|
+
:separator => ', ',
|
622
|
+
:altsep => '; ',
|
623
|
+
:penultimate => true,
|
624
|
+
:conjunctive => 'and',
|
625
|
+
:combine => true,
|
626
|
+
:casefold => true,
|
627
|
+
:generalize => false,
|
628
|
+
:quantsort => true,
|
629
|
+
}
|
630
|
+
|
631
|
+
|
632
|
+
#
|
633
|
+
# Title case
|
634
|
+
#
|
635
|
+
|
636
|
+
# "In titles, capitalize the first word, the last word, and all words in
|
637
|
+
# between except articles (a, an, and the), prepositions under five letters
|
638
|
+
# (in, of, to), and coordinating conjunctions (and, but). These rules apply
|
639
|
+
# to titles of long, short, and partial works as well as your own papers"
|
640
|
+
# (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
|
641
|
+
|
642
|
+
# Build the list of exceptions to title-capitalization
|
643
|
+
Articles = %w[a and the]
|
644
|
+
ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
|
645
|
+
"into", "like", "near", "of", "off", "on", "onto", "out", "over",
|
646
|
+
"past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
|
647
|
+
CoordConjunctions = %w[and but as]
|
648
|
+
TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
|
649
|
+
|
650
|
+
|
651
|
+
# :startdoc:
|
652
|
+
|
653
|
+
#################################################################
|
654
|
+
### " B A C K E N D " F U N C T I O N S
|
655
|
+
#################################################################
|
656
|
+
|
657
|
+
|
658
|
+
###############
|
659
|
+
module_function
|
660
|
+
###############
|
661
|
+
|
662
|
+
### Debugging output
|
663
|
+
def debug_msg( *msgs ) # :nodoc:
|
664
|
+
$stderr.puts msgs.join(" ") if $DEBUG
|
665
|
+
end
|
666
|
+
|
667
|
+
|
668
|
+
### Normalize a count to either 1 or 2 (singular or plural)
|
669
|
+
def normalize_count( count, default=2 )
|
670
|
+
return default if count.nil? # Default to plural
|
671
|
+
if /^(#{PL_count_one})$/i =~ count.to_s ||
|
672
|
+
Linguistics::classical? &&
|
673
|
+
/^(#{PL_count_zero})$/ =~ count.to_s
|
674
|
+
return 1
|
675
|
+
else
|
676
|
+
return default
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
|
681
|
+
### Do normal/classical switching and match capitalization in <tt>inflected</tt> by
|
682
|
+
### examining the <tt>original</tt> input.
|
683
|
+
def postprocess( original, inflected )
|
684
|
+
inflected.sub!( /([^|]+)\|(.+)/ ) {
|
685
|
+
Linguistics::classical? ? $2 : $1
|
686
|
+
}
|
687
|
+
|
688
|
+
case original
|
689
|
+
when "I"
|
690
|
+
return inflected
|
691
|
+
when /^[A-Z]+$/
|
692
|
+
return inflected.upcase
|
693
|
+
when /^[A-Z]/
|
694
|
+
# Can't use #capitalize, as it will downcase the rest of the string,
|
695
|
+
# too.
|
696
|
+
inflected[0,1] = inflected[0,1].upcase
|
697
|
+
return inflected
|
698
|
+
else
|
699
|
+
return inflected
|
700
|
+
end
|
701
|
+
end
|
702
|
+
|
703
|
+
|
704
|
+
### Pluralize nouns
|
705
|
+
def pluralize_noun( word, count=nil )
|
706
|
+
value = nil
|
707
|
+
count ||= Linguistics::num
|
708
|
+
count = normalize_count( count )
|
709
|
+
|
710
|
+
return word if count == 1
|
711
|
+
|
712
|
+
# Handle user-defined nouns
|
713
|
+
#if value = ud_match( word, PL_sb_user_defined )
|
714
|
+
# return value
|
715
|
+
#end
|
716
|
+
|
717
|
+
# Handle empty word, singular count and uninflected plurals
|
718
|
+
case word
|
719
|
+
when ''
|
720
|
+
return word
|
721
|
+
when /^(#{PL_sb_uninflected})$/i
|
722
|
+
return word
|
723
|
+
else
|
724
|
+
if Linguistics::classical? &&
|
725
|
+
/^(#{PL_sb_uninflected_herd})$/i =~ word
|
726
|
+
return word
|
727
|
+
end
|
728
|
+
end
|
729
|
+
|
730
|
+
# Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
|
731
|
+
case word
|
732
|
+
when /^(?:#{PL_sb_postfix_adj})$/i
|
733
|
+
value = $2
|
734
|
+
return pluralize_noun( $1, 2 ) + value
|
735
|
+
|
736
|
+
when /^(?:#{PL_sb_prep_dual_compound})$/i
|
737
|
+
value = [ $2, $3 ]
|
738
|
+
return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )
|
739
|
+
|
740
|
+
when /^(?:#{PL_sb_prep_compound})$/i
|
741
|
+
value = $2
|
742
|
+
return pluralize_noun( $1, 2 ) + value
|
743
|
+
|
744
|
+
# Handle pronouns
|
745
|
+
when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
|
746
|
+
return $1 + PL_pron_acc_h[ $2.downcase ]
|
747
|
+
|
748
|
+
when /^(#{PL_pron_nom})$/i
|
749
|
+
return PL_pron_nom_h[ word.downcase ]
|
750
|
+
|
751
|
+
when /^(#{PL_pron_acc})$/i
|
752
|
+
return PL_pron_acc_h[ $1.downcase ]
|
753
|
+
|
754
|
+
# Handle isolated irregular plurals
|
755
|
+
when /(.*)\b(#{PL_sb_irregular})$/i
|
756
|
+
return $1 + PL_sb_irregular_h[ $2.downcase ]
|
757
|
+
|
758
|
+
when /(#{PL_sb_U_man_mans})$/i
|
759
|
+
return "#{$1}s"
|
760
|
+
|
761
|
+
# Handle families of irregular plurals
|
762
|
+
when /(.*)man$/i ; return "#{$1}men"
|
763
|
+
when /(.*[ml])ouse$/i ; return "#{$1}ice"
|
764
|
+
when /(.*)goose$/i ; return "#{$1}geese"
|
765
|
+
when /(.*)tooth$/i ; return "#{$1}teeth"
|
766
|
+
when /(.*)foot$/i ; return "#{$1}feet"
|
767
|
+
|
768
|
+
# Handle unassimilated imports
|
769
|
+
when /(.*)ceps$/i ; return word
|
770
|
+
when /(.*)zoon$/i ; return "#{$1}zoa"
|
771
|
+
when /(.*[csx])is$/i ; return "#{$1}es"
|
772
|
+
when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
|
773
|
+
when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
|
774
|
+
when /(#{PL_sb_U_um_a})um$/i ; return "#{$1}a"
|
775
|
+
when /(#{PL_sb_U_us_i})us$/i ; return "#{$1}i"
|
776
|
+
when /(#{PL_sb_U_on_a})on$/i ; return "#{$1}a"
|
777
|
+
when /(#{PL_sb_U_a_ae})$/i ; return "#{$1}e"
|
778
|
+
end
|
779
|
+
|
780
|
+
# Handle incompletely assimilated imports
|
781
|
+
if Linguistics::classical?
|
782
|
+
case word
|
783
|
+
when /(.*)trix$/i ; return "#{$1}trices"
|
784
|
+
when /(.*)eau$/i ; return "#{$1}eaux"
|
785
|
+
when /(.*)ieu$/i ; return "#{$1}ieux"
|
786
|
+
when /(.{2,}[yia])nx$/i ; return "#{$1}nges"
|
787
|
+
when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
|
788
|
+
when /(#{PL_sb_C_ex_ices})ex$/i; return "#{$1}ices"
|
789
|
+
when /(#{PL_sb_C_ix_ices})ix$/i; return "#{$1}ices"
|
790
|
+
when /(#{PL_sb_C_um_a})um$/i ; return "#{$1}a"
|
791
|
+
when /(#{PL_sb_C_us_i})us$/i ; return "#{$1}i"
|
792
|
+
when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
|
793
|
+
when /(#{PL_sb_C_a_ae})$/i ; return "#{$1}e"
|
794
|
+
when /(#{PL_sb_C_a_ata})a$/i ; return "#{$1}ata"
|
795
|
+
when /(#{PL_sb_C_o_i})o$/i ; return "#{$1}i"
|
796
|
+
when /(#{PL_sb_C_on_a})on$/i ; return "#{$1}a"
|
797
|
+
when /#{PL_sb_C_im}$/i ; return "#{word}im"
|
798
|
+
when /#{PL_sb_C_i}$/i ; return "#{word}i"
|
799
|
+
end
|
800
|
+
end
|
801
|
+
|
802
|
+
|
803
|
+
# Handle singular nouns ending in ...s or other silibants
|
804
|
+
case word
|
805
|
+
when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
|
806
|
+
when /^([A-Z].*s)$/; return "#{$1}es"
|
807
|
+
when /(.*)([cs]h|[zx])$/i ; return "#{$1}#{$2}es"
|
808
|
+
# when /(.*)(us)$/i ; return "#{$1}#{$2}es"
|
809
|
+
|
810
|
+
# Handle ...f -> ...ves
|
811
|
+
when /(.*[eao])lf$/i ; return "#{$1}lves";
|
812
|
+
when /(.*[^d])eaf$/i ; return "#{$1}eaves"
|
813
|
+
when /(.*[nlw])ife$/i ; return "#{$1}ives"
|
814
|
+
when /(.*)arf$/i ; return "#{$1}arves"
|
815
|
+
|
816
|
+
# Handle ...y
|
817
|
+
when /(.*[aeiou])y$/i ; return "#{$1}ys"
|
818
|
+
when /([A-Z].*y)$/ ; return "#{$1}s"
|
819
|
+
when /(.*)y$/i ; return "#{$1}ies"
|
820
|
+
|
821
|
+
# Handle ...o
|
822
|
+
when /#{PL_sb_U_o_os}$/i ; return "#{word}s"
|
823
|
+
when /[aeiou]o$/i ; return "#{word}s"
|
824
|
+
when /o$/i ; return "#{word}es"
|
825
|
+
|
826
|
+
# Otherwise just add ...s
|
827
|
+
else
|
828
|
+
return "#{word}s"
|
829
|
+
end
|
830
|
+
end # def pluralize_noun
|
831
|
+
|
832
|
+
|
833
|
+
|
834
|
+
### Pluralize special verbs
|
835
|
+
def pluralize_special_verb( word, count )
|
836
|
+
count ||= Linguistics::num
|
837
|
+
count = normalize_count( count )
|
838
|
+
|
839
|
+
return nil if /^(#{PL_count_one})$/i =~ count.to_s
|
840
|
+
|
841
|
+
# Handle user-defined verbs
|
842
|
+
#if value = ud_match( word, PL_v_user_defined )
|
843
|
+
# return value
|
844
|
+
#end
|
845
|
+
|
846
|
+
case word
|
847
|
+
|
848
|
+
# Handle irregular present tense (simple and compound)
|
849
|
+
when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
|
850
|
+
return PL_v_irregular_pres_h[ $1.downcase ] + $2
|
851
|
+
|
852
|
+
# Handle irregular future, preterite and perfect tenses
|
853
|
+
when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
|
854
|
+
return word
|
855
|
+
|
856
|
+
# Handle special cases
|
857
|
+
when /^(#{PL_v_special_s})$/, /\s/
|
858
|
+
return nil
|
859
|
+
|
860
|
+
# Handle standard 3rd person (chop the ...(e)s off single words)
|
861
|
+
when /^(.*)([cs]h|[x]|zz|ss)es$/i
|
862
|
+
return $1 + $2
|
863
|
+
when /^(..+)ies$/i
|
864
|
+
return "#{$1}y"
|
865
|
+
when /^(.+)oes$/i
|
866
|
+
return "#{$1}o"
|
867
|
+
when /^(.*[^s])s$/i
|
868
|
+
return $1
|
869
|
+
|
870
|
+
# Otherwise, a regular verb (handle elsewhere)
|
871
|
+
else
|
872
|
+
return nil
|
873
|
+
end
|
874
|
+
end
|
875
|
+
|
876
|
+
|
877
|
+
### Pluralize regular verbs
|
878
|
+
def pluralize_general_verb( word, count )
|
879
|
+
count ||= Linguistics::num
|
880
|
+
count = normalize_count( count )
|
881
|
+
|
882
|
+
return word if /^(#{PL_count_one})$/i =~ count.to_s
|
883
|
+
|
884
|
+
case word
|
885
|
+
|
886
|
+
# Handle ambiguous present tenses (simple and compound)
|
887
|
+
when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
|
888
|
+
return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
|
889
|
+
|
890
|
+
# Handle ambiguous preterite and perfect tenses
|
891
|
+
when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
|
892
|
+
return word
|
893
|
+
|
894
|
+
# Otherwise, 1st or 2nd person is uninflected
|
895
|
+
else
|
896
|
+
return word
|
897
|
+
end
|
898
|
+
end
|
899
|
+
|
900
|
+
|
901
|
+
### Handle special adjectives
|
902
|
+
def pluralize_special_adjective( word, count )
|
903
|
+
count ||= Linguistics::num
|
904
|
+
count = normalize_count( count )
|
905
|
+
|
906
|
+
return word if /^(#{PL_count_one})$/i =~ count.to_s
|
907
|
+
|
908
|
+
# Handle user-defined verbs
|
909
|
+
#if value = ud_match( word, PL_adj_user_defined )
|
910
|
+
# return value
|
911
|
+
#end
|
912
|
+
|
913
|
+
case word
|
914
|
+
|
915
|
+
# Handle known cases
|
916
|
+
when /^(#{PL_adj_special})$/i
|
917
|
+
return PL_adj_special_h[ $1.downcase ]
|
918
|
+
|
919
|
+
# Handle possessives
|
920
|
+
when /^(#{PL_adj_poss})$/i
|
921
|
+
return PL_adj_poss_h[ $1.downcase ]
|
922
|
+
|
923
|
+
when /^(.*)'s?$/
|
924
|
+
pl = plural_noun( $1 )
|
925
|
+
if /s$/ =~ pl
|
926
|
+
return "#{pl}'"
|
927
|
+
else
|
928
|
+
return "#{pl}'s"
|
929
|
+
end
|
930
|
+
|
931
|
+
# Otherwise, no idea
|
932
|
+
else
|
933
|
+
return nil
|
934
|
+
end
|
935
|
+
end
|
936
|
+
|
937
|
+
|
938
|
+
### Returns the given word with a prepended indefinite article, unless
|
939
|
+
### +count+ is non-nil and not singular.
|
940
|
+
def indef_article( word, count )
|
941
|
+
count ||= Linguistics::num
|
942
|
+
return "#{count} #{word}" if
|
943
|
+
count && /^(#{PL_count_one})$/i !~ count.to_s
|
944
|
+
|
945
|
+
# Handle user-defined variants
|
946
|
+
# return value if value = ud_match( word, A_a_user_defined )
|
947
|
+
|
948
|
+
case word
|
949
|
+
|
950
|
+
# Handle special cases
|
951
|
+
when /^(#{A_explicit_an})/i
|
952
|
+
return "an #{word}"
|
953
|
+
|
954
|
+
# Handle abbreviations
|
955
|
+
when /^(#{A_abbrev})/x
|
956
|
+
return "an #{word}"
|
957
|
+
when /^[aefhilmnorsx][.-]/i
|
958
|
+
return "an #{word}"
|
959
|
+
when /^[a-z][.-]/i
|
960
|
+
return "a #{word}"
|
961
|
+
|
962
|
+
# Handle consonants
|
963
|
+
when /^[^aeiouy]/i
|
964
|
+
return "a #{word}"
|
965
|
+
|
966
|
+
# Handle special vowel-forms
|
967
|
+
when /^e[uw]/i
|
968
|
+
return "a #{word}"
|
969
|
+
when /^onc?e\b/i
|
970
|
+
return "a #{word}"
|
971
|
+
when /^uni([^nmd]|mo)/i
|
972
|
+
return "a #{word}"
|
973
|
+
when /^u[bcfhjkqrst][aeiou]/i
|
974
|
+
return "a #{word}"
|
975
|
+
|
976
|
+
# Handle vowels
|
977
|
+
when /^[aeiou]/i
|
978
|
+
return "an #{word}"
|
979
|
+
|
980
|
+
# Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
|
981
|
+
when /^(#{A_y_cons})/i
|
982
|
+
return "an #{word}"
|
983
|
+
|
984
|
+
# Otherwise, guess "a"
|
985
|
+
else
|
986
|
+
return "a #{word}"
|
987
|
+
end
|
988
|
+
end
|
989
|
+
|
990
|
+
|
991
|
+
### Transform the specified number of units-place numerals into a
|
992
|
+
### word-phrase at the given number of +thousands+ places.
|
993
|
+
def to_units( units, thousands=0 )
|
994
|
+
return Units[ units ] + to_thousands( thousands )
|
995
|
+
end
|
996
|
+
|
997
|
+
|
998
|
+
### Transform the specified number of tens- and units-place numerals into a
|
999
|
+
### word-phrase at the given number of +thousands+ places.
|
1000
|
+
def to_tens( tens, units, thousands=0 )
|
1001
|
+
unless tens == 1
|
1002
|
+
return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
|
1003
|
+
to_units( units, thousands )
|
1004
|
+
else
|
1005
|
+
return Teens[ units ] + to_thousands( thousands )
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
|
1010
|
+
### Transform the specified number of hundreds-, tens-, and units-place
|
1011
|
+
### numerals into a word phrase. If the number of thousands (+thousands+) is
|
1012
|
+
### greater than 0, it will be used to determine where the decimal point is
|
1013
|
+
### in relation to the hundreds-place number.
|
1014
|
+
def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
|
1015
|
+
joinword = ' ' if joinword.empty?
|
1016
|
+
if hundreds.nonzero?
|
1017
|
+
return to_units( hundreds ) + " hundred" +
|
1018
|
+
(tens.nonzero? || units.nonzero? ? joinword : '') +
|
1019
|
+
to_tens( tens, units ) +
|
1020
|
+
to_thousands( thousands )
|
1021
|
+
elsif tens.nonzero? || units.nonzero?
|
1022
|
+
return to_tens( tens, units ) + to_thousands( thousands )
|
1023
|
+
else
|
1024
|
+
return nil
|
1025
|
+
end
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
### Transform the specified number into one or more words like 'thousand',
|
1029
|
+
### 'million', etc. Uses the thousands (American) system.
|
1030
|
+
def to_thousands( thousands=0 )
|
1031
|
+
parts = []
|
1032
|
+
(0..thousands).step( Thousands.length - 1 ) {|i|
|
1033
|
+
if i.zero?
|
1034
|
+
parts.push Thousands[ thousands % (Thousands.length - 1) ]
|
1035
|
+
else
|
1036
|
+
parts.push Thousands.last
|
1037
|
+
end
|
1038
|
+
}
|
1039
|
+
|
1040
|
+
return parts.join(" ")
|
1041
|
+
end
|
1042
|
+
|
1043
|
+
|
1044
|
+
### Return the specified number +num+ as an array of number phrases.
|
1045
|
+
def number_to_words( num, config )
|
1046
|
+
return [config[:zero]] if num.to_i.zero?
|
1047
|
+
chunks = []
|
1048
|
+
|
1049
|
+
# Break into word-groups if groups is set
|
1050
|
+
if config[:group].nonzero?
|
1051
|
+
|
1052
|
+
# Build a Regexp with <config[:group]> number of digits. Any past
|
1053
|
+
# the first are optional.
|
1054
|
+
re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )
|
1055
|
+
|
1056
|
+
# Scan the string, and call the word-chunk function that deals with
|
1057
|
+
# chunks of the found number of digits.
|
1058
|
+
num.to_s.scan( re ) {|digits|
|
1059
|
+
debug_msg " digits = #{digits.inspect}"
|
1060
|
+
fn = NumberToWordsFunctions[ digits.nitems ]
|
1061
|
+
numerals = digits.flatten.compact.collect {|i| i.to_i}
|
1062
|
+
debug_msg " numerals = #{numerals.inspect}"
|
1063
|
+
chunks.push fn.call( config[:zero], *numerals ).strip
|
1064
|
+
}
|
1065
|
+
else
|
1066
|
+
phrase = num.to_s
|
1067
|
+
phrase.sub!( /\A\s*0+/, '' )
|
1068
|
+
mill = 0
|
1069
|
+
|
1070
|
+
# Match backward from the end of the digits in the string, turning
|
1071
|
+
# chunks of three, of two, and of one into words.
|
1072
|
+
mill += 1 while
|
1073
|
+
phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
|
1074
|
+
words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill,
|
1075
|
+
config[:and] )
|
1076
|
+
chunks.unshift words.strip.squeeze(' ') unless words.nil?
|
1077
|
+
''
|
1078
|
+
}
|
1079
|
+
|
1080
|
+
phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
|
1081
|
+
chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
|
1082
|
+
''
|
1083
|
+
}
|
1084
|
+
phrase.sub!( /(\d)(?=\D*\Z)/ ) {
|
1085
|
+
chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
|
1086
|
+
''
|
1087
|
+
}
|
1088
|
+
end
|
1089
|
+
|
1090
|
+
return chunks
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
|
1094
|
+
#################################################################
|
1095
|
+
### P U B L I C F U N C T I O N S
|
1096
|
+
#################################################################
|
1097
|
+
|
1098
|
+
### Return the name of the language this module is for.
|
1099
|
+
def language( unused=nil )
|
1100
|
+
"English"
|
1101
|
+
end
|
1102
|
+
|
1103
|
+
|
1104
|
+
### Return the plural of the given +phrase+ if +count+ indicates it should
|
1105
|
+
### be plural.
|
1106
|
+
def plural( phrase, count=nil )
|
1107
|
+
phrase = numwords( phrase ) if phrase.is_a?( Numeric )
|
1108
|
+
|
1109
|
+
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1110
|
+
pre, word, post = md.to_a[1,3]
|
1111
|
+
return phrase if word.nil? or word.empty?
|
1112
|
+
|
1113
|
+
plural = postprocess( word,
|
1114
|
+
pluralize_special_adjective(word, count) ||
|
1115
|
+
pluralize_special_verb(word, count) ||
|
1116
|
+
pluralize_noun(word, count) )
|
1117
|
+
|
1118
|
+
return pre + plural + post
|
1119
|
+
end
|
1120
|
+
def_lprintf_formatter :PL, :plural
|
1121
|
+
|
1122
|
+
|
1123
|
+
### Return the plural of the given noun +phrase+ if +count+ indicates it
|
1124
|
+
### should be plural.
|
1125
|
+
def plural_noun( phrase, count=nil )
|
1126
|
+
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1127
|
+
pre, word, post = md.to_a[1,3]
|
1128
|
+
return phrase if word.nil? or word.empty?
|
1129
|
+
|
1130
|
+
plural = postprocess( word, pluralize_noun(word, count) )
|
1131
|
+
return pre + plural + post
|
1132
|
+
end
|
1133
|
+
def_lprintf_formatter :PL_N, :plural_noun
|
1134
|
+
|
1135
|
+
|
1136
|
+
### Return the plural of the given verb +phrase+ if +count+ indicates it
|
1137
|
+
### should be plural.
|
1138
|
+
def plural_verb( phrase, count=nil )
|
1139
|
+
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1140
|
+
pre, word, post = md.to_a[1,3]
|
1141
|
+
return phrase if word.nil? or word.empty?
|
1142
|
+
|
1143
|
+
plural = postprocess( word,
|
1144
|
+
pluralize_special_verb(word, count) ||
|
1145
|
+
pluralize_general_verb(word, count) )
|
1146
|
+
return pre + plural + post
|
1147
|
+
end
|
1148
|
+
def_lprintf_formatter :PL_V, :plural_verb
|
1149
|
+
|
1150
|
+
|
1151
|
+
### Return the plural of the given adjectival +phrase+ if +count+ indicates
|
1152
|
+
### it should be plural.
|
1153
|
+
def plural_adjective( phrase, count=nil )
|
1154
|
+
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1155
|
+
pre, word, post = md.to_a[1,3]
|
1156
|
+
return phrase if word.nil? or word.empty?
|
1157
|
+
|
1158
|
+
plural = postprocess( word,
|
1159
|
+
pluralize_special_adjective(word, count) || word )
|
1160
|
+
return pre + plural + post
|
1161
|
+
end
|
1162
|
+
alias_method :plural_adj, :plural_adjective
|
1163
|
+
def_lprintf_formatter :PL_ADJ, :plural_adjective
|
1164
|
+
|
1165
|
+
|
1166
|
+
### Return the given phrase with the appropriate indefinite article ("a" or
|
1167
|
+
### "an") prepended.
|
1168
|
+
def a( phrase, count=nil )
|
1169
|
+
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1170
|
+
pre, word, post = md.to_a[1,3]
|
1171
|
+
return phrase if word.nil? or word.empty?
|
1172
|
+
|
1173
|
+
result = indef_article( word, count )
|
1174
|
+
return pre + result + post
|
1175
|
+
end
|
1176
|
+
alias_method :an, :a
|
1177
|
+
def_lprintf_formatter :A, :a
|
1178
|
+
def_lprintf_formatter :AN, :a
|
1179
|
+
|
1180
|
+
|
1181
|
+
### Translate zero-quantified +phrase+ to "no +phrase.plural+"
|
1182
|
+
def no( phrase, count=nil )
|
1183
|
+
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1184
|
+
pre, word, post = md.to_a[1,3]
|
1185
|
+
count ||= Linguistics::num || 0
|
1186
|
+
|
1187
|
+
unless /^#{PL_count_zero}$/ =~ count.to_s
|
1188
|
+
return "#{pre}#{count} " + plural( word, count ) + post
|
1189
|
+
else
|
1190
|
+
return "#{pre}no " + plural( word, 0 ) + post
|
1191
|
+
end
|
1192
|
+
end
|
1193
|
+
def_lprintf_formatter :NO, :no
|
1194
|
+
|
1195
|
+
|
1196
|
+
### Participles
|
1197
|
+
def present_participle( word )
|
1198
|
+
plural = plural_verb( word.to_s, 2 )
|
1199
|
+
|
1200
|
+
plural.sub!( /ie$/, 'y' ) or
|
1201
|
+
plural.sub!( /ue$/, 'u' ) or
|
1202
|
+
plural.sub!( /([auy])e$/, '$1' ) or
|
1203
|
+
plural.sub!( /i$/, '' ) or
|
1204
|
+
plural.sub!( /([^e])e$/, "\\1" ) or
|
1205
|
+
/er$/.match( plural ) or
|
1206
|
+
plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
|
1207
|
+
|
1208
|
+
return "#{plural}ing"
|
1209
|
+
end
|
1210
|
+
alias_method :part_pres, :present_participle
|
1211
|
+
def_lprintf_formatter :PART_PRES, :present_participle
|
1212
|
+
|
1213
|
+
|
1214
|
+
|
1215
|
+
### Return the specified number as english words. One or more configuration
|
1216
|
+
### values may be passed to control the returned String:
|
1217
|
+
###
|
1218
|
+
### [<b>:group</b>]
|
1219
|
+
### Controls how many numbers at a time are grouped together. Valid values
|
1220
|
+
### are <code>0</code> (normal grouping), <code>1</code> (single-digit
|
1221
|
+
### grouping, e.g., "one, two, three, four"), <code>2</code>
|
1222
|
+
### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
|
1223
|
+
### (triple-digit grouping, e.g., "one twenty-three, four").
|
1224
|
+
### [<b>:comma</b>]
|
1225
|
+
### Set the character/s used to separate word groups. Defaults to
|
1226
|
+
### <code>", "</code>.
|
1227
|
+
### [<b>:and</b>]
|
1228
|
+
### Set the word and/or characters used where <code>' and ' </code>(the
|
1229
|
+
### default) is normally used. Setting <code>:and</code> to
|
1230
|
+
### <code>' '</code>, for example, will cause <code>2556</code> to be
|
1231
|
+
### returned as "two-thousand, five hundred fifty-six" instead of
|
1232
|
+
### "two-thousand, five hundred and fifty-six".
|
1233
|
+
### [<b>:zero</b>]
|
1234
|
+
### Set the word used to represent the numeral <code>0</code> in the
|
1235
|
+
### result. <code>'zero'</code> is the default.
|
1236
|
+
### [<b>:decimal</b>]
|
1237
|
+
### Set the translation of any decimal points in the number; the default
|
1238
|
+
### is <code>'point'</code>.
|
1239
|
+
### [<b>:asArray</b>]
|
1240
|
+
### If set to a true value, the number will be returned as an array of
|
1241
|
+
### word groups instead of a String.
|
1242
|
+
def numwords( number, hashargs={} )
|
1243
|
+
num = number.to_s
|
1244
|
+
config = NumwordDefaults.merge( hashargs )
|
1245
|
+
raise "Bad chunking option: #{config[:group]}" unless
|
1246
|
+
config[:group].between?( 0, 3 )
|
1247
|
+
|
1248
|
+
# Array of number parts: first is everything to the left of the first
|
1249
|
+
# decimal, followed by any groups of decimal-delimted numbers after that
|
1250
|
+
parts = []
|
1251
|
+
|
1252
|
+
# Wordify any sign prefix
|
1253
|
+
sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
|
1254
|
+
|
1255
|
+
# Strip any ordinal suffixes
|
1256
|
+
ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
|
1257
|
+
|
1258
|
+
# Split the number into chunks delimited by '.'
|
1259
|
+
chunks = if !config[:decimal].empty? then
|
1260
|
+
if config[:group].nonzero?
|
1261
|
+
num.split(/\./)
|
1262
|
+
else
|
1263
|
+
num.split(/\./, 2)
|
1264
|
+
end
|
1265
|
+
else
|
1266
|
+
[ num ]
|
1267
|
+
end
|
1268
|
+
|
1269
|
+
# Wordify each chunk, pushing arrays into the parts array
|
1270
|
+
chunks.each_with_index {|chunk,section|
|
1271
|
+
chunk.gsub!( /\D+/, '' )
|
1272
|
+
|
1273
|
+
# If there's nothing in this chunk of the number, set it to zero
|
1274
|
+
# unless it's the whole-number part, in which case just push an
|
1275
|
+
# empty array.
|
1276
|
+
if chunk.empty?
|
1277
|
+
if section.zero?
|
1278
|
+
parts.push []
|
1279
|
+
next
|
1280
|
+
end
|
1281
|
+
end
|
1282
|
+
|
1283
|
+
# Split the number section into wordified parts unless this is the
|
1284
|
+
# second or succeeding part of a non-group number
|
1285
|
+
unless config[:group].zero? && section.nonzero?
|
1286
|
+
parts.push number_to_words( chunk, config )
|
1287
|
+
else
|
1288
|
+
parts.push number_to_words( chunk, config.merge(:group => 1) )
|
1289
|
+
end
|
1290
|
+
}
|
1291
|
+
|
1292
|
+
debug_msg "Parts => #{parts.inspect}"
|
1293
|
+
|
1294
|
+
# Turn the last word of the whole-number part back into an ordinal if
|
1295
|
+
# the original number came in that way.
|
1296
|
+
if ord && !parts[0].empty?
|
1297
|
+
parts[0][-1] = ordinal( parts[0].last )
|
1298
|
+
end
|
1299
|
+
|
1300
|
+
# If the caller's expecting an Array return, just flatten and return the
|
1301
|
+
# parts array.
|
1302
|
+
if config[:asArray]
|
1303
|
+
unless sign.empty?
|
1304
|
+
parts[0].unshift( sign )
|
1305
|
+
end
|
1306
|
+
return parts.flatten
|
1307
|
+
end
|
1308
|
+
|
1309
|
+
# Catenate each sub-parts array into a whole number part and one or more
|
1310
|
+
# post-decimal parts. If grouping is turned on, all sub-parts get joined
|
1311
|
+
# with commas, otherwise just the whole-number part is.
|
1312
|
+
if config[:group].zero?
|
1313
|
+
if parts[0].length > 1
|
1314
|
+
|
1315
|
+
# Join all but the last part together with commas
|
1316
|
+
wholenum = parts[0][0...-1].join( config[:comma] )
|
1317
|
+
|
1318
|
+
# If the last part is just a single word, append it to the
|
1319
|
+
# wholenum part with an 'and'. This is to get things like 'three
|
1320
|
+
# thousand and three' instead of 'three thousand, three'.
|
1321
|
+
if /^\s*(\S+)\s*$/ =~ parts[0].last
|
1322
|
+
wholenum += config[:and] + parts[0].last
|
1323
|
+
else
|
1324
|
+
wholenum += config[:comma] + parts[0].last
|
1325
|
+
end
|
1326
|
+
else
|
1327
|
+
wholenum = parts[0][0]
|
1328
|
+
end
|
1329
|
+
decimals = parts[1..-1].collect {|part| part.join(" ")}
|
1330
|
+
|
1331
|
+
debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
|
1332
|
+
|
1333
|
+
# Join with the configured decimal; if it's empty, just join with
|
1334
|
+
# spaces.
|
1335
|
+
unless config[:decimal].empty?
|
1336
|
+
return sign + ([ wholenum ] + decimals).
|
1337
|
+
join( " #{config[:decimal]} " ).strip
|
1338
|
+
else
|
1339
|
+
return sign + ([ wholenum ] + decimals).
|
1340
|
+
join( " " ).strip
|
1341
|
+
end
|
1342
|
+
else
|
1343
|
+
return parts.compact.
|
1344
|
+
separate( config[:decimal] ).
|
1345
|
+
delete_if {|el| el.empty?}.
|
1346
|
+
join( config[:comma] ).
|
1347
|
+
strip
|
1348
|
+
end
|
1349
|
+
end
|
1350
|
+
def_lprintf_formatter :NUMWORDS, :numwords
|
1351
|
+
|
1352
|
+
|
1353
|
+
### Transform the given +number+ into an ordinal word. The +number+ object
|
1354
|
+
### can be either an Integer or a String.
|
1355
|
+
def ordinal( number )
|
1356
|
+
case number
|
1357
|
+
when Integer
|
1358
|
+
return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])
|
1359
|
+
|
1360
|
+
else
|
1361
|
+
return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
|
1362
|
+
end
|
1363
|
+
end
|
1364
|
+
def_lprintf_formatter :ORD, :ordinal
|
1365
|
+
|
1366
|
+
|
1367
|
+
### Transform the given +number+ into an ordinate word.
|
1368
|
+
def ordinate( number )
|
1369
|
+
numwords( number.en.ordinal )
|
1370
|
+
end
|
1371
|
+
|
1372
|
+
|
1373
|
+
### Return a phrase describing the specified +number+ of objects in the
|
1374
|
+
### given +phrase+ in general terms. The following options can be used to
|
1375
|
+
### control the makeup of the returned quantity String:
|
1376
|
+
###
|
1377
|
+
### [<b>:joinword</b>]
|
1378
|
+
### Sets the word (and any surrounding spaces) used as the word separating the
|
1379
|
+
### quantity from the noun in the resulting string. Defaults to <tt>' of
|
1380
|
+
### '</tt>.
|
1381
|
+
def quantify( phrase, number=0, args={} )
|
1382
|
+
num = number.to_i
|
1383
|
+
config = QuantifyDefaults.merge( args )
|
1384
|
+
|
1385
|
+
case num
|
1386
|
+
when 0
|
1387
|
+
no( phrase )
|
1388
|
+
when 1
|
1389
|
+
a( phrase )
|
1390
|
+
when SeveralRange
|
1391
|
+
"several " + plural( phrase, num )
|
1392
|
+
when NumberRange
|
1393
|
+
"a number of " + plural( phrase, num )
|
1394
|
+
when NumerousRange
|
1395
|
+
"numerous " + plural( phrase, num )
|
1396
|
+
when ManyRange
|
1397
|
+
"many " + plural( phrase, num )
|
1398
|
+
else
|
1399
|
+
|
1400
|
+
# Anything bigger than the ManyRange gets described like
|
1401
|
+
# "hundreds of thousands of..." or "millions of..."
|
1402
|
+
# depending, of course, on how many there are.
|
1403
|
+
thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
|
1404
|
+
stword =
|
1405
|
+
case subthousands
|
1406
|
+
when 2
|
1407
|
+
"hundreds"
|
1408
|
+
when 1
|
1409
|
+
"tens"
|
1410
|
+
else
|
1411
|
+
nil
|
1412
|
+
end
|
1413
|
+
thword = plural( to_thousands(thousands).strip )
|
1414
|
+
thword = nil if thword.empty?
|
1415
|
+
|
1416
|
+
[ # Hundreds (of)...
|
1417
|
+
stword,
|
1418
|
+
|
1419
|
+
# thousands (of)
|
1420
|
+
thword,
|
1421
|
+
|
1422
|
+
# stars.
|
1423
|
+
plural(phrase, number)
|
1424
|
+
].compact.join( config[:joinword] )
|
1425
|
+
end
|
1426
|
+
end
|
1427
|
+
def_lprintf_formatter :QUANT, :quantify
|
1428
|
+
|
1429
|
+
|
1430
|
+
# :TODO: Needs refactoring
|
1431
|
+
|
1432
|
+
### Return the specified +obj+ (which must support the <tt>#collect</tt>
|
1433
|
+
### method) as a conjunction. Each item is converted to a String if it is
|
1434
|
+
### not already (using #to_s) unless a block is given, in which case it is
|
1435
|
+
### called once for each object in the array, and the stringified return
|
1436
|
+
### value from the block is used instead. Returning +nil+ causes that
|
1437
|
+
### particular element to be omitted from the resulting conjunction. The
|
1438
|
+
### following options can be used to control the makeup of the returned
|
1439
|
+
### conjunction String:
|
1440
|
+
###
|
1441
|
+
### [<b>:separator</b>]
|
1442
|
+
### Specify one or more characters to separate items in the resulting
|
1443
|
+
### list. Defaults to <tt>', '</tt>.
|
1444
|
+
### [<b>:altsep</b>]
|
1445
|
+
### An alternate separator to use if any of the resulting conjunction's
|
1446
|
+
### clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
|
1447
|
+
### [<b>:penultimate</b>]
|
1448
|
+
### Flag that indicates whether or not to join the last clause onto the
|
1449
|
+
### rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
|
1450
|
+
### %w{duck, cow, dog}.en.conjunction
|
1451
|
+
### # => "a duck, a cow, and a dog"
|
1452
|
+
### %w{duck cow dog}.en.conjunction( :penultimate => false )
|
1453
|
+
### "a duck, a cow and a dog"
|
1454
|
+
### Default to <tt>true</tt>.
|
1455
|
+
### [<b>:conjunctive</b>]
|
1456
|
+
### Sets the word used as the conjunctive (separating word) of the
|
1457
|
+
### resulting string. Default to <tt>'and'</tt>.
|
1458
|
+
### [<b>:combine</b>]
|
1459
|
+
### If set to <tt>true</tt> (the default), items which are indentical (after
|
1460
|
+
### surrounding spaces are stripped) will be combined in the resulting
|
1461
|
+
### conjunction. E.g.,
|
1462
|
+
### %w{goose cow goose dog}.en.conjunction
|
1463
|
+
### # => "two geese, a cow, and a dog"
|
1464
|
+
### %w{goose cow goose dog}.en.conjunction( :combine => false )
|
1465
|
+
### # => "a goose, a cow, a goose, and a dog"
|
1466
|
+
### [<b>:casefold</b>]
|
1467
|
+
### If set to <tt>true</tt> (the default), then items are compared
|
1468
|
+
### case-insensitively when combining them. This has no effect if
|
1469
|
+
### <tt>:combine</tt> is <tt>false</tt>.
|
1470
|
+
### [<b>:generalize</b>]
|
1471
|
+
### If set to <tt>true</tt>, then quantities of combined items are turned into
|
1472
|
+
### general descriptions instead of exact amounts.
|
1473
|
+
### ary = %w{goose pig dog horse goose reindeer goose dog horse}
|
1474
|
+
### ary.en.conjunction
|
1475
|
+
### # => "three geese, two dogs, two horses, a pig, and a reindeer"
|
1476
|
+
### ary.en.conjunction( :generalize => true )
|
1477
|
+
### # => "several geese, several dogs, several horses, a pig, and a reindeer"
|
1478
|
+
### See the #quantify method for specifics on how quantities are
|
1479
|
+
### generalized. Generalization defaults to <tt>false</tt>, and has no effect if
|
1480
|
+
### :combine is <tt>false</tt>.
|
1481
|
+
### [<b>:quantsort</b>]
|
1482
|
+
### If set to <tt>true</tt> (the default), items which are combined in the
|
1483
|
+
### resulting conjunction will be listed in order of amount, with greater
|
1484
|
+
### quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
|
1485
|
+
### will appear where the first instance of them occurred in the
|
1486
|
+
### list. This sort is also the fallback for indentical quantities (ie.,
|
1487
|
+
### items of the same quantity will be listed in the order they appeared
|
1488
|
+
### in the source list).
|
1489
|
+
###
|
1490
|
+
def conjunction( obj, args={} )
|
1491
|
+
config = ConjunctionDefaults.merge( args )
|
1492
|
+
phrases = []
|
1493
|
+
|
1494
|
+
# Transform items in the obj to phrases
|
1495
|
+
if block_given?
|
1496
|
+
phrases = obj.collect {|item| yield(item) }.compact
|
1497
|
+
else
|
1498
|
+
phrases = obj.collect {|item| item.to_s }
|
1499
|
+
end
|
1500
|
+
|
1501
|
+
# No need for a conjunction if there's only one thing
|
1502
|
+
return a(phrases[0]) if phrases.length < 2
|
1503
|
+
|
1504
|
+
# Set up a Proc to derive a collector key from a phrase depending on the
|
1505
|
+
# configuration
|
1506
|
+
keyfunc =
|
1507
|
+
if config[:casefold]
|
1508
|
+
proc {|key| key.downcase.strip}
|
1509
|
+
else
|
1510
|
+
proc {|key| key.strip}
|
1511
|
+
end
|
1512
|
+
|
1513
|
+
# Count and delete phrases that hash the same when the keyfunc munges
|
1514
|
+
# them into the same thing if we're combining (:combine => true).
|
1515
|
+
collector = {}
|
1516
|
+
if config[:combine]
|
1517
|
+
|
1518
|
+
phrases.each_index do |i|
|
1519
|
+
# Stop when reaching the end of a truncated list
|
1520
|
+
break if phrases[i].nil?
|
1521
|
+
|
1522
|
+
# Make the key using the configured key function
|
1523
|
+
phrase = keyfunc[ phrases[i] ]
|
1524
|
+
|
1525
|
+
# If the collector already has this key, increment its count,
|
1526
|
+
# eliminate the duplicate from the phrase list, and redo the loop.
|
1527
|
+
if collector.key?( phrase )
|
1528
|
+
collector[ phrase ] += 1
|
1529
|
+
phrases.delete_at( i )
|
1530
|
+
redo
|
1531
|
+
end
|
1532
|
+
|
1533
|
+
collector[ phrase ] = 1
|
1534
|
+
end
|
1535
|
+
else
|
1536
|
+
# If we're not combining, just make everything have a count of 1.
|
1537
|
+
phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
|
1538
|
+
end
|
1539
|
+
|
1540
|
+
# If sort-by-quantity is turned on, sort the phrases first by how many
|
1541
|
+
# there are (most-first), and then by the order they were specified in.
|
1542
|
+
if config[:quantsort] && config[:combine]
|
1543
|
+
origorder = {}
|
1544
|
+
phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
|
1545
|
+
phrases.sort! {|a,b|
|
1546
|
+
(collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
|
1547
|
+
(origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
|
1548
|
+
}
|
1549
|
+
end
|
1550
|
+
|
1551
|
+
# Set up a filtering function that adds either an indefinite article, an
|
1552
|
+
# indefinite quantifier, or a definite quantifier to each phrase
|
1553
|
+
# depending on the configuration and the count of phrases in the
|
1554
|
+
# collector.
|
1555
|
+
filter =
|
1556
|
+
if config[:generalize]
|
1557
|
+
proc {|phrase, count| quantify(phrase, count) }
|
1558
|
+
else
|
1559
|
+
proc {|phrase, count|
|
1560
|
+
if count > 1
|
1561
|
+
"%s %s" % [
|
1562
|
+
# :TODO: Make this threshold settable
|
1563
|
+
count < 10 ? count.en.numwords : count.to_s,
|
1564
|
+
plural(phrase, count)
|
1565
|
+
]
|
1566
|
+
else
|
1567
|
+
a( phrase )
|
1568
|
+
end
|
1569
|
+
}
|
1570
|
+
end
|
1571
|
+
|
1572
|
+
# Now use the configured filter to turn each phrase into its final
|
1573
|
+
# form. Hmmm... square-bracket Lisp?
|
1574
|
+
phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
|
1575
|
+
|
1576
|
+
# Prepend the conjunctive to the last element unless it's empty or
|
1577
|
+
# there's only one element
|
1578
|
+
phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
|
1579
|
+
config[:conjunctive].strip.empty? or
|
1580
|
+
phrases.length < 2
|
1581
|
+
|
1582
|
+
# Concatenate the last two elements if there's no penultimate separator,
|
1583
|
+
# and pick a separator based on how many phrases there are and whether
|
1584
|
+
# or not there's already an instance of it in the phrases.
|
1585
|
+
phrase_count = phrases.length
|
1586
|
+
phrases[-2] << " " << phrases.pop unless config[:penultimate]
|
1587
|
+
sep = config[:separator]
|
1588
|
+
if phrase_count <= 2
|
1589
|
+
sep = ' '
|
1590
|
+
elsif phrases.find {|str| str.include?(config[:separator]) }
|
1591
|
+
sep = config[:altsep]
|
1592
|
+
end
|
1593
|
+
|
1594
|
+
return phrases.join( sep )
|
1595
|
+
end
|
1596
|
+
def_lprintf_formatter :CONJUNCT, :conjunction
|
1597
|
+
|
1598
|
+
|
1599
|
+
### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
|
1600
|
+
### ("camel case to english"). Each word is decapitalized.
|
1601
|
+
def camel_case_to_english( string )
|
1602
|
+
string.to_s.
|
1603
|
+
gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
|
1604
|
+
gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
|
1605
|
+
end
|
1606
|
+
|
1607
|
+
|
1608
|
+
### Turns an English language +string+ into a CamelCase word.
|
1609
|
+
def english_to_camel_case( string )
|
1610
|
+
string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
|
1611
|
+
end
|
1612
|
+
|
1613
|
+
|
1614
|
+
### This method doesn't work quite right yet. It does okay for simple cases,
|
1615
|
+
### but it misses more complex ones, e.g. 'as' used as a coordinating
|
1616
|
+
### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
|
1617
|
+
### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
|
1618
|
+
### up. Until then it'll just be undocumented.
|
1619
|
+
|
1620
|
+
### Returns the given +string+ as a title-cased phrase.
|
1621
|
+
def titlecase( string ) # :nodoc:
|
1622
|
+
|
1623
|
+
# Split on word-boundaries
|
1624
|
+
words = string.split( /\b/ )
|
1625
|
+
|
1626
|
+
# Always capitalize the first and last words
|
1627
|
+
words.first.capitalize!
|
1628
|
+
words.last.capitalize!
|
1629
|
+
|
1630
|
+
# Now scan the rest of the tokens, skipping non-words and capitalization
|
1631
|
+
# exceptions.
|
1632
|
+
words.each_with_index do |word, i|
|
1633
|
+
|
1634
|
+
# Non-words
|
1635
|
+
next unless /^\w+$/.match( word )
|
1636
|
+
|
1637
|
+
# Skip exception-words
|
1638
|
+
next if TitleCaseExceptions.include?( word )
|
1639
|
+
|
1640
|
+
# Skip second parts of contractions
|
1641
|
+
next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
|
1642
|
+
|
1643
|
+
# Have to do it this way instead of capitalize! because that method
|
1644
|
+
# also downcases all other letters.
|
1645
|
+
word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
|
1646
|
+
end
|
1647
|
+
|
1648
|
+
return words.join
|
1649
|
+
end
|
1650
|
+
|
1651
|
+
|
1652
|
+
### Returns the proper noun form of a string by capitalizing most of the
|
1653
|
+
### words.
|
1654
|
+
###
|
1655
|
+
### Examples:
|
1656
|
+
### English.proper_noun("bosnia and herzegovina") ->
|
1657
|
+
### "Bosnia and Herzegovina"
|
1658
|
+
### English.proper_noun("macedonia, the former yugoslav republic of") ->
|
1659
|
+
### "Macedonia, the Former Yugoslav Republic of"
|
1660
|
+
### English.proper_noun("virgin islands, u.s.") ->
|
1661
|
+
### "Virgin Islands, U.S."
|
1662
|
+
def proper_noun( string )
|
1663
|
+
return string.split(/([ .]+)/).collect {|word|
|
1664
|
+
next word unless /^[a-z]/.match( word ) &&
|
1665
|
+
! (%w{and the of}.include?( word ))
|
1666
|
+
word.capitalize
|
1667
|
+
}.join
|
1668
|
+
end
|
1669
|
+
|
1670
|
+
|
1671
|
+
### Format the given +fmt+ string by replacing %-escaped sequences with the
|
1672
|
+
### result of performing a specified operation on the corresponding
|
1673
|
+
### argument, ala Kernel.sprintf.
|
1674
|
+
### %PL::
|
1675
|
+
### Plural.
|
1676
|
+
### %A, %AN::
|
1677
|
+
### Prepend indefinite article.
|
1678
|
+
### %NO::
|
1679
|
+
### Zero-quantified phrase.
|
1680
|
+
### %NUMWORDS::
|
1681
|
+
### Convert a number into the corresponding words.
|
1682
|
+
### %CONJUNCT::
|
1683
|
+
### Conjunction.
|
1684
|
+
def lprintf( fmt, *args )
|
1685
|
+
fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
|
1686
|
+
op = $1.to_s.upcase.to_sym
|
1687
|
+
if self.lprintf_formatters.key?( op )
|
1688
|
+
arg = args.shift
|
1689
|
+
self.lprintf_formatters[ op ].call( arg )
|
1690
|
+
else
|
1691
|
+
raise "no such formatter %p" % op
|
1692
|
+
end
|
1693
|
+
end
|
1694
|
+
end
|
1695
|
+
|
1696
|
+
end # module Linguistics::EN
|
1697
|
+
|
1698
|
+
|
1699
|
+
### Add the #separate and #separate! methods to Array.
|
1700
|
+
class Array
|
1701
|
+
|
1702
|
+
### Returns a new Array that has had a new member inserted between all of
|
1703
|
+
### the current ones. The value used is the given +value+ argument unless a
|
1704
|
+
### block is given, in which case the block is called once for each pair of
|
1705
|
+
### the Array, and the return value is used as the separator.
|
1706
|
+
def separate( value=:__no_arg__, &block )
|
1707
|
+
ary = self.dup
|
1708
|
+
ary.separate!( value, &block )
|
1709
|
+
return ary
|
1710
|
+
end
|
1711
|
+
|
1712
|
+
### The same as #separate, but modifies the Array in place.
|
1713
|
+
def separate!( value=:__no_arg__ )
|
1714
|
+
raise ArgumentError, "wrong number of arguments: (0 for 1)" if
|
1715
|
+
value == :__no_arg__ && !block_given?
|
1716
|
+
|
1717
|
+
(1..( (self.length * 2) - 2 )).step(2) do |i|
|
1718
|
+
if block_given?
|
1719
|
+
self.insert( i, yield(self[i-1,2]) )
|
1720
|
+
else
|
1721
|
+
self.insert( i, value )
|
1722
|
+
end
|
1723
|
+
end
|
1724
|
+
self
|
1725
|
+
end
|
1726
|
+
|
1727
|
+
end
|
1728
|
+
|