linguistics 1.0.9 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ChangeLog +849 -342
- data/History.rdoc +11 -0
- data/LICENSE +9 -9
- data/Manifest.txt +44 -0
- data/README.rdoc +226 -0
- data/Rakefile +32 -349
- data/examples/endocs.rb +272 -0
- data/examples/generalize_sentence.rb +2 -1
- data/examples/klingon.rb +22 -0
- data/lib/linguistics.rb +130 -292
- data/lib/linguistics/en.rb +337 -1628
- data/lib/linguistics/en/articles.rb +138 -0
- data/lib/linguistics/en/conjugation.rb +2245 -0
- data/lib/linguistics/en/conjunctions.rb +202 -0
- data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
- data/lib/linguistics/en/linkparser.rb +41 -49
- data/lib/linguistics/en/numbers.rb +483 -0
- data/lib/linguistics/en/participles.rb +33 -0
- data/lib/linguistics/en/pluralization.rb +810 -0
- data/lib/linguistics/en/stemmer.rb +75 -0
- data/lib/linguistics/en/titlecase.rb +121 -0
- data/lib/linguistics/en/wordnet.rb +63 -97
- data/lib/linguistics/inflector.rb +89 -0
- data/lib/linguistics/iso639.rb +534 -448
- data/lib/linguistics/languagebehavior.rb +36 -0
- data/lib/linguistics/monkeypatches.rb +42 -0
- data/spec/lib/constants.rb +15 -0
- data/spec/lib/helpers.rb +38 -0
- data/spec/linguistics/en/articles_spec.rb +797 -0
- data/spec/linguistics/en/conjugation_spec.rb +2083 -0
- data/spec/linguistics/en/conjunctions_spec.rb +154 -0
- data/spec/linguistics/en/infinitives_spec.rb +518 -0
- data/spec/linguistics/en/linkparser_spec.rb +66 -0
- data/spec/linguistics/en/numbers_spec.rb +1295 -0
- data/spec/linguistics/en/participles_spec.rb +55 -0
- data/spec/linguistics/en/pluralization_spec.rb +4636 -0
- data/spec/linguistics/en/stemmer_spec.rb +72 -0
- data/spec/linguistics/en/titlecase_spec.rb +841 -0
- data/spec/linguistics/en/wordnet_spec.rb +85 -0
- data/spec/linguistics/en_spec.rb +45 -167
- data/spec/linguistics/inflector_spec.rb +40 -0
- data/spec/linguistics/iso639_spec.rb +49 -53
- data/spec/linguistics/monkeypatches_spec.rb +40 -0
- data/spec/linguistics_spec.rb +46 -76
- metadata +241 -113
- metadata.gz.sig +0 -0
- data/README +0 -166
- data/README.english +0 -245
- data/rake/191_compat.rb +0 -26
- data/rake/dependencies.rb +0 -76
- data/rake/documentation.rb +0 -123
- data/rake/helpers.rb +0 -502
- data/rake/hg.rb +0 -318
- data/rake/manual.rb +0 -787
- data/rake/packaging.rb +0 -129
- data/rake/publishing.rb +0 -341
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -668
- data/rake/testing.rb +0 -152
- data/rake/verifytask.rb +0 -64
- data/tests/en/infinitive.tests.rb +0 -207
- data/tests/en/inflect.tests.rb +0 -1389
- data/tests/en/lafcadio.tests.rb +0 -77
- data/tests/en/linkparser.tests.rb +0 -42
- data/tests/en/lprintf.tests.rb +0 -77
- data/tests/en/titlecase.tests.rb +0 -73
- data/tests/en/wordnet.tests.rb +0 -95
data/lib/linguistics/en.rb
CHANGED
@@ -1,1093 +1,401 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
#
|
2
|
+
|
3
|
+
require 'rubygems' # For Gem.find_files
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
require 'linguistics' unless defined?( Linguistics )
|
7
|
+
|
8
|
+
|
9
|
+
# This module is a container for various English-language linguistic
|
10
|
+
# functions for the Linguistics library. It can be either loaded
|
11
|
+
# directly, or by passing some variant of +:en+ or +:eng+ to the
|
12
|
+
# Linguistics.use method.
|
4
13
|
#
|
5
|
-
#
|
6
|
-
# module. It can be either loaded directly, or by passing some variant of 'en'
|
7
|
-
# or 'eng' to the Linguistics::use method.
|
14
|
+
# == Pluralization
|
8
15
|
#
|
9
|
-
#
|
16
|
+
# "box".en.plural
|
17
|
+
# # => "boxes"
|
10
18
|
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
19
|
+
# "mouse".en.plural
|
20
|
+
# # => "mice"
|
21
|
+
#
|
22
|
+
# "ruby".en.plural
|
23
|
+
# # => "rubies"
|
16
24
|
#
|
17
|
-
# These can be accessed via the #plural, #plural_noun, #plural_verb, and
|
18
|
-
# #plural_adjective methods.
|
19
25
|
#
|
20
26
|
# == Indefinite Articles
|
21
27
|
#
|
22
|
-
#
|
23
|
-
#
|
28
|
+
# "book".en.a
|
29
|
+
# # => "a book"
|
30
|
+
#
|
31
|
+
# "article".en.a
|
32
|
+
# # => "an article"
|
33
|
+
#
|
34
|
+
#
|
35
|
+
# == Present Participles
|
36
|
+
#
|
37
|
+
# "runs".en.present_participle
|
38
|
+
# # => "running"
|
39
|
+
#
|
40
|
+
# "eats".en.present_participle
|
41
|
+
# # => "eating"
|
42
|
+
#
|
43
|
+
# "spies".en.present_participle
|
44
|
+
# # => "spying"
|
45
|
+
#
|
46
|
+
#
|
47
|
+
# == Ordinal Numbers
|
48
|
+
#
|
49
|
+
# 5.en.ordinal
|
50
|
+
# # => "5th"
|
51
|
+
#
|
52
|
+
# 2004.en.ordinal
|
53
|
+
# # => "2004th"
|
24
54
|
#
|
25
|
-
# See: #a, #an, and #no.
|
26
55
|
#
|
27
56
|
# == Numbers to Words
|
28
57
|
#
|
29
|
-
#
|
30
|
-
#
|
58
|
+
# 5.en.numwords
|
59
|
+
# # => "five"
|
60
|
+
#
|
61
|
+
# 2004.en.numwords
|
62
|
+
# # => "two thousand and four"
|
63
|
+
#
|
64
|
+
# 2385762345876.en.numwords
|
65
|
+
# # => "two trillion, three hundred and eighty-five billion, seven hundred and
|
66
|
+
# # sixty-two million, three hundred and forty-five thousand, eight hundred
|
67
|
+
# # and seventy-six"
|
68
|
+
#
|
31
69
|
#
|
32
|
-
#
|
70
|
+
# == Quantification
|
33
71
|
#
|
34
|
-
#
|
72
|
+
# "cow".en.quantify( 5 )
|
73
|
+
# # => "several cows"
|
74
|
+
#
|
75
|
+
# "cow".en.quantify( 1005 )
|
76
|
+
# # => "thousands of cows"
|
77
|
+
#
|
78
|
+
# "cow".en.quantify( 20_432_123_000_000 )
|
79
|
+
# # => "tens of trillions of cows"
|
35
80
|
#
|
36
|
-
# It is also possible to inflect numerals (1,2,3) and number words ("one",
|
37
|
-
# "two", "three") to ordinals (1st, 2nd, 3rd) and ordinates ("first", "second",
|
38
|
-
# "third").
|
39
81
|
#
|
40
82
|
# == Conjunctions
|
41
83
|
#
|
42
|
-
#
|
43
|
-
#
|
84
|
+
# animals = %w{dog cow ox chicken goose goat cow dog rooster llama pig goat
|
85
|
+
# dog cat cat dog cow goat goose goose ox alpaca}
|
86
|
+
# "The farm has: " + animals.en.conjunction
|
87
|
+
# # => "The farm has: four dogs, three cows, three geese, three goats, two
|
88
|
+
# # oxen, two cats, a chicken, a rooster, a llama, a pig, and an alpaca"
|
89
|
+
#
|
90
|
+
# Note that 'goose' and 'ox' are both correctly pluralized, and the correct
|
91
|
+
# indefinite article 'an' has been used for 'alpaca'.
|
92
|
+
#
|
93
|
+
# You can also use the generalization function of the #quantify method to give
|
94
|
+
# general descriptions of object lists instead of literal counts:
|
95
|
+
#
|
96
|
+
# allobjs = []
|
97
|
+
# ObjectSpace::each_object {|obj| allobjs << obj.class.name }
|
98
|
+
# puts "The current Ruby objectspace contains: " +
|
99
|
+
# allobjs.en.conjunction( :generalize => true )
|
100
|
+
#
|
101
|
+
# Outputs:
|
102
|
+
#
|
103
|
+
# The current Ruby objectspace contains: hundreds of thousands of Strings,
|
104
|
+
# thousands of RubyVM::InstructionSequences, thousands of Arrays, thousands
|
105
|
+
# of Hashes, hundreds of Procs, hundreds of Regexps, [...], a
|
106
|
+
# SystemStackError, a Random, an ARGF.class, a Data, a fatal, an
|
107
|
+
# OptionParser::List, a YAML::EngineManager, a URI::Parser, a Rational, and
|
108
|
+
# a Gem::Platform
|
44
109
|
#
|
45
|
-
# %w{cow pig chicken cow dog cow duck duck moose}.en.conjunction
|
46
|
-
# ==> "three cows, two ducks, a pig, a chicken, a dog, and a moose"
|
47
110
|
#
|
48
111
|
# == Infinitives
|
49
112
|
#
|
50
|
-
#
|
113
|
+
# "leaving".en.infinitive
|
114
|
+
# # => "leave"
|
115
|
+
#
|
116
|
+
# "left".en.infinitive
|
117
|
+
# # => "leave"
|
118
|
+
#
|
119
|
+
# "leaving".en.infinitive.suffix
|
120
|
+
# # => "ing"
|
121
|
+
#
|
122
|
+
#
|
123
|
+
# == Conjugation
|
124
|
+
#
|
125
|
+
# Conjugate a verb given an infinitive:
|
126
|
+
#
|
127
|
+
# "run".en.past_tense
|
128
|
+
# # => "ran"
|
129
|
+
#
|
130
|
+
# "run".en.past_participle
|
131
|
+
# # => "run"
|
132
|
+
#
|
133
|
+
# "run".en.present_tense
|
134
|
+
# # => "run"
|
135
|
+
#
|
136
|
+
# "run".en.present_participle
|
137
|
+
# # => "running"
|
138
|
+
#
|
139
|
+
# Conjugate an infinitive with an explicit tense and grammatical person:
|
140
|
+
#
|
141
|
+
# "be".en.conjugate( :present, :third_person_singular )
|
142
|
+
# # => "is"
|
143
|
+
#
|
144
|
+
# "be".en.conjugate( :present, :first_person_singular )
|
145
|
+
# # => "am"
|
146
|
+
#
|
147
|
+
# "be".en.conjugate( :past, :first_person_singular )
|
148
|
+
# # => "was"
|
149
|
+
#
|
150
|
+
# The functionality is a port of the verb conjugation portion of Morph
|
151
|
+
# Adorner (http://morphadorner.northwestern.edu/).
|
152
|
+
#
|
153
|
+
# It includes a good number of irregular verbs, but it's not going to be
|
154
|
+
# 100% correct everytime.
|
155
|
+
#
|
156
|
+
#
|
157
|
+
# == WordNet® Integration
|
158
|
+
#
|
159
|
+
# If you have the 'wordnet' gem installed, you can look up WordNet synsets using
|
160
|
+
# the Linguistics interface:
|
161
|
+
#
|
162
|
+
# Test to be sure the WordNet module loaded okay.
|
163
|
+
#
|
164
|
+
# Linguistics::EN.has_wordnet?
|
165
|
+
# # => true
|
166
|
+
#
|
167
|
+
# Fetch the default synset for the word "balance"
|
168
|
+
#
|
169
|
+
# "balance".en.synset
|
170
|
+
# # => #<WordNet::Synset:0x7f9fb11012f8 {102777100} 'balance' (noun):
|
171
|
+
# # [noun.artifact] a scale for weighing; depends on pull of gravity>
|
172
|
+
#
|
173
|
+
# Fetch the synset for the first verb sense of "balance"
|
174
|
+
#
|
175
|
+
# "balance".en.synset( :verb )
|
176
|
+
# # => #<WordNet::Synset:0x7f9fb10f3fb8 {201602318} 'balance, poise' (verb):
|
177
|
+
# # [verb.contact] hold or carry in equilibrium>
|
178
|
+
#
|
179
|
+
# Fetch the second noun sense
|
180
|
+
#
|
181
|
+
# "balance".en.synset( 2, :noun )
|
182
|
+
# # => #<WordNet::Synset:0x7f9fb10ebbd8 {102777402} 'balance, balance wheel'
|
183
|
+
# # (noun): [noun.artifact] a wheel that regulates the rate of movement in a
|
184
|
+
# # machine; especially a wheel oscillating against the hairspring of a
|
185
|
+
# # timepiece to regulate its beat>
|
186
|
+
#
|
187
|
+
# Fetch the second noun sense's hypernyms (more-general words, like a
|
188
|
+
# superclass)
|
189
|
+
#
|
190
|
+
# "balance".en.synset( 2, :noun ).hypernyms
|
191
|
+
# # => [#<WordNet::Synset:0x7f9fb10dd100 {104574999} 'wheel' (noun):
|
192
|
+
# # [noun.artifact] a simple machine consisting of a circular frame with
|
193
|
+
# # spokes (or a solid disc) that can rotate on a shaft or axle (as in
|
194
|
+
# # vehicles or other machines)>]
|
51
195
|
#
|
52
|
-
#
|
53
|
-
# ==> "dodge"
|
196
|
+
# A simpler way of doing the same thing:
|
54
197
|
#
|
198
|
+
# "balance".en.hypernyms( 2, :noun )
|
199
|
+
# # => [#<WordNet::Synset:0x7f9fb10d24d0 {104574999} 'wheel' (noun):
|
200
|
+
# # [noun.artifact] a simple machine consisting of a circular frame with
|
201
|
+
# # spokes (or a solid disc) that can rotate on a shaft or axle (as in
|
202
|
+
# # vehicles or other machines)>]
|
55
203
|
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
# * Michael Granger <ged@FaerieMUD.org>
|
59
|
-
#
|
60
|
-
# == Acknowledgements
|
204
|
+
# Fetch the first hypernym's hypernyms
|
61
205
|
#
|
62
|
-
#
|
63
|
-
#
|
206
|
+
# "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
207
|
+
# # => [#<WordNet::Synset:0x7f9fb10c5190 {103700963} 'machine, simple machine'
|
208
|
+
# # (noun): [noun.artifact] a device for overcoming resistance at one point by
|
209
|
+
# # applying force at some other point>]
|
64
210
|
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
# and/or modified under the same terms as Perl itself.
|
211
|
+
# Find the synset to which both the second noun sense of "balance" and the
|
212
|
+
# default sense of "shovel" belong.
|
68
213
|
#
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
214
|
+
# ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
215
|
+
# # => #<WordNet::Synset:0x7f9fb1091e58 {103183080} 'device' (noun):
|
216
|
+
# # [noun.artifact] an instrumentality invented for a particular purpose>
|
72
217
|
#
|
73
|
-
#
|
218
|
+
# Fetch words for the specific kinds of (device-ish) "instruments"
|
74
219
|
#
|
75
|
-
|
220
|
+
# "instrument".en.hyponyms( "device" ).collect( &:words ).flatten.join(', ')
|
221
|
+
# # => "analyser, analyzer, cauterant, cautery, drafting instrument, engine,
|
222
|
+
# # extractor, instrument of execution, instrument of punishment, measuring
|
223
|
+
# # device, measuring instrument, measuring system, medical instrument,
|
224
|
+
# # navigational instrument, optical instrument, plotter, scientific
|
225
|
+
# # instrument, sonograph, surveying instrument, surveyor's instrument,
|
226
|
+
# # tracer, arm, weapon, weapon system, whip"
|
227
|
+
#
|
228
|
+
# ...or musical instruments
|
229
|
+
#
|
230
|
+
# "instrument".en.hyponyms( "musical" ).collect( &:words ).flatten.join(', ')
|
231
|
+
# # => "barrel organ, grind organ, hand organ, hurdy-gurdy, hurdy gurdy,
|
232
|
+
# # street organ, bass, calliope, steam organ, electronic instrument,
|
233
|
+
# # electronic musical instrument, jew's harp, jews' harp, mouth bow, keyboard
|
234
|
+
# # instrument, music box, musical box, percussion instrument, percussive
|
235
|
+
# # instrument, stringed instrument, wind, wind instrument"
|
236
|
+
#
|
237
|
+
# There are many more WordNet methods supported--too many to list here. See the
|
238
|
+
# WordNet::Synset API documentation for the complete list.
|
239
|
+
#
|
240
|
+
#
|
241
|
+
# == LinkParser Integration
|
242
|
+
#
|
243
|
+
# If you have the 'linkparser' gem installed, you can create linkages
|
244
|
+
# from English sentences that let you query for parts of speech:
|
245
|
+
#
|
246
|
+
# Test to see whether or not the link parser is loaded.
|
247
|
+
#
|
248
|
+
# Linguistics::EN.has_linkparser?
|
249
|
+
# # => true
|
250
|
+
#
|
251
|
+
# Diagram the first linkage for a test sentence
|
252
|
+
#
|
253
|
+
# puts "he is a big dog".en.sentence.linkages.first.diagram
|
254
|
+
#
|
255
|
+
# Outputs:
|
256
|
+
#
|
257
|
+
# +-----Ost----+
|
258
|
+
# | +----Ds---+
|
259
|
+
# +-Ss+ | +--A--+
|
260
|
+
# | | | | |
|
261
|
+
# he is.v a big.a dog.n
|
262
|
+
#
|
263
|
+
# Find the verb in the sentence
|
264
|
+
#
|
265
|
+
# "he is a big dog".en.sentence.verb.to_s
|
266
|
+
# # => "is"
|
267
|
+
#
|
268
|
+
# Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
269
|
+
# given sentence.
|
270
|
+
#
|
271
|
+
# "he is a big dog".en.sentence.verb.en.infinitive
|
272
|
+
# # => "be"
|
273
|
+
#
|
274
|
+
# Find the direct object of the sentence
|
275
|
+
#
|
276
|
+
# "he is a big dog".en.sentence.object.to_s
|
277
|
+
# # => "dog"
|
278
|
+
#
|
279
|
+
# Combine WordNet + LinkParser to find the definition of the direct object of
|
280
|
+
# the sentence
|
281
|
+
#
|
282
|
+
# "he is a big dog".en.sentence.object.en.definition
|
283
|
+
# # => "a member of the genus Canis (probably descended from the common wolf)
|
284
|
+
# # that has been domesticated by man since prehistoric times; occurs in many
|
285
|
+
# # breeds"
|
76
286
|
#
|
77
|
-
# Please see the file LICENSE in the base directory for licensing details.
|
78
287
|
#
|
79
288
|
module Linguistics::EN
|
289
|
+
extend Loggability
|
80
290
|
|
81
|
-
#
|
82
|
-
|
83
|
-
require 'linguistics/en/wordnet'
|
84
|
-
require 'linguistics/en/linkparser'
|
85
|
-
|
86
|
-
# Add 'english' to the list of default languages
|
87
|
-
Linguistics::DefaultLanguages.push( :en )
|
88
|
-
|
89
|
-
|
90
|
-
#################################################################
|
91
|
-
### U T I L I T Y F U N C T I O N S
|
92
|
-
#################################################################
|
93
|
-
|
94
|
-
### Wrap one or more parts in a non-capturing alteration Regexp
|
95
|
-
def self::matchgroup( *parts )
|
96
|
-
re = parts.flatten.join("|")
|
97
|
-
"(?:#{re})"
|
98
|
-
end
|
99
|
-
|
100
|
-
|
101
|
-
@lprintf_formatters = {}
|
102
|
-
class << self
|
103
|
-
attr_accessor :lprintf_formatters
|
104
|
-
end
|
105
|
-
|
106
|
-
### Add the specified method (which can be either a Method object or a
|
107
|
-
### Symbol for looking up a method)
|
108
|
-
def self::def_lprintf_formatter( name, meth )
|
109
|
-
meth = self.method( meth ) unless meth.is_a?( Method )
|
110
|
-
self.lprintf_formatters[ name ] = meth
|
111
|
-
end
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
#################################################################
|
116
|
-
### C O N S T A N T S
|
117
|
-
#################################################################
|
118
|
-
|
119
|
-
# :stopdoc:
|
120
|
-
|
121
|
-
#
|
122
|
-
# Plurals
|
123
|
-
#
|
124
|
-
|
125
|
-
PL_sb_irregular_s = {
|
126
|
-
"ephemeris" => "ephemerides",
|
127
|
-
"iris" => "irises|irides",
|
128
|
-
"clitoris" => "clitorises|clitorides",
|
129
|
-
"corpus" => "corpuses|corpora",
|
130
|
-
"opus" => "opuses|opera",
|
131
|
-
"genus" => "genera",
|
132
|
-
"mythos" => "mythoi",
|
133
|
-
"penis" => "penises|penes",
|
134
|
-
"testis" => "testes",
|
135
|
-
}
|
136
|
-
|
137
|
-
PL_sb_irregular_h = {
|
138
|
-
"child" => "children",
|
139
|
-
"brother" => "brothers|brethren",
|
140
|
-
"loaf" => "loaves",
|
141
|
-
"hoof" => "hoofs|hooves",
|
142
|
-
"beef" => "beefs|beeves",
|
143
|
-
"money" => "monies",
|
144
|
-
"mongoose" => "mongooses",
|
145
|
-
"ox" => "oxen",
|
146
|
-
"cow" => "cows|kine",
|
147
|
-
"soliloquy" => "soliloquies",
|
148
|
-
"graffito" => "graffiti",
|
149
|
-
"prima donna" => "prima donnas|prime donne",
|
150
|
-
"octopus" => "octopuses|octopodes",
|
151
|
-
"genie" => "genies|genii",
|
152
|
-
"ganglion" => "ganglions|ganglia",
|
153
|
-
"trilby" => "trilbys",
|
154
|
-
"turf" => "turfs|turves",
|
155
|
-
}.update( PL_sb_irregular_s )
|
156
|
-
PL_sb_irregular = matchgroup PL_sb_irregular_h.keys
|
157
|
-
|
158
|
-
|
159
|
-
# Classical "..a" -> "..ata"
|
160
|
-
PL_sb_C_a_ata = matchgroup %w[
|
161
|
-
anathema bema carcinoma charisma diploma
|
162
|
-
dogma drama edema enema enigma lemma
|
163
|
-
lymphoma magma melisma miasma oedema
|
164
|
-
sarcoma schema soma stigma stoma trauma
|
165
|
-
gumma pragma
|
166
|
-
].collect {|word| word[0...-1]}
|
167
|
-
|
168
|
-
# Unconditional "..a" -> "..ae"
|
169
|
-
PL_sb_U_a_ae = matchgroup %w[
|
170
|
-
alumna alga vertebra persona
|
171
|
-
]
|
172
|
-
|
173
|
-
# Classical "..a" -> "..ae"
|
174
|
-
PL_sb_C_a_ae = matchgroup %w[
|
175
|
-
amoeba antenna formula hyperbola
|
176
|
-
medusa nebula parabola abscissa
|
177
|
-
hydra nova lacuna aurora .*umbra
|
178
|
-
flora fauna
|
179
|
-
]
|
180
|
-
|
181
|
-
# Classical "..en" -> "..ina"
|
182
|
-
PL_sb_C_en_ina = matchgroup %w[
|
183
|
-
stamen foramen lumen
|
184
|
-
].collect {|word| word[0...-2] }
|
185
|
-
|
186
|
-
# Unconditional "..um" -> "..a"
|
187
|
-
PL_sb_U_um_a = matchgroup %w[
|
188
|
-
bacterium agendum desideratum erratum
|
189
|
-
stratum datum ovum extremum
|
190
|
-
candelabrum
|
191
|
-
].collect {|word| word[0...-2] }
|
192
|
-
|
193
|
-
# Classical "..um" -> "..a"
|
194
|
-
PL_sb_C_um_a = matchgroup %w[
|
195
|
-
maximum minimum momentum optimum
|
196
|
-
quantum cranium curriculum dictum
|
197
|
-
phylum aquarium compendium emporium
|
198
|
-
enconium gymnasium honorarium interregnum
|
199
|
-
lustrum memorandum millenium rostrum
|
200
|
-
spectrum speculum stadium trapezium
|
201
|
-
ultimatum medium vacuum velum
|
202
|
-
consortium
|
203
|
-
].collect {|word| word[0...-2]}
|
204
|
-
|
205
|
-
# Unconditional "..us" -> "i"
|
206
|
-
PL_sb_U_us_i = matchgroup %w[
|
207
|
-
alumnus alveolus bacillus bronchus
|
208
|
-
locus nucleus stimulus meniscus
|
209
|
-
].collect {|word| word[0...-2]}
|
210
|
-
|
211
|
-
# Classical "..us" -> "..i"
|
212
|
-
PL_sb_C_us_i = matchgroup %w[
|
213
|
-
focus radius genius
|
214
|
-
incubus succubus nimbus
|
215
|
-
fungus nucleolus stylus
|
216
|
-
torus umbilicus uterus
|
217
|
-
hippopotamus
|
218
|
-
].collect {|word| word[0...-2]}
|
291
|
+
# Loggability API -- log to the Linguistics logger
|
292
|
+
log_to :linguistics
|
219
293
|
|
220
|
-
#
|
221
|
-
|
222
|
-
status apparatus prospectus sinus
|
223
|
-
hiatus impetus plexus
|
224
|
-
]
|
294
|
+
# The list of loaded modules
|
295
|
+
MODULES = []
|
225
296
|
|
226
|
-
#
|
227
|
-
|
228
|
-
criterion perihelion aphelion
|
229
|
-
phenomenon prolegomenon noumenon
|
230
|
-
organon asyndeton hyperbaton
|
231
|
-
].collect {|word| word[0...-2]}
|
297
|
+
# The key to set in the thread-hash to indicate it's running in 'classical' mode
|
298
|
+
THREAD_CLASSICAL_KEY = :english_classical_mode
|
232
299
|
|
233
|
-
# Classical "..on" -> "..a"
|
234
|
-
PL_sb_C_on_a = matchgroup %w[
|
235
|
-
oxymoron
|
236
|
-
].collect {|word| word[0...-2]}
|
237
300
|
|
238
|
-
#
|
239
|
-
|
240
|
-
solo soprano basso alto
|
241
|
-
contralto tempo piano
|
242
|
-
]
|
243
|
-
PL_sb_C_o_i = matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
|
301
|
+
# A Hash of 'lprintf' formatters keyed by name
|
302
|
+
@@lprintf_formatters = {}
|
244
303
|
|
245
|
-
# Always "..o" -> "..os"
|
246
|
-
PL_sb_U_o_os = matchgroup( %w[
|
247
|
-
albino archipelago armadillo
|
248
|
-
commando crescendo fiasco
|
249
|
-
ditto dynamo embryo
|
250
|
-
ghetto guano inferno
|
251
|
-
jumbo lumbago magneto
|
252
|
-
manifesto medico octavo
|
253
|
-
photo pro quarto
|
254
|
-
canto lingo generalissimo
|
255
|
-
stylo rhino
|
256
|
-
] | PL_sb_C_o_i_a )
|
257
|
-
|
258
|
-
|
259
|
-
# Unconditional "..[ei]x" -> "..ices"
|
260
|
-
PL_sb_U_ex_ices = matchgroup %w[
|
261
|
-
codex murex silex
|
262
|
-
].collect {|word| word[0...-2]}
|
263
|
-
PL_sb_U_ix_ices = matchgroup %w[
|
264
|
-
radix helix
|
265
|
-
].collect {|word| word[0...-2]}
|
266
|
-
|
267
|
-
# Classical "..[ei]x" -> "..ices"
|
268
|
-
PL_sb_C_ex_ices = matchgroup %w[
|
269
|
-
vortex vertex cortex latex
|
270
|
-
pontifex apex index simplex
|
271
|
-
].collect {|word| word[0...-2]}
|
272
|
-
PL_sb_C_ix_ices = matchgroup %w[
|
273
|
-
appendix
|
274
|
-
].collect {|word| word[0...-2]}
|
275
|
-
|
276
|
-
|
277
|
-
# Arabic: ".." -> "..i"
|
278
|
-
PL_sb_C_i = matchgroup %w[
|
279
|
-
afrit afreet efreet
|
280
|
-
]
|
281
|
-
|
282
|
-
|
283
|
-
# Hebrew: ".." -> "..im"
|
284
|
-
PL_sb_C_im = matchgroup %w[
|
285
|
-
goy seraph cherub
|
286
|
-
]
|
287
|
-
|
288
|
-
# Unconditional "..man" -> "..mans"
|
289
|
-
PL_sb_U_man_mans = matchgroup %w[
|
290
|
-
human
|
291
|
-
Alabaman Bahaman Burman German
|
292
|
-
Hiroshiman Liman Nakayaman Oklahoman
|
293
|
-
Panaman Selman Sonaman Tacoman Yakiman
|
294
|
-
Yokohaman Yuman
|
295
|
-
]
|
296
|
-
|
297
|
-
|
298
|
-
PL_sb_uninflected_s = [
|
299
|
-
# Pairs or groups subsumed to a singular...
|
300
|
-
"breeches", "britches", "clippers", "gallows", "hijinks",
|
301
|
-
"headquarters", "pliers", "scissors", "testes", "herpes",
|
302
|
-
"pincers", "shears", "proceedings", "trousers",
|
303
|
-
|
304
|
-
# Unassimilated Latin 4th declension
|
305
|
-
"cantus", "coitus", "nexus",
|
306
|
-
|
307
|
-
# Recent imports...
|
308
|
-
"contretemps", "corps", "debris",
|
309
|
-
".*ois",
|
310
|
-
|
311
|
-
# Diseases
|
312
|
-
".*measles", "mumps",
|
313
|
-
|
314
|
-
# Miscellaneous others...
|
315
|
-
"diabetes", "jackanapes", "series", "species", "rabies",
|
316
|
-
"chassis", "innings", "news", "mews",
|
317
|
-
]
|
318
|
-
|
319
|
-
|
320
|
-
# Don't inflect in classical mode, otherwise normal inflection
|
321
|
-
PL_sb_uninflected_herd = matchgroup %w[
|
322
|
-
wildebeest swine eland bison buffalo
|
323
|
-
elk moose rhinoceros
|
324
|
-
]
|
325
|
-
|
326
|
-
PL_sb_uninflected = matchgroup [
|
327
|
-
|
328
|
-
# Some fish and herd animals
|
329
|
-
".*fish", "tuna", "salmon", "mackerel", "trout",
|
330
|
-
"bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
|
331
|
-
|
332
|
-
".*deer", ".*sheep",
|
333
|
-
|
334
|
-
# All nationals ending in -ese
|
335
|
-
"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
|
336
|
-
"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
|
337
|
-
"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
|
338
|
-
"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
|
339
|
-
"Shavese", "Vermontese", "Wenchowese", "Yengeese",
|
340
|
-
".*[nrlm]ese",
|
341
|
-
|
342
|
-
# Some words ending in ...s (often pairs taken as a whole)
|
343
|
-
PL_sb_uninflected_s,
|
344
|
-
|
345
|
-
# Diseases
|
346
|
-
".*pox",
|
347
|
-
|
348
|
-
# Other oddities
|
349
|
-
"graffiti", "djinn"
|
350
|
-
]
|
351
|
-
|
352
|
-
|
353
|
-
# Singular words ending in ...s (all inflect with ...es)
|
354
|
-
PL_sb_singular_s = matchgroup %w[
|
355
|
-
.*ss
|
356
|
-
acropolis aegis alias arthritis asbestos atlas
|
357
|
-
bathos bias bronchitis bursitis caddis cannabis
|
358
|
-
canvas chaos cosmos dais digitalis encephalitis
|
359
|
-
epidermis ethos eyas gas glottis hepatitis
|
360
|
-
hubris ibis lens mantis marquis metropolis
|
361
|
-
neuritis pathos pelvis polis rhinoceros
|
362
|
-
sassafras tonsillitis trellis .*us
|
363
|
-
]
|
364
|
-
|
365
|
-
PL_v_special_s = matchgroup [
|
366
|
-
PL_sb_singular_s,
|
367
|
-
PL_sb_uninflected_s,
|
368
|
-
PL_sb_irregular_s.keys,
|
369
|
-
'(.*[csx])is',
|
370
|
-
'(.*)ceps',
|
371
|
-
'[A-Z].*s',
|
372
|
-
]
|
373
|
-
|
374
|
-
PL_sb_postfix_adj = '(' + {
|
375
|
-
|
376
|
-
'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
|
377
|
-
'martial' => ["court"],
|
378
|
-
|
379
|
-
}.collect {|key,val|
|
380
|
-
matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
|
381
|
-
}.join("|") + ")(.*)"
|
382
|
-
|
383
|
-
|
384
|
-
PL_sb_military = %r'major|lieutenant|brigadier|adjutant|quartermaster'
|
385
|
-
PL_sb_general = %r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
|
386
|
-
|
387
|
-
PL_prep = matchgroup %w[
|
388
|
-
about above across after among around at athwart before behind
|
389
|
-
below beneath beside besides between betwixt beyond but by
|
390
|
-
during except for from in into near of off on onto out over
|
391
|
-
since till to under until unto upon with
|
392
|
-
]
|
393
|
-
|
394
|
-
PL_sb_prep_dual_compound = %r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
|
395
|
-
PL_sb_prep_compound = %r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
|
396
|
-
|
397
|
-
|
398
|
-
PL_pron_nom_h = {
|
399
|
-
# Nominative Reflexive
|
400
|
-
"i" => "we", "myself" => "ourselves",
|
401
|
-
"you" => "you", "yourself" => "yourselves",
|
402
|
-
"she" => "they", "herself" => "themselves",
|
403
|
-
"he" => "they", "himself" => "themselves",
|
404
|
-
"it" => "they", "itself" => "themselves",
|
405
|
-
"they" => "they", "themself" => "themselves",
|
406
|
-
|
407
|
-
# Possessive
|
408
|
-
"mine" => "ours",
|
409
|
-
"yours" => "yours",
|
410
|
-
"hers" => "theirs",
|
411
|
-
"his" => "theirs",
|
412
|
-
"its" => "theirs",
|
413
|
-
"theirs" => "theirs",
|
414
|
-
}
|
415
|
-
PL_pron_nom = matchgroup PL_pron_nom_h.keys
|
416
|
-
|
417
|
-
PL_pron_acc_h = {
|
418
|
-
# Accusative Reflexive
|
419
|
-
"me" => "us", "myself" => "ourselves",
|
420
|
-
"you" => "you", "yourself" => "yourselves",
|
421
|
-
"her" => "them", "herself" => "themselves",
|
422
|
-
"him" => "them", "himself" => "themselves",
|
423
|
-
"it" => "them", "itself" => "themselves",
|
424
|
-
"them" => "them", "themself" => "themselves",
|
425
|
-
}
|
426
|
-
PL_pron_acc = matchgroup PL_pron_acc_h.keys
|
427
|
-
|
428
|
-
PL_v_irregular_pres_h = {
|
429
|
-
# 1St pers. sing. 2nd pers. sing. 3rd pers. singular
|
430
|
-
# 3rd pers. (indet.)
|
431
|
-
"am" => "are", "are" => "are", "is" => "are",
|
432
|
-
"was" => "were", "were" => "were", "was" => "were",
|
433
|
-
"have" => "have", "have" => "have", "has" => "have",
|
434
|
-
}
|
435
|
-
PL_v_irregular_pres = matchgroup PL_v_irregular_pres_h.keys
|
436
|
-
|
437
|
-
PL_v_ambiguous_pres_h = {
|
438
|
-
# 1st pers. sing. 2nd pers. sing. 3rd pers. singular
|
439
|
-
# 3rd pers. (indet.)
|
440
|
-
"act" => "act", "act" => "act", "acts" => "act",
|
441
|
-
"blame" => "blame", "blame" => "blame", "blames" => "blame",
|
442
|
-
"can" => "can", "can" => "can", "can" => "can",
|
443
|
-
"must" => "must", "must" => "must", "must" => "must",
|
444
|
-
"fly" => "fly", "fly" => "fly", "flies" => "fly",
|
445
|
-
"copy" => "copy", "copy" => "copy", "copies" => "copy",
|
446
|
-
"drink" => "drink", "drink" => "drink", "drinks" => "drink",
|
447
|
-
"fight" => "fight", "fight" => "fight", "fights" => "fight",
|
448
|
-
"fire" => "fire", "fire" => "fire", "fires" => "fire",
|
449
|
-
"like" => "like", "like" => "like", "likes" => "like",
|
450
|
-
"look" => "look", "look" => "look", "looks" => "look",
|
451
|
-
"make" => "make", "make" => "make", "makes" => "make",
|
452
|
-
"reach" => "reach", "reach" => "reach", "reaches" => "reach",
|
453
|
-
"run" => "run", "run" => "run", "runs" => "run",
|
454
|
-
"sink" => "sink", "sink" => "sink", "sinks" => "sink",
|
455
|
-
"sleep" => "sleep", "sleep" => "sleep", "sleeps" => "sleep",
|
456
|
-
"view" => "view", "view" => "view", "views" => "view",
|
457
|
-
}
|
458
|
-
PL_v_ambiguous_pres = matchgroup PL_v_ambiguous_pres_h.keys
|
459
|
-
|
460
|
-
PL_v_irregular_non_pres = matchgroup %w[
|
461
|
-
did had ate made put
|
462
|
-
spent fought sank gave sought
|
463
|
-
shall could ought should
|
464
|
-
]
|
465
|
-
|
466
|
-
PL_v_ambiguous_non_pres = matchgroup %w[
|
467
|
-
thought saw bent will might cut
|
468
|
-
]
|
469
|
-
|
470
|
-
PL_count_zero = matchgroup %w[
|
471
|
-
0 no zero nil
|
472
|
-
]
|
473
|
-
|
474
|
-
PL_count_one = matchgroup %w[
|
475
|
-
1 a an one each every this that
|
476
|
-
]
|
477
|
-
|
478
|
-
PL_adj_special_h = {
|
479
|
-
"a" => "some", "an" => "some",
|
480
|
-
"this" => "these", "that" => "those",
|
481
|
-
}
|
482
|
-
PL_adj_special = matchgroup PL_adj_special_h.keys
|
483
|
-
|
484
|
-
PL_adj_poss_h = {
|
485
|
-
"my" => "our",
|
486
|
-
"your" => "your",
|
487
|
-
"its" => "their",
|
488
|
-
"her" => "their",
|
489
|
-
"his" => "their",
|
490
|
-
"their" => "their",
|
491
|
-
}
|
492
|
-
PL_adj_poss = matchgroup PL_adj_poss_h.keys
|
493
|
-
|
494
|
-
|
495
|
-
#
|
496
|
-
# Numerals, ordinals, and numbers-to-words
|
497
|
-
#
|
498
|
-
|
499
|
-
# Numerical inflections
|
500
|
-
Nth = {
|
501
|
-
0 => 'th',
|
502
|
-
1 => 'st',
|
503
|
-
2 => 'nd',
|
504
|
-
3 => 'rd',
|
505
|
-
4 => 'th',
|
506
|
-
5 => 'th',
|
507
|
-
6 => 'th',
|
508
|
-
7 => 'th',
|
509
|
-
8 => 'th',
|
510
|
-
9 => 'th',
|
511
|
-
11 => 'th',
|
512
|
-
12 => 'th',
|
513
|
-
13 => 'th',
|
514
|
-
}
|
515
|
-
|
516
|
-
# Ordinal word parts
|
517
|
-
Ordinals = {
|
518
|
-
'ty' => 'tieth',
|
519
|
-
'one' => 'first',
|
520
|
-
'two' => 'second',
|
521
|
-
'three' => 'third',
|
522
|
-
'five' => 'fifth',
|
523
|
-
'eight' => 'eighth',
|
524
|
-
'nine' => 'ninth',
|
525
|
-
'twelve' => 'twelfth',
|
526
|
-
}
|
527
|
-
OrdinalSuffixes = Ordinals.keys.join("|") + "|"
|
528
|
-
Ordinals[""] = 'th'
|
529
|
-
|
530
|
-
# Numeral names
|
531
|
-
Units = [''] + %w[one two three four five six seven eight nine]
|
532
|
-
Teens = %w[ten eleven twelve thirteen fourteen
|
533
|
-
fifteen sixteen seventeen eighteen nineteen]
|
534
|
-
Tens = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
|
535
|
-
Thousands = [' ', ' thousand'] + %w[
|
536
|
-
m b tr quadr quint sext sept oct non dec undec duodec tredec
|
537
|
-
quattuordec quindec sexdec septemdec octodec novemdec vigint
|
538
|
-
].collect {|prefix| ' ' + prefix + 'illion'}
|
539
|
-
|
540
|
-
# A collection of functions for transforming digits into word
|
541
|
-
# phrases. Indexed by the number of digits being transformed; e.g.,
|
542
|
-
# <tt>NumberToWordsFunctions[2]</tt> is the function for transforming
|
543
|
-
# double-digit numbers.
|
544
|
-
NumberToWordsFunctions = [
|
545
|
-
proc {|*args| raise "No digits (#{args.inspect})"},
|
546
|
-
|
547
|
-
# Single-digits
|
548
|
-
proc {|zero,x|
|
549
|
-
(x.nonzero? ? to_units(x) : "#{zero} ")
|
550
|
-
},
|
551
|
-
|
552
|
-
# Double-digits
|
553
|
-
proc {|zero,x,y|
|
554
|
-
if x.nonzero?
|
555
|
-
to_tens( x, y )
|
556
|
-
elsif y.nonzero?
|
557
|
-
"#{zero} " + NumberToWordsFunctions[1].call( zero, y )
|
558
|
-
else
|
559
|
-
([zero] * 2).join(" ")
|
560
|
-
end
|
561
|
-
},
|
562
|
-
|
563
|
-
# Triple-digits
|
564
|
-
proc {|zero,x,y,z|
|
565
|
-
NumberToWordsFunctions[1].call(zero,x) +
|
566
|
-
NumberToWordsFunctions[2].call(zero,y,z)
|
567
|
-
}
|
568
|
-
]
|
569
|
-
|
570
|
-
|
571
|
-
#
|
572
|
-
# Indefinite Articles
|
573
|
-
#
|
574
|
-
|
575
|
-
# This pattern matches strings of capitals starting with a "vowel-sound"
|
576
|
-
# consonant followed by another consonant, and which are not likely
|
577
|
-
# to be real words (oh, all right then, it's just magic!)
|
578
|
-
A_abbrev = %{
|
579
|
-
(?! FJO | [HLMNS]Y. | RY[EO] | SQU
|
580
|
-
| ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
|
581
|
-
[FHLMNRSX][A-Z]
|
582
|
-
}
|
583
|
-
|
584
|
-
# This pattern codes the beginnings of all english words begining with a
|
585
|
-
# 'y' followed by a consonant. Any other y-consonant prefix therefore
|
586
|
-
# implies an abbreviation.
|
587
|
-
A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
|
588
|
-
|
589
|
-
# Exceptions to exceptions
|
590
|
-
A_explicit_an = matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
|
591
|
-
|
592
|
-
|
593
|
-
#
|
594
|
-
# Configuration defaults
|
595
|
-
#
|
596
|
-
|
597
|
-
# Default configuration arguments for the #numwords function
|
598
|
-
NumwordDefaults = {
|
599
|
-
:group => 0,
|
600
|
-
:comma => ', ',
|
601
|
-
:and => ' and ',
|
602
|
-
:zero => 'zero',
|
603
|
-
:decimal => 'point',
|
604
|
-
:asArray => false,
|
605
|
-
}
|
606
|
-
|
607
|
-
# Default ranges for #quantify
|
608
|
-
SeveralRange = 2..5
|
609
|
-
NumberRange = 6..19
|
610
|
-
NumerousRange = 20..45
|
611
|
-
ManyRange = 46..99
|
612
|
-
|
613
|
-
# Default configuration arguments for the #quantify function
|
614
|
-
QuantifyDefaults = {
|
615
|
-
:joinword => " of ",
|
616
|
-
}
|
617
|
-
|
618
|
-
# Default configuration arguments for the #conjunction (junction, what's
|
619
|
-
# your) function.
|
620
|
-
ConjunctionDefaults = {
|
621
|
-
:separator => ', ',
|
622
|
-
:altsep => '; ',
|
623
|
-
:penultimate => true,
|
624
|
-
:conjunctive => 'and',
|
625
|
-
:combine => true,
|
626
|
-
:casefold => true,
|
627
|
-
:generalize => false,
|
628
|
-
:quantsort => true,
|
629
|
-
}
|
630
|
-
|
631
|
-
|
632
|
-
#
|
633
|
-
# Title case
|
634
|
-
#
|
635
|
-
|
636
|
-
# "In titles, capitalize the first word, the last word, and all words in
|
637
|
-
# between except articles (a, an, and the), prepositions under five letters
|
638
|
-
# (in, of, to), and coordinating conjunctions (and, but). These rules apply
|
639
|
-
# to titles of long, short, and partial works as well as your own papers"
|
640
|
-
# (Anson, Schwegler, and Muth. The Longman Writer's Companion 240).
|
641
|
-
|
642
|
-
# Build the list of exceptions to title-capitalization
|
643
|
-
Articles = %w[a and the]
|
644
|
-
ShortPrepositions = ["amid", "at", "but", "by", "down", "from", "in",
|
645
|
-
"into", "like", "near", "of", "off", "on", "onto", "out", "over",
|
646
|
-
"past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
|
647
|
-
CoordConjunctions = %w[and but as]
|
648
|
-
TitleCaseExceptions = Articles | ShortPrepositions | CoordConjunctions
|
649
|
-
|
650
|
-
|
651
|
-
# :startdoc:
|
652
304
|
|
653
305
|
#################################################################
|
654
|
-
###
|
306
|
+
### U T I L I T Y F U N C T I O N S
|
655
307
|
#################################################################
|
656
308
|
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
###############
|
661
|
-
|
662
|
-
### Debugging output
|
663
|
-
def debug_msg( *msgs ) # :nodoc:
|
664
|
-
$stderr.puts msgs.join(" ") if $DEBUG
|
665
|
-
end
|
666
|
-
|
667
|
-
|
668
|
-
### Normalize a count to either 1 or 2 (singular or plural)
|
669
|
-
def normalize_count( count, default=2 )
|
670
|
-
return default if count.nil? # Default to plural
|
671
|
-
if /^(#{PL_count_one})$/i =~ count.to_s ||
|
672
|
-
Linguistics::classical? &&
|
673
|
-
/^(#{PL_count_zero})$/ =~ count.to_s
|
674
|
-
return 1
|
675
|
-
else
|
676
|
-
return default
|
677
|
-
end
|
309
|
+
### A Hash of formatters for the lprintf function.
|
310
|
+
def self::lprintf_formatters
|
311
|
+
return @@lprintf_formatters
|
678
312
|
end
|
679
313
|
|
680
314
|
|
681
|
-
###
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
Linguistics::classical? ? $2 : $1
|
686
|
-
}
|
687
|
-
|
688
|
-
case original
|
689
|
-
when "I"
|
690
|
-
return inflected
|
691
|
-
when /^[A-Z]+$/
|
692
|
-
return inflected.upcase
|
693
|
-
when /^[A-Z]/
|
694
|
-
# Can't use #capitalize, as it will downcase the rest of the string,
|
695
|
-
# too.
|
696
|
-
inflected[0,1] = inflected[0,1].upcase
|
697
|
-
return inflected
|
698
|
-
else
|
699
|
-
return inflected
|
700
|
-
end
|
701
|
-
end
|
702
|
-
|
703
|
-
|
704
|
-
### Pluralize nouns
|
705
|
-
def pluralize_noun( word, count=nil )
|
706
|
-
value = nil
|
707
|
-
count ||= Linguistics::num
|
708
|
-
count = normalize_count( count )
|
709
|
-
|
710
|
-
return word if count == 1
|
711
|
-
|
712
|
-
# Handle user-defined nouns
|
713
|
-
#if value = ud_match( word, PL_sb_user_defined )
|
714
|
-
# return value
|
715
|
-
#end
|
716
|
-
|
717
|
-
# Handle empty word, singular count and uninflected plurals
|
718
|
-
case word
|
719
|
-
when ''
|
720
|
-
return word
|
721
|
-
when /^(#{PL_sb_uninflected})$/i
|
722
|
-
return word
|
723
|
-
else
|
724
|
-
if Linguistics::classical? &&
|
725
|
-
/^(#{PL_sb_uninflected_herd})$/i =~ word
|
726
|
-
return word
|
727
|
-
end
|
728
|
-
end
|
729
|
-
|
730
|
-
# Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
|
731
|
-
case word
|
732
|
-
when /^(?:#{PL_sb_postfix_adj})$/i
|
733
|
-
value = $2
|
734
|
-
return pluralize_noun( $1, 2 ) + value
|
735
|
-
|
736
|
-
when /^(?:#{PL_sb_prep_dual_compound})$/i
|
737
|
-
value = [ $2, $3 ]
|
738
|
-
return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )
|
739
|
-
|
740
|
-
when /^(?:#{PL_sb_prep_compound})$/i
|
741
|
-
value = $2
|
742
|
-
return pluralize_noun( $1, 2 ) + value
|
743
|
-
|
744
|
-
# Handle pronouns
|
745
|
-
when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
|
746
|
-
return $1 + PL_pron_acc_h[ $2.downcase ]
|
747
|
-
|
748
|
-
when /^(#{PL_pron_nom})$/i
|
749
|
-
return PL_pron_nom_h[ word.downcase ]
|
750
|
-
|
751
|
-
when /^(#{PL_pron_acc})$/i
|
752
|
-
return PL_pron_acc_h[ $1.downcase ]
|
753
|
-
|
754
|
-
# Handle isolated irregular plurals
|
755
|
-
when /(.*)\b(#{PL_sb_irregular})$/i
|
756
|
-
return $1 + PL_sb_irregular_h[ $2.downcase ]
|
315
|
+
### Register an English-language extension.
|
316
|
+
def self::register_extension( mod )
|
317
|
+
MODULES.push( mod )
|
318
|
+
self.log.debug "Registered English extension %p" % [ mod ]
|
757
319
|
|
758
|
-
|
759
|
-
|
320
|
+
include( mod )
|
321
|
+
mod.extend( Loggability )
|
322
|
+
mod.log_to( :linguistics )
|
760
323
|
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
when /(.*)tooth$/i ; return "#{$1}teeth"
|
766
|
-
when /(.*)foot$/i ; return "#{$1}feet"
|
324
|
+
if mod.const_defined?( :SingletonMethods )
|
325
|
+
smod = mod.const_get(:SingletonMethods)
|
326
|
+
self.log.debug " and its singleton methods %p" % [ smod ]
|
327
|
+
extend( smod )
|
767
328
|
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
|
773
|
-
when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
|
774
|
-
when /(#{PL_sb_U_um_a})um$/i ; return "#{$1}a"
|
775
|
-
when /(#{PL_sb_U_us_i})us$/i ; return "#{$1}i"
|
776
|
-
when /(#{PL_sb_U_on_a})on$/i ; return "#{$1}a"
|
777
|
-
when /(#{PL_sb_U_a_ae})$/i ; return "#{$1}e"
|
778
|
-
end
|
779
|
-
|
780
|
-
# Handle incompletely assimilated imports
|
781
|
-
if Linguistics::classical?
|
782
|
-
case word
|
783
|
-
when /(.*)trix$/i ; return "#{$1}trices"
|
784
|
-
when /(.*)eau$/i ; return "#{$1}eaux"
|
785
|
-
when /(.*)ieu$/i ; return "#{$1}ieux"
|
786
|
-
when /(.{2,}[yia])nx$/i ; return "#{$1}nges"
|
787
|
-
when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
|
788
|
-
when /(#{PL_sb_C_ex_ices})ex$/i; return "#{$1}ices"
|
789
|
-
when /(#{PL_sb_C_ix_ices})ix$/i; return "#{$1}ices"
|
790
|
-
when /(#{PL_sb_C_um_a})um$/i ; return "#{$1}a"
|
791
|
-
when /(#{PL_sb_C_us_i})us$/i ; return "#{$1}i"
|
792
|
-
when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
|
793
|
-
when /(#{PL_sb_C_a_ae})$/i ; return "#{$1}e"
|
794
|
-
when /(#{PL_sb_C_a_ata})a$/i ; return "#{$1}ata"
|
795
|
-
when /(#{PL_sb_C_o_i})o$/i ; return "#{$1}i"
|
796
|
-
when /(#{PL_sb_C_on_a})on$/i ; return "#{$1}a"
|
797
|
-
when /#{PL_sb_C_im}$/i ; return "#{word}im"
|
798
|
-
when /#{PL_sb_C_i}$/i ; return "#{word}i"
|
329
|
+
ivars = mod.instance_variables
|
330
|
+
self.log.debug " and instance variables %p" % [ ivars ]
|
331
|
+
ivars.each do |ivar|
|
332
|
+
instance_variable_set( ivar, mod.instance_variable_get(ivar) )
|
799
333
|
end
|
800
334
|
end
|
801
|
-
|
802
|
-
|
803
|
-
# Handle singular nouns ending in ...s or other silibants
|
804
|
-
case word
|
805
|
-
when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
|
806
|
-
when /^([A-Z].*s)$/; return "#{$1}es"
|
807
|
-
when /(.*)([cs]h|[zx])$/i ; return "#{$1}#{$2}es"
|
808
|
-
# when /(.*)(us)$/i ; return "#{$1}#{$2}es"
|
809
|
-
|
810
|
-
# Handle ...f -> ...ves
|
811
|
-
when /(.*[eao])lf$/i ; return "#{$1}lves";
|
812
|
-
when /(.*[^d])eaf$/i ; return "#{$1}eaves"
|
813
|
-
when /(.*[nlw])ife$/i ; return "#{$1}ives"
|
814
|
-
when /(.*)arf$/i ; return "#{$1}arves"
|
815
|
-
|
816
|
-
# Handle ...y
|
817
|
-
when /(.*[aeiou])y$/i ; return "#{$1}ys"
|
818
|
-
when /([A-Z].*y)$/ ; return "#{$1}s"
|
819
|
-
when /(.*)y$/i ; return "#{$1}ies"
|
820
|
-
|
821
|
-
# Handle ...o
|
822
|
-
when /#{PL_sb_U_o_os}$/i ; return "#{word}s"
|
823
|
-
when /[aeiou]o$/i ; return "#{word}s"
|
824
|
-
when /o$/i ; return "#{word}es"
|
825
|
-
|
826
|
-
# Otherwise just add ...s
|
827
|
-
else
|
828
|
-
return "#{word}s"
|
829
|
-
end
|
830
|
-
end # def pluralize_noun
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
### Pluralize special verbs
|
835
|
-
def pluralize_special_verb( word, count )
|
836
|
-
count ||= Linguistics::num
|
837
|
-
count = normalize_count( count )
|
838
|
-
|
839
|
-
return nil if /^(#{PL_count_one})$/i =~ count.to_s
|
840
|
-
|
841
|
-
# Handle user-defined verbs
|
842
|
-
#if value = ud_match( word, PL_v_user_defined )
|
843
|
-
# return value
|
844
|
-
#end
|
845
|
-
|
846
|
-
case word
|
847
|
-
|
848
|
-
# Handle irregular present tense (simple and compound)
|
849
|
-
when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
|
850
|
-
return PL_v_irregular_pres_h[ $1.downcase ] + $2
|
851
|
-
|
852
|
-
# Handle irregular future, preterite and perfect tenses
|
853
|
-
when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
|
854
|
-
return word
|
855
|
-
|
856
|
-
# Handle special cases
|
857
|
-
when /^(#{PL_v_special_s})$/, /\s/
|
858
|
-
return nil
|
859
|
-
|
860
|
-
# Handle standard 3rd person (chop the ...(e)s off single words)
|
861
|
-
when /^(.*)([cs]h|[x]|zz|ss)es$/i
|
862
|
-
return $1 + $2
|
863
|
-
when /^(..+)ies$/i
|
864
|
-
return "#{$1}y"
|
865
|
-
when /^(.+)oes$/i
|
866
|
-
return "#{$1}o"
|
867
|
-
when /^(.*[^s])s$/i
|
868
|
-
return $1
|
869
|
-
|
870
|
-
# Otherwise, a regular verb (handle elsewhere)
|
871
|
-
else
|
872
|
-
return nil
|
873
|
-
end
|
874
335
|
end
|
875
336
|
|
876
337
|
|
877
|
-
###
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
return word if /^(#{PL_count_one})$/i =~ count.to_s
|
883
|
-
|
884
|
-
case word
|
885
|
-
|
886
|
-
# Handle ambiguous present tenses (simple and compound)
|
887
|
-
when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
|
888
|
-
return PL_v_ambiguous_pres_h[ $1.downcase ] + $2
|
889
|
-
|
890
|
-
# Handle ambiguous preterite and perfect tenses
|
891
|
-
when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
|
892
|
-
return word
|
893
|
-
|
894
|
-
# Otherwise, 1st or 2nd person is uninflected
|
895
|
-
else
|
896
|
-
return word
|
338
|
+
### Returns +true+ if the English-language module with the given +name+ was
|
339
|
+
### successfully registered.
|
340
|
+
def self::has_extension?( name )
|
341
|
+
return MODULES.any? do |mod|
|
342
|
+
mod.name.sub( /.*::/, '' ).downcase == name.to_s.downcase
|
897
343
|
end
|
898
344
|
end
|
899
345
|
|
900
346
|
|
901
|
-
###
|
902
|
-
def
|
903
|
-
|
904
|
-
count = normalize_count( count )
|
905
|
-
|
906
|
-
return word if /^(#{PL_count_one})$/i =~ count.to_s
|
907
|
-
|
908
|
-
# Handle user-defined verbs
|
909
|
-
#if value = ud_match( word, PL_adj_user_defined )
|
910
|
-
# return value
|
911
|
-
#end
|
912
|
-
|
913
|
-
case word
|
914
|
-
|
915
|
-
# Handle known cases
|
916
|
-
when /^(#{PL_adj_special})$/i
|
917
|
-
return PL_adj_special_h[ $1.downcase ]
|
918
|
-
|
919
|
-
# Handle possessives
|
920
|
-
when /^(#{PL_adj_poss})$/i
|
921
|
-
return PL_adj_poss_h[ $1.downcase ]
|
922
|
-
|
923
|
-
when /^(.*)'s?$/
|
924
|
-
pl = plural_noun( $1 )
|
925
|
-
if /s$/ =~ pl
|
926
|
-
return "#{pl}'"
|
927
|
-
else
|
928
|
-
return "#{pl}'s"
|
929
|
-
end
|
930
|
-
|
931
|
-
# Otherwise, no idea
|
932
|
-
else
|
933
|
-
return nil
|
934
|
-
end
|
935
|
-
end
|
936
|
-
|
937
|
-
|
938
|
-
### Returns the given word with a prepended indefinite article, unless
|
939
|
-
### +count+ is non-nil and not singular.
|
940
|
-
def indef_article( word, count )
|
941
|
-
count ||= Linguistics::num
|
942
|
-
return "#{count} #{word}" if
|
943
|
-
count && /^(#{PL_count_one})$/i !~ count.to_s
|
944
|
-
|
945
|
-
# Handle user-defined variants
|
946
|
-
# return value if value = ud_match( word, A_a_user_defined )
|
947
|
-
|
948
|
-
case word
|
949
|
-
|
950
|
-
# Handle special cases
|
951
|
-
when /^(#{A_explicit_an})/i
|
952
|
-
return "an #{word}"
|
953
|
-
|
954
|
-
# Handle abbreviations
|
955
|
-
when /^(#{A_abbrev})/x
|
956
|
-
return "an #{word}"
|
957
|
-
when /^[aefhilmnorsx][.-]/i
|
958
|
-
return "an #{word}"
|
959
|
-
when /^[a-z][.-]/i
|
960
|
-
return "a #{word}"
|
961
|
-
|
962
|
-
# Handle consonants
|
963
|
-
when /^[^aeiouy]/i
|
964
|
-
return "a #{word}"
|
965
|
-
|
966
|
-
# Handle special vowel-forms
|
967
|
-
when /^e[uw]/i
|
968
|
-
return "a #{word}"
|
969
|
-
when /^onc?e\b/i
|
970
|
-
return "a #{word}"
|
971
|
-
when /^uni([^nmd]|mo)/i
|
972
|
-
return "a #{word}"
|
973
|
-
when /^u[bcfhjkqrst][aeiou]/i
|
974
|
-
return "a #{word}"
|
975
|
-
|
976
|
-
# Handle vowels
|
977
|
-
when /^[aeiou]/i
|
978
|
-
return "an #{word}"
|
979
|
-
|
980
|
-
# Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
|
981
|
-
when /^(#{A_y_cons})/i
|
982
|
-
return "an #{word}"
|
983
|
-
|
984
|
-
# Otherwise, guess "a"
|
985
|
-
else
|
986
|
-
return "a #{word}"
|
987
|
-
end
|
988
|
-
end
|
989
|
-
|
990
|
-
|
991
|
-
### Transform the specified number of units-place numerals into a
|
992
|
-
### word-phrase at the given number of +thousands+ places.
|
993
|
-
def to_units( units, thousands=0 )
|
994
|
-
return Units[ units ] + to_thousands( thousands )
|
347
|
+
### Debugging output
|
348
|
+
def self::debug_msg( *msgs ) # :nodoc:
|
349
|
+
$stderr.puts msgs.join(" ") if $DEBUG
|
995
350
|
end
|
996
351
|
|
997
352
|
|
998
|
-
###
|
999
|
-
###
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
353
|
+
### Add an lprintf formatter named +name+ that will use the specified +callback+ method.
|
354
|
+
### The name of the formatter is the placeholder that will be used in the
|
355
|
+
### format string, and the +callback+ is the method to call on the english-language
|
356
|
+
### inflector for the lprintf argument, and can either be an object that responds to
|
357
|
+
### #call, or the name of a method to call as a Symbol.
|
358
|
+
###
|
359
|
+
### Using a Symbol:
|
360
|
+
###
|
361
|
+
### def plural( count=2 )
|
362
|
+
### # return the plural of the inflected object
|
363
|
+
### end
|
364
|
+
### Linguistics::EN.register_lprintf_formatter :PL, :plural
|
365
|
+
###
|
366
|
+
### Using a method:
|
367
|
+
###
|
368
|
+
### Linguistics::EN.register_lprintf_formatter :PL, method( :plural )
|
369
|
+
###
|
370
|
+
### Using a block:
|
371
|
+
###
|
372
|
+
### Linguistics::EN.register_lprintf_formatter :PL do |obj|
|
373
|
+
### obj.en.plural
|
374
|
+
### end
|
375
|
+
###
|
376
|
+
def self::register_lprintf_formatter( name, callback=nil )
|
377
|
+
raise LocalJumpError, "no callback or block given" unless callback || block_given?
|
378
|
+
callback ||= Proc.new
|
1009
379
|
|
1010
|
-
|
1011
|
-
### numerals into a word phrase. If the number of thousands (+thousands+) is
|
1012
|
-
### greater than 0, it will be used to determine where the decimal point is
|
1013
|
-
### in relation to the hundreds-place number.
|
1014
|
-
def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
|
1015
|
-
joinword = ' ' if joinword.empty?
|
1016
|
-
if hundreds.nonzero?
|
1017
|
-
return to_units( hundreds ) + " hundred" +
|
1018
|
-
(tens.nonzero? || units.nonzero? ? joinword : '') +
|
1019
|
-
to_tens( tens, units ) +
|
1020
|
-
to_thousands( thousands )
|
1021
|
-
elsif tens.nonzero? || units.nonzero?
|
1022
|
-
return to_tens( tens, units ) + to_thousands( thousands )
|
1023
|
-
else
|
1024
|
-
return nil
|
1025
|
-
end
|
380
|
+
@@lprintf_formatters[ name ] = callback.to_proc
|
1026
381
|
end
|
1027
382
|
|
1028
|
-
### Transform the specified number into one or more words like 'thousand',
|
1029
|
-
### 'million', etc. Uses the thousands (American) system.
|
1030
|
-
def to_thousands( thousands=0 )
|
1031
|
-
parts = []
|
1032
|
-
(0..thousands).step( Thousands.length - 1 ) {|i|
|
1033
|
-
if i.zero?
|
1034
|
-
parts.push Thousands[ thousands % (Thousands.length - 1) ]
|
1035
|
-
else
|
1036
|
-
parts.push Thousands.last
|
1037
|
-
end
|
1038
|
-
}
|
1039
383
|
|
1040
|
-
|
384
|
+
### Return +true+ if running in a 'classical' mode.
|
385
|
+
def self::classical?
|
386
|
+
return Thread.current[ THREAD_CLASSICAL_KEY ] ? true : false
|
1041
387
|
end
|
1042
388
|
|
1043
389
|
|
1044
|
-
###
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
# Break into word-groups if groups is set
|
1050
|
-
if config[:group].nonzero?
|
1051
|
-
|
1052
|
-
# Build a Regexp with <config[:group]> number of digits. Any past
|
1053
|
-
# the first are optional.
|
1054
|
-
re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )
|
1055
|
-
|
1056
|
-
# Scan the string, and call the word-chunk function that deals with
|
1057
|
-
# chunks of the found number of digits.
|
1058
|
-
num.to_s.scan( re ) {|digits|
|
1059
|
-
debug_msg " digits = #{digits.inspect}"
|
1060
|
-
fn = NumberToWordsFunctions[ digits.nitems ]
|
1061
|
-
numerals = digits.flatten.compact.collect {|i| i.to_i}
|
1062
|
-
debug_msg " numerals = #{numerals.inspect}"
|
1063
|
-
chunks.push fn.call( config[:zero], *numerals ).strip
|
1064
|
-
}
|
1065
|
-
else
|
1066
|
-
phrase = num.to_s
|
1067
|
-
phrase.sub!( /\A\s*0+/, '' )
|
1068
|
-
mill = 0
|
1069
|
-
|
1070
|
-
# Match backward from the end of the digits in the string, turning
|
1071
|
-
# chunks of three, of two, and of one into words.
|
1072
|
-
mill += 1 while
|
1073
|
-
phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
|
1074
|
-
words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill,
|
1075
|
-
config[:and] )
|
1076
|
-
chunks.unshift words.strip.squeeze(' ') unless words.nil?
|
1077
|
-
''
|
1078
|
-
}
|
1079
|
-
|
1080
|
-
phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
|
1081
|
-
chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
|
1082
|
-
''
|
1083
|
-
}
|
1084
|
-
phrase.sub!( /(\d)(?=\D*\Z)/ ) {
|
1085
|
-
chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
|
1086
|
-
''
|
1087
|
-
}
|
1088
|
-
end
|
390
|
+
### Set classical mode for the current thread inside the block, then
|
391
|
+
### unset it when it returns.
|
392
|
+
def self::in_classical_mode
|
393
|
+
old_setting = Thread.current[ THREAD_CLASSICAL_KEY ]
|
394
|
+
Thread.current[ THREAD_CLASSICAL_KEY ] = true
|
1089
395
|
|
1090
|
-
|
396
|
+
yield
|
397
|
+
ensure
|
398
|
+
Thread.current[ THREAD_CLASSICAL_KEY ] = old_setting
|
1091
399
|
end
|
1092
400
|
|
1093
401
|
|
@@ -1095,579 +403,6 @@ module Linguistics::EN
|
|
1095
403
|
### P U B L I C F U N C T I O N S
|
1096
404
|
#################################################################
|
1097
405
|
|
1098
|
-
### Return the name of the language this module is for.
|
1099
|
-
def language( unused=nil )
|
1100
|
-
"English"
|
1101
|
-
end
|
1102
|
-
|
1103
|
-
|
1104
|
-
### Return the plural of the given +phrase+ if +count+ indicates it should
|
1105
|
-
### be plural.
|
1106
|
-
def plural( phrase, count=nil )
|
1107
|
-
phrase = numwords( phrase ) if phrase.is_a?( Numeric )
|
1108
|
-
|
1109
|
-
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1110
|
-
pre, word, post = md.to_a[1,3]
|
1111
|
-
return phrase if word.nil? or word.empty?
|
1112
|
-
|
1113
|
-
plural = postprocess( word,
|
1114
|
-
pluralize_special_adjective(word, count) ||
|
1115
|
-
pluralize_special_verb(word, count) ||
|
1116
|
-
pluralize_noun(word, count) )
|
1117
|
-
|
1118
|
-
return pre + plural + post
|
1119
|
-
end
|
1120
|
-
def_lprintf_formatter :PL, :plural
|
1121
|
-
|
1122
|
-
|
1123
|
-
### Return the plural of the given noun +phrase+ if +count+ indicates it
|
1124
|
-
### should be plural.
|
1125
|
-
def plural_noun( phrase, count=nil )
|
1126
|
-
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1127
|
-
pre, word, post = md.to_a[1,3]
|
1128
|
-
return phrase if word.nil? or word.empty?
|
1129
|
-
|
1130
|
-
plural = postprocess( word, pluralize_noun(word, count) )
|
1131
|
-
return pre + plural + post
|
1132
|
-
end
|
1133
|
-
def_lprintf_formatter :PL_N, :plural_noun
|
1134
|
-
|
1135
|
-
|
1136
|
-
### Return the plural of the given verb +phrase+ if +count+ indicates it
|
1137
|
-
### should be plural.
|
1138
|
-
def plural_verb( phrase, count=nil )
|
1139
|
-
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1140
|
-
pre, word, post = md.to_a[1,3]
|
1141
|
-
return phrase if word.nil? or word.empty?
|
1142
|
-
|
1143
|
-
plural = postprocess( word,
|
1144
|
-
pluralize_special_verb(word, count) ||
|
1145
|
-
pluralize_general_verb(word, count) )
|
1146
|
-
return pre + plural + post
|
1147
|
-
end
|
1148
|
-
def_lprintf_formatter :PL_V, :plural_verb
|
1149
|
-
|
1150
|
-
|
1151
|
-
### Return the plural of the given adjectival +phrase+ if +count+ indicates
|
1152
|
-
### it should be plural.
|
1153
|
-
def plural_adjective( phrase, count=nil )
|
1154
|
-
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1155
|
-
pre, word, post = md.to_a[1,3]
|
1156
|
-
return phrase if word.nil? or word.empty?
|
1157
|
-
|
1158
|
-
plural = postprocess( word,
|
1159
|
-
pluralize_special_adjective(word, count) || word )
|
1160
|
-
return pre + plural + post
|
1161
|
-
end
|
1162
|
-
alias_method :plural_adj, :plural_adjective
|
1163
|
-
def_lprintf_formatter :PL_ADJ, :plural_adjective
|
1164
|
-
|
1165
|
-
|
1166
|
-
### Return the given phrase with the appropriate indefinite article ("a" or
|
1167
|
-
### "an") prepended.
|
1168
|
-
def a( phrase, count=nil )
|
1169
|
-
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1170
|
-
pre, word, post = md.to_a[1,3]
|
1171
|
-
return phrase if word.nil? or word.empty?
|
1172
|
-
|
1173
|
-
result = indef_article( word, count )
|
1174
|
-
return pre + result + post
|
1175
|
-
end
|
1176
|
-
alias_method :an, :a
|
1177
|
-
def_lprintf_formatter :A, :a
|
1178
|
-
def_lprintf_formatter :AN, :a
|
1179
|
-
|
1180
|
-
|
1181
|
-
### Translate zero-quantified +phrase+ to "no +phrase.plural+"
|
1182
|
-
def no( phrase, count=nil )
|
1183
|
-
md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
|
1184
|
-
pre, word, post = md.to_a[1,3]
|
1185
|
-
count ||= Linguistics::num || 0
|
1186
|
-
|
1187
|
-
unless /^#{PL_count_zero}$/ =~ count.to_s
|
1188
|
-
return "#{pre}#{count} " + plural( word, count ) + post
|
1189
|
-
else
|
1190
|
-
return "#{pre}no " + plural( word, 0 ) + post
|
1191
|
-
end
|
1192
|
-
end
|
1193
|
-
def_lprintf_formatter :NO, :no
|
1194
|
-
|
1195
|
-
|
1196
|
-
### Participles
|
1197
|
-
def present_participle( word )
|
1198
|
-
plural = plural_verb( word.to_s, 2 )
|
1199
|
-
|
1200
|
-
plural.sub!( /ie$/, 'y' ) or
|
1201
|
-
plural.sub!( /ue$/, 'u' ) or
|
1202
|
-
plural.sub!( /([auy])e$/, '$1' ) or
|
1203
|
-
plural.sub!( /i$/, '' ) or
|
1204
|
-
plural.sub!( /([^e])e$/, "\\1" ) or
|
1205
|
-
/er$/.match( plural ) or
|
1206
|
-
plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )
|
1207
|
-
|
1208
|
-
return "#{plural}ing"
|
1209
|
-
end
|
1210
|
-
alias_method :part_pres, :present_participle
|
1211
|
-
def_lprintf_formatter :PART_PRES, :present_participle
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
### Return the specified number as english words. One or more configuration
|
1216
|
-
### values may be passed to control the returned String:
|
1217
|
-
###
|
1218
|
-
### [<b>:group</b>]
|
1219
|
-
### Controls how many numbers at a time are grouped together. Valid values
|
1220
|
-
### are <code>0</code> (normal grouping), <code>1</code> (single-digit
|
1221
|
-
### grouping, e.g., "one, two, three, four"), <code>2</code>
|
1222
|
-
### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
|
1223
|
-
### (triple-digit grouping, e.g., "one twenty-three, four").
|
1224
|
-
### [<b>:comma</b>]
|
1225
|
-
### Set the character/s used to separate word groups. Defaults to
|
1226
|
-
### <code>", "</code>.
|
1227
|
-
### [<b>:and</b>]
|
1228
|
-
### Set the word and/or characters used where <code>' and ' </code>(the
|
1229
|
-
### default) is normally used. Setting <code>:and</code> to
|
1230
|
-
### <code>' '</code>, for example, will cause <code>2556</code> to be
|
1231
|
-
### returned as "two-thousand, five hundred fifty-six" instead of
|
1232
|
-
### "two-thousand, five hundred and fifty-six".
|
1233
|
-
### [<b>:zero</b>]
|
1234
|
-
### Set the word used to represent the numeral <code>0</code> in the
|
1235
|
-
### result. <code>'zero'</code> is the default.
|
1236
|
-
### [<b>:decimal</b>]
|
1237
|
-
### Set the translation of any decimal points in the number; the default
|
1238
|
-
### is <code>'point'</code>.
|
1239
|
-
### [<b>:asArray</b>]
|
1240
|
-
### If set to a true value, the number will be returned as an array of
|
1241
|
-
### word groups instead of a String.
|
1242
|
-
def numwords( number, hashargs={} )
|
1243
|
-
num = number.to_s
|
1244
|
-
config = NumwordDefaults.merge( hashargs )
|
1245
|
-
raise "Bad chunking option: #{config[:group]}" unless
|
1246
|
-
config[:group].between?( 0, 3 )
|
1247
|
-
|
1248
|
-
# Array of number parts: first is everything to the left of the first
|
1249
|
-
# decimal, followed by any groups of decimal-delimted numbers after that
|
1250
|
-
parts = []
|
1251
|
-
|
1252
|
-
# Wordify any sign prefix
|
1253
|
-
sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
|
1254
|
-
|
1255
|
-
# Strip any ordinal suffixes
|
1256
|
-
ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
|
1257
|
-
|
1258
|
-
# Split the number into chunks delimited by '.'
|
1259
|
-
chunks = if !config[:decimal].empty? then
|
1260
|
-
if config[:group].nonzero?
|
1261
|
-
num.split(/\./)
|
1262
|
-
else
|
1263
|
-
num.split(/\./, 2)
|
1264
|
-
end
|
1265
|
-
else
|
1266
|
-
[ num ]
|
1267
|
-
end
|
1268
|
-
|
1269
|
-
# Wordify each chunk, pushing arrays into the parts array
|
1270
|
-
chunks.each_with_index {|chunk,section|
|
1271
|
-
chunk.gsub!( /\D+/, '' )
|
1272
|
-
|
1273
|
-
# If there's nothing in this chunk of the number, set it to zero
|
1274
|
-
# unless it's the whole-number part, in which case just push an
|
1275
|
-
# empty array.
|
1276
|
-
if chunk.empty?
|
1277
|
-
if section.zero?
|
1278
|
-
parts.push []
|
1279
|
-
next
|
1280
|
-
end
|
1281
|
-
end
|
1282
|
-
|
1283
|
-
# Split the number section into wordified parts unless this is the
|
1284
|
-
# second or succeeding part of a non-group number
|
1285
|
-
unless config[:group].zero? && section.nonzero?
|
1286
|
-
parts.push number_to_words( chunk, config )
|
1287
|
-
else
|
1288
|
-
parts.push number_to_words( chunk, config.merge(:group => 1) )
|
1289
|
-
end
|
1290
|
-
}
|
1291
|
-
|
1292
|
-
debug_msg "Parts => #{parts.inspect}"
|
1293
|
-
|
1294
|
-
# Turn the last word of the whole-number part back into an ordinal if
|
1295
|
-
# the original number came in that way.
|
1296
|
-
if ord && !parts[0].empty?
|
1297
|
-
parts[0][-1] = ordinal( parts[0].last )
|
1298
|
-
end
|
1299
|
-
|
1300
|
-
# If the caller's expecting an Array return, just flatten and return the
|
1301
|
-
# parts array.
|
1302
|
-
if config[:asArray]
|
1303
|
-
unless sign.empty?
|
1304
|
-
parts[0].unshift( sign )
|
1305
|
-
end
|
1306
|
-
return parts.flatten
|
1307
|
-
end
|
1308
|
-
|
1309
|
-
# Catenate each sub-parts array into a whole number part and one or more
|
1310
|
-
# post-decimal parts. If grouping is turned on, all sub-parts get joined
|
1311
|
-
# with commas, otherwise just the whole-number part is.
|
1312
|
-
if config[:group].zero?
|
1313
|
-
if parts[0].length > 1
|
1314
|
-
|
1315
|
-
# Join all but the last part together with commas
|
1316
|
-
wholenum = parts[0][0...-1].join( config[:comma] )
|
1317
|
-
|
1318
|
-
# If the last part is just a single word, append it to the
|
1319
|
-
# wholenum part with an 'and'. This is to get things like 'three
|
1320
|
-
# thousand and three' instead of 'three thousand, three'.
|
1321
|
-
if /^\s*(\S+)\s*$/ =~ parts[0].last
|
1322
|
-
wholenum += config[:and] + parts[0].last
|
1323
|
-
else
|
1324
|
-
wholenum += config[:comma] + parts[0].last
|
1325
|
-
end
|
1326
|
-
else
|
1327
|
-
wholenum = parts[0][0]
|
1328
|
-
end
|
1329
|
-
decimals = parts[1..-1].collect {|part| part.join(" ")}
|
1330
|
-
|
1331
|
-
debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"
|
1332
|
-
|
1333
|
-
# Join with the configured decimal; if it's empty, just join with
|
1334
|
-
# spaces.
|
1335
|
-
unless config[:decimal].empty?
|
1336
|
-
return sign + ([ wholenum ] + decimals).
|
1337
|
-
join( " #{config[:decimal]} " ).strip
|
1338
|
-
else
|
1339
|
-
return sign + ([ wholenum ] + decimals).
|
1340
|
-
join( " " ).strip
|
1341
|
-
end
|
1342
|
-
else
|
1343
|
-
return parts.compact.
|
1344
|
-
separate( config[:decimal] ).
|
1345
|
-
delete_if {|el| el.empty?}.
|
1346
|
-
join( config[:comma] ).
|
1347
|
-
strip
|
1348
|
-
end
|
1349
|
-
end
|
1350
|
-
def_lprintf_formatter :NUMWORDS, :numwords
|
1351
|
-
|
1352
|
-
|
1353
|
-
### Transform the given +number+ into an ordinal word. The +number+ object
|
1354
|
-
### can be either an Integer or a String.
|
1355
|
-
def ordinal( number )
|
1356
|
-
case number
|
1357
|
-
when Integer
|
1358
|
-
return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])
|
1359
|
-
|
1360
|
-
else
|
1361
|
-
return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
|
1362
|
-
end
|
1363
|
-
end
|
1364
|
-
def_lprintf_formatter :ORD, :ordinal
|
1365
|
-
|
1366
|
-
|
1367
|
-
### Transform the given +number+ into an ordinate word.
|
1368
|
-
def ordinate( number )
|
1369
|
-
return Linguistics::EN.ordinal( Linguistics::EN.numwords(number) )
|
1370
|
-
end
|
1371
|
-
|
1372
|
-
|
1373
|
-
### Return a phrase describing the specified +number+ of objects in the
|
1374
|
-
### given +phrase+ in general terms. The following options can be used to
|
1375
|
-
### control the makeup of the returned quantity String:
|
1376
|
-
###
|
1377
|
-
### [<b>:joinword</b>]
|
1378
|
-
### Sets the word (and any surrounding spaces) used as the word separating the
|
1379
|
-
### quantity from the noun in the resulting string. Defaults to <tt>' of
|
1380
|
-
### '</tt>.
|
1381
|
-
def quantify( phrase, number=0, args={} )
|
1382
|
-
num = number.to_i
|
1383
|
-
config = QuantifyDefaults.merge( args )
|
1384
|
-
|
1385
|
-
case num
|
1386
|
-
when 0
|
1387
|
-
no( phrase )
|
1388
|
-
when 1
|
1389
|
-
a( phrase )
|
1390
|
-
when SeveralRange
|
1391
|
-
"several " + plural( phrase, num )
|
1392
|
-
when NumberRange
|
1393
|
-
"a number of " + plural( phrase, num )
|
1394
|
-
when NumerousRange
|
1395
|
-
"numerous " + plural( phrase, num )
|
1396
|
-
when ManyRange
|
1397
|
-
"many " + plural( phrase, num )
|
1398
|
-
else
|
1399
|
-
|
1400
|
-
# Anything bigger than the ManyRange gets described like
|
1401
|
-
# "hundreds of thousands of..." or "millions of..."
|
1402
|
-
# depending, of course, on how many there are.
|
1403
|
-
thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
|
1404
|
-
stword =
|
1405
|
-
case subthousands
|
1406
|
-
when 2
|
1407
|
-
"hundreds"
|
1408
|
-
when 1
|
1409
|
-
"tens"
|
1410
|
-
else
|
1411
|
-
nil
|
1412
|
-
end
|
1413
|
-
thword = plural( to_thousands(thousands).strip )
|
1414
|
-
thword = nil if thword.empty?
|
1415
|
-
|
1416
|
-
[ # Hundreds (of)...
|
1417
|
-
stword,
|
1418
|
-
|
1419
|
-
# thousands (of)
|
1420
|
-
thword,
|
1421
|
-
|
1422
|
-
# stars.
|
1423
|
-
plural(phrase, number)
|
1424
|
-
].compact.join( config[:joinword] )
|
1425
|
-
end
|
1426
|
-
end
|
1427
|
-
def_lprintf_formatter :QUANT, :quantify
|
1428
|
-
|
1429
|
-
|
1430
|
-
# :TODO: Needs refactoring
|
1431
|
-
|
1432
|
-
### Return the specified +obj+ (which must support the <tt>#collect</tt>
|
1433
|
-
### method) as a conjunction. Each item is converted to a String if it is
|
1434
|
-
### not already (using #to_s) unless a block is given, in which case it is
|
1435
|
-
### called once for each object in the array, and the stringified return
|
1436
|
-
### value from the block is used instead. Returning +nil+ causes that
|
1437
|
-
### particular element to be omitted from the resulting conjunction. The
|
1438
|
-
### following options can be used to control the makeup of the returned
|
1439
|
-
### conjunction String:
|
1440
|
-
###
|
1441
|
-
### [<b>:separator</b>]
|
1442
|
-
### Specify one or more characters to separate items in the resulting
|
1443
|
-
### list. Defaults to <tt>', '</tt>.
|
1444
|
-
### [<b>:altsep</b>]
|
1445
|
-
### An alternate separator to use if any of the resulting conjunction's
|
1446
|
-
### clauses contain the <tt>:separator</tt> character/s. Defaults to <tt>'; '</tt>.
|
1447
|
-
### [<b>:penultimate</b>]
|
1448
|
-
### Flag that indicates whether or not to join the last clause onto the
|
1449
|
-
### rest of the conjunction using a penultimate <tt>:separator</tt>. E.g.,
|
1450
|
-
### %w{duck, cow, dog}.en.conjunction
|
1451
|
-
### # => "a duck, a cow, and a dog"
|
1452
|
-
### %w{duck cow dog}.en.conjunction( :penultimate => false )
|
1453
|
-
### "a duck, a cow and a dog"
|
1454
|
-
### Default to <tt>true</tt>.
|
1455
|
-
### [<b>:conjunctive</b>]
|
1456
|
-
### Sets the word used as the conjunctive (separating word) of the
|
1457
|
-
### resulting string. Default to <tt>'and'</tt>.
|
1458
|
-
### [<b>:combine</b>]
|
1459
|
-
### If set to <tt>true</tt> (the default), items which are indentical (after
|
1460
|
-
### surrounding spaces are stripped) will be combined in the resulting
|
1461
|
-
### conjunction. E.g.,
|
1462
|
-
### %w{goose cow goose dog}.en.conjunction
|
1463
|
-
### # => "two geese, a cow, and a dog"
|
1464
|
-
### %w{goose cow goose dog}.en.conjunction( :combine => false )
|
1465
|
-
### # => "a goose, a cow, a goose, and a dog"
|
1466
|
-
### [<b>:casefold</b>]
|
1467
|
-
### If set to <tt>true</tt> (the default), then items are compared
|
1468
|
-
### case-insensitively when combining them. This has no effect if
|
1469
|
-
### <tt>:combine</tt> is <tt>false</tt>.
|
1470
|
-
### [<b>:generalize</b>]
|
1471
|
-
### If set to <tt>true</tt>, then quantities of combined items are turned into
|
1472
|
-
### general descriptions instead of exact amounts.
|
1473
|
-
### ary = %w{goose pig dog horse goose reindeer goose dog horse}
|
1474
|
-
### ary.en.conjunction
|
1475
|
-
### # => "three geese, two dogs, two horses, a pig, and a reindeer"
|
1476
|
-
### ary.en.conjunction( :generalize => true )
|
1477
|
-
### # => "several geese, several dogs, several horses, a pig, and a reindeer"
|
1478
|
-
### See the #quantify method for specifics on how quantities are
|
1479
|
-
### generalized. Generalization defaults to <tt>false</tt>, and has no effect if
|
1480
|
-
### :combine is <tt>false</tt>.
|
1481
|
-
### [<b>:quantsort</b>]
|
1482
|
-
### If set to <tt>true</tt> (the default), items which are combined in the
|
1483
|
-
### resulting conjunction will be listed in order of amount, with greater
|
1484
|
-
### quantities sorted first. If <tt>:quantsort</tt> is <tt>false</tt>, combined items
|
1485
|
-
### will appear where the first instance of them occurred in the
|
1486
|
-
### list. This sort is also the fallback for indentical quantities (ie.,
|
1487
|
-
### items of the same quantity will be listed in the order they appeared
|
1488
|
-
### in the source list).
|
1489
|
-
###
|
1490
|
-
def conjunction( obj, args={} )
|
1491
|
-
config = ConjunctionDefaults.merge( args )
|
1492
|
-
phrases = []
|
1493
|
-
|
1494
|
-
# Transform items in the obj to phrases
|
1495
|
-
if block_given?
|
1496
|
-
phrases = obj.collect {|item| yield(item) }.compact
|
1497
|
-
else
|
1498
|
-
phrases = obj.collect {|item| item.to_s }
|
1499
|
-
end
|
1500
|
-
|
1501
|
-
# No need for a conjunction if there's only one thing
|
1502
|
-
return a(phrases[0]) if phrases.length < 2
|
1503
|
-
|
1504
|
-
# Set up a Proc to derive a collector key from a phrase depending on the
|
1505
|
-
# configuration
|
1506
|
-
keyfunc =
|
1507
|
-
if config[:casefold]
|
1508
|
-
proc {|key| key.downcase.strip}
|
1509
|
-
else
|
1510
|
-
proc {|key| key.strip}
|
1511
|
-
end
|
1512
|
-
|
1513
|
-
# Count and delete phrases that hash the same when the keyfunc munges
|
1514
|
-
# them into the same thing if we're combining (:combine => true).
|
1515
|
-
collector = {}
|
1516
|
-
if config[:combine]
|
1517
|
-
|
1518
|
-
phrases.each_index do |i|
|
1519
|
-
# Stop when reaching the end of a truncated list
|
1520
|
-
break if phrases[i].nil?
|
1521
|
-
|
1522
|
-
# Make the key using the configured key function
|
1523
|
-
phrase = keyfunc[ phrases[i] ]
|
1524
|
-
|
1525
|
-
# If the collector already has this key, increment its count,
|
1526
|
-
# eliminate the duplicate from the phrase list, and redo the loop.
|
1527
|
-
if collector.key?( phrase )
|
1528
|
-
collector[ phrase ] += 1
|
1529
|
-
phrases.delete_at( i )
|
1530
|
-
redo
|
1531
|
-
end
|
1532
|
-
|
1533
|
-
collector[ phrase ] = 1
|
1534
|
-
end
|
1535
|
-
else
|
1536
|
-
# If we're not combining, just make everything have a count of 1.
|
1537
|
-
phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
|
1538
|
-
end
|
1539
|
-
|
1540
|
-
# If sort-by-quantity is turned on, sort the phrases first by how many
|
1541
|
-
# there are (most-first), and then by the order they were specified in.
|
1542
|
-
if config[:quantsort] && config[:combine]
|
1543
|
-
origorder = {}
|
1544
|
-
phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
|
1545
|
-
phrases.sort! {|a,b|
|
1546
|
-
(collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
|
1547
|
-
(origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
|
1548
|
-
}
|
1549
|
-
end
|
1550
|
-
|
1551
|
-
# Set up a filtering function that adds either an indefinite article, an
|
1552
|
-
# indefinite quantifier, or a definite quantifier to each phrase
|
1553
|
-
# depending on the configuration and the count of phrases in the
|
1554
|
-
# collector.
|
1555
|
-
filter =
|
1556
|
-
if config[:generalize]
|
1557
|
-
proc {|phrase, count| quantify(phrase, count) }
|
1558
|
-
else
|
1559
|
-
proc {|phrase, count|
|
1560
|
-
if count > 1
|
1561
|
-
"%s %s" % [
|
1562
|
-
# :TODO: Make this threshold settable
|
1563
|
-
count < 10 ? count.en.numwords : count.to_s,
|
1564
|
-
plural(phrase, count)
|
1565
|
-
]
|
1566
|
-
else
|
1567
|
-
a( phrase )
|
1568
|
-
end
|
1569
|
-
}
|
1570
|
-
end
|
1571
|
-
|
1572
|
-
# Now use the configured filter to turn each phrase into its final
|
1573
|
-
# form. Hmmm... square-bracket Lisp?
|
1574
|
-
phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }
|
1575
|
-
|
1576
|
-
# Prepend the conjunctive to the last element unless it's empty or
|
1577
|
-
# there's only one element
|
1578
|
-
phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
|
1579
|
-
config[:conjunctive].strip.empty? or
|
1580
|
-
phrases.length < 2
|
1581
|
-
|
1582
|
-
# Concatenate the last two elements if there's no penultimate separator,
|
1583
|
-
# and pick a separator based on how many phrases there are and whether
|
1584
|
-
# or not there's already an instance of it in the phrases.
|
1585
|
-
phrase_count = phrases.length
|
1586
|
-
phrases[-2] << " " << phrases.pop unless config[:penultimate]
|
1587
|
-
sep = config[:separator]
|
1588
|
-
if phrase_count <= 2
|
1589
|
-
sep = ' '
|
1590
|
-
elsif phrases.find {|str| str.include?(config[:separator]) }
|
1591
|
-
sep = config[:altsep]
|
1592
|
-
end
|
1593
|
-
|
1594
|
-
return phrases.join( sep )
|
1595
|
-
end
|
1596
|
-
def_lprintf_formatter :CONJUNCT, :conjunction
|
1597
|
-
|
1598
|
-
|
1599
|
-
### Turns a camel-case +string+ ("camelCaseToEnglish") to plain English
|
1600
|
-
### ("camel case to english"). Each word is decapitalized.
|
1601
|
-
def camel_case_to_english( string )
|
1602
|
-
string.to_s.
|
1603
|
-
gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
|
1604
|
-
gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
|
1605
|
-
end
|
1606
|
-
|
1607
|
-
|
1608
|
-
### Turns an English language +string+ into a CamelCase word.
|
1609
|
-
def english_to_camel_case( string )
|
1610
|
-
string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
|
1611
|
-
end
|
1612
|
-
|
1613
|
-
|
1614
|
-
### This method doesn't work quite right yet. It does okay for simple cases,
|
1615
|
-
### but it misses more complex ones, e.g. 'as' used as a coordinating
|
1616
|
-
### conjunction in "A Portrait of the Artist as a Young Man". Perhaps after
|
1617
|
-
### there's a working (non-leaking) LinkParser for Ruby, this can be fixed
|
1618
|
-
### up. Until then it'll just be undocumented.
|
1619
|
-
|
1620
|
-
### Returns the given +string+ as a title-cased phrase.
|
1621
|
-
def titlecase( string ) # :nodoc:
|
1622
|
-
|
1623
|
-
# Split on word-boundaries
|
1624
|
-
words = string.split( /\b/ )
|
1625
|
-
|
1626
|
-
# Always capitalize the first and last words
|
1627
|
-
words.first.capitalize!
|
1628
|
-
words.last.capitalize!
|
1629
|
-
|
1630
|
-
# Now scan the rest of the tokens, skipping non-words and capitalization
|
1631
|
-
# exceptions.
|
1632
|
-
words.each_with_index do |word, i|
|
1633
|
-
|
1634
|
-
# Non-words
|
1635
|
-
next unless /^\w+$/.match( word )
|
1636
|
-
|
1637
|
-
# Skip exception-words
|
1638
|
-
next if TitleCaseExceptions.include?( word )
|
1639
|
-
|
1640
|
-
# Skip second parts of contractions
|
1641
|
-
next if words[i - 1] == "'" && /\w/.match( words[i - 2] )
|
1642
|
-
|
1643
|
-
# Have to do it this way instead of capitalize! because that method
|
1644
|
-
# also downcases all other letters.
|
1645
|
-
word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
|
1646
|
-
end
|
1647
|
-
|
1648
|
-
return words.join
|
1649
|
-
end
|
1650
|
-
|
1651
|
-
|
1652
|
-
### Returns the proper noun form of a string by capitalizing most of the
|
1653
|
-
### words.
|
1654
|
-
###
|
1655
|
-
### Examples:
|
1656
|
-
### English.proper_noun("bosnia and herzegovina") ->
|
1657
|
-
### "Bosnia and Herzegovina"
|
1658
|
-
### English.proper_noun("macedonia, the former yugoslav republic of") ->
|
1659
|
-
### "Macedonia, the Former Yugoslav Republic of"
|
1660
|
-
### English.proper_noun("virgin islands, u.s.") ->
|
1661
|
-
### "Virgin Islands, U.S."
|
1662
|
-
def proper_noun( string )
|
1663
|
-
return string.split(/([ .]+)/).collect {|word|
|
1664
|
-
next word unless /^[a-z]/.match( word ) &&
|
1665
|
-
! (%w{and the of}.include?( word ))
|
1666
|
-
word.capitalize
|
1667
|
-
}.join
|
1668
|
-
end
|
1669
|
-
|
1670
|
-
|
1671
406
|
### Format the given +fmt+ string by replacing %-escaped sequences with the
|
1672
407
|
### result of performing a specified operation on the corresponding
|
1673
408
|
### argument, ala Kernel.sprintf.
|
@@ -1681,48 +416,22 @@ module Linguistics::EN
|
|
1681
416
|
### Convert a number into the corresponding words.
|
1682
417
|
### %CONJUNCT::
|
1683
418
|
### Conjunction.
|
1684
|
-
def lprintf(
|
1685
|
-
|
419
|
+
def lprintf( *args )
|
420
|
+
return self.to_s.gsub( /%([A-Z_]+)/ ) do |match|
|
1686
421
|
op = $1.to_s.upcase.to_sym
|
1687
|
-
if
|
422
|
+
if (( callback = Linguistics::EN.lprintf_formatters[op] ))
|
1688
423
|
arg = args.shift
|
1689
|
-
|
424
|
+
callback.call( arg.en )
|
1690
425
|
else
|
1691
|
-
raise "no such formatter %p" % op
|
426
|
+
raise "no such formatter %p" % [ op ]
|
1692
427
|
end
|
1693
428
|
end
|
1694
429
|
end
|
1695
430
|
|
1696
|
-
end # module Linguistics::EN
|
1697
|
-
|
1698
|
-
|
1699
|
-
### Add the #separate and #separate! methods to Array.
|
1700
|
-
class Array
|
1701
|
-
|
1702
|
-
### Returns a new Array that has had a new member inserted between all of
|
1703
|
-
### the current ones. The value used is the given +value+ argument unless a
|
1704
|
-
### block is given, in which case the block is called once for each pair of
|
1705
|
-
### the Array, and the return value is used as the separator.
|
1706
|
-
def separate( value=:__no_arg__, &block )
|
1707
|
-
ary = self.dup
|
1708
|
-
ary.separate!( value, &block )
|
1709
|
-
return ary
|
1710
|
-
end
|
1711
431
|
|
1712
|
-
|
1713
|
-
|
1714
|
-
raise ArgumentError, "wrong number of arguments: (0 for 1)" if
|
1715
|
-
value == :__no_arg__ && !block_given?
|
432
|
+
# Add 'english' to the list of default languages
|
433
|
+
Linguistics.register_language( :en, self )
|
1716
434
|
|
1717
|
-
(1..( (self.length * 2) - 2 )).step(2) do |i|
|
1718
|
-
if block_given?
|
1719
|
-
self.insert( i, yield(self[i-1,2]) )
|
1720
|
-
else
|
1721
|
-
self.insert( i, value )
|
1722
|
-
end
|
1723
|
-
end
|
1724
|
-
self
|
1725
|
-
end
|
1726
435
|
|
1727
|
-
end
|
436
|
+
end # module Linguistics::EN
|
1728
437
|
|