linguistics 1.0.9 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ChangeLog +849 -342
- data/History.rdoc +11 -0
- data/LICENSE +9 -9
- data/Manifest.txt +44 -0
- data/README.rdoc +226 -0
- data/Rakefile +32 -349
- data/examples/endocs.rb +272 -0
- data/examples/generalize_sentence.rb +2 -1
- data/examples/klingon.rb +22 -0
- data/lib/linguistics.rb +130 -292
- data/lib/linguistics/en.rb +337 -1628
- data/lib/linguistics/en/articles.rb +138 -0
- data/lib/linguistics/en/conjugation.rb +2245 -0
- data/lib/linguistics/en/conjunctions.rb +202 -0
- data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
- data/lib/linguistics/en/linkparser.rb +41 -49
- data/lib/linguistics/en/numbers.rb +483 -0
- data/lib/linguistics/en/participles.rb +33 -0
- data/lib/linguistics/en/pluralization.rb +810 -0
- data/lib/linguistics/en/stemmer.rb +75 -0
- data/lib/linguistics/en/titlecase.rb +121 -0
- data/lib/linguistics/en/wordnet.rb +63 -97
- data/lib/linguistics/inflector.rb +89 -0
- data/lib/linguistics/iso639.rb +534 -448
- data/lib/linguistics/languagebehavior.rb +36 -0
- data/lib/linguistics/monkeypatches.rb +42 -0
- data/spec/lib/constants.rb +15 -0
- data/spec/lib/helpers.rb +38 -0
- data/spec/linguistics/en/articles_spec.rb +797 -0
- data/spec/linguistics/en/conjugation_spec.rb +2083 -0
- data/spec/linguistics/en/conjunctions_spec.rb +154 -0
- data/spec/linguistics/en/infinitives_spec.rb +518 -0
- data/spec/linguistics/en/linkparser_spec.rb +66 -0
- data/spec/linguistics/en/numbers_spec.rb +1295 -0
- data/spec/linguistics/en/participles_spec.rb +55 -0
- data/spec/linguistics/en/pluralization_spec.rb +4636 -0
- data/spec/linguistics/en/stemmer_spec.rb +72 -0
- data/spec/linguistics/en/titlecase_spec.rb +841 -0
- data/spec/linguistics/en/wordnet_spec.rb +85 -0
- data/spec/linguistics/en_spec.rb +45 -167
- data/spec/linguistics/inflector_spec.rb +40 -0
- data/spec/linguistics/iso639_spec.rb +49 -53
- data/spec/linguistics/monkeypatches_spec.rb +40 -0
- data/spec/linguistics_spec.rb +46 -76
- metadata +241 -113
- metadata.gz.sig +0 -0
- data/README +0 -166
- data/README.english +0 -245
- data/rake/191_compat.rb +0 -26
- data/rake/dependencies.rb +0 -76
- data/rake/documentation.rb +0 -123
- data/rake/helpers.rb +0 -502
- data/rake/hg.rb +0 -318
- data/rake/manual.rb +0 -787
- data/rake/packaging.rb +0 -129
- data/rake/publishing.rb +0 -341
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -668
- data/rake/testing.rb +0 -152
- data/rake/verifytask.rb +0 -64
- data/tests/en/infinitive.tests.rb +0 -207
- data/tests/en/inflect.tests.rb +0 -1389
- data/tests/en/lafcadio.tests.rb +0 -77
- data/tests/en/linkparser.tests.rb +0 -42
- data/tests/en/lprintf.tests.rb +0 -77
- data/tests/en/titlecase.tests.rb +0 -73
- data/tests/en/wordnet.tests.rb +0 -95
@@ -0,0 +1,483 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'linguistics/en' unless defined?( Linguistics::EN )
|
4
|
+
|
5
|
+
# Numeric methods for the English-language Linguistics module.
|
6
|
+
module Linguistics::EN::Numbers
|
7
|
+
|
8
|
+
# Register this module to the list of modules to include
|
9
|
+
Linguistics::EN.register_extension( self )
|
10
|
+
|
11
|
+
#
|
12
|
+
# Numerals, ordinals, and numbers-to-words
|
13
|
+
#
|
14
|
+
|
15
|
+
# Default configuration arguments for the #numwords function
|
16
|
+
NUMWORD_DEFAULTS = {
|
17
|
+
:group => 0,
|
18
|
+
:comma => ', ',
|
19
|
+
:and => ' and ',
|
20
|
+
:zero => 'zero',
|
21
|
+
:decimal => 'point',
|
22
|
+
:asArray => false,
|
23
|
+
}
|
24
|
+
|
25
|
+
# Default configuration arguments for the #quantify function
|
26
|
+
QUANTIFY_DEFAULTS = {
|
27
|
+
:joinword => " of ",
|
28
|
+
}
|
29
|
+
|
30
|
+
# Default ranges for #quantify
|
31
|
+
SEVERAL_RANGE = 2..5
|
32
|
+
NUMBER_RANGE = 6..19
|
33
|
+
NUMEROUS_RANGE = 20..45
|
34
|
+
MANY_RANGE = 46..99
|
35
|
+
|
36
|
+
# Numerical inflections
|
37
|
+
NTH = {
|
38
|
+
0 => 'th',
|
39
|
+
1 => 'st',
|
40
|
+
2 => 'nd',
|
41
|
+
3 => 'rd',
|
42
|
+
4 => 'th',
|
43
|
+
5 => 'th',
|
44
|
+
6 => 'th',
|
45
|
+
7 => 'th',
|
46
|
+
8 => 'th',
|
47
|
+
9 => 'th',
|
48
|
+
11 => 'th',
|
49
|
+
12 => 'th',
|
50
|
+
13 => 'th',
|
51
|
+
}
|
52
|
+
|
53
|
+
# Ordinal word parts
|
54
|
+
ORDINALS = {
|
55
|
+
'ty' => 'tieth',
|
56
|
+
'one' => 'first',
|
57
|
+
'two' => 'second',
|
58
|
+
'three' => 'third',
|
59
|
+
'five' => 'fifth',
|
60
|
+
'eight' => 'eighth',
|
61
|
+
'nine' => 'ninth',
|
62
|
+
'twelve' => 'twelfth',
|
63
|
+
}
|
64
|
+
ORDINAL_SUFFIXES = ORDINALS.keys.join("|") + "|"
|
65
|
+
ORDINALS[""] = 'th'
|
66
|
+
|
67
|
+
# Numeral names
|
68
|
+
UNITS = [''] + %w[one two three four five six seven eight nine]
|
69
|
+
TEENS = %w[ten eleven twelve thirteen fourteen
|
70
|
+
fifteen sixteen seventeen eighteen nineteen]
|
71
|
+
TENS = ['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
|
72
|
+
THOUSANDS = [' ', ' thousand'] + %w[
|
73
|
+
m b tr quadr quint sext sept oct non dec undec duodec tredec
|
74
|
+
quattuordec quindec sexdec septemdec octodec novemdec vigint
|
75
|
+
].collect {|prefix| ' ' + prefix + 'illion'}
|
76
|
+
|
77
|
+
|
78
|
+
# A collection of functions for transforming digits into word
|
79
|
+
# phrases. Indexed by the number of digits being transformed; e.g.,
|
80
|
+
# <tt>NUMBER_TO_WORDS_FUNCTIONS[2]</tt> is the function for transforming
|
81
|
+
# double-digit numbers.
|
82
|
+
NUMBER_TO_WORDS_FUNCTIONS = [
|
83
|
+
proc {|*args| raise "No digits (#{args.inspect})"},
|
84
|
+
|
85
|
+
# Single-digits
|
86
|
+
proc {|zero,x|
|
87
|
+
(x.nonzero? ? to_units(x) : "#{zero} ")
|
88
|
+
},
|
89
|
+
|
90
|
+
# Double-digits
|
91
|
+
proc {|zero,x,y|
|
92
|
+
if x.nonzero?
|
93
|
+
to_tens( x, y )
|
94
|
+
elsif y.nonzero?
|
95
|
+
"#{zero} " + NUMBER_TO_WORDS_FUNCTIONS[1].call( zero, y )
|
96
|
+
else
|
97
|
+
([zero] * 2).join(" ")
|
98
|
+
end
|
99
|
+
},
|
100
|
+
|
101
|
+
# Triple-digits
|
102
|
+
proc {|zero,x,y,z|
|
103
|
+
NUMBER_TO_WORDS_FUNCTIONS[1].call(zero,x) +
|
104
|
+
NUMBER_TO_WORDS_FUNCTIONS[2].call(zero,y,z)
|
105
|
+
}
|
106
|
+
]
|
107
|
+
|
108
|
+
|
109
|
+
### Return the specified number as english words. One or more configuration
|
110
|
+
### values may be passed to control the returned String:
|
111
|
+
###
|
112
|
+
### [<b>:group</b>]
|
113
|
+
### Controls how many numbers at a time are grouped together. Valid values
|
114
|
+
### are <code>0</code> (normal grouping), <code>1</code> (single-digit
|
115
|
+
### grouping, e.g., "one, two, three, four"), <code>2</code>
|
116
|
+
### (double-digit grouping, e.g., "twelve, thirty-four", or <code>3</code>
|
117
|
+
### (triple-digit grouping, e.g., "one twenty-three, four").
|
118
|
+
### [<b>:comma</b>]
|
119
|
+
### Set the character/s used to separate word groups. Defaults to
|
120
|
+
### <code>", "</code>.
|
121
|
+
### [<b>:and</b>]
|
122
|
+
### Set the word and/or characters used where <code>' and ' </code>(the
|
123
|
+
### default) is normally used. Setting <code>:and</code> to
|
124
|
+
### <code>' '</code>, for example, will cause <code>2556</code> to be
|
125
|
+
### returned as "two-thousand, five hundred fifty-six" instead of
|
126
|
+
### "two-thousand, five hundred and fifty-six".
|
127
|
+
### [<b>:zero</b>]
|
128
|
+
### Set the word used to represent the numeral <code>0</code> in the
|
129
|
+
### result. <code>'zero'</code> is the default.
|
130
|
+
### [<b>:decimal</b>]
|
131
|
+
### Set the translation of any decimal points in the number; the default
|
132
|
+
### is <code>'point'</code>.
|
133
|
+
### [<b>:as_array</b>]
|
134
|
+
### If set to a true value, the number will be returned as an array of
|
135
|
+
### word groups instead of a String.
|
136
|
+
def numwords( hashargs={} )
|
137
|
+
num = self.to_s
|
138
|
+
self.log.debug "Turning %p into number words..." % [ num ]
|
139
|
+
config = NUMWORD_DEFAULTS.merge( hashargs )
|
140
|
+
raise "Bad chunking option: #{config[:group]}" unless
|
141
|
+
config[:group].between?( 0, 3 )
|
142
|
+
|
143
|
+
# Array of number parts: first is everything to the left of the first
|
144
|
+
# decimal, followed by any groups of decimal-delimted numbers after that
|
145
|
+
parts = []
|
146
|
+
|
147
|
+
# Wordify any sign prefix
|
148
|
+
sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''
|
149
|
+
|
150
|
+
# Strip any ordinal suffixes
|
151
|
+
ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )
|
152
|
+
|
153
|
+
# Split the number into chunks delimited by '.'
|
154
|
+
chunks = if !config[:decimal].empty? then
|
155
|
+
if config[:group].nonzero?
|
156
|
+
num.split(/\./)
|
157
|
+
else
|
158
|
+
num.split(/\./, 2)
|
159
|
+
end
|
160
|
+
else
|
161
|
+
[ num ]
|
162
|
+
end
|
163
|
+
|
164
|
+
# Wordify each chunk, pushing arrays into the parts array
|
165
|
+
chunks.each_with_index do |chunk,section|
|
166
|
+
chunk.gsub!( /\D+/, '' )
|
167
|
+
self.log.debug " working on chunk %p (section %d)" % [ chunk, section ]
|
168
|
+
|
169
|
+
# If there's nothing in this chunk of the number, set it to zero
|
170
|
+
# unless it's the whole-number part, in which case just push an
|
171
|
+
# empty array.
|
172
|
+
if chunk.empty?
|
173
|
+
self.log.debug " chunk is empty..."
|
174
|
+
if section.zero?
|
175
|
+
self.log.debug " skipping the empty whole-number part"
|
176
|
+
parts.push []
|
177
|
+
next
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# Split the number section into wordified parts unless this is the
|
182
|
+
# second or succeeding part of a non-group number
|
183
|
+
unless config[:group].zero? && section.nonzero?
|
184
|
+
parts.push number_to_words( chunk, config )
|
185
|
+
self.log.debug " added %p" % [ parts.last ]
|
186
|
+
else
|
187
|
+
parts.push number_to_words( chunk, config.merge(:group => 1) )
|
188
|
+
self.log.debug " added %p" % [ parts.last ]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
self.log.debug "Parts => %p" % [ parts ]
|
193
|
+
|
194
|
+
# Turn the last word of the whole-number part back into an ordinal if
|
195
|
+
# the original number came in that way.
|
196
|
+
if ord && !parts[0].empty?
|
197
|
+
self.log.debug " turning the last whole-number part back into an ordinal, since it " +
|
198
|
+
"came in that way"
|
199
|
+
parts[0][-1] = ordinal( parts[0].last )
|
200
|
+
end
|
201
|
+
|
202
|
+
# If the caller's expecting an Array return, just flatten and return the
|
203
|
+
# parts array.
|
204
|
+
if config[:as_array]
|
205
|
+
self.log.debug " returning the number parts as an Array"
|
206
|
+
unless sign.empty?
|
207
|
+
parts[0].unshift( sign )
|
208
|
+
end
|
209
|
+
return parts.flatten
|
210
|
+
end
|
211
|
+
|
212
|
+
# Catenate each sub-parts array into a whole number part and one or more
|
213
|
+
# post-decimal parts. If grouping is turned on, all sub-parts get joined
|
214
|
+
# with commas, otherwise just the whole-number part is.
|
215
|
+
if config[:group].zero?
|
216
|
+
self.log.debug " no custom grouping"
|
217
|
+
if parts[0].length > 1
|
218
|
+
self.log.debug " whole and decimal part; working on the whole number first"
|
219
|
+
|
220
|
+
# Join all but the last part together with commas
|
221
|
+
wholenum = parts[0][0...-1].join( config[:comma] )
|
222
|
+
|
223
|
+
# If the last part is just a single word, append it to the
|
224
|
+
# wholenum part with an 'and'. This is to get things like 'three
|
225
|
+
# thousand and three' instead of 'three thousand, three'.
|
226
|
+
if /^\s*(\S+)\s*$/ =~ parts[0].last
|
227
|
+
self.log.debug "last word is a single word; using the 'and' separator: %p" %
|
228
|
+
[ config[:and] ]
|
229
|
+
wholenum += config[:and] + parts[0].last
|
230
|
+
else
|
231
|
+
self.log.debug "last word has multiple words; using the comma separator: %p" %
|
232
|
+
[ config[:comma] ]
|
233
|
+
wholenum += config[:comma] + parts[0].last
|
234
|
+
end
|
235
|
+
else
|
236
|
+
self.log.debug " non-decimal."
|
237
|
+
wholenum = parts[0][0]
|
238
|
+
end
|
239
|
+
|
240
|
+
decimals = parts[1..-1].collect {|part| part.join(" ")}
|
241
|
+
self.log.debug " wholenum: %p; decimals: %p" % [ wholenum, decimals ]
|
242
|
+
|
243
|
+
# Join with the configured decimal; if it's empty, just join with
|
244
|
+
# spaces.
|
245
|
+
unless config[:decimal].empty?
|
246
|
+
self.log.debug " joining with the configured decimal: %p" % [ config[:decimal] ]
|
247
|
+
return sign + ([ wholenum ] + decimals).
|
248
|
+
join( " #{config[:decimal]} " ).strip
|
249
|
+
else
|
250
|
+
self.log.debug " joining with the spaces since no decimal is configured"
|
251
|
+
return sign + ([ wholenum ] + decimals).
|
252
|
+
join( " " ).strip
|
253
|
+
end
|
254
|
+
|
255
|
+
else
|
256
|
+
self.log.debug " grouping with decimal %p and comma %p" %
|
257
|
+
config.values_at( :decimal, :comma )
|
258
|
+
return parts.compact.
|
259
|
+
separate( config[:decimal] ).
|
260
|
+
delete_if {|el| el.empty?}.
|
261
|
+
join( config[:comma] ).
|
262
|
+
strip
|
263
|
+
end
|
264
|
+
end
|
265
|
+
Linguistics::EN.register_lprintf_formatter :NUMWORDS, :numwords
|
266
|
+
|
267
|
+
|
268
|
+
### Transform the given +number+ into an ordinal word. The +number+ object
|
269
|
+
### can be either an Integer or a String.
|
270
|
+
def ordinal
|
271
|
+
if self.respond_to?( :to_int )
|
272
|
+
number = self.to_int
|
273
|
+
return "%d%s" % [ number, (NTH[ number % 100 ] || NTH[ number % 10 ]) ]
|
274
|
+
|
275
|
+
else
|
276
|
+
number = self.to_s
|
277
|
+
self.log.debug "Making an ordinal out of a non-Integer (%p)" % [ number ]
|
278
|
+
return number.sub( /(#{ORDINAL_SUFFIXES})\Z/ ) { ORDINALS[$1] }
|
279
|
+
end
|
280
|
+
end
|
281
|
+
Linguistics::EN.register_lprintf_formatter :ORD, :ordinal
|
282
|
+
|
283
|
+
|
284
|
+
### Transform the given +number+ into an ordinate word.
|
285
|
+
def ordinate
|
286
|
+
return self.numwords.en.ordinal
|
287
|
+
end
|
288
|
+
|
289
|
+
|
290
|
+
### Return a phrase describing the specified +number+ of objects in the
|
291
|
+
### inflected object in general terms. The following options can be used to
|
292
|
+
### control the makeup of the returned quantity String:
|
293
|
+
###
|
294
|
+
### [<b>:joinword</b>]
|
295
|
+
### Sets the word (and any surrounding spaces) used as the word separating the
|
296
|
+
### quantity from the noun in the resulting string. Defaults to <tt>' of
|
297
|
+
### '</tt>.
|
298
|
+
def quantify( number=0, args={} )
|
299
|
+
phrase = self.to_s
|
300
|
+
self.log.debug "Quantifying %d instances of %p" % [ number, phrase ]
|
301
|
+
|
302
|
+
num = number.to_i
|
303
|
+
config = QUANTIFY_DEFAULTS.merge( args )
|
304
|
+
|
305
|
+
case num
|
306
|
+
when 0
|
307
|
+
phrase.en.no
|
308
|
+
when 1
|
309
|
+
phrase.en.a
|
310
|
+
when SEVERAL_RANGE
|
311
|
+
"several " + phrase.en.plural( num )
|
312
|
+
when NUMBER_RANGE
|
313
|
+
"a number of " + phrase.en.plural( num )
|
314
|
+
when NUMEROUS_RANGE
|
315
|
+
"numerous " + phrase.en.plural( num )
|
316
|
+
when MANY_RANGE
|
317
|
+
"many " + phrase.en.plural( num )
|
318
|
+
else
|
319
|
+
|
320
|
+
# Anything bigger than the MANY_RANGE gets described like
|
321
|
+
# "hundreds of thousands of..." or "millions of..."
|
322
|
+
# depending, of course, on how many there are.
|
323
|
+
thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
|
324
|
+
self.log.debug "thousands = %p, subthousands = %p" % [ thousands, subthousands ]
|
325
|
+
|
326
|
+
stword =
|
327
|
+
case subthousands
|
328
|
+
when 2
|
329
|
+
"hundreds"
|
330
|
+
when 1
|
331
|
+
"tens"
|
332
|
+
else
|
333
|
+
nil
|
334
|
+
end
|
335
|
+
|
336
|
+
unless thousands.zero?
|
337
|
+
thword = to_thousands( thousands ).strip.en.plural
|
338
|
+
end
|
339
|
+
|
340
|
+
[ # Hundreds (of)...
|
341
|
+
stword,
|
342
|
+
|
343
|
+
# thousands (of)
|
344
|
+
thword,
|
345
|
+
|
346
|
+
# stars.
|
347
|
+
phrase.en.plural(number)
|
348
|
+
].compact.join( config[:joinword] )
|
349
|
+
end
|
350
|
+
end
|
351
|
+
Linguistics::EN.register_lprintf_formatter :QUANT, :quantify
|
352
|
+
|
353
|
+
|
354
|
+
###############
|
355
|
+
module_function
|
356
|
+
###############
|
357
|
+
|
358
|
+
### Transform the specified number of units-place numerals into a
|
359
|
+
### word-phrase at the given number of +thousands+ places.
|
360
|
+
def to_units( units, thousands=0 )
|
361
|
+
return UNITS[ units ] + to_thousands( thousands )
|
362
|
+
end
|
363
|
+
|
364
|
+
|
365
|
+
### Transform the specified number of tens- and units-place numerals into a
|
366
|
+
### word-phrase at the given number of +thousands+ places.
|
367
|
+
def to_tens( tens, units, thousands=0 )
|
368
|
+
raise ArgumentError, "tens: no implicit conversion from nil" unless tens
|
369
|
+
raise ArgumentError, "units: no implicit conversion from nil" unless units
|
370
|
+
|
371
|
+
unless tens == 1
|
372
|
+
return TENS[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
|
373
|
+
to_units( units, thousands )
|
374
|
+
else
|
375
|
+
return TEENS[ units ] + to_thousands( thousands )
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
|
380
|
+
### Transform the specified number of hundreds-, tens-, and units-place
|
381
|
+
### numerals into a word phrase. If the number of thousands (+thousands+) is
|
382
|
+
### greater than 0, it will be used to determine where the decimal point is
|
383
|
+
### in relation to the hundreds-place number.
|
384
|
+
def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
|
385
|
+
joinword = ' ' if joinword.empty?
|
386
|
+
if hundreds.nonzero?
|
387
|
+
return to_units( hundreds ) + " hundred" +
|
388
|
+
(tens.nonzero? || units.nonzero? ? joinword : '') +
|
389
|
+
to_tens( tens, units ) +
|
390
|
+
to_thousands( thousands )
|
391
|
+
elsif tens.nonzero? || units.nonzero?
|
392
|
+
return to_tens( tens, units ) + to_thousands( thousands )
|
393
|
+
else
|
394
|
+
return nil
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
### Transform the specified number into one or more words like 'thousand',
|
399
|
+
### 'million', etc. Uses the thousands (American) system.
|
400
|
+
def to_thousands( thousands=0 )
|
401
|
+
parts = []
|
402
|
+
(0..thousands).step( THOUSANDS.length - 1 ) {|i|
|
403
|
+
if i.zero?
|
404
|
+
parts.push THOUSANDS[ thousands % (THOUSANDS.length - 1) ]
|
405
|
+
else
|
406
|
+
parts.push THOUSANDS.last
|
407
|
+
end
|
408
|
+
}
|
409
|
+
|
410
|
+
return parts.join(" ")
|
411
|
+
end
|
412
|
+
|
413
|
+
|
414
|
+
### Return the specified number +number+ as an array of number phrases.
|
415
|
+
def number_to_words( number, config )
|
416
|
+
return [config[:zero]] if number.to_i.zero?
|
417
|
+
|
418
|
+
if config[:group].nonzero? then
|
419
|
+
return number_to_custom_word_groups( number, config[:group], config[:zero] )
|
420
|
+
else
|
421
|
+
return number_to_standard_word_groups( number, config[:and] )
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
|
426
|
+
### Split the given +number+ up into groups of +groupsize+ and return
|
427
|
+
### them as an Array of words. Use +zeroword+ for any occurences of '0'.
|
428
|
+
def number_to_custom_word_groups( number, groupsize, zeroword="zero" )
|
429
|
+
self.log.debug "Making custom word groups of %d digits out of %p" % [ groupsize, number ]
|
430
|
+
|
431
|
+
# Build a Regexp with <config[:group]> number of digits. Any past
|
432
|
+
# the first are optional.
|
433
|
+
re = Regexp.new( "(\\d)" + ("(\\d)?" * (groupsize - 1)) )
|
434
|
+
self.log.debug " regex for matching groups of %d digits is %p" % [ groupsize, re ]
|
435
|
+
|
436
|
+
# Scan the string, and call the word-chunk function that deals with
|
437
|
+
# chunks of the found number of digits.
|
438
|
+
return number.to_s.scan( re ).collect do |digits|
|
439
|
+
self.log.debug " digits = %p" % [ digits ]
|
440
|
+
numerals = digits.flatten.compact.collect {|i| i.to_i}
|
441
|
+
self.log.debug " numerals = %p" % [ numerals ]
|
442
|
+
|
443
|
+
fn = NUMBER_TO_WORDS_FUNCTIONS[ numerals.length ]
|
444
|
+
self.log.debug " number to word function is #%d: %p" % [ numerals.length, fn ]
|
445
|
+
fn.call( zeroword, *numerals ).strip
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
|
450
|
+
### Split the given +number+ up into groups of three and return
|
451
|
+
### the Array of words describing each group in the standard style.
|
452
|
+
def number_to_standard_word_groups( number, andword="and" )
|
453
|
+
phrase = number.to_s
|
454
|
+
phrase.sub!( /\A\s*0+/, '' )
|
455
|
+
chunks = []
|
456
|
+
mill = 0
|
457
|
+
self.log.debug "Making standard word groups out of %p" % [ phrase ]
|
458
|
+
|
459
|
+
# Match backward from the end of the digits in the string, turning
|
460
|
+
# chunks of three, of two, and of one into words.
|
461
|
+
mill += 1 while
|
462
|
+
phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) do
|
463
|
+
words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, andword )
|
464
|
+
chunks.unshift words.strip.squeeze(' ') unless words.nil?
|
465
|
+
''
|
466
|
+
end
|
467
|
+
|
468
|
+
phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) do
|
469
|
+
chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
|
470
|
+
''
|
471
|
+
end
|
472
|
+
|
473
|
+
phrase.sub!( /(\d)(?=\D*\Z)/ ) do
|
474
|
+
chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
|
475
|
+
''
|
476
|
+
end
|
477
|
+
|
478
|
+
return chunks
|
479
|
+
end
|
480
|
+
|
481
|
+
|
482
|
+
end # module Linguistics::EN::Numbers
|
483
|
+
|