re 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +3 -1
- data/README.rdoc +10 -9
- data/lib/re.rb +204 -123
- data/test/re_test.rb +41 -8
- metadata +2 -2
data/MIT-LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -33,12 +33,12 @@ Although it is more code, the individual pieces are smaller and
|
|
33
33
|
easier to independently verify. As an additional bonus, the capture
|
34
34
|
groups can be retrieved by name:
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
result = date.match("2009-01-23")
|
37
|
+
result.data(:year) # => "2009"
|
38
|
+
result.data(:month) # => "01"
|
39
|
+
result.data(:day) # => "23"
|
40
40
|
|
41
|
-
== Version: 0.0.
|
41
|
+
== Version: 0.0.4
|
42
42
|
|
43
43
|
== Usage:
|
44
44
|
|
@@ -91,8 +91,9 @@ Re is provided under the MIT open source license (see MIT-LICENSE)
|
|
91
91
|
|
92
92
|
== Links:
|
93
93
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
94
|
+
Documentation :: http://re-lib.rubyforge.org
|
95
|
+
Source :: http://github.com/jimweirich/re
|
96
|
+
GemCutter :: http://gemcutter.org/gems/re
|
97
|
+
Bug Tracker :: http://www.pivotaltracker.com/projects/47758
|
98
|
+
Author :: jim.weirich@gmail.com
|
98
99
|
|
data/lib/re.rb
CHANGED
@@ -34,10 +34,10 @@
|
|
34
34
|
# easier to independently verify. As an additional bonus, the capture
|
35
35
|
# groups can be retrieved by name:
|
36
36
|
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
#
|
40
|
-
#
|
37
|
+
# result = date.match("2009-01-23")
|
38
|
+
# result.data(:year) # => "2009"
|
39
|
+
# result.data(:month) # => "01"
|
40
|
+
# result.data(:day) # => "23"
|
41
41
|
#
|
42
42
|
# == Usage:
|
43
43
|
#
|
@@ -90,10 +90,11 @@
|
|
90
90
|
#
|
91
91
|
# == Links:
|
92
92
|
#
|
93
|
-
#
|
94
|
-
#
|
95
|
-
#
|
96
|
-
#
|
93
|
+
# Documentation :: http://re-lib.rubyforge.org
|
94
|
+
# Source :: http://github.com/jimweirich/re
|
95
|
+
# GemCutter :: http://gemcutter.org/gems/re
|
96
|
+
# Bug Tracker :: http://www.pivotaltracker.com/projects/47758
|
97
|
+
# Author :: jim.weirich@gmail.com
|
97
98
|
#
|
98
99
|
module Re
|
99
100
|
|
@@ -101,7 +102,7 @@ module Re
|
|
101
102
|
NUMBERS = [
|
102
103
|
MAJOR = 0,
|
103
104
|
MINOR = 0,
|
104
|
-
BUILD =
|
105
|
+
BUILD = 4,
|
105
106
|
BETA = nil,
|
106
107
|
].compact
|
107
108
|
end
|
@@ -110,19 +111,19 @@ module Re
|
|
110
111
|
# Re::Result captures the result of a match and allows lookup of the
|
111
112
|
# captured groups by name.
|
112
113
|
class Result
|
113
|
-
# Create a Re result object with the match data and the
|
114
|
+
# Create a Re result object with the match data and the original
|
114
115
|
# Re::Rexp object.
|
115
116
|
def initialize(match_data, rexp)
|
116
117
|
@match_data = match_data
|
117
118
|
@rexp = rexp
|
118
119
|
end
|
119
120
|
|
120
|
-
# Return the full match
|
121
|
+
# Return the text of the full match.
|
121
122
|
def full_match
|
122
123
|
@match_data[0]
|
123
124
|
end
|
124
125
|
|
125
|
-
# Return the named capture data.
|
126
|
+
# Return the text of the named capture data.
|
126
127
|
def [](name)
|
127
128
|
index = @rexp.capture_keys.index(name)
|
128
129
|
index ? @match_data[index+1] : nil
|
@@ -136,25 +137,28 @@ module Re
|
|
136
137
|
CONCAT = 2 # r + r, literal :nodoc:
|
137
138
|
ALT = 1 # r | r :nodoc:
|
138
139
|
|
140
|
+
# Mode Bits
|
141
|
+
|
142
|
+
MULTILINE_MODE = Regexp::MULTILINE
|
143
|
+
IGNORE_CASE_MODE = Regexp::IGNORECASE
|
139
144
|
|
140
145
|
# Constructed regular expressions.
|
141
146
|
class Rexp
|
142
|
-
attr_reader :
|
147
|
+
attr_reader :level, :options, :capture_keys
|
143
148
|
|
144
149
|
# Create a regular expression from the string. The regular
|
145
150
|
# expression will have a precedence of +level+ and will recognized
|
146
151
|
# +keys+ as a list of capture keys.
|
147
|
-
def initialize(string, level,
|
148
|
-
@
|
152
|
+
def initialize(string, level, keys, options=0)
|
153
|
+
@raw_string = string
|
149
154
|
@level = level
|
150
|
-
@options = options
|
151
155
|
@capture_keys = keys
|
156
|
+
@options = options
|
152
157
|
end
|
153
|
-
|
154
|
-
# Return a
|
155
|
-
# regular expression.
|
158
|
+
|
159
|
+
# Return a Regexp from the the constructed regular expression.
|
156
160
|
def regexp
|
157
|
-
@regexp ||= Regexp.new(
|
161
|
+
@regexp ||= Regexp.new(encoding)
|
158
162
|
end
|
159
163
|
|
160
164
|
# Does it match a string? (returns Re::Result if match, nil otherwise)
|
@@ -164,140 +168,152 @@ module Re
|
|
164
168
|
end
|
165
169
|
alias =~ match
|
166
170
|
|
167
|
-
#
|
171
|
+
# New regular expresion that matches the concatenation of self and
|
172
|
+
# other.
|
168
173
|
def +(other)
|
169
|
-
Rexp.new(
|
174
|
+
Rexp.new(parenthesized_encoding(CONCAT) + other.parenthesized_encoding(CONCAT),
|
170
175
|
CONCAT,
|
171
|
-
options | other.options,
|
172
176
|
capture_keys + other.capture_keys)
|
173
177
|
end
|
174
178
|
|
175
|
-
#
|
179
|
+
# New regular expresion that matches either self or other.
|
176
180
|
def |(other)
|
177
|
-
Rexp.new(
|
181
|
+
Rexp.new(parenthesized_encoding(ALT) + "|" + other.parenthesized_encoding(ALT),
|
178
182
|
ALT,
|
179
|
-
options | other.options,
|
180
183
|
capture_keys + other.capture_keys)
|
181
184
|
end
|
182
185
|
|
183
|
-
# self is optional
|
186
|
+
# New regular expression where self is optional.
|
184
187
|
def optional
|
185
|
-
Rexp.new(
|
188
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "?", POSTFIX, capture_keys)
|
186
189
|
end
|
187
190
|
|
188
|
-
#
|
191
|
+
# New regular expression that matches self many (zero or more)
|
192
|
+
# times.
|
189
193
|
def many
|
190
|
-
Rexp.new(
|
194
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "*", POSTFIX, capture_keys)
|
191
195
|
end
|
192
196
|
|
193
|
-
#
|
197
|
+
# New regular expression that matches self many (zero or more)
|
198
|
+
# times (non-greedy version).
|
194
199
|
def many!
|
195
|
-
Rexp.new(
|
200
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "*?", POSTFIX, capture_keys)
|
196
201
|
end
|
197
202
|
|
198
|
-
#
|
203
|
+
# New regular expression that matches self one or more times.
|
199
204
|
def one_or_more
|
200
|
-
Rexp.new(
|
205
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "+", POSTFIX, capture_keys)
|
201
206
|
end
|
202
207
|
|
203
|
-
#
|
208
|
+
# New regular expression that matches self one or more times
|
209
|
+
# (non-greedy version).
|
204
210
|
def one_or_more!
|
205
|
-
Rexp.new(
|
211
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "+?", POSTFIX, capture_keys)
|
206
212
|
end
|
207
213
|
|
208
|
-
#
|
209
|
-
#
|
214
|
+
# New regular expression that matches self between +min+ and +max+
|
215
|
+
# times (inclusive). If +max+ is omitted, then it must match self
|
216
|
+
# exactly exactly +min+ times.
|
210
217
|
def repeat(min, max=nil)
|
211
218
|
if min && max
|
212
|
-
Rexp.new(
|
219
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "{#{min},#{max}}", POSTFIX, capture_keys)
|
213
220
|
else
|
214
|
-
Rexp.new(
|
221
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "{#{min}}", POSTFIX, capture_keys)
|
215
222
|
end
|
216
223
|
end
|
217
224
|
|
218
|
-
#
|
225
|
+
# New regular expression that matches self at least +min+ times.
|
219
226
|
def at_least(min)
|
220
|
-
Rexp.new(
|
227
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "{#{min},}", POSTFIX, capture_keys)
|
221
228
|
end
|
222
229
|
|
223
|
-
#
|
230
|
+
# New regular expression that matches self at most +max+ times.
|
224
231
|
def at_most(max)
|
225
|
-
Rexp.new(
|
226
|
-
end
|
227
|
-
|
228
|
-
# None of the given characters will match.
|
229
|
-
def none(chars)
|
230
|
-
Rexp.new("[^" + Rexp.escape_any(chars) + "]", GROUPED, 0, [])
|
232
|
+
Rexp.new(parenthesized_encoding(POSTFIX) + "{0,#{max}}", POSTFIX, capture_keys)
|
231
233
|
end
|
232
234
|
|
233
|
-
#
|
235
|
+
# New regular expression that matches self across the complete
|
236
|
+
# string.
|
234
237
|
def all
|
235
238
|
self.begin.very_end
|
236
239
|
end
|
237
240
|
|
238
|
-
#
|
241
|
+
# New regular expression that matches self across most of the
|
242
|
+
# entire string (trailing new lines are not required to match).
|
239
243
|
def almost_all
|
240
244
|
self.begin.end
|
241
245
|
end
|
242
246
|
|
243
|
-
#
|
247
|
+
# New regular expression that matches self at the beginning of a line.
|
244
248
|
def bol
|
245
|
-
Rexp.new("^" +
|
249
|
+
Rexp.new("^" + parenthesized_encoding(CONCAT), CONCAT, capture_keys)
|
246
250
|
end
|
247
251
|
|
248
|
-
#
|
252
|
+
# New regular expression that matches self at the end of the line.
|
249
253
|
def eol
|
250
|
-
Rexp.new(
|
254
|
+
Rexp.new(parenthesized_encoding(CONCAT) + "$", CONCAT, capture_keys)
|
251
255
|
end
|
252
256
|
|
253
|
-
#
|
257
|
+
# New regular expression that matches self at the beginning of a string.
|
254
258
|
def begin
|
255
|
-
Rexp.new("\\A" +
|
259
|
+
Rexp.new("\\A" + parenthesized_encoding(CONCAT), CONCAT, capture_keys)
|
256
260
|
end
|
257
261
|
|
258
|
-
# self
|
262
|
+
# New regular expression that matches self at the end of a string
|
263
|
+
# (trailing new lines are allowed to not match).
|
259
264
|
def end
|
260
|
-
Rexp.new(
|
265
|
+
Rexp.new(parenthesized_encoding(CONCAT) + "\\Z", CONCAT, capture_keys)
|
261
266
|
end
|
262
267
|
|
263
|
-
# self
|
268
|
+
# New regular expression that matches self at the very end of a string
|
269
|
+
# (trailing new lines are required to match).
|
264
270
|
def very_end
|
265
|
-
Rexp.new(
|
271
|
+
Rexp.new(parenthesized_encoding(CONCAT) + "\\z", CONCAT, capture_keys)
|
266
272
|
end
|
267
273
|
|
268
|
-
# self
|
274
|
+
# New expression that matches self across an entire line.
|
269
275
|
def line
|
270
276
|
self.bol.eol
|
271
277
|
end
|
272
278
|
|
273
|
-
#
|
279
|
+
# New regular expression that is grouped, but does not cause the
|
280
|
+
# capture of a match. The Re library normally handles grouping
|
281
|
+
# automatically, so this method shouldn't be needed by client
|
282
|
+
# software for normal operations.
|
274
283
|
def group
|
275
|
-
Rexp.new("(?:" +
|
284
|
+
Rexp.new("(?:" + encoding + ")", GROUPED, capture_keys)
|
276
285
|
end
|
277
286
|
|
278
|
-
#
|
287
|
+
# New regular expression that captures text matching self. The
|
288
|
+
# matching text may be retrieved from the Re::Result object using
|
289
|
+
# the +name+ (a symbol) as the keyword.
|
279
290
|
def capture(name)
|
280
|
-
Rexp.new("(" +
|
291
|
+
Rexp.new("(" + encoding + ")", GROUPED, [name] + capture_keys)
|
281
292
|
end
|
282
293
|
|
283
|
-
#
|
294
|
+
# New regular expression that matches self in multiline mode.
|
284
295
|
def multiline
|
285
|
-
Rexp.new(
|
296
|
+
Rexp.new(@raw_string, GROUPED, capture_keys, options | MULTILINE_MODE)
|
286
297
|
end
|
287
298
|
|
288
|
-
# Is this a multiline regular expression?
|
299
|
+
# Is this a multiline regular expression? The multiline mode of
|
300
|
+
# interior regular expressions are not reflected in value returned
|
301
|
+
# by this method.
|
289
302
|
def multiline?
|
290
|
-
(options &
|
303
|
+
(options & MULTILINE_MODE) != 0
|
291
304
|
end
|
292
305
|
|
293
|
-
#
|
306
|
+
# New regular expression that matches self while ignoring case.
|
294
307
|
def ignore_case
|
295
|
-
Rexp.new(
|
308
|
+
Rexp.new(@raw_string, GROUPED, capture_keys, options | IGNORE_CASE_MODE)
|
296
309
|
end
|
297
310
|
|
298
|
-
# Does this regular expression ignore case?
|
311
|
+
# Does this regular expression ignore case? Note that this only
|
312
|
+
# queries the outer most regular expression. The ignore case mode
|
313
|
+
# of interior regular expressions are not reflected in value
|
314
|
+
# returned by this method.
|
299
315
|
def ignore_case?
|
300
|
-
(options &
|
316
|
+
(options & IGNORE_CASE_MODE) != 0
|
301
317
|
end
|
302
318
|
|
303
319
|
# String representation of the constructed regular expression.
|
@@ -310,32 +326,51 @@ module Re
|
|
310
326
|
# String representation with grouping if needed.
|
311
327
|
#
|
312
328
|
# If the precedence of the current Regexp is less than the new
|
313
|
-
# precedence level, return the
|
314
|
-
# group. Otherwise just return the
|
315
|
-
def
|
329
|
+
# precedence level, return the encoding wrapped in a non-capturing
|
330
|
+
# group. Otherwise just return the encoding.
|
331
|
+
def parenthesized_encoding(new_level)
|
316
332
|
if level >= new_level
|
317
|
-
|
333
|
+
encoding
|
318
334
|
else
|
319
|
-
group.
|
335
|
+
group.encoding
|
320
336
|
end
|
321
337
|
end
|
322
338
|
|
323
|
-
#
|
324
|
-
#
|
339
|
+
# The string encoding of current regular expression. The encoding
|
340
|
+
# will include option flags if specified.
|
341
|
+
def encoding
|
342
|
+
if options == 0
|
343
|
+
@raw_string
|
344
|
+
else
|
345
|
+
"(?#{encode_options}:" + @raw_string + ")"
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# Encode the options into a string (e.g "", "m", "i", or "mi")
|
350
|
+
def encode_options # :nodoc:
|
351
|
+
(multiline? ? "m" : "") +
|
352
|
+
(ignore_case? ? "i" : "")
|
353
|
+
end
|
354
|
+
private :encode_options
|
355
|
+
|
356
|
+
# New regular expression that matches the literal characters in
|
357
|
+
# +chars+. For example, Re.literal("a(b)") will be equivalent to
|
358
|
+
# /a\(b\)/. Note that characters with special meanings in regular
|
359
|
+
# expressions will be quoted.
|
325
360
|
def self.literal(chars)
|
326
|
-
new(Regexp.escape(chars), CONCAT,
|
361
|
+
new(Regexp.escape(chars), CONCAT, [])
|
327
362
|
end
|
328
363
|
|
329
|
-
#
|
330
|
-
# regular expression. The raw string should represent a
|
331
|
-
# expression with the highest level of precedence (you
|
332
|
-
# parenthesis if it is not).
|
364
|
+
# New regular expression constructed from a string representing a
|
365
|
+
# ruby regular expression. The raw string should represent a
|
366
|
+
# regular expression with the highest level of precedence (you
|
367
|
+
# should use parenthesis if it is not).
|
333
368
|
def self.raw(re_string) # :no-doc:
|
334
|
-
new(re_string, GROUPED,
|
369
|
+
new(re_string, GROUPED, [])
|
335
370
|
end
|
336
371
|
|
337
|
-
# Escape
|
338
|
-
def self.escape_any(chars)
|
372
|
+
# Escape special characters found in character classes.
|
373
|
+
def self.escape_any(chars) # :nodoc:
|
339
374
|
chars.gsub(/([\[\]\^\-])/) { "\\#{$1}" }
|
340
375
|
end
|
341
376
|
end
|
@@ -360,10 +395,12 @@ module Re
|
|
360
395
|
# re() without arguments can be used to access the methods.
|
361
396
|
module ConstructionMethods
|
362
397
|
|
398
|
+
ANY_CHAR = Rexp.raw(".")
|
399
|
+
|
363
400
|
# :call-seq:
|
364
401
|
# re.null
|
365
402
|
#
|
366
|
-
#
|
403
|
+
# Regular expression that matches the null string
|
367
404
|
def null
|
368
405
|
self
|
369
406
|
end
|
@@ -374,43 +411,77 @@ module Re
|
|
374
411
|
# re.any(range)
|
375
412
|
# re.any(chars, range, ...)
|
376
413
|
#
|
377
|
-
#
|
414
|
+
# Regular expression that matches a character from a character
|
415
|
+
# class.
|
378
416
|
#
|
379
|
-
# Any without any arguments will match any single character.
|
380
|
-
# with one or more arguments will construct a character
|
381
|
-
# the arguments. If the argument is a three character
|
382
|
-
# the middle character is "-", then the argument
|
383
|
-
# of characters. Otherwise the arguments are
|
384
|
-
# characters to be added to the character
|
417
|
+
# +Any+ without any arguments will match any single character.
|
418
|
+
# +Any+ with one or more arguments will construct a character
|
419
|
+
# class for the arguments. If the argument is a three character
|
420
|
+
# string where the middle character is "-", then the argument
|
421
|
+
# represents a range of characters. Otherwise the arguments are
|
422
|
+
# treated as a list of characters to be added to the character
|
423
|
+
# class.
|
385
424
|
#
|
386
425
|
# Examples:
|
387
426
|
#
|
388
|
-
# re.any --
|
389
|
-
# re.any("aieouy") --
|
390
|
-
# re.any("0-9") --
|
391
|
-
# re.any("A-Z", "a-z", "0-9") --
|
392
|
-
# re.any("A-Z", "a-z", "0-9", "_") --
|
427
|
+
# re.any -- matches any character
|
428
|
+
# re.any("aieouy") -- matches vowels
|
429
|
+
# re.any("0-9") -- matches digits
|
430
|
+
# re.any("A-Z", "a-z", "0-9") -- matches alphanumerics
|
431
|
+
# re.any("A-Z", "a-z", "0-9", "_") -- matches alphanumerics
|
432
|
+
# plus an underscore
|
393
433
|
#
|
394
434
|
def any(*chars)
|
395
435
|
if chars.empty?
|
396
|
-
|
436
|
+
ANY_CHAR
|
397
437
|
else
|
398
|
-
|
399
|
-
chars.each do |chs|
|
400
|
-
if /^.-.$/ =~ chs
|
401
|
-
any_chars << chs
|
402
|
-
else
|
403
|
-
any_chars << Rexp.escape_any(chs)
|
404
|
-
end
|
405
|
-
end
|
406
|
-
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
438
|
+
Rexp.new("[" + char_class(chars) + "]", GROUPED, [])
|
407
439
|
end
|
408
440
|
end
|
409
441
|
|
442
|
+
# :call-seq:
|
443
|
+
# re.none(chars)
|
444
|
+
# re.none(range)
|
445
|
+
# re.none(chars, range, ...)
|
446
|
+
#
|
447
|
+
# Regular expression that matches a character not in a character
|
448
|
+
# class.
|
449
|
+
#
|
450
|
+
# +None+ with one or more arguments will construct a character
|
451
|
+
# class for the given arguments. If the argument is a three
|
452
|
+
# character string where the middle character is "-", then the
|
453
|
+
# argument represents a range of characters. Otherwise the
|
454
|
+
# arguments are treated as a list of characters to be added to the
|
455
|
+
# character class.
|
456
|
+
#
|
457
|
+
# Examples:
|
458
|
+
#
|
459
|
+
# re.none("aieouy") -- matches non-vowels
|
460
|
+
# re.any("0-9") -- matches non-digits
|
461
|
+
# re.any("A-Z", "a-z", "0-9") -- matches non-alphanumerics
|
462
|
+
#
|
463
|
+
def none(*chars)
|
464
|
+
Rexp.new("[^" + char_class(chars) + "]", GROUPED, [])
|
465
|
+
end
|
466
|
+
|
467
|
+
def char_class(chars)
|
468
|
+
any_chars = ''
|
469
|
+
chars.each do |chs|
|
470
|
+
if /^.-.$/ =~ chs
|
471
|
+
any_chars << chs
|
472
|
+
else
|
473
|
+
any_chars << Rexp.escape_any(chs)
|
474
|
+
end
|
475
|
+
end
|
476
|
+
any_chars
|
477
|
+
end
|
478
|
+
private :char_class
|
479
|
+
|
410
480
|
# :call-seq:
|
411
481
|
# re.space
|
412
482
|
#
|
413
|
-
#
|
483
|
+
# Regular expression that matches any white space character.
|
484
|
+
# (equivalent to /\s/)
|
414
485
|
def space
|
415
486
|
@space ||= Rexp.raw("\\s")
|
416
487
|
end
|
@@ -418,7 +489,8 @@ module Re
|
|
418
489
|
# :call-seq:
|
419
490
|
# re.spaces
|
420
491
|
#
|
421
|
-
#
|
492
|
+
# Regular expression that matches any sequence of white space
|
493
|
+
# characters. (equivalent to /\s+/)
|
422
494
|
def spaces
|
423
495
|
@spaces ||= space.one_or_more
|
424
496
|
end
|
@@ -426,7 +498,8 @@ module Re
|
|
426
498
|
# :call-seq:
|
427
499
|
# re.nonspace
|
428
500
|
#
|
429
|
-
#
|
501
|
+
# Regular expression that matches any non-white space character.
|
502
|
+
# (equivalent to /\S/)
|
430
503
|
def nonspace
|
431
504
|
@nonspace ||= Rexp.raw("\\S")
|
432
505
|
end
|
@@ -434,7 +507,8 @@ module Re
|
|
434
507
|
# :call-seq:
|
435
508
|
# re.nonspaces
|
436
509
|
#
|
437
|
-
#
|
510
|
+
# Regular expression that matches any sequence of non-white space
|
511
|
+
# characters. (equivalent to /\S+/)
|
438
512
|
def nonspaces
|
439
513
|
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
440
514
|
end
|
@@ -442,7 +516,8 @@ module Re
|
|
442
516
|
# :call-seq:
|
443
517
|
# re.word_char
|
444
518
|
#
|
445
|
-
#
|
519
|
+
# Regular expression that matches any word character. (equivalent
|
520
|
+
# to /\w/)
|
446
521
|
def word_char
|
447
522
|
@word_char ||= Rexp.raw("\\w")
|
448
523
|
end
|
@@ -450,7 +525,8 @@ module Re
|
|
450
525
|
# :call-seq:
|
451
526
|
# re.word
|
452
527
|
#
|
453
|
-
#
|
528
|
+
# Regular expression that matches any sequence of word characters.
|
529
|
+
# (equivalent to /\w+/)
|
454
530
|
def word
|
455
531
|
@word ||= word_char.one_or_more
|
456
532
|
end
|
@@ -458,7 +534,8 @@ module Re
|
|
458
534
|
# :call-seq:
|
459
535
|
# re.break
|
460
536
|
#
|
461
|
-
#
|
537
|
+
# Regular expression that matches any break between word/non-word
|
538
|
+
# characters. This is a zero length match. (equivalent to /\b/)
|
462
539
|
def break
|
463
540
|
@break ||= Rexp.raw("\\b")
|
464
541
|
end
|
@@ -466,7 +543,8 @@ module Re
|
|
466
543
|
# :call-seq:
|
467
544
|
# re.digit
|
468
545
|
#
|
469
|
-
#
|
546
|
+
# Regular expression that matches a single digit. (equivalent to
|
547
|
+
# /\d/)
|
470
548
|
def digit
|
471
549
|
@digit ||= any("0-9")
|
472
550
|
end
|
@@ -474,7 +552,8 @@ module Re
|
|
474
552
|
# :call-seq:
|
475
553
|
# re.digits
|
476
554
|
#
|
477
|
-
#
|
555
|
+
# Regular expression that matches a sequence of digits.
|
556
|
+
# (equivalent to /\d+/)
|
478
557
|
def digits
|
479
558
|
@digits ||= digit.one_or_more
|
480
559
|
end
|
@@ -482,7 +561,8 @@ module Re
|
|
482
561
|
# :call-seq:
|
483
562
|
# re.hex_digit
|
484
563
|
#
|
485
|
-
#
|
564
|
+
# Regular expression that matches a single hex digit. (equivalent
|
565
|
+
# to /[A-Fa-f0-9]/)
|
486
566
|
def hex_digit
|
487
567
|
@hex_digit ||= any("0-9", "a-f", "A-F")
|
488
568
|
end
|
@@ -490,7 +570,8 @@ module Re
|
|
490
570
|
# :call-seq:
|
491
571
|
# re.hex_digits
|
492
572
|
#
|
493
|
-
#
|
573
|
+
# Regular expression that matches a sequence of hex digits
|
574
|
+
# (equivalent to /[A-Fa-f0-9]+/)
|
494
575
|
def hex_digits
|
495
576
|
@hex_digits ||= hex_digit.one_or_more
|
496
577
|
end
|
data/test/re_test.rb
CHANGED
@@ -138,23 +138,40 @@ class ReTest < Test::Unit::TestCase
|
|
138
138
|
end
|
139
139
|
|
140
140
|
def test_no_options
|
141
|
-
r = re("a")
|
141
|
+
r = re("a") + re.any + re("b")
|
142
142
|
assert ! r.ignore_case?
|
143
143
|
assert ! r.multiline?
|
144
|
+
assert r =~ "axb"
|
145
|
+
assert r !~ "a\nb"
|
146
|
+
assert r !~ "Axb"
|
144
147
|
end
|
145
148
|
|
146
149
|
def test_any_with_multiline
|
147
|
-
r = re.any.multiline
|
150
|
+
r = re.any.all.multiline
|
148
151
|
assert r.multiline?
|
149
152
|
assert r =~ "\n"
|
150
153
|
end
|
151
154
|
|
152
155
|
def test_ignore_case
|
153
|
-
r = re("a").ignore_case
|
154
|
-
assert r.ignore_case?
|
156
|
+
r = re("a").all.ignore_case
|
155
157
|
assert r =~ "a"
|
156
158
|
assert r =~ "A"
|
157
159
|
end
|
160
|
+
|
161
|
+
def test_partial_ignore_case
|
162
|
+
r = (re("a").ignore_case + re("b")).all
|
163
|
+
assert r =~ "ab"
|
164
|
+
assert r =~ "Ab"
|
165
|
+
assert r !~ "aB"
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_options_no_not_modify_existing_rexps
|
169
|
+
r = re("a")
|
170
|
+
r2 = r.ignore_case
|
171
|
+
|
172
|
+
assert r !~ "A"
|
173
|
+
assert r2 =~ "A"
|
174
|
+
end
|
158
175
|
|
159
176
|
def test_any_with_a_character_list
|
160
177
|
r = re.any("xyz").all
|
@@ -176,17 +193,17 @@ class ReTest < Test::Unit::TestCase
|
|
176
193
|
assert r =~ "]"
|
177
194
|
end
|
178
195
|
|
179
|
-
def
|
196
|
+
def test_any_with_a_range_of_chars
|
180
197
|
r = re.any("a-z").many.all
|
181
198
|
assert r =~ "abcdefghijklmnopqrstuvwxyz"
|
182
199
|
end
|
183
200
|
|
184
|
-
def
|
201
|
+
def test_any_with_a_range_and_mix_of_chars
|
185
202
|
r = re.any("0-9", ".-").many.all
|
186
203
|
assert r =~ "-12.3"
|
187
204
|
end
|
188
205
|
|
189
|
-
def
|
206
|
+
def test_none_with_a_character_list
|
190
207
|
r = re.none("xyz").all
|
191
208
|
assert r =~ "w"
|
192
209
|
assert r !~ "x"
|
@@ -206,6 +223,22 @@ class ReTest < Test::Unit::TestCase
|
|
206
223
|
assert r !~ "]"
|
207
224
|
end
|
208
225
|
|
226
|
+
def test_none_with_a_range_of_chars
|
227
|
+
r = re.none("a-z").many.all
|
228
|
+
assert r =~ "0123()$#"
|
229
|
+
assert r !~ "a"
|
230
|
+
assert r !~ "b"
|
231
|
+
assert r !~ "z"
|
232
|
+
end
|
233
|
+
|
234
|
+
def test_none_with_a_range_and_mix_of_chars
|
235
|
+
r = re.none("0-9", ".-").many.all
|
236
|
+
assert r =~ "abc%^&"
|
237
|
+
assert r !~ "-"
|
238
|
+
assert r !~ "."
|
239
|
+
assert r !~ "1"
|
240
|
+
end
|
241
|
+
|
209
242
|
def test_all
|
210
243
|
r = re("a").all
|
211
244
|
assert r =~ "a"
|
@@ -471,7 +504,7 @@ class ReTest < Test::Unit::TestCase
|
|
471
504
|
class << self
|
472
505
|
include Re
|
473
506
|
def date_re
|
474
|
-
# (19|20)
|
507
|
+
# /\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
|
475
508
|
@date_re ||=
|
476
509
|
begin
|
477
510
|
delim_re = re.any("- /.")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Weirich
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-29 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|