re 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +39 -0
- data/Rakefile +18 -0
- data/lib/re.rb +373 -0
- data/test/re_test.rb +440 -0
- metadata +63 -0
data/README.rdoc
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
= Regular Expression Construction.
|
3
|
+
|
4
|
+
Construct regular expressions using the re() method.
|
5
|
+
|
6
|
+
Usage:
|
7
|
+
|
8
|
+
include Re
|
9
|
+
|
10
|
+
number = re.any("0-9").all
|
11
|
+
if number =~ string
|
12
|
+
puts "Matches!"
|
13
|
+
else
|
14
|
+
puts "No Match"
|
15
|
+
end
|
16
|
+
|
17
|
+
Examples:
|
18
|
+
|
19
|
+
re("a") -- matches "a"
|
20
|
+
re("a") + re("b") -- matches "ab"
|
21
|
+
re("a") | re("b") -- matches "a" or "b"
|
22
|
+
re("a").many -- matches "", "a", "aaaaaa"
|
23
|
+
re("a").one_or_more -- matches "a", "aaaaaa", but not ""
|
24
|
+
re("a").optional -- matches "" or "a"
|
25
|
+
re("a").all -- matches "a", but not "xab"
|
26
|
+
|
27
|
+
See Re::Rexp for a complete list of expressions.
|
28
|
+
|
29
|
+
Using re without an argument allows access to a number of common
|
30
|
+
regular expression constants. For example:
|
31
|
+
|
32
|
+
re.space -- matches " ", "\n" or "\t"
|
33
|
+
re.spaces -- matches any number of spaces (but at least one)
|
34
|
+
re.digit / re.digits -- matches a digit / sequence of digits
|
35
|
+
|
36
|
+
See Re::NULL for a complete list of common constants.
|
37
|
+
|
38
|
+
See Re.re,
|
39
|
+
Re::Rexp, and Re::NULL for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby"
|
2
|
+
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
|
7
|
+
task :default => :test
|
8
|
+
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.warning = true
|
11
|
+
t.verbose = false
|
12
|
+
t.test_files = FileList['test/*_test.rb']
|
13
|
+
end
|
14
|
+
|
15
|
+
Rake::RDocTask.new do |rd|
|
16
|
+
rd.main = "lib/re.rb"
|
17
|
+
rd.rdoc_files = FileList["lib/re.rb"]
|
18
|
+
end
|
data/lib/re.rb
ADDED
@@ -0,0 +1,373 @@
|
|
1
|
+
#!/usr/bin/ruby -wKU
|
2
|
+
#
|
3
|
+
# = Regular Expression Construction.
|
4
|
+
#
|
5
|
+
# Construct regular expressions using the re() method.
|
6
|
+
#
|
7
|
+
# Usage:
|
8
|
+
#
|
9
|
+
# include Re
|
10
|
+
#
|
11
|
+
# number = re.any("0-9").all
|
12
|
+
# if number =~ string
|
13
|
+
# puts "Matches!"
|
14
|
+
# else
|
15
|
+
# puts "No Match"
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# Examples:
|
19
|
+
#
|
20
|
+
# re("a") -- matches "a"
|
21
|
+
# re("a") + re("b") -- matches "ab"
|
22
|
+
# re("a") | re("b") -- matches "a" or "b"
|
23
|
+
# re("a").many -- matches "", "a", "aaaaaa"
|
24
|
+
# re("a").one_or_more -- matches "a", "aaaaaa", but not ""
|
25
|
+
# re("a").optional -- matches "" or "a"
|
26
|
+
# re("a").all -- matches "a", but not "xab"
|
27
|
+
#
|
28
|
+
# See Re::Rexp for a complete list of expressions.
|
29
|
+
#
|
30
|
+
# Using re without an argument allows access to a number of common
|
31
|
+
# regular expression constants. For example:
|
32
|
+
#
|
33
|
+
# re.space -- matches " ", "\n" or "\t"
|
34
|
+
# re.spaces -- matches any number of spaces (but at least one)
|
35
|
+
# re.digit / re.digits -- matches a digit / sequence of digits
|
36
|
+
#
|
37
|
+
# See Re::NULL for a complete list of common constants.
|
38
|
+
#
|
39
|
+
# See Re.re,
|
40
|
+
# Re::Rexp, and Re::NULL for details.
|
41
|
+
|
42
|
+
module Re
|
43
|
+
class Result
|
44
|
+
def initialize(match_data, rexp)
|
45
|
+
@match_data = match_data
|
46
|
+
@rexp = rexp
|
47
|
+
end
|
48
|
+
def data(name=nil)
|
49
|
+
if name
|
50
|
+
index = @rexp.capture_keys.index(name)
|
51
|
+
index ? @match_data[index+1] : nil
|
52
|
+
else
|
53
|
+
@match_data[0]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Precedence levels for regular expressions:
|
59
|
+
|
60
|
+
GROUPED = 4 # (r), [chars] :nodoc:
|
61
|
+
POSTFIX = 3 # r*, r+, r? :nodoc:
|
62
|
+
CONCAT = 2 # r + r, literal :nodoc:
|
63
|
+
ALT = 1 # r | r :nodoc:
|
64
|
+
|
65
|
+
|
66
|
+
# Constructed regular expressions.
|
67
|
+
class Rexp
|
68
|
+
attr_reader :string, :level, :flags, :capture_keys
|
69
|
+
|
70
|
+
# Create a regular expression from the string. The regular
|
71
|
+
# expression will have a precedence of +level+ and will recognized
|
72
|
+
# +keys+ as a list of capture keys.
|
73
|
+
def initialize(string, level, flags, keys)
|
74
|
+
@string = string
|
75
|
+
@level = level
|
76
|
+
@flags = flags
|
77
|
+
@capture_keys = keys
|
78
|
+
end
|
79
|
+
|
80
|
+
# Return a real regular expression from the the constructed
|
81
|
+
# regular expression.
|
82
|
+
def regexp
|
83
|
+
@regexp ||= Regexp.new(string, flags)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Does it match a string? (returns Re::Result if match, nil otherwise)
|
87
|
+
def match(string)
|
88
|
+
md = regexp.match(string)
|
89
|
+
md ? Result.new(md, self) : nil
|
90
|
+
end
|
91
|
+
alias =~ match
|
92
|
+
|
93
|
+
# Concatenate two regular expressions
|
94
|
+
def +(other)
|
95
|
+
Rexp.new(parenthesize(CONCAT) + other.parenthesize(CONCAT),
|
96
|
+
CONCAT,
|
97
|
+
flags | other.flags,
|
98
|
+
capture_keys + other.capture_keys)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Matches either self or other
|
102
|
+
def |(other)
|
103
|
+
Rexp.new(parenthesize(ALT) + "|" + other.parenthesize(ALT),
|
104
|
+
ALT,
|
105
|
+
flags | other.flags,
|
106
|
+
capture_keys + other.capture_keys)
|
107
|
+
end
|
108
|
+
|
109
|
+
# self is optional
|
110
|
+
def optional
|
111
|
+
Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX, flags, capture_keys)
|
112
|
+
end
|
113
|
+
|
114
|
+
# self matches many times (zero or more)
|
115
|
+
def many
|
116
|
+
Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX, flags, capture_keys)
|
117
|
+
end
|
118
|
+
|
119
|
+
# self matches one or more times
|
120
|
+
def one_or_more
|
121
|
+
Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX, flags, capture_keys)
|
122
|
+
end
|
123
|
+
|
124
|
+
# self is repeated from min to max times. If max is omitted, then
|
125
|
+
# it is repeated exactly min times.
|
126
|
+
def repeat(min, max=nil)
|
127
|
+
if min && max
|
128
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX, flags, capture_keys)
|
129
|
+
else
|
130
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX, flags, capture_keys)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# self is repeated at least min times
|
135
|
+
def at_least(min)
|
136
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX, flags, capture_keys)
|
137
|
+
end
|
138
|
+
|
139
|
+
# self is repeated at least max times
|
140
|
+
def at_most(max)
|
141
|
+
Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX, flags, capture_keys)
|
142
|
+
end
|
143
|
+
|
144
|
+
# None of the given characters will match.
|
145
|
+
def none(chars)
|
146
|
+
Rexp.new("[^" + Rexp.escape_any(chars) + "]", GROUPED, 0, [])
|
147
|
+
end
|
148
|
+
|
149
|
+
# self must match all of the string
|
150
|
+
def all
|
151
|
+
self.begin.very_end
|
152
|
+
end
|
153
|
+
|
154
|
+
# self must match almost all of the string (trailing new lines are allowed)
|
155
|
+
def almost_all
|
156
|
+
self.begin.end
|
157
|
+
end
|
158
|
+
|
159
|
+
# self must match at the beginning of a line
|
160
|
+
def bol
|
161
|
+
Rexp.new("^" + parenthesize(CONCAT), CONCAT, flags, capture_keys)
|
162
|
+
end
|
163
|
+
|
164
|
+
# self must match at the end of a line
|
165
|
+
def eol
|
166
|
+
Rexp.new(parenthesize(CONCAT) + "$", CONCAT, flags, capture_keys)
|
167
|
+
end
|
168
|
+
|
169
|
+
# self must match at the beginning of the string
|
170
|
+
def begin
|
171
|
+
Rexp.new("\\A" + parenthesize(CONCAT), CONCAT, flags, capture_keys)
|
172
|
+
end
|
173
|
+
|
174
|
+
# self must match the end of the string (with an optional new line)
|
175
|
+
def end
|
176
|
+
Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT, flags, capture_keys)
|
177
|
+
end
|
178
|
+
|
179
|
+
# self must match the very end of the string (including any new lines)
|
180
|
+
def very_end
|
181
|
+
Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT, flags, capture_keys)
|
182
|
+
end
|
183
|
+
|
184
|
+
# self must match an entire line.
|
185
|
+
def line
|
186
|
+
self.bol.eol
|
187
|
+
end
|
188
|
+
|
189
|
+
# self is contained in a non-capturing group
|
190
|
+
def group
|
191
|
+
Rexp.new("(?:" + string + ")", GROUPED, flags, capture_keys)
|
192
|
+
end
|
193
|
+
|
194
|
+
# self is a capturing group with the given name.
|
195
|
+
def capture(name)
|
196
|
+
Rexp.new("(" + string + ")", GROUPED, flags, [name] + capture_keys)
|
197
|
+
end
|
198
|
+
|
199
|
+
# self will work in multiline matches
|
200
|
+
def multiline
|
201
|
+
Rexp.new(string, GROUPED, flags|Regexp::MULTILINE, capture_keys)
|
202
|
+
end
|
203
|
+
|
204
|
+
# Is this a multiline regular expression?
|
205
|
+
def multiline?
|
206
|
+
(flags & Regexp::MULTILINE) != 0
|
207
|
+
end
|
208
|
+
|
209
|
+
# self will work in multiline matches
|
210
|
+
def ignore_case
|
211
|
+
Rexp.new(string, GROUPED, flags|Regexp::IGNORECASE, capture_keys)
|
212
|
+
end
|
213
|
+
|
214
|
+
# Does this regular expression ignore case?
|
215
|
+
def ignore_case?
|
216
|
+
(flags & Regexp::IGNORECASE) != 0
|
217
|
+
end
|
218
|
+
|
219
|
+
# String representation of the constructed regular expression.
|
220
|
+
def to_s
|
221
|
+
regexp.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
protected
|
225
|
+
|
226
|
+
# String representation with grouping if needed.
|
227
|
+
#
|
228
|
+
# If the precedence of the current Regexp is less than the new
|
229
|
+
# precedence level, return the string wrapped in a non-capturing
|
230
|
+
# group. Otherwise just return the string.
|
231
|
+
def parenthesize(new_level)
|
232
|
+
if level >= new_level
|
233
|
+
string
|
234
|
+
else
|
235
|
+
group.string
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# Create a literal regular expression (concatenation level
|
240
|
+
# precedence, no capture keywords).
|
241
|
+
def self.literal(chars)
|
242
|
+
new(Regexp.escape(chars), CONCAT, 0, [])
|
243
|
+
end
|
244
|
+
|
245
|
+
# Create a regular expression from a raw string representing a
|
246
|
+
# regular expression. The raw string should represent a regular
|
247
|
+
# expression with the highest level of precedence (you should use
|
248
|
+
# parenthesis if it is not).
|
249
|
+
def self.raw(re_string) # :no-doc:
|
250
|
+
new(re_string, GROUPED, 0, [])
|
251
|
+
end
|
252
|
+
|
253
|
+
# Escape any special characters.
|
254
|
+
def self.escape_any(chars)
|
255
|
+
chars.gsub(/([\[\]\^\-])/) { "\\#{$1}" }
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
|
260
|
+
# Construct a regular expression from the literal string. Special
|
261
|
+
# Regexp characters will be escaped before constructing the regular
|
262
|
+
# expression. If no literal is given, then the NULL regular
|
263
|
+
# expression is returned.
|
264
|
+
#
|
265
|
+
# See Re for example usage.
|
266
|
+
#
|
267
|
+
def re(exp=nil)
|
268
|
+
exp ? Rexp.literal(exp) : NULL
|
269
|
+
end
|
270
|
+
|
271
|
+
# Matches an empty string. Additional common regular expression
|
272
|
+
# constants are defined as methods on the NULL Rexp. See Re::NULL.
|
273
|
+
NULL = Rexp.literal("")
|
274
|
+
|
275
|
+
# Matches the null string
|
276
|
+
def NULL.null
|
277
|
+
self
|
278
|
+
end
|
279
|
+
|
280
|
+
# :call-seq:
|
281
|
+
# re.any
|
282
|
+
# re.any(chars)
|
283
|
+
# re.any(range)
|
284
|
+
# re.any(chars, range, ...)
|
285
|
+
#
|
286
|
+
# Match a character from the character class.
|
287
|
+
#
|
288
|
+
# Any without any arguments will match any single character. Any
|
289
|
+
# with one or more arguments will construct a character class for
|
290
|
+
# the arguments. If the argument is a three character string where
|
291
|
+
# the middle character is "-", then the argument represents a range
|
292
|
+
# of characters. Otherwise the arguments are treated as a list of
|
293
|
+
# characters to be added to the character class.
|
294
|
+
#
|
295
|
+
# Examples:
|
296
|
+
#
|
297
|
+
# re.any -- match any character
|
298
|
+
# re.any("aieouy") -- match vowels
|
299
|
+
# re.any("0-9") -- match digits
|
300
|
+
# re.any("A-Z", "a-z", "0-9") -- match alphanumerics
|
301
|
+
# re.any("A-Z", "a-z", "0-9", "_") -- match alphanumerics
|
302
|
+
#
|
303
|
+
def NULL.any(*chars)
|
304
|
+
if chars.empty?
|
305
|
+
@dot ||= Rexp.raw(".")
|
306
|
+
else
|
307
|
+
any_chars = ''
|
308
|
+
chars.each do |chs|
|
309
|
+
if /^.-.$/ =~ chs
|
310
|
+
any_chars << chs
|
311
|
+
else
|
312
|
+
any_chars << Rexp.escape_any(chs)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# Matches any white space
|
320
|
+
def NULL.space
|
321
|
+
@space ||= Rexp.raw("\\s")
|
322
|
+
end
|
323
|
+
|
324
|
+
# Matches any white space
|
325
|
+
def NULL.spaces
|
326
|
+
@spaces ||= space.one_or_more
|
327
|
+
end
|
328
|
+
|
329
|
+
# Matches any non-white space
|
330
|
+
def NULL.nonspace
|
331
|
+
@nonspace ||= Rexp.raw("\\S")
|
332
|
+
end
|
333
|
+
|
334
|
+
# Matches any non-white space
|
335
|
+
def NULL.nonspaces
|
336
|
+
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
337
|
+
end
|
338
|
+
|
339
|
+
# Matches any sequence of word characters
|
340
|
+
def NULL.word_char
|
341
|
+
@word_char ||= Rexp.raw("\\w")
|
342
|
+
end
|
343
|
+
|
344
|
+
# Matches any sequence of word characters
|
345
|
+
def NULL.word
|
346
|
+
@word ||= word_char.one_or_more
|
347
|
+
end
|
348
|
+
|
349
|
+
# Zero-length matches any break
|
350
|
+
def NULL.break
|
351
|
+
@break ||= Rexp.raw("\\b")
|
352
|
+
end
|
353
|
+
|
354
|
+
# Matches a digit
|
355
|
+
def NULL.digit
|
356
|
+
@digit ||= any("0-9")
|
357
|
+
end
|
358
|
+
|
359
|
+
# Matches a sequence of digits
|
360
|
+
def NULL.digits
|
361
|
+
@digits ||= digit.one_or_more
|
362
|
+
end
|
363
|
+
|
364
|
+
# Matches a hex digit (upper or lower case)
|
365
|
+
def NULL.hex_digit
|
366
|
+
@hex_digit ||= any("0-9", "a-f", "A-F")
|
367
|
+
end
|
368
|
+
|
369
|
+
# Matches a sequence of hex digits
|
370
|
+
def NULL.hex_digits
|
371
|
+
@hex_digits ||= hex_digit.one_or_more
|
372
|
+
end
|
373
|
+
end
|
data/test/re_test.rb
ADDED
@@ -0,0 +1,440 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 're'
|
5
|
+
|
6
|
+
class ReTest < Test::Unit::TestCase
|
7
|
+
include Re
|
8
|
+
|
9
|
+
def test_strings_match
|
10
|
+
assert re("a") =~ "a"
|
11
|
+
assert re("a") !~ "A"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_not_match
|
15
|
+
assert re("a") !~ "b"
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_special_characters_match
|
19
|
+
r = re("()").all
|
20
|
+
assert r =~ "()"
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_concatenation
|
24
|
+
r = re("a") + re("b")
|
25
|
+
assert r =~ "ab"
|
26
|
+
assert r !~ "xb"
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_null
|
30
|
+
r = re("a") + re.null + re("b")
|
31
|
+
assert r =~ "ab"
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_alteration
|
35
|
+
r = re("a") | re("b")
|
36
|
+
assert r =~ "a"
|
37
|
+
assert r =~ "b"
|
38
|
+
assert r !~ "x"
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_many
|
42
|
+
r = re("x").many.all
|
43
|
+
assert r !~ "z"
|
44
|
+
assert r =~ ""
|
45
|
+
assert r =~ "x"
|
46
|
+
assert r =~ "xxx"
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_one_or_more
|
50
|
+
r = re("x").one_or_more.all
|
51
|
+
assert r !~ ""
|
52
|
+
assert r =~ "x"
|
53
|
+
assert r =~ "xxx"
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_repeat_fixed_number
|
57
|
+
r = re("a").repeat(3).all
|
58
|
+
assert r =~ "aaa"
|
59
|
+
assert r !~ "aa"
|
60
|
+
assert r !~ "aaaa"
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_repeat_range
|
64
|
+
r = re("a").repeat(2, 4).all
|
65
|
+
assert r !~ "a"
|
66
|
+
assert r =~ "aa"
|
67
|
+
assert r =~ "aaa"
|
68
|
+
assert r =~ "aaaa"
|
69
|
+
assert r !~ "aaaaa"
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_at_least
|
73
|
+
r = re("a").at_least(2).all
|
74
|
+
assert r !~ "a"
|
75
|
+
assert r =~ "aa"
|
76
|
+
assert r =~ "aaaaaaaaaaaaaaaaaaaa"
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_at_most
|
80
|
+
r = re("a").at_most(4).all
|
81
|
+
assert r =~ ""
|
82
|
+
assert r =~ "a"
|
83
|
+
assert r =~ "aa"
|
84
|
+
assert r =~ "aaa"
|
85
|
+
assert r =~ "aaaa"
|
86
|
+
assert r !~ "aaaaa"
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_optional
|
90
|
+
r = re("a").optional.all
|
91
|
+
assert r =~ ""
|
92
|
+
assert r =~ "a"
|
93
|
+
assert r !~ "aa"
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_any_with_no_arguments
|
97
|
+
r = re.any.all
|
98
|
+
assert r =~ "a"
|
99
|
+
assert r =~ "1"
|
100
|
+
assert r =~ "#"
|
101
|
+
assert r =~ "."
|
102
|
+
assert r =~ " "
|
103
|
+
assert r !~ "ab"
|
104
|
+
assert r !~ "\n"
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_no_flags
|
108
|
+
r = re("a")
|
109
|
+
assert ! r.ignore_case?
|
110
|
+
assert ! r.multiline?
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_any_with_multiline
|
114
|
+
r = re.any.multiline.all
|
115
|
+
assert r.multiline?
|
116
|
+
assert r =~ "\n"
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_ignore_case
|
120
|
+
r = re("a").ignore_case.all
|
121
|
+
assert r.ignore_case?
|
122
|
+
assert r =~ "a"
|
123
|
+
assert r =~ "A"
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_any_with_a_character_list
|
127
|
+
r = re.any("xyz").all
|
128
|
+
assert r !~ "w"
|
129
|
+
assert r =~ "x"
|
130
|
+
assert r =~ "y"
|
131
|
+
assert r =~ "z"
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_any_with_special_chars
|
135
|
+
r = re.any("^.(-)[]").all
|
136
|
+
assert r !~ "x"
|
137
|
+
assert r =~ "."
|
138
|
+
assert r =~ "^"
|
139
|
+
assert r =~ "-"
|
140
|
+
assert r =~ "("
|
141
|
+
assert r =~ ")"
|
142
|
+
assert r =~ "["
|
143
|
+
assert r =~ "]"
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_range_of_chars
|
147
|
+
r = re.any("a-z").many.all
|
148
|
+
assert r =~ "abcdefghijklmnopqrstuvwxyz"
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_range_and_mix_of_chars
|
152
|
+
r = re.any("0-9", ".-").many.all
|
153
|
+
assert r =~ "-12.3"
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_none
|
157
|
+
r = re.none("xyz").all
|
158
|
+
assert r =~ "w"
|
159
|
+
assert r !~ "x"
|
160
|
+
assert r !~ "y"
|
161
|
+
assert r !~ "z"
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_none_with_special_chars
|
165
|
+
r = re.none("^.()[]-").all
|
166
|
+
assert r =~ "x"
|
167
|
+
assert r !~ "."
|
168
|
+
assert r !~ "^"
|
169
|
+
assert r !~ "-"
|
170
|
+
assert r !~ "("
|
171
|
+
assert r !~ ")"
|
172
|
+
assert r !~ "["
|
173
|
+
assert r !~ "]"
|
174
|
+
end
|
175
|
+
|
176
|
+
def test_all
|
177
|
+
r = re("a").all
|
178
|
+
assert r =~ "a"
|
179
|
+
assert r !~ "a\n"
|
180
|
+
assert r !~ "xa"
|
181
|
+
assert r !~ "ax"
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_almost_all
|
185
|
+
r = re("a").almost_all
|
186
|
+
assert r =~ "a"
|
187
|
+
assert r =~ "a\n"
|
188
|
+
assert r !~ "xa"
|
189
|
+
assert r !~ "ax"
|
190
|
+
end
|
191
|
+
|
192
|
+
def test_all_across_lines
|
193
|
+
r = re("a").many.all
|
194
|
+
assert r =~ "a"
|
195
|
+
assert r !~ "b\na"
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_line
|
199
|
+
r = re("a").line
|
200
|
+
assert r =~ "a"
|
201
|
+
assert r =~ "b\na"
|
202
|
+
assert r =~ "b\na\n"
|
203
|
+
assert r =~ "b\na\nx"
|
204
|
+
end
|
205
|
+
|
206
|
+
def test_bol
|
207
|
+
r = re("a").bol
|
208
|
+
assert r =~ "a"
|
209
|
+
assert r =~ "b\na"
|
210
|
+
assert r =~ "b\na"
|
211
|
+
assert r !~ "b\nxa"
|
212
|
+
end
|
213
|
+
|
214
|
+
def test_eol
|
215
|
+
r = re("a").eol
|
216
|
+
assert r =~ "a"
|
217
|
+
assert r =~ "b\na\nx"
|
218
|
+
assert r !~ "b\nax"
|
219
|
+
assert r !~ "b\nax\n"
|
220
|
+
end
|
221
|
+
|
222
|
+
def test_begin
|
223
|
+
r = re("a").begin
|
224
|
+
assert r =~ "a"
|
225
|
+
assert r =~ "a\nb"
|
226
|
+
assert r !~ "b\na"
|
227
|
+
assert r !~ "b\na"
|
228
|
+
assert r !~ "b\nxa"
|
229
|
+
end
|
230
|
+
|
231
|
+
def test_begin2
|
232
|
+
r = re.begin + re("a")
|
233
|
+
assert r =~ "a"
|
234
|
+
assert r =~ "a\nb"
|
235
|
+
assert r !~ "b\na"
|
236
|
+
assert r !~ "b\na"
|
237
|
+
assert r !~ "b\nxa"
|
238
|
+
end
|
239
|
+
|
240
|
+
def test_end
|
241
|
+
r = re("a").end
|
242
|
+
assert r =~ "a"
|
243
|
+
assert r =~ "b\na"
|
244
|
+
assert r =~ "b\na\n"
|
245
|
+
assert r !~ "b\na\nx"
|
246
|
+
assert r !~ "b\nax"
|
247
|
+
assert r !~ "b\nax\n"
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_end2
|
251
|
+
r = re("a") + re.end
|
252
|
+
assert r =~ "a"
|
253
|
+
assert r =~ "b\na"
|
254
|
+
assert r =~ "b\na\n"
|
255
|
+
assert r !~ "b\na\nx"
|
256
|
+
assert r !~ "b\nax"
|
257
|
+
assert r !~ "b\nax\n"
|
258
|
+
end
|
259
|
+
|
260
|
+
def test_very_end
|
261
|
+
r = re("a").very_end
|
262
|
+
assert r =~ "a"
|
263
|
+
assert r =~ "b\na"
|
264
|
+
assert r !~ "b\na\n"
|
265
|
+
assert r !~ "b\na\nx"
|
266
|
+
assert r !~ "b\nax"
|
267
|
+
assert r !~ "b\nax\n"
|
268
|
+
end
|
269
|
+
|
270
|
+
def test_hex_digit
|
271
|
+
r = re.hex_digit.all
|
272
|
+
assert r =~ "1"
|
273
|
+
assert r =~ "a"
|
274
|
+
assert r =~ "F"
|
275
|
+
assert r !~ "12"
|
276
|
+
assert r !~ "g"
|
277
|
+
end
|
278
|
+
|
279
|
+
def test_hex_digits
|
280
|
+
r = re.hex_digits.all
|
281
|
+
assert r =~ "1234567890abcedfABCDEF"
|
282
|
+
assert r !~ "g"
|
283
|
+
end
|
284
|
+
|
285
|
+
def test_digit
|
286
|
+
r = re.digit.all
|
287
|
+
assert r =~ "0"
|
288
|
+
assert r =~ "9"
|
289
|
+
assert r !~ "12"
|
290
|
+
assert r !~ "x"
|
291
|
+
assert r !~ "a"
|
292
|
+
end
|
293
|
+
|
294
|
+
def test_digits
|
295
|
+
r = re.digits.all
|
296
|
+
assert r =~ "0123456789"
|
297
|
+
assert r !~ "0123456789x"
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_break
|
301
|
+
r = re.break + re("a") + re.break
|
302
|
+
assert r =~ "there is a home"
|
303
|
+
assert r !~ "there is an aardvark"
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_nonspace
|
307
|
+
r = re.nonspace.all
|
308
|
+
assert r =~ "a"
|
309
|
+
assert r =~ "1"
|
310
|
+
assert r =~ "#"
|
311
|
+
assert r !~ "ab"
|
312
|
+
assert r !~ " "
|
313
|
+
assert r !~ "\t"
|
314
|
+
assert r !~ "\n"
|
315
|
+
end
|
316
|
+
|
317
|
+
def test_nonspaces
|
318
|
+
r = re.nonspaces.all
|
319
|
+
assert r =~ "a"
|
320
|
+
assert r =~ "asdfhjkl!@\#$%^&*()_+="
|
321
|
+
assert r !~ ""
|
322
|
+
assert r !~ "a dog"
|
323
|
+
end
|
324
|
+
|
325
|
+
def test_space
|
326
|
+
r = re.space.all
|
327
|
+
assert r =~ " "
|
328
|
+
assert r =~ "\t"
|
329
|
+
assert r =~ "\n"
|
330
|
+
assert r !~ "x"
|
331
|
+
assert r !~ ""
|
332
|
+
assert r !~ " "
|
333
|
+
assert re.space.many.all =~ " \n\t "
|
334
|
+
end
|
335
|
+
|
336
|
+
def test_spaces
|
337
|
+
r = re.spaces.all
|
338
|
+
assert r =~ " "
|
339
|
+
assert r =~ " "
|
340
|
+
assert r =~ " \t \n "
|
341
|
+
assert r !~ ""
|
342
|
+
assert r !~ "x"
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_word_char
|
346
|
+
r = re.word_char.all
|
347
|
+
assert r =~ "a"
|
348
|
+
assert r =~ "1"
|
349
|
+
assert r =~ "_"
|
350
|
+
assert r !~ "!"
|
351
|
+
assert r !~ "?"
|
352
|
+
end
|
353
|
+
|
354
|
+
def test_word
|
355
|
+
r = re.word.all
|
356
|
+
assert r =~ "a"
|
357
|
+
assert r =~ "1"
|
358
|
+
assert re.word.all =~ "this_is_a_test"
|
359
|
+
assert re.word.all !~ "asdf jkl"
|
360
|
+
end
|
361
|
+
|
362
|
+
def test_single_capture
|
363
|
+
r = re.any("a-z").one_or_more.capture(:word)
|
364
|
+
result = (r =~ "012abc789")
|
365
|
+
assert result
|
366
|
+
assert_equal "abc", result.data(:word)
|
367
|
+
end
|
368
|
+
|
369
|
+
def test_multiple_capture
|
370
|
+
word = re.any("a-z").one_or_more.capture(:word)
|
371
|
+
number = re.any("0-9").one_or_more.capture(:number)
|
372
|
+
r = (word + re.spaces + number).capture(:everything)
|
373
|
+
result = (r =~ " now 123\n")
|
374
|
+
assert result
|
375
|
+
assert_equal [:everything, :word, :number], r.capture_keys
|
376
|
+
assert_equal "now", result.data(:word)
|
377
|
+
assert_equal "123", result.data(:number)
|
378
|
+
assert_equal "now 123", result.data(:everything)
|
379
|
+
assert_equal "now 123", result.data
|
380
|
+
end
|
381
|
+
|
382
|
+
def test_precedence_concatentaion_vs_alteration
|
383
|
+
r = (re("a") | re("b") + re("c")).all
|
384
|
+
assert r =~ "a"
|
385
|
+
assert r =~ "bc"
|
386
|
+
assert r !~ "ac"
|
387
|
+
end
|
388
|
+
|
389
|
+
def test_precendence_of_eol
|
390
|
+
r = re("a").bol.many
|
391
|
+
end
|
392
|
+
|
393
|
+
def test_example
|
394
|
+
bracketed_delim = re("[") + re.none("]").one_or_more + re("]")
|
395
|
+
delims = bracketed_delim.one_or_more.capture(:delims)
|
396
|
+
delim_definition = re("//").bol + delims + re("\n")
|
397
|
+
|
398
|
+
result = delim_definition.match("//[a][b][xyz]\n1a2b3xyz4")
|
399
|
+
assert result
|
400
|
+
assert_equal "[a][b][xyz]", result.data(:delims)
|
401
|
+
end
|
402
|
+
|
403
|
+
def test_date_parser
|
404
|
+
# (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
|
405
|
+
|
406
|
+
delim = re.any("- /.")
|
407
|
+
n_19_or_20 = re("19") | re("20")
|
408
|
+
n_1_to_9 = re("0") + re.any("1-9")
|
409
|
+
n_10_to_12 = re("1") + re.any("012")
|
410
|
+
n_10_to_29 = re.any("12") + re.any("0-9")
|
411
|
+
n_30_or_31 = re("3") + re.any("01")
|
412
|
+
|
413
|
+
year = n_19_or_20 + re.digit.repeat(2)
|
414
|
+
month = n_1_to_9 | n_10_to_12
|
415
|
+
day = n_1_to_9 | n_10_to_29 | n_30_or_31
|
416
|
+
|
417
|
+
date_re = (year.capture(:year) + delim + month.capture(:month) + delim + day.capture(:day)).all
|
418
|
+
|
419
|
+
assert date_re.match("1900/01/01")
|
420
|
+
assert date_re.match("1956/01/01")
|
421
|
+
assert date_re.match("2000/01/01")
|
422
|
+
assert date_re.match("2010/01/01")
|
423
|
+
assert date_re.match("2010/12/01")
|
424
|
+
assert date_re.match("2010/03/01")
|
425
|
+
assert date_re.match("2010/03/12")
|
426
|
+
assert date_re.match("2010/03/24")
|
427
|
+
assert date_re.match("2010/03/30")
|
428
|
+
assert date_re.match("2010/03/31")
|
429
|
+
|
430
|
+
assert ! date_re.match("2100/01/01")
|
431
|
+
assert ! date_re.match("2100/01/32")
|
432
|
+
assert ! date_re.match("2010/00/01")
|
433
|
+
assert ! date_re.match("2010/13/01")
|
434
|
+
assert ! date_re.match("2010/01/00")
|
435
|
+
assert ! date_re.match("2010/1/01")
|
436
|
+
assert ! date_re.match("2010/01/1")
|
437
|
+
end
|
438
|
+
|
439
|
+
|
440
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: re
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jim Weirich
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-28 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: " The re library allows the easy construction of regular expressions via an expression language.\n"
|
17
|
+
email: jim.weirich@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- README.rdoc
|
27
|
+
- lib/re.rb
|
28
|
+
- test/re_test.rb
|
29
|
+
has_rdoc: true
|
30
|
+
homepage: http://re.rubyforge.org
|
31
|
+
licenses: []
|
32
|
+
|
33
|
+
post_install_message:
|
34
|
+
rdoc_options:
|
35
|
+
- --line-numbers
|
36
|
+
- --inline-source
|
37
|
+
- --main
|
38
|
+
- re.rb
|
39
|
+
- --title
|
40
|
+
- Re -- Ruby Regular Expression Construction
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
requirements: []
|
56
|
+
|
57
|
+
rubyforge_project: re-lib
|
58
|
+
rubygems_version: 1.3.5
|
59
|
+
signing_key:
|
60
|
+
specification_version: 3
|
61
|
+
summary: Construct Ruby Regular Expressions
|
62
|
+
test_files: []
|
63
|
+
|