re 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +39 -0
- data/Rakefile +18 -0
- data/lib/re.rb +373 -0
- data/test/re_test.rb +440 -0
- metadata +63 -0
data/README.rdoc
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
= Regular Expression Construction.
|
3
|
+
|
4
|
+
Construct regular expressions using the re() method.
|
5
|
+
|
6
|
+
Usage:
|
7
|
+
|
8
|
+
include Re
|
9
|
+
|
10
|
+
number = re.any("0-9").all
|
11
|
+
if number =~ string
|
12
|
+
puts "Matches!"
|
13
|
+
else
|
14
|
+
puts "No Match"
|
15
|
+
end
|
16
|
+
|
17
|
+
Examples:
|
18
|
+
|
19
|
+
re("a") -- matches "a"
|
20
|
+
re("a") + re("b") -- matches "ab"
|
21
|
+
re("a") | re("b") -- matches "a" or "b"
|
22
|
+
re("a").many -- matches "", "a", "aaaaaa"
|
23
|
+
re("a").one_or_more -- matches "a", "aaaaaa", but not ""
|
24
|
+
re("a").optional -- matches "" or "a"
|
25
|
+
re("a").all -- matches "a", but not "xab"
|
26
|
+
|
27
|
+
See Re::Rexp for a complete list of expressions.
|
28
|
+
|
29
|
+
Using re without an argument allows access to a number of common
|
30
|
+
regular expression constants. For example:
|
31
|
+
|
32
|
+
re.space -- matches " ", "\n" or "\t"
|
33
|
+
re.spaces -- matches any number of spaces (but at least one)
|
34
|
+
re.digit / re.digits -- matches a digit / sequence of digits
|
35
|
+
|
36
|
+
See Re::NULL for a complete list of common constants.
|
37
|
+
|
38
|
+
See Re.re,
|
39
|
+
Re::Rexp, and Re::NULL for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby"
|
2
|
+
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/rdoctask'
|
6
|
+
|
7
|
+
task :default => :test
|
8
|
+
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.warning = true
|
11
|
+
t.verbose = false
|
12
|
+
t.test_files = FileList['test/*_test.rb']
|
13
|
+
end
|
14
|
+
|
15
|
+
Rake::RDocTask.new do |rd|
|
16
|
+
rd.main = "lib/re.rb"
|
17
|
+
rd.rdoc_files = FileList["lib/re.rb"]
|
18
|
+
end
|
data/lib/re.rb
ADDED
@@ -0,0 +1,373 @@
|
|
1
|
+
#!/usr/bin/ruby -wKU
|
2
|
+
#
|
3
|
+
# = Regular Expression Construction.
|
4
|
+
#
|
5
|
+
# Construct regular expressions using the re() method.
|
6
|
+
#
|
7
|
+
# Usage:
|
8
|
+
#
|
9
|
+
# include Re
|
10
|
+
#
|
11
|
+
# number = re.any("0-9").all
|
12
|
+
# if number =~ string
|
13
|
+
# puts "Matches!"
|
14
|
+
# else
|
15
|
+
# puts "No Match"
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# Examples:
|
19
|
+
#
|
20
|
+
# re("a") -- matches "a"
|
21
|
+
# re("a") + re("b") -- matches "ab"
|
22
|
+
# re("a") | re("b") -- matches "a" or "b"
|
23
|
+
# re("a").many -- matches "", "a", "aaaaaa"
|
24
|
+
# re("a").one_or_more -- matches "a", "aaaaaa", but not ""
|
25
|
+
# re("a").optional -- matches "" or "a"
|
26
|
+
# re("a").all -- matches "a", but not "xab"
|
27
|
+
#
|
28
|
+
# See Re::Rexp for a complete list of expressions.
|
29
|
+
#
|
30
|
+
# Using re without an argument allows access to a number of common
|
31
|
+
# regular expression constants. For example:
|
32
|
+
#
|
33
|
+
# re.space -- matches " ", "\n" or "\t"
|
34
|
+
# re.spaces -- matches any number of spaces (but at least one)
|
35
|
+
# re.digit / re.digits -- matches a digit / sequence of digits
|
36
|
+
#
|
37
|
+
# See Re::NULL for a complete list of common constants.
|
38
|
+
#
|
39
|
+
# See Re.re,
|
40
|
+
# Re::Rexp, and Re::NULL for details.
|
41
|
+
|
42
|
+
module Re
|
43
|
+
class Result
|
44
|
+
def initialize(match_data, rexp)
|
45
|
+
@match_data = match_data
|
46
|
+
@rexp = rexp
|
47
|
+
end
|
48
|
+
def data(name=nil)
|
49
|
+
if name
|
50
|
+
index = @rexp.capture_keys.index(name)
|
51
|
+
index ? @match_data[index+1] : nil
|
52
|
+
else
|
53
|
+
@match_data[0]
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Precedence levels for regular expressions:
|
59
|
+
|
60
|
+
GROUPED = 4 # (r), [chars] :nodoc:
|
61
|
+
POSTFIX = 3 # r*, r+, r? :nodoc:
|
62
|
+
CONCAT = 2 # r + r, literal :nodoc:
|
63
|
+
ALT = 1 # r | r :nodoc:
|
64
|
+
|
65
|
+
|
66
|
+
# Constructed regular expressions.
|
67
|
+
class Rexp
|
68
|
+
attr_reader :string, :level, :flags, :capture_keys
|
69
|
+
|
70
|
+
# Create a regular expression from the string. The regular
|
71
|
+
# expression will have a precedence of +level+ and will recognized
|
72
|
+
# +keys+ as a list of capture keys.
|
73
|
+
def initialize(string, level, flags, keys)
|
74
|
+
@string = string
|
75
|
+
@level = level
|
76
|
+
@flags = flags
|
77
|
+
@capture_keys = keys
|
78
|
+
end
|
79
|
+
|
80
|
+
# Return a real regular expression from the the constructed
|
81
|
+
# regular expression.
|
82
|
+
def regexp
|
83
|
+
@regexp ||= Regexp.new(string, flags)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Does it match a string? (returns Re::Result if match, nil otherwise)
|
87
|
+
def match(string)
|
88
|
+
md = regexp.match(string)
|
89
|
+
md ? Result.new(md, self) : nil
|
90
|
+
end
|
91
|
+
alias =~ match
|
92
|
+
|
93
|
+
# Concatenate two regular expressions
|
94
|
+
def +(other)
|
95
|
+
Rexp.new(parenthesize(CONCAT) + other.parenthesize(CONCAT),
|
96
|
+
CONCAT,
|
97
|
+
flags | other.flags,
|
98
|
+
capture_keys + other.capture_keys)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Matches either self or other
|
102
|
+
def |(other)
|
103
|
+
Rexp.new(parenthesize(ALT) + "|" + other.parenthesize(ALT),
|
104
|
+
ALT,
|
105
|
+
flags | other.flags,
|
106
|
+
capture_keys + other.capture_keys)
|
107
|
+
end
|
108
|
+
|
109
|
+
# self is optional
|
110
|
+
def optional
|
111
|
+
Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX, flags, capture_keys)
|
112
|
+
end
|
113
|
+
|
114
|
+
# self matches many times (zero or more)
|
115
|
+
def many
|
116
|
+
Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX, flags, capture_keys)
|
117
|
+
end
|
118
|
+
|
119
|
+
# self matches one or more times
|
120
|
+
def one_or_more
|
121
|
+
Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX, flags, capture_keys)
|
122
|
+
end
|
123
|
+
|
124
|
+
# self is repeated from min to max times. If max is omitted, then
|
125
|
+
# it is repeated exactly min times.
|
126
|
+
def repeat(min, max=nil)
|
127
|
+
if min && max
|
128
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX, flags, capture_keys)
|
129
|
+
else
|
130
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX, flags, capture_keys)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# self is repeated at least min times
|
135
|
+
def at_least(min)
|
136
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX, flags, capture_keys)
|
137
|
+
end
|
138
|
+
|
139
|
+
# self is repeated at least max times
|
140
|
+
def at_most(max)
|
141
|
+
Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX, flags, capture_keys)
|
142
|
+
end
|
143
|
+
|
144
|
+
# None of the given characters will match.
|
145
|
+
def none(chars)
|
146
|
+
Rexp.new("[^" + Rexp.escape_any(chars) + "]", GROUPED, 0, [])
|
147
|
+
end
|
148
|
+
|
149
|
+
# self must match all of the string
|
150
|
+
def all
|
151
|
+
self.begin.very_end
|
152
|
+
end
|
153
|
+
|
154
|
+
# self must match almost all of the string (trailing new lines are allowed)
|
155
|
+
def almost_all
|
156
|
+
self.begin.end
|
157
|
+
end
|
158
|
+
|
159
|
+
# self must match at the beginning of a line
|
160
|
+
def bol
|
161
|
+
Rexp.new("^" + parenthesize(CONCAT), CONCAT, flags, capture_keys)
|
162
|
+
end
|
163
|
+
|
164
|
+
# self must match at the end of a line
|
165
|
+
def eol
|
166
|
+
Rexp.new(parenthesize(CONCAT) + "$", CONCAT, flags, capture_keys)
|
167
|
+
end
|
168
|
+
|
169
|
+
# self must match at the beginning of the string
|
170
|
+
def begin
|
171
|
+
Rexp.new("\\A" + parenthesize(CONCAT), CONCAT, flags, capture_keys)
|
172
|
+
end
|
173
|
+
|
174
|
+
# self must match the end of the string (with an optional new line)
|
175
|
+
def end
|
176
|
+
Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT, flags, capture_keys)
|
177
|
+
end
|
178
|
+
|
179
|
+
# self must match the very end of the string (including any new lines)
|
180
|
+
def very_end
|
181
|
+
Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT, flags, capture_keys)
|
182
|
+
end
|
183
|
+
|
184
|
+
# self must match an entire line.
|
185
|
+
def line
|
186
|
+
self.bol.eol
|
187
|
+
end
|
188
|
+
|
189
|
+
# self is contained in a non-capturing group
|
190
|
+
def group
|
191
|
+
Rexp.new("(?:" + string + ")", GROUPED, flags, capture_keys)
|
192
|
+
end
|
193
|
+
|
194
|
+
# self is a capturing group with the given name.
|
195
|
+
def capture(name)
|
196
|
+
Rexp.new("(" + string + ")", GROUPED, flags, [name] + capture_keys)
|
197
|
+
end
|
198
|
+
|
199
|
+
# self will work in multiline matches
|
200
|
+
def multiline
|
201
|
+
Rexp.new(string, GROUPED, flags|Regexp::MULTILINE, capture_keys)
|
202
|
+
end
|
203
|
+
|
204
|
+
# Is this a multiline regular expression?
|
205
|
+
def multiline?
|
206
|
+
(flags & Regexp::MULTILINE) != 0
|
207
|
+
end
|
208
|
+
|
209
|
+
# self will work in multiline matches
|
210
|
+
def ignore_case
|
211
|
+
Rexp.new(string, GROUPED, flags|Regexp::IGNORECASE, capture_keys)
|
212
|
+
end
|
213
|
+
|
214
|
+
# Does this regular expression ignore case?
|
215
|
+
def ignore_case?
|
216
|
+
(flags & Regexp::IGNORECASE) != 0
|
217
|
+
end
|
218
|
+
|
219
|
+
# String representation of the constructed regular expression.
|
220
|
+
def to_s
|
221
|
+
regexp.to_s
|
222
|
+
end
|
223
|
+
|
224
|
+
protected
|
225
|
+
|
226
|
+
# String representation with grouping if needed.
|
227
|
+
#
|
228
|
+
# If the precedence of the current Regexp is less than the new
|
229
|
+
# precedence level, return the string wrapped in a non-capturing
|
230
|
+
# group. Otherwise just return the string.
|
231
|
+
def parenthesize(new_level)
|
232
|
+
if level >= new_level
|
233
|
+
string
|
234
|
+
else
|
235
|
+
group.string
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# Create a literal regular expression (concatenation level
|
240
|
+
# precedence, no capture keywords).
|
241
|
+
def self.literal(chars)
|
242
|
+
new(Regexp.escape(chars), CONCAT, 0, [])
|
243
|
+
end
|
244
|
+
|
245
|
+
# Create a regular expression from a raw string representing a
|
246
|
+
# regular expression. The raw string should represent a regular
|
247
|
+
# expression with the highest level of precedence (you should use
|
248
|
+
# parenthesis if it is not).
|
249
|
+
def self.raw(re_string) # :no-doc:
|
250
|
+
new(re_string, GROUPED, 0, [])
|
251
|
+
end
|
252
|
+
|
253
|
+
# Escape any special characters.
|
254
|
+
def self.escape_any(chars)
|
255
|
+
chars.gsub(/([\[\]\^\-])/) { "\\#{$1}" }
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
|
260
|
+
# Construct a regular expression from the literal string. Special
|
261
|
+
# Regexp characters will be escaped before constructing the regular
|
262
|
+
# expression. If no literal is given, then the NULL regular
|
263
|
+
# expression is returned.
|
264
|
+
#
|
265
|
+
# See Re for example usage.
|
266
|
+
#
|
267
|
+
def re(exp=nil)
|
268
|
+
exp ? Rexp.literal(exp) : NULL
|
269
|
+
end
|
270
|
+
|
271
|
+
# Matches an empty string. Additional common regular expression
|
272
|
+
# constants are defined as methods on the NULL Rexp. See Re::NULL.
|
273
|
+
NULL = Rexp.literal("")
|
274
|
+
|
275
|
+
# Matches the null string
|
276
|
+
def NULL.null
|
277
|
+
self
|
278
|
+
end
|
279
|
+
|
280
|
+
# :call-seq:
|
281
|
+
# re.any
|
282
|
+
# re.any(chars)
|
283
|
+
# re.any(range)
|
284
|
+
# re.any(chars, range, ...)
|
285
|
+
#
|
286
|
+
# Match a character from the character class.
|
287
|
+
#
|
288
|
+
# Any without any arguments will match any single character. Any
|
289
|
+
# with one or more arguments will construct a character class for
|
290
|
+
# the arguments. If the argument is a three character string where
|
291
|
+
# the middle character is "-", then the argument represents a range
|
292
|
+
# of characters. Otherwise the arguments are treated as a list of
|
293
|
+
# characters to be added to the character class.
|
294
|
+
#
|
295
|
+
# Examples:
|
296
|
+
#
|
297
|
+
# re.any -- match any character
|
298
|
+
# re.any("aieouy") -- match vowels
|
299
|
+
# re.any("0-9") -- match digits
|
300
|
+
# re.any("A-Z", "a-z", "0-9") -- match alphanumerics
|
301
|
+
# re.any("A-Z", "a-z", "0-9", "_") -- match alphanumerics
|
302
|
+
#
|
303
|
+
def NULL.any(*chars)
|
304
|
+
if chars.empty?
|
305
|
+
@dot ||= Rexp.raw(".")
|
306
|
+
else
|
307
|
+
any_chars = ''
|
308
|
+
chars.each do |chs|
|
309
|
+
if /^.-.$/ =~ chs
|
310
|
+
any_chars << chs
|
311
|
+
else
|
312
|
+
any_chars << Rexp.escape_any(chs)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
# Matches any white space
|
320
|
+
def NULL.space
|
321
|
+
@space ||= Rexp.raw("\\s")
|
322
|
+
end
|
323
|
+
|
324
|
+
# Matches any white space
|
325
|
+
def NULL.spaces
|
326
|
+
@spaces ||= space.one_or_more
|
327
|
+
end
|
328
|
+
|
329
|
+
# Matches any non-white space
|
330
|
+
def NULL.nonspace
|
331
|
+
@nonspace ||= Rexp.raw("\\S")
|
332
|
+
end
|
333
|
+
|
334
|
+
# Matches any non-white space
|
335
|
+
def NULL.nonspaces
|
336
|
+
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
337
|
+
end
|
338
|
+
|
339
|
+
# Matches any sequence of word characters
|
340
|
+
def NULL.word_char
|
341
|
+
@word_char ||= Rexp.raw("\\w")
|
342
|
+
end
|
343
|
+
|
344
|
+
# Matches any sequence of word characters
|
345
|
+
def NULL.word
|
346
|
+
@word ||= word_char.one_or_more
|
347
|
+
end
|
348
|
+
|
349
|
+
# Zero-length matches any break
|
350
|
+
def NULL.break
|
351
|
+
@break ||= Rexp.raw("\\b")
|
352
|
+
end
|
353
|
+
|
354
|
+
# Matches a digit
|
355
|
+
def NULL.digit
|
356
|
+
@digit ||= any("0-9")
|
357
|
+
end
|
358
|
+
|
359
|
+
# Matches a sequence of digits
|
360
|
+
def NULL.digits
|
361
|
+
@digits ||= digit.one_or_more
|
362
|
+
end
|
363
|
+
|
364
|
+
# Matches a hex digit (upper or lower case)
|
365
|
+
def NULL.hex_digit
|
366
|
+
@hex_digit ||= any("0-9", "a-f", "A-F")
|
367
|
+
end
|
368
|
+
|
369
|
+
# Matches a sequence of hex digits
|
370
|
+
def NULL.hex_digits
|
371
|
+
@hex_digits ||= hex_digit.one_or_more
|
372
|
+
end
|
373
|
+
end
|
data/test/re_test.rb
ADDED
@@ -0,0 +1,440 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 're'
|
5
|
+
|
6
|
+
class ReTest < Test::Unit::TestCase
|
7
|
+
include Re
|
8
|
+
|
9
|
+
def test_strings_match
|
10
|
+
assert re("a") =~ "a"
|
11
|
+
assert re("a") !~ "A"
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_not_match
|
15
|
+
assert re("a") !~ "b"
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_special_characters_match
|
19
|
+
r = re("()").all
|
20
|
+
assert r =~ "()"
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_concatenation
|
24
|
+
r = re("a") + re("b")
|
25
|
+
assert r =~ "ab"
|
26
|
+
assert r !~ "xb"
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_null
|
30
|
+
r = re("a") + re.null + re("b")
|
31
|
+
assert r =~ "ab"
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_alteration
|
35
|
+
r = re("a") | re("b")
|
36
|
+
assert r =~ "a"
|
37
|
+
assert r =~ "b"
|
38
|
+
assert r !~ "x"
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_many
|
42
|
+
r = re("x").many.all
|
43
|
+
assert r !~ "z"
|
44
|
+
assert r =~ ""
|
45
|
+
assert r =~ "x"
|
46
|
+
assert r =~ "xxx"
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_one_or_more
|
50
|
+
r = re("x").one_or_more.all
|
51
|
+
assert r !~ ""
|
52
|
+
assert r =~ "x"
|
53
|
+
assert r =~ "xxx"
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_repeat_fixed_number
|
57
|
+
r = re("a").repeat(3).all
|
58
|
+
assert r =~ "aaa"
|
59
|
+
assert r !~ "aa"
|
60
|
+
assert r !~ "aaaa"
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_repeat_range
|
64
|
+
r = re("a").repeat(2, 4).all
|
65
|
+
assert r !~ "a"
|
66
|
+
assert r =~ "aa"
|
67
|
+
assert r =~ "aaa"
|
68
|
+
assert r =~ "aaaa"
|
69
|
+
assert r !~ "aaaaa"
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_at_least
|
73
|
+
r = re("a").at_least(2).all
|
74
|
+
assert r !~ "a"
|
75
|
+
assert r =~ "aa"
|
76
|
+
assert r =~ "aaaaaaaaaaaaaaaaaaaa"
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_at_most
|
80
|
+
r = re("a").at_most(4).all
|
81
|
+
assert r =~ ""
|
82
|
+
assert r =~ "a"
|
83
|
+
assert r =~ "aa"
|
84
|
+
assert r =~ "aaa"
|
85
|
+
assert r =~ "aaaa"
|
86
|
+
assert r !~ "aaaaa"
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_optional
|
90
|
+
r = re("a").optional.all
|
91
|
+
assert r =~ ""
|
92
|
+
assert r =~ "a"
|
93
|
+
assert r !~ "aa"
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_any_with_no_arguments
|
97
|
+
r = re.any.all
|
98
|
+
assert r =~ "a"
|
99
|
+
assert r =~ "1"
|
100
|
+
assert r =~ "#"
|
101
|
+
assert r =~ "."
|
102
|
+
assert r =~ " "
|
103
|
+
assert r !~ "ab"
|
104
|
+
assert r !~ "\n"
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_no_flags
|
108
|
+
r = re("a")
|
109
|
+
assert ! r.ignore_case?
|
110
|
+
assert ! r.multiline?
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_any_with_multiline
|
114
|
+
r = re.any.multiline.all
|
115
|
+
assert r.multiline?
|
116
|
+
assert r =~ "\n"
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_ignore_case
|
120
|
+
r = re("a").ignore_case.all
|
121
|
+
assert r.ignore_case?
|
122
|
+
assert r =~ "a"
|
123
|
+
assert r =~ "A"
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_any_with_a_character_list
|
127
|
+
r = re.any("xyz").all
|
128
|
+
assert r !~ "w"
|
129
|
+
assert r =~ "x"
|
130
|
+
assert r =~ "y"
|
131
|
+
assert r =~ "z"
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_any_with_special_chars
|
135
|
+
r = re.any("^.(-)[]").all
|
136
|
+
assert r !~ "x"
|
137
|
+
assert r =~ "."
|
138
|
+
assert r =~ "^"
|
139
|
+
assert r =~ "-"
|
140
|
+
assert r =~ "("
|
141
|
+
assert r =~ ")"
|
142
|
+
assert r =~ "["
|
143
|
+
assert r =~ "]"
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_range_of_chars
|
147
|
+
r = re.any("a-z").many.all
|
148
|
+
assert r =~ "abcdefghijklmnopqrstuvwxyz"
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_range_and_mix_of_chars
|
152
|
+
r = re.any("0-9", ".-").many.all
|
153
|
+
assert r =~ "-12.3"
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_none
|
157
|
+
r = re.none("xyz").all
|
158
|
+
assert r =~ "w"
|
159
|
+
assert r !~ "x"
|
160
|
+
assert r !~ "y"
|
161
|
+
assert r !~ "z"
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_none_with_special_chars
|
165
|
+
r = re.none("^.()[]-").all
|
166
|
+
assert r =~ "x"
|
167
|
+
assert r !~ "."
|
168
|
+
assert r !~ "^"
|
169
|
+
assert r !~ "-"
|
170
|
+
assert r !~ "("
|
171
|
+
assert r !~ ")"
|
172
|
+
assert r !~ "["
|
173
|
+
assert r !~ "]"
|
174
|
+
end
|
175
|
+
|
176
|
+
def test_all
|
177
|
+
r = re("a").all
|
178
|
+
assert r =~ "a"
|
179
|
+
assert r !~ "a\n"
|
180
|
+
assert r !~ "xa"
|
181
|
+
assert r !~ "ax"
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_almost_all
|
185
|
+
r = re("a").almost_all
|
186
|
+
assert r =~ "a"
|
187
|
+
assert r =~ "a\n"
|
188
|
+
assert r !~ "xa"
|
189
|
+
assert r !~ "ax"
|
190
|
+
end
|
191
|
+
|
192
|
+
def test_all_across_lines
|
193
|
+
r = re("a").many.all
|
194
|
+
assert r =~ "a"
|
195
|
+
assert r !~ "b\na"
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_line
|
199
|
+
r = re("a").line
|
200
|
+
assert r =~ "a"
|
201
|
+
assert r =~ "b\na"
|
202
|
+
assert r =~ "b\na\n"
|
203
|
+
assert r =~ "b\na\nx"
|
204
|
+
end
|
205
|
+
|
206
|
+
def test_bol
|
207
|
+
r = re("a").bol
|
208
|
+
assert r =~ "a"
|
209
|
+
assert r =~ "b\na"
|
210
|
+
assert r =~ "b\na"
|
211
|
+
assert r !~ "b\nxa"
|
212
|
+
end
|
213
|
+
|
214
|
+
def test_eol
|
215
|
+
r = re("a").eol
|
216
|
+
assert r =~ "a"
|
217
|
+
assert r =~ "b\na\nx"
|
218
|
+
assert r !~ "b\nax"
|
219
|
+
assert r !~ "b\nax\n"
|
220
|
+
end
|
221
|
+
|
222
|
+
def test_begin
|
223
|
+
r = re("a").begin
|
224
|
+
assert r =~ "a"
|
225
|
+
assert r =~ "a\nb"
|
226
|
+
assert r !~ "b\na"
|
227
|
+
assert r !~ "b\na"
|
228
|
+
assert r !~ "b\nxa"
|
229
|
+
end
|
230
|
+
|
231
|
+
def test_begin2
|
232
|
+
r = re.begin + re("a")
|
233
|
+
assert r =~ "a"
|
234
|
+
assert r =~ "a\nb"
|
235
|
+
assert r !~ "b\na"
|
236
|
+
assert r !~ "b\na"
|
237
|
+
assert r !~ "b\nxa"
|
238
|
+
end
|
239
|
+
|
240
|
+
def test_end
|
241
|
+
r = re("a").end
|
242
|
+
assert r =~ "a"
|
243
|
+
assert r =~ "b\na"
|
244
|
+
assert r =~ "b\na\n"
|
245
|
+
assert r !~ "b\na\nx"
|
246
|
+
assert r !~ "b\nax"
|
247
|
+
assert r !~ "b\nax\n"
|
248
|
+
end
|
249
|
+
|
250
|
+
def test_end2
|
251
|
+
r = re("a") + re.end
|
252
|
+
assert r =~ "a"
|
253
|
+
assert r =~ "b\na"
|
254
|
+
assert r =~ "b\na\n"
|
255
|
+
assert r !~ "b\na\nx"
|
256
|
+
assert r !~ "b\nax"
|
257
|
+
assert r !~ "b\nax\n"
|
258
|
+
end
|
259
|
+
|
260
|
+
def test_very_end
|
261
|
+
r = re("a").very_end
|
262
|
+
assert r =~ "a"
|
263
|
+
assert r =~ "b\na"
|
264
|
+
assert r !~ "b\na\n"
|
265
|
+
assert r !~ "b\na\nx"
|
266
|
+
assert r !~ "b\nax"
|
267
|
+
assert r !~ "b\nax\n"
|
268
|
+
end
|
269
|
+
|
270
|
+
def test_hex_digit
|
271
|
+
r = re.hex_digit.all
|
272
|
+
assert r =~ "1"
|
273
|
+
assert r =~ "a"
|
274
|
+
assert r =~ "F"
|
275
|
+
assert r !~ "12"
|
276
|
+
assert r !~ "g"
|
277
|
+
end
|
278
|
+
|
279
|
+
def test_hex_digits
|
280
|
+
r = re.hex_digits.all
|
281
|
+
assert r =~ "1234567890abcedfABCDEF"
|
282
|
+
assert r !~ "g"
|
283
|
+
end
|
284
|
+
|
285
|
+
def test_digit
|
286
|
+
r = re.digit.all
|
287
|
+
assert r =~ "0"
|
288
|
+
assert r =~ "9"
|
289
|
+
assert r !~ "12"
|
290
|
+
assert r !~ "x"
|
291
|
+
assert r !~ "a"
|
292
|
+
end
|
293
|
+
|
294
|
+
def test_digits
|
295
|
+
r = re.digits.all
|
296
|
+
assert r =~ "0123456789"
|
297
|
+
assert r !~ "0123456789x"
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_break
|
301
|
+
r = re.break + re("a") + re.break
|
302
|
+
assert r =~ "there is a home"
|
303
|
+
assert r !~ "there is an aardvark"
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_nonspace
|
307
|
+
r = re.nonspace.all
|
308
|
+
assert r =~ "a"
|
309
|
+
assert r =~ "1"
|
310
|
+
assert r =~ "#"
|
311
|
+
assert r !~ "ab"
|
312
|
+
assert r !~ " "
|
313
|
+
assert r !~ "\t"
|
314
|
+
assert r !~ "\n"
|
315
|
+
end
|
316
|
+
|
317
|
+
def test_nonspaces
|
318
|
+
r = re.nonspaces.all
|
319
|
+
assert r =~ "a"
|
320
|
+
assert r =~ "asdfhjkl!@\#$%^&*()_+="
|
321
|
+
assert r !~ ""
|
322
|
+
assert r !~ "a dog"
|
323
|
+
end
|
324
|
+
|
325
|
+
def test_space
|
326
|
+
r = re.space.all
|
327
|
+
assert r =~ " "
|
328
|
+
assert r =~ "\t"
|
329
|
+
assert r =~ "\n"
|
330
|
+
assert r !~ "x"
|
331
|
+
assert r !~ ""
|
332
|
+
assert r !~ " "
|
333
|
+
assert re.space.many.all =~ " \n\t "
|
334
|
+
end
|
335
|
+
|
336
|
+
def test_spaces
|
337
|
+
r = re.spaces.all
|
338
|
+
assert r =~ " "
|
339
|
+
assert r =~ " "
|
340
|
+
assert r =~ " \t \n "
|
341
|
+
assert r !~ ""
|
342
|
+
assert r !~ "x"
|
343
|
+
end
|
344
|
+
|
345
|
+
def test_word_char
|
346
|
+
r = re.word_char.all
|
347
|
+
assert r =~ "a"
|
348
|
+
assert r =~ "1"
|
349
|
+
assert r =~ "_"
|
350
|
+
assert r !~ "!"
|
351
|
+
assert r !~ "?"
|
352
|
+
end
|
353
|
+
|
354
|
+
def test_word
|
355
|
+
r = re.word.all
|
356
|
+
assert r =~ "a"
|
357
|
+
assert r =~ "1"
|
358
|
+
assert re.word.all =~ "this_is_a_test"
|
359
|
+
assert re.word.all !~ "asdf jkl"
|
360
|
+
end
|
361
|
+
|
362
|
+
def test_single_capture
|
363
|
+
r = re.any("a-z").one_or_more.capture(:word)
|
364
|
+
result = (r =~ "012abc789")
|
365
|
+
assert result
|
366
|
+
assert_equal "abc", result.data(:word)
|
367
|
+
end
|
368
|
+
|
369
|
+
def test_multiple_capture
|
370
|
+
word = re.any("a-z").one_or_more.capture(:word)
|
371
|
+
number = re.any("0-9").one_or_more.capture(:number)
|
372
|
+
r = (word + re.spaces + number).capture(:everything)
|
373
|
+
result = (r =~ " now 123\n")
|
374
|
+
assert result
|
375
|
+
assert_equal [:everything, :word, :number], r.capture_keys
|
376
|
+
assert_equal "now", result.data(:word)
|
377
|
+
assert_equal "123", result.data(:number)
|
378
|
+
assert_equal "now 123", result.data(:everything)
|
379
|
+
assert_equal "now 123", result.data
|
380
|
+
end
|
381
|
+
|
382
|
+
def test_precedence_concatentaion_vs_alteration
|
383
|
+
r = (re("a") | re("b") + re("c")).all
|
384
|
+
assert r =~ "a"
|
385
|
+
assert r =~ "bc"
|
386
|
+
assert r !~ "ac"
|
387
|
+
end
|
388
|
+
|
389
|
+
def test_precendence_of_eol
|
390
|
+
r = re("a").bol.many
|
391
|
+
end
|
392
|
+
|
393
|
+
def test_example
|
394
|
+
bracketed_delim = re("[") + re.none("]").one_or_more + re("]")
|
395
|
+
delims = bracketed_delim.one_or_more.capture(:delims)
|
396
|
+
delim_definition = re("//").bol + delims + re("\n")
|
397
|
+
|
398
|
+
result = delim_definition.match("//[a][b][xyz]\n1a2b3xyz4")
|
399
|
+
assert result
|
400
|
+
assert_equal "[a][b][xyz]", result.data(:delims)
|
401
|
+
end
|
402
|
+
|
403
|
+
def test_date_parser
|
404
|
+
# (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
|
405
|
+
|
406
|
+
delim = re.any("- /.")
|
407
|
+
n_19_or_20 = re("19") | re("20")
|
408
|
+
n_1_to_9 = re("0") + re.any("1-9")
|
409
|
+
n_10_to_12 = re("1") + re.any("012")
|
410
|
+
n_10_to_29 = re.any("12") + re.any("0-9")
|
411
|
+
n_30_or_31 = re("3") + re.any("01")
|
412
|
+
|
413
|
+
year = n_19_or_20 + re.digit.repeat(2)
|
414
|
+
month = n_1_to_9 | n_10_to_12
|
415
|
+
day = n_1_to_9 | n_10_to_29 | n_30_or_31
|
416
|
+
|
417
|
+
date_re = (year.capture(:year) + delim + month.capture(:month) + delim + day.capture(:day)).all
|
418
|
+
|
419
|
+
assert date_re.match("1900/01/01")
|
420
|
+
assert date_re.match("1956/01/01")
|
421
|
+
assert date_re.match("2000/01/01")
|
422
|
+
assert date_re.match("2010/01/01")
|
423
|
+
assert date_re.match("2010/12/01")
|
424
|
+
assert date_re.match("2010/03/01")
|
425
|
+
assert date_re.match("2010/03/12")
|
426
|
+
assert date_re.match("2010/03/24")
|
427
|
+
assert date_re.match("2010/03/30")
|
428
|
+
assert date_re.match("2010/03/31")
|
429
|
+
|
430
|
+
assert ! date_re.match("2100/01/01")
|
431
|
+
assert ! date_re.match("2100/01/32")
|
432
|
+
assert ! date_re.match("2010/00/01")
|
433
|
+
assert ! date_re.match("2010/13/01")
|
434
|
+
assert ! date_re.match("2010/01/00")
|
435
|
+
assert ! date_re.match("2010/1/01")
|
436
|
+
assert ! date_re.match("2010/01/1")
|
437
|
+
end
|
438
|
+
|
439
|
+
|
440
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: re
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jim Weirich
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-28 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: " The re library allows the easy construction of regular expressions via an expression language.\n"
|
17
|
+
email: jim.weirich@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- README.rdoc
|
27
|
+
- lib/re.rb
|
28
|
+
- test/re_test.rb
|
29
|
+
has_rdoc: true
|
30
|
+
homepage: http://re.rubyforge.org
|
31
|
+
licenses: []
|
32
|
+
|
33
|
+
post_install_message:
|
34
|
+
rdoc_options:
|
35
|
+
- --line-numbers
|
36
|
+
- --inline-source
|
37
|
+
- --main
|
38
|
+
- re.rb
|
39
|
+
- --title
|
40
|
+
- Re -- Ruby Regular Expression Construction
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
requirements: []
|
56
|
+
|
57
|
+
rubyforge_project: re-lib
|
58
|
+
rubygems_version: 1.3.5
|
59
|
+
signing_key:
|
60
|
+
specification_version: 3
|
61
|
+
summary: Construct Ruby Regular Expressions
|
62
|
+
test_files: []
|
63
|
+
|