re 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +21 -0
- data/README.rdoc +67 -8
- data/Rakefile +6 -3
- data/lib/re.rb +269 -137
- data/test/re_test.rb +74 -22
- metadata +3 -2
data/MIT-LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2003, 2004 Jim Weirich
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
data/README.rdoc
CHANGED
@@ -1,9 +1,46 @@
|
|
1
1
|
|
2
2
|
= Regular Expression Construction.
|
3
3
|
|
4
|
-
|
4
|
+
Complex regular expressions are hard to construct and even harder to
|
5
|
+
read. The Re library allows users to construct complex regular
|
6
|
+
expressions from simpler expressions. For example, consider the
|
7
|
+
following regular expression that will parse dates:
|
5
8
|
|
6
|
-
|
9
|
+
/\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
|
10
|
+
|
11
|
+
Using the Re library, That regular expression can be built
|
12
|
+
incrementaly from smaller, easier to understand expressions.
|
13
|
+
Perhaps something like this:
|
14
|
+
|
15
|
+
require 're'
|
16
|
+
|
17
|
+
include Re
|
18
|
+
|
19
|
+
delim = re.any("- /.")
|
20
|
+
century_prefix = re("19") | re("20")
|
21
|
+
under_ten = re("0") + re.any("1-9")
|
22
|
+
ten_to_twelve = re("1") + re.any("012")
|
23
|
+
ten_and_under_thirty = re.any("12") + re.any("0-9")
|
24
|
+
thirties = re("3") + re.any("01")
|
25
|
+
|
26
|
+
year = (century_prefix + re.digit.repeat(2)).capture(:year)
|
27
|
+
month = (under_ten | ten_to_twelve).capture(:month)
|
28
|
+
day = (under_ten | ten_and_under_thirty | thirties).capture(:day)
|
29
|
+
|
30
|
+
date = (year + delim + month + delim + day).all
|
31
|
+
|
32
|
+
Although it is more code, the individual pieces are smaller and
|
33
|
+
easier to independently verify. As an additional bonus, the capture
|
34
|
+
groups can be retrieved by name:
|
35
|
+
|
36
|
+
result = date.match("2009-01-23")
|
37
|
+
result.data(:year) # => "2009"
|
38
|
+
result.data(:month) # => "01"
|
39
|
+
result.data(:day) # => "23"
|
40
|
+
|
41
|
+
== Version: 0.0.3
|
42
|
+
|
43
|
+
== Usage:
|
7
44
|
|
8
45
|
include Re
|
9
46
|
|
@@ -14,7 +51,7 @@ Usage:
|
|
14
51
|
puts "No Match"
|
15
52
|
end
|
16
53
|
|
17
|
-
Examples:
|
54
|
+
== Examples:
|
18
55
|
|
19
56
|
re("a") -- matches "a"
|
20
57
|
re("a") + re("b") -- matches "ab"
|
@@ -29,11 +66,33 @@ See Re::Rexp for a complete list of expressions.
|
|
29
66
|
Using re without an argument allows access to a number of common
|
30
67
|
regular expression constants. For example:
|
31
68
|
|
32
|
-
re.space
|
33
|
-
re.spaces -- matches any number of spaces (but at least one)
|
69
|
+
re.space / re.spaces -- matches " ", "\n" or "\t"
|
34
70
|
re.digit / re.digits -- matches a digit / sequence of digits
|
35
71
|
|
36
|
-
|
72
|
+
Also, re without arguments can also be used to construct character
|
73
|
+
classes:
|
74
|
+
|
75
|
+
re.any -- Matches any charactor
|
76
|
+
re.any("abc") -- Matches "a", "b", or "c"
|
77
|
+
re.any("0-9") -- Matches the digits 0 through 9
|
78
|
+
re.any("A-Z", "a-z", "0-9", "_")
|
79
|
+
-- Matches alphanumeric or an underscore
|
80
|
+
|
81
|
+
See Re::ConstructionMethods for a complete list of common constants
|
82
|
+
and character class functions.
|
83
|
+
|
84
|
+
See Re.re, Re::Rexp, and Re::ConstructionMethods for details.
|
85
|
+
|
86
|
+
== License and Copyright:
|
87
|
+
|
88
|
+
Copyright 2009 by Jim Weirich (jim.weirich@gmail.com)
|
89
|
+
|
90
|
+
Re is provided under the MIT open source license (see MIT-LICENSE)
|
91
|
+
|
92
|
+
== Links:
|
93
|
+
|
94
|
+
* Documentation :: http://re-lib.rubyforge.org
|
95
|
+
* Source :: http://github.com/jimweirich/re
|
96
|
+
* Bug Tracker :: http://www.pivotaltracker.com/projects/47758
|
97
|
+
* Author :: jim.weirich@gmail.com
|
37
98
|
|
38
|
-
See Re.re,
|
39
|
-
Re::Rexp, and Re::NULL for details.
|
data/Rakefile
CHANGED
@@ -4,6 +4,8 @@ require 'rake/clean'
|
|
4
4
|
require 'rake/testtask'
|
5
5
|
require 'rake/rdoctask'
|
6
6
|
|
7
|
+
require 'lib/re'
|
8
|
+
|
7
9
|
task :default => :test
|
8
10
|
|
9
11
|
Rake::TestTask.new(:test) do |t|
|
@@ -12,7 +14,8 @@ Rake::TestTask.new(:test) do |t|
|
|
12
14
|
t.test_files = FileList['test/*_test.rb']
|
13
15
|
end
|
14
16
|
|
15
|
-
|
16
|
-
|
17
|
-
|
17
|
+
task :release => [:check_non_beta, :readme, :gem, "publish:rdoc"]
|
18
|
+
|
19
|
+
task :check_non_beta do
|
20
|
+
fail "Must not be a beta version! Version is #{Re::VERSION}" if Re::Version::BETA
|
18
21
|
end
|
data/lib/re.rb
CHANGED
@@ -2,9 +2,44 @@
|
|
2
2
|
#
|
3
3
|
# = Regular Expression Construction.
|
4
4
|
#
|
5
|
-
#
|
5
|
+
# Complex regular expressions are hard to construct and even harder to
|
6
|
+
# read. The Re library allows users to construct complex regular
|
7
|
+
# expressions from simpler expressions. For example, consider the
|
8
|
+
# following regular expression that will parse dates:
|
6
9
|
#
|
7
|
-
#
|
10
|
+
# /\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
|
11
|
+
#
|
12
|
+
# Using the Re library, That regular expression can be built
|
13
|
+
# incrementaly from smaller, easier to understand expressions.
|
14
|
+
# Perhaps something like this:
|
15
|
+
#
|
16
|
+
# require 're'
|
17
|
+
#
|
18
|
+
# include Re
|
19
|
+
#
|
20
|
+
# delim = re.any("- /.")
|
21
|
+
# century_prefix = re("19") | re("20")
|
22
|
+
# under_ten = re("0") + re.any("1-9")
|
23
|
+
# ten_to_twelve = re("1") + re.any("012")
|
24
|
+
# ten_and_under_thirty = re.any("12") + re.any("0-9")
|
25
|
+
# thirties = re("3") + re.any("01")
|
26
|
+
#
|
27
|
+
# year = (century_prefix + re.digit.repeat(2)).capture(:year)
|
28
|
+
# month = (under_ten | ten_to_twelve).capture(:month)
|
29
|
+
# day = (under_ten | ten_and_under_thirty | thirties).capture(:day)
|
30
|
+
#
|
31
|
+
# date = (year + delim + month + delim + day).all
|
32
|
+
#
|
33
|
+
# Although it is more code, the individual pieces are smaller and
|
34
|
+
# easier to independently verify. As an additional bonus, the capture
|
35
|
+
# groups can be retrieved by name:
|
36
|
+
#
|
37
|
+
# result = date.match("2009-01-23")
|
38
|
+
# result.data(:year) # => "2009"
|
39
|
+
# result.data(:month) # => "01"
|
40
|
+
# result.data(:day) # => "23"
|
41
|
+
#
|
42
|
+
# == Usage:
|
8
43
|
#
|
9
44
|
# include Re
|
10
45
|
#
|
@@ -15,7 +50,7 @@
|
|
15
50
|
# puts "No Match"
|
16
51
|
# end
|
17
52
|
#
|
18
|
-
# Examples:
|
53
|
+
# == Examples:
|
19
54
|
#
|
20
55
|
# re("a") -- matches "a"
|
21
56
|
# re("a") + re("b") -- matches "ab"
|
@@ -30,31 +65,70 @@
|
|
30
65
|
# Using re without an argument allows access to a number of common
|
31
66
|
# regular expression constants. For example:
|
32
67
|
#
|
33
|
-
# re.space
|
34
|
-
# re.spaces -- matches any number of spaces (but at least one)
|
68
|
+
# re.space / re.spaces -- matches " ", "\n" or "\t"
|
35
69
|
# re.digit / re.digits -- matches a digit / sequence of digits
|
36
70
|
#
|
37
|
-
#
|
71
|
+
# Also, re without arguments can also be used to construct character
|
72
|
+
# classes:
|
73
|
+
#
|
74
|
+
# re.any -- Matches any charactor
|
75
|
+
# re.any("abc") -- Matches "a", "b", or "c"
|
76
|
+
# re.any("0-9") -- Matches the digits 0 through 9
|
77
|
+
# re.any("A-Z", "a-z", "0-9", "_")
|
78
|
+
# -- Matches alphanumeric or an underscore
|
79
|
+
#
|
80
|
+
# See Re::ConstructionMethods for a complete list of common constants
|
81
|
+
# and character class functions.
|
82
|
+
#
|
83
|
+
# See Re.re, Re::Rexp, and Re::ConstructionMethods for details.
|
84
|
+
#
|
85
|
+
# == License and Copyright:
|
86
|
+
#
|
87
|
+
# Copyright 2009 by Jim Weirich (jim.weirich@gmail.com)
|
88
|
+
#
|
89
|
+
# Re is provided under the MIT open source license (see MIT-LICENSE)
|
90
|
+
#
|
91
|
+
# == Links:
|
92
|
+
#
|
93
|
+
# * Documentation :: http://re-lib.rubyforge.org
|
94
|
+
# * Source :: http://github.com/jimweirich/re
|
95
|
+
# * Bug Tracker :: http://www.pivotaltracker.com/projects/47758
|
96
|
+
# * Author :: jim.weirich@gmail.com
|
38
97
|
#
|
39
|
-
# See Re.re,
|
40
|
-
# Re::Rexp, and Re::NULL for details.
|
41
|
-
|
42
98
|
module Re
|
99
|
+
|
100
|
+
module Version
|
101
|
+
NUMBERS = [
|
102
|
+
MAJOR = 0,
|
103
|
+
MINOR = 0,
|
104
|
+
BUILD = 3,
|
105
|
+
BETA = nil,
|
106
|
+
].compact
|
107
|
+
end
|
108
|
+
VERSION = Version::NUMBERS.join('.')
|
109
|
+
|
110
|
+
# Re::Result captures the result of a match and allows lookup of the
|
111
|
+
# captured groups by name.
|
43
112
|
class Result
|
113
|
+
# Create a Re result object with the match data and the origina
|
114
|
+
# Re::Rexp object.
|
44
115
|
def initialize(match_data, rexp)
|
45
116
|
@match_data = match_data
|
46
117
|
@rexp = rexp
|
47
118
|
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
119
|
+
|
120
|
+
# Return the full match
|
121
|
+
def full_match
|
122
|
+
@match_data[0]
|
123
|
+
end
|
124
|
+
|
125
|
+
# Return the named capture data.
|
126
|
+
def [](name)
|
127
|
+
index = @rexp.capture_keys.index(name)
|
128
|
+
index ? @match_data[index+1] : nil
|
55
129
|
end
|
56
130
|
end
|
57
|
-
|
131
|
+
|
58
132
|
# Precedence levels for regular expressions:
|
59
133
|
|
60
134
|
GROUPED = 4 # (r), [chars] :nodoc:
|
@@ -65,22 +139,22 @@ module Re
|
|
65
139
|
|
66
140
|
# Constructed regular expressions.
|
67
141
|
class Rexp
|
68
|
-
attr_reader :string, :level, :
|
142
|
+
attr_reader :string, :level, :options, :capture_keys
|
69
143
|
|
70
144
|
# Create a regular expression from the string. The regular
|
71
145
|
# expression will have a precedence of +level+ and will recognized
|
72
146
|
# +keys+ as a list of capture keys.
|
73
|
-
def initialize(string, level,
|
147
|
+
def initialize(string, level, options, keys)
|
74
148
|
@string = string
|
75
149
|
@level = level
|
76
|
-
@
|
150
|
+
@options = options
|
77
151
|
@capture_keys = keys
|
78
152
|
end
|
79
153
|
|
80
154
|
# Return a real regular expression from the the constructed
|
81
155
|
# regular expression.
|
82
156
|
def regexp
|
83
|
-
@regexp ||= Regexp.new(string,
|
157
|
+
@regexp ||= Regexp.new(string, options)
|
84
158
|
end
|
85
159
|
|
86
160
|
# Does it match a string? (returns Re::Result if match, nil otherwise)
|
@@ -94,7 +168,7 @@ module Re
|
|
94
168
|
def +(other)
|
95
169
|
Rexp.new(parenthesize(CONCAT) + other.parenthesize(CONCAT),
|
96
170
|
CONCAT,
|
97
|
-
|
171
|
+
options | other.options,
|
98
172
|
capture_keys + other.capture_keys)
|
99
173
|
end
|
100
174
|
|
@@ -102,43 +176,53 @@ module Re
|
|
102
176
|
def |(other)
|
103
177
|
Rexp.new(parenthesize(ALT) + "|" + other.parenthesize(ALT),
|
104
178
|
ALT,
|
105
|
-
|
179
|
+
options | other.options,
|
106
180
|
capture_keys + other.capture_keys)
|
107
181
|
end
|
108
182
|
|
109
183
|
# self is optional
|
110
184
|
def optional
|
111
|
-
Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX,
|
185
|
+
Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX, options, capture_keys)
|
112
186
|
end
|
113
187
|
|
114
188
|
# self matches many times (zero or more)
|
115
189
|
def many
|
116
|
-
Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX,
|
190
|
+
Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX, options, capture_keys)
|
191
|
+
end
|
192
|
+
|
193
|
+
# self matches many times (zero or more) (non-greedy version)
|
194
|
+
def many!
|
195
|
+
Rexp.new(parenthesize(POSTFIX) + "*?", POSTFIX, options, capture_keys)
|
117
196
|
end
|
118
197
|
|
119
198
|
# self matches one or more times
|
120
199
|
def one_or_more
|
121
|
-
Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX,
|
200
|
+
Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX, options, capture_keys)
|
201
|
+
end
|
202
|
+
|
203
|
+
# self matches one or more times
|
204
|
+
def one_or_more!
|
205
|
+
Rexp.new(parenthesize(POSTFIX) + "+?", POSTFIX, options, capture_keys)
|
122
206
|
end
|
123
207
|
|
124
208
|
# self is repeated from min to max times. If max is omitted, then
|
125
209
|
# it is repeated exactly min times.
|
126
210
|
def repeat(min, max=nil)
|
127
211
|
if min && max
|
128
|
-
Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX,
|
212
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX, options, capture_keys)
|
129
213
|
else
|
130
|
-
Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX,
|
214
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX, options, capture_keys)
|
131
215
|
end
|
132
216
|
end
|
133
217
|
|
134
218
|
# self is repeated at least min times
|
135
219
|
def at_least(min)
|
136
|
-
Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX,
|
220
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX, options, capture_keys)
|
137
221
|
end
|
138
222
|
|
139
223
|
# self is repeated at least max times
|
140
224
|
def at_most(max)
|
141
|
-
Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX,
|
225
|
+
Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX, options, capture_keys)
|
142
226
|
end
|
143
227
|
|
144
228
|
# None of the given characters will match.
|
@@ -158,27 +242,27 @@ module Re
|
|
158
242
|
|
159
243
|
# self must match at the beginning of a line
|
160
244
|
def bol
|
161
|
-
Rexp.new("^" + parenthesize(CONCAT), CONCAT,
|
245
|
+
Rexp.new("^" + parenthesize(CONCAT), CONCAT, options, capture_keys)
|
162
246
|
end
|
163
247
|
|
164
248
|
# self must match at the end of a line
|
165
249
|
def eol
|
166
|
-
Rexp.new(parenthesize(CONCAT) + "$", CONCAT,
|
250
|
+
Rexp.new(parenthesize(CONCAT) + "$", CONCAT, options, capture_keys)
|
167
251
|
end
|
168
252
|
|
169
253
|
# self must match at the beginning of the string
|
170
254
|
def begin
|
171
|
-
Rexp.new("\\A" + parenthesize(CONCAT), CONCAT,
|
255
|
+
Rexp.new("\\A" + parenthesize(CONCAT), CONCAT, options, capture_keys)
|
172
256
|
end
|
173
257
|
|
174
258
|
# self must match the end of the string (with an optional new line)
|
175
259
|
def end
|
176
|
-
Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT,
|
260
|
+
Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT, options, capture_keys)
|
177
261
|
end
|
178
262
|
|
179
263
|
# self must match the very end of the string (including any new lines)
|
180
264
|
def very_end
|
181
|
-
Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT,
|
265
|
+
Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT, options, capture_keys)
|
182
266
|
end
|
183
267
|
|
184
268
|
# self must match an entire line.
|
@@ -188,32 +272,32 @@ module Re
|
|
188
272
|
|
189
273
|
# self is contained in a non-capturing group
|
190
274
|
def group
|
191
|
-
Rexp.new("(?:" + string + ")", GROUPED,
|
275
|
+
Rexp.new("(?:" + string + ")", GROUPED, options, capture_keys)
|
192
276
|
end
|
193
277
|
|
194
278
|
# self is a capturing group with the given name.
|
195
279
|
def capture(name)
|
196
|
-
Rexp.new("(" + string + ")", GROUPED,
|
280
|
+
Rexp.new("(" + string + ")", GROUPED, options, [name] + capture_keys)
|
197
281
|
end
|
198
282
|
|
199
283
|
# self will work in multiline matches
|
200
284
|
def multiline
|
201
|
-
Rexp.new(string, GROUPED,
|
285
|
+
Rexp.new(string, GROUPED, options|Regexp::MULTILINE, capture_keys)
|
202
286
|
end
|
203
287
|
|
204
288
|
# Is this a multiline regular expression?
|
205
289
|
def multiline?
|
206
|
-
(
|
290
|
+
(options & Regexp::MULTILINE) != 0
|
207
291
|
end
|
208
292
|
|
209
293
|
# self will work in multiline matches
|
210
294
|
def ignore_case
|
211
|
-
Rexp.new(string, GROUPED,
|
295
|
+
Rexp.new(string, GROUPED, options|Regexp::IGNORECASE, capture_keys)
|
212
296
|
end
|
213
297
|
|
214
298
|
# Does this regular expression ignore case?
|
215
299
|
def ignore_case?
|
216
|
-
(
|
300
|
+
(options & Regexp::IGNORECASE) != 0
|
217
301
|
end
|
218
302
|
|
219
303
|
# String representation of the constructed regular expression.
|
@@ -267,107 +351,155 @@ module Re
|
|
267
351
|
def re(exp=nil)
|
268
352
|
exp ? Rexp.literal(exp) : NULL
|
269
353
|
end
|
354
|
+
extend self
|
270
355
|
|
271
|
-
#
|
272
|
-
#
|
273
|
-
|
274
|
-
|
275
|
-
#
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
356
|
+
# This module defines a number of methods returning common
|
357
|
+
# pre-packaged regular expressions along with methods to create
|
358
|
+
# regular expressions from character classes and other objects.
|
359
|
+
# ConstructionMethods is mixed into the NULL Rexp object so that
|
360
|
+
# re() without arguments can be used to access the methods.
|
361
|
+
module ConstructionMethods
|
362
|
+
|
363
|
+
# :call-seq:
|
364
|
+
# re.null
|
365
|
+
#
|
366
|
+
# Matches the null string
|
367
|
+
def null
|
368
|
+
self
|
369
|
+
end
|
370
|
+
|
371
|
+
# :call-seq:
|
372
|
+
# re.any
|
373
|
+
# re.any(chars)
|
374
|
+
# re.any(range)
|
375
|
+
# re.any(chars, range, ...)
|
376
|
+
#
|
377
|
+
# Match a character from the character class.
|
378
|
+
#
|
379
|
+
# Any without any arguments will match any single character. Any
|
380
|
+
# with one or more arguments will construct a character class for
|
381
|
+
# the arguments. If the argument is a three character string where
|
382
|
+
# the middle character is "-", then the argument represents a range
|
383
|
+
# of characters. Otherwise the arguments are treated as a list of
|
384
|
+
# characters to be added to the character class.
|
385
|
+
#
|
386
|
+
# Examples:
|
387
|
+
#
|
388
|
+
# re.any -- match any character
|
389
|
+
# re.any("aieouy") -- match vowels
|
390
|
+
# re.any("0-9") -- match digits
|
391
|
+
# re.any("A-Z", "a-z", "0-9") -- match alphanumerics
|
392
|
+
# re.any("A-Z", "a-z", "0-9", "_") -- match alphanumerics
|
393
|
+
#
|
394
|
+
def any(*chars)
|
395
|
+
if chars.empty?
|
396
|
+
@dot ||= Rexp.raw(".")
|
397
|
+
else
|
398
|
+
any_chars = ''
|
399
|
+
chars.each do |chs|
|
400
|
+
if /^.-.$/ =~ chs
|
401
|
+
any_chars << chs
|
402
|
+
else
|
403
|
+
any_chars << Rexp.escape_any(chs)
|
404
|
+
end
|
313
405
|
end
|
406
|
+
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
314
407
|
end
|
315
|
-
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
316
408
|
end
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
409
|
+
|
410
|
+
# :call-seq:
|
411
|
+
# re.space
|
412
|
+
#
|
413
|
+
# Matches any white space
|
414
|
+
def space
|
415
|
+
@space ||= Rexp.raw("\\s")
|
416
|
+
end
|
417
|
+
|
418
|
+
# :call-seq:
|
419
|
+
# re.spaces
|
420
|
+
#
|
324
421
|
# Matches any white space
|
325
|
-
|
326
|
-
|
422
|
+
def spaces
|
423
|
+
@spaces ||= space.one_or_more
|
424
|
+
end
|
425
|
+
|
426
|
+
# :call-seq:
|
427
|
+
# re.nonspace
|
428
|
+
#
|
429
|
+
# Matches any non-white space
|
430
|
+
def nonspace
|
431
|
+
@nonspace ||= Rexp.raw("\\S")
|
432
|
+
end
|
433
|
+
|
434
|
+
# :call-seq:
|
435
|
+
# re.nonspaces
|
436
|
+
#
|
437
|
+
# Matches any non-white space
|
438
|
+
def nonspaces
|
439
|
+
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
440
|
+
end
|
441
|
+
|
442
|
+
# :call-seq:
|
443
|
+
# re.word_char
|
444
|
+
#
|
445
|
+
# Matches any sequence of word characters
|
446
|
+
def word_char
|
447
|
+
@word_char ||= Rexp.raw("\\w")
|
448
|
+
end
|
449
|
+
|
450
|
+
# :call-seq:
|
451
|
+
# re.word
|
452
|
+
#
|
453
|
+
# Matches any sequence of word characters
|
454
|
+
def word
|
455
|
+
@word ||= word_char.one_or_more
|
456
|
+
end
|
457
|
+
|
458
|
+
# :call-seq:
|
459
|
+
# re.break
|
460
|
+
#
|
461
|
+
# Zero-length matches any break
|
462
|
+
def break
|
463
|
+
@break ||= Rexp.raw("\\b")
|
464
|
+
end
|
465
|
+
|
466
|
+
# :call-seq:
|
467
|
+
# re.digit
|
468
|
+
#
|
469
|
+
# Matches a digit
|
470
|
+
def digit
|
471
|
+
@digit ||= any("0-9")
|
472
|
+
end
|
473
|
+
|
474
|
+
# :call-seq:
|
475
|
+
# re.digits
|
476
|
+
#
|
477
|
+
# Matches a sequence of digits
|
478
|
+
def digits
|
479
|
+
@digits ||= digit.one_or_more
|
480
|
+
end
|
481
|
+
|
482
|
+
# :call-seq:
|
483
|
+
# re.hex_digit
|
484
|
+
#
|
485
|
+
# Matches a hex digit (upper or lower case)
|
486
|
+
def hex_digit
|
487
|
+
@hex_digit ||= any("0-9", "a-f", "A-F")
|
488
|
+
end
|
489
|
+
|
490
|
+
# :call-seq:
|
491
|
+
# re.hex_digits
|
492
|
+
#
|
493
|
+
# Matches a sequence of hex digits
|
494
|
+
def hex_digits
|
495
|
+
@hex_digits ||= hex_digit.one_or_more
|
496
|
+
end
|
327
497
|
end
|
328
498
|
|
329
|
-
# Matches
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
def NULL.nonspaces
|
336
|
-
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
337
|
-
end
|
338
|
-
|
339
|
-
# Matches any sequence of word characters
|
340
|
-
def NULL.word_char
|
341
|
-
@word_char ||= Rexp.raw("\\w")
|
342
|
-
end
|
343
|
-
|
344
|
-
# Matches any sequence of word characters
|
345
|
-
def NULL.word
|
346
|
-
@word ||= word_char.one_or_more
|
347
|
-
end
|
348
|
-
|
349
|
-
# Zero-length matches any break
|
350
|
-
def NULL.break
|
351
|
-
@break ||= Rexp.raw("\\b")
|
352
|
-
end
|
353
|
-
|
354
|
-
# Matches a digit
|
355
|
-
def NULL.digit
|
356
|
-
@digit ||= any("0-9")
|
357
|
-
end
|
358
|
-
|
359
|
-
# Matches a sequence of digits
|
360
|
-
def NULL.digits
|
361
|
-
@digits ||= digit.one_or_more
|
362
|
-
end
|
363
|
-
|
364
|
-
# Matches a hex digit (upper or lower case)
|
365
|
-
def NULL.hex_digit
|
366
|
-
@hex_digit ||= any("0-9", "a-f", "A-F")
|
367
|
-
end
|
368
|
-
|
369
|
-
# Matches a sequence of hex digits
|
370
|
-
def NULL.hex_digits
|
371
|
-
@hex_digits ||= hex_digit.one_or_more
|
372
|
-
end
|
499
|
+
# Matches an empty string. Additional common regular expression
|
500
|
+
# construction methods are defined on NULL. See
|
501
|
+
# Re::ConstructionMethods for details.
|
502
|
+
NULL = Rexp.literal("")
|
503
|
+
NULL.extend(ConstructionMethods)
|
504
|
+
|
373
505
|
end
|
data/test/re_test.rb
CHANGED
@@ -6,6 +6,11 @@ require 're'
|
|
6
6
|
class ReTest < Test::Unit::TestCase
|
7
7
|
include Re
|
8
8
|
|
9
|
+
def test_module_access_to_re_function
|
10
|
+
r = Re.re("a").all
|
11
|
+
assert r =~ "a"
|
12
|
+
end
|
13
|
+
|
9
14
|
def test_strings_match
|
10
15
|
assert re("a") =~ "a"
|
11
16
|
assert re("a") !~ "A"
|
@@ -46,6 +51,20 @@ class ReTest < Test::Unit::TestCase
|
|
46
51
|
assert r =~ "xxx"
|
47
52
|
end
|
48
53
|
|
54
|
+
def test_greedy_many
|
55
|
+
r = re.any.many.capture(:x) + re("b")
|
56
|
+
result = r.match("xbxb")
|
57
|
+
assert result
|
58
|
+
assert_equal "xbx", result[:x]
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_non_greedy_many
|
62
|
+
r = re.any.many!.capture(:x) + re("b")
|
63
|
+
result = r.match("xbxb")
|
64
|
+
assert result
|
65
|
+
assert_equal "x", result[:x]
|
66
|
+
end
|
67
|
+
|
49
68
|
def test_one_or_more
|
50
69
|
r = re("x").one_or_more.all
|
51
70
|
assert r !~ ""
|
@@ -53,6 +72,20 @@ class ReTest < Test::Unit::TestCase
|
|
53
72
|
assert r =~ "xxx"
|
54
73
|
end
|
55
74
|
|
75
|
+
def test_greedy_one_or_more
|
76
|
+
r = re.any.one_or_more.capture(:any) + re("b")
|
77
|
+
result = r.match("xbxb")
|
78
|
+
assert result
|
79
|
+
assert_equal "xbx", result[:any]
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_non_greedy_one_or_more
|
83
|
+
r = re.any.one_or_more!.capture(:any) + re("b")
|
84
|
+
result = r.match("xbxb")
|
85
|
+
assert result
|
86
|
+
assert_equal "x", result[:any]
|
87
|
+
end
|
88
|
+
|
56
89
|
def test_repeat_fixed_number
|
57
90
|
r = re("a").repeat(3).all
|
58
91
|
assert r =~ "aaa"
|
@@ -104,7 +137,7 @@ class ReTest < Test::Unit::TestCase
|
|
104
137
|
assert r !~ "\n"
|
105
138
|
end
|
106
139
|
|
107
|
-
def
|
140
|
+
def test_no_options
|
108
141
|
r = re("a")
|
109
142
|
assert ! r.ignore_case?
|
110
143
|
assert ! r.multiline?
|
@@ -363,7 +396,7 @@ class ReTest < Test::Unit::TestCase
|
|
363
396
|
r = re.any("a-z").one_or_more.capture(:word)
|
364
397
|
result = (r =~ "012abc789")
|
365
398
|
assert result
|
366
|
-
assert_equal "abc", result
|
399
|
+
assert_equal "abc", result[:word]
|
367
400
|
end
|
368
401
|
|
369
402
|
def test_multiple_capture
|
@@ -373,10 +406,10 @@ class ReTest < Test::Unit::TestCase
|
|
373
406
|
result = (r =~ " now 123\n")
|
374
407
|
assert result
|
375
408
|
assert_equal [:everything, :word, :number], r.capture_keys
|
376
|
-
assert_equal "now", result
|
377
|
-
assert_equal "123", result
|
378
|
-
assert_equal "now 123", result
|
379
|
-
assert_equal "now 123", result.
|
409
|
+
assert_equal "now", result[:word]
|
410
|
+
assert_equal "123", result[:number]
|
411
|
+
assert_equal "now 123", result[:everything]
|
412
|
+
assert_equal "now 123", result.full_match
|
380
413
|
end
|
381
414
|
|
382
415
|
def test_precedence_concatentaion_vs_alteration
|
@@ -397,25 +430,10 @@ class ReTest < Test::Unit::TestCase
|
|
397
430
|
|
398
431
|
result = delim_definition.match("//[a][b][xyz]\n1a2b3xyz4")
|
399
432
|
assert result
|
400
|
-
assert_equal "[a][b][xyz]", result
|
433
|
+
assert_equal "[a][b][xyz]", result[:delims]
|
401
434
|
end
|
402
435
|
|
403
436
|
def test_date_parser
|
404
|
-
# (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
|
405
|
-
|
406
|
-
delim = re.any("- /.")
|
407
|
-
n_19_or_20 = re("19") | re("20")
|
408
|
-
n_1_to_9 = re("0") + re.any("1-9")
|
409
|
-
n_10_to_12 = re("1") + re.any("012")
|
410
|
-
n_10_to_29 = re.any("12") + re.any("0-9")
|
411
|
-
n_30_or_31 = re("3") + re.any("01")
|
412
|
-
|
413
|
-
year = n_19_or_20 + re.digit.repeat(2)
|
414
|
-
month = n_1_to_9 | n_10_to_12
|
415
|
-
day = n_1_to_9 | n_10_to_29 | n_30_or_31
|
416
|
-
|
417
|
-
date_re = (year.capture(:year) + delim + month.capture(:month) + delim + day.capture(:day)).all
|
418
|
-
|
419
437
|
assert date_re.match("1900/01/01")
|
420
438
|
assert date_re.match("1956/01/01")
|
421
439
|
assert date_re.match("2000/01/01")
|
@@ -435,6 +453,40 @@ class ReTest < Test::Unit::TestCase
|
|
435
453
|
assert ! date_re.match("2010/1/01")
|
436
454
|
assert ! date_re.match("2010/01/1")
|
437
455
|
end
|
456
|
+
|
457
|
+
def test_date_capture
|
458
|
+
result = date_re.match("2010/02/14")
|
459
|
+
assert result
|
460
|
+
assert_equal "2010", result[:year]
|
461
|
+
assert_equal "02", result[:month]
|
462
|
+
assert_equal "14", result[:day]
|
463
|
+
end
|
438
464
|
|
465
|
+
private
|
439
466
|
|
467
|
+
def date_re
|
468
|
+
self.class.date_re
|
469
|
+
end
|
470
|
+
|
471
|
+
class << self
|
472
|
+
include Re
|
473
|
+
def date_re
|
474
|
+
# (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
|
475
|
+
@date_re ||=
|
476
|
+
begin
|
477
|
+
delim_re = re.any("- /.")
|
478
|
+
century_prefix_re = re("19") | re("20")
|
479
|
+
under_ten_re = re("0") + re.any("1-9")
|
480
|
+
ten_to_twelve_re = re("1") + re.any("012")
|
481
|
+
ten_and_under_thirty_re = re.any("12") + re.any("0-9")
|
482
|
+
thirties_re = re("3") + re.any("01")
|
483
|
+
|
484
|
+
year = century_prefix_re + re.digit.repeat(2)
|
485
|
+
month = under_ten_re | ten_to_twelve_re
|
486
|
+
day = under_ten_re | ten_and_under_thirty_re | thirties_re
|
487
|
+
|
488
|
+
(year.capture(:year) + delim_re + month.capture(:month) + delim_re + day.capture(:day)).all
|
489
|
+
end
|
490
|
+
end
|
491
|
+
end
|
440
492
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: re
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jim Weirich
|
@@ -22,12 +22,13 @@ extensions: []
|
|
22
22
|
extra_rdoc_files: []
|
23
23
|
|
24
24
|
files:
|
25
|
+
- MIT-LICENSE
|
25
26
|
- Rakefile
|
26
27
|
- README.rdoc
|
27
28
|
- lib/re.rb
|
28
29
|
- test/re_test.rb
|
29
30
|
has_rdoc: true
|
30
|
-
homepage: http://re.rubyforge.org
|
31
|
+
homepage: http://re-lib.rubyforge.org
|
31
32
|
licenses: []
|
32
33
|
|
33
34
|
post_install_message:
|