re 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +21 -0
- data/README.rdoc +67 -8
- data/Rakefile +6 -3
- data/lib/re.rb +269 -137
- data/test/re_test.rb +74 -22
- metadata +3 -2
data/MIT-LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Copyright (c) 2003, 2004 Jim Weirich
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
21
|
+
|
data/README.rdoc
CHANGED
|
@@ -1,9 +1,46 @@
|
|
|
1
1
|
|
|
2
2
|
= Regular Expression Construction.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Complex regular expressions are hard to construct and even harder to
|
|
5
|
+
read. The Re library allows users to construct complex regular
|
|
6
|
+
expressions from simpler expressions. For example, consider the
|
|
7
|
+
following regular expression that will parse dates:
|
|
5
8
|
|
|
6
|
-
|
|
9
|
+
/\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
|
|
10
|
+
|
|
11
|
+
Using the Re library, That regular expression can be built
|
|
12
|
+
incrementaly from smaller, easier to understand expressions.
|
|
13
|
+
Perhaps something like this:
|
|
14
|
+
|
|
15
|
+
require 're'
|
|
16
|
+
|
|
17
|
+
include Re
|
|
18
|
+
|
|
19
|
+
delim = re.any("- /.")
|
|
20
|
+
century_prefix = re("19") | re("20")
|
|
21
|
+
under_ten = re("0") + re.any("1-9")
|
|
22
|
+
ten_to_twelve = re("1") + re.any("012")
|
|
23
|
+
ten_and_under_thirty = re.any("12") + re.any("0-9")
|
|
24
|
+
thirties = re("3") + re.any("01")
|
|
25
|
+
|
|
26
|
+
year = (century_prefix + re.digit.repeat(2)).capture(:year)
|
|
27
|
+
month = (under_ten | ten_to_twelve).capture(:month)
|
|
28
|
+
day = (under_ten | ten_and_under_thirty | thirties).capture(:day)
|
|
29
|
+
|
|
30
|
+
date = (year + delim + month + delim + day).all
|
|
31
|
+
|
|
32
|
+
Although it is more code, the individual pieces are smaller and
|
|
33
|
+
easier to independently verify. As an additional bonus, the capture
|
|
34
|
+
groups can be retrieved by name:
|
|
35
|
+
|
|
36
|
+
result = date.match("2009-01-23")
|
|
37
|
+
result.data(:year) # => "2009"
|
|
38
|
+
result.data(:month) # => "01"
|
|
39
|
+
result.data(:day) # => "23"
|
|
40
|
+
|
|
41
|
+
== Version: 0.0.3
|
|
42
|
+
|
|
43
|
+
== Usage:
|
|
7
44
|
|
|
8
45
|
include Re
|
|
9
46
|
|
|
@@ -14,7 +51,7 @@ Usage:
|
|
|
14
51
|
puts "No Match"
|
|
15
52
|
end
|
|
16
53
|
|
|
17
|
-
Examples:
|
|
54
|
+
== Examples:
|
|
18
55
|
|
|
19
56
|
re("a") -- matches "a"
|
|
20
57
|
re("a") + re("b") -- matches "ab"
|
|
@@ -29,11 +66,33 @@ See Re::Rexp for a complete list of expressions.
|
|
|
29
66
|
Using re without an argument allows access to a number of common
|
|
30
67
|
regular expression constants. For example:
|
|
31
68
|
|
|
32
|
-
re.space
|
|
33
|
-
re.spaces -- matches any number of spaces (but at least one)
|
|
69
|
+
re.space / re.spaces -- matches " ", "\n" or "\t"
|
|
34
70
|
re.digit / re.digits -- matches a digit / sequence of digits
|
|
35
71
|
|
|
36
|
-
|
|
72
|
+
Also, re without arguments can also be used to construct character
|
|
73
|
+
classes:
|
|
74
|
+
|
|
75
|
+
re.any -- Matches any charactor
|
|
76
|
+
re.any("abc") -- Matches "a", "b", or "c"
|
|
77
|
+
re.any("0-9") -- Matches the digits 0 through 9
|
|
78
|
+
re.any("A-Z", "a-z", "0-9", "_")
|
|
79
|
+
-- Matches alphanumeric or an underscore
|
|
80
|
+
|
|
81
|
+
See Re::ConstructionMethods for a complete list of common constants
|
|
82
|
+
and character class functions.
|
|
83
|
+
|
|
84
|
+
See Re.re, Re::Rexp, and Re::ConstructionMethods for details.
|
|
85
|
+
|
|
86
|
+
== License and Copyright:
|
|
87
|
+
|
|
88
|
+
Copyright 2009 by Jim Weirich (jim.weirich@gmail.com)
|
|
89
|
+
|
|
90
|
+
Re is provided under the MIT open source license (see MIT-LICENSE)
|
|
91
|
+
|
|
92
|
+
== Links:
|
|
93
|
+
|
|
94
|
+
* Documentation :: http://re-lib.rubyforge.org
|
|
95
|
+
* Source :: http://github.com/jimweirich/re
|
|
96
|
+
* Bug Tracker :: http://www.pivotaltracker.com/projects/47758
|
|
97
|
+
* Author :: jim.weirich@gmail.com
|
|
37
98
|
|
|
38
|
-
See Re.re,
|
|
39
|
-
Re::Rexp, and Re::NULL for details.
|
data/Rakefile
CHANGED
|
@@ -4,6 +4,8 @@ require 'rake/clean'
|
|
|
4
4
|
require 'rake/testtask'
|
|
5
5
|
require 'rake/rdoctask'
|
|
6
6
|
|
|
7
|
+
require 'lib/re'
|
|
8
|
+
|
|
7
9
|
task :default => :test
|
|
8
10
|
|
|
9
11
|
Rake::TestTask.new(:test) do |t|
|
|
@@ -12,7 +14,8 @@ Rake::TestTask.new(:test) do |t|
|
|
|
12
14
|
t.test_files = FileList['test/*_test.rb']
|
|
13
15
|
end
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
task :release => [:check_non_beta, :readme, :gem, "publish:rdoc"]
|
|
18
|
+
|
|
19
|
+
task :check_non_beta do
|
|
20
|
+
fail "Must not be a beta version! Version is #{Re::VERSION}" if Re::Version::BETA
|
|
18
21
|
end
|
data/lib/re.rb
CHANGED
|
@@ -2,9 +2,44 @@
|
|
|
2
2
|
#
|
|
3
3
|
# = Regular Expression Construction.
|
|
4
4
|
#
|
|
5
|
-
#
|
|
5
|
+
# Complex regular expressions are hard to construct and even harder to
|
|
6
|
+
# read. The Re library allows users to construct complex regular
|
|
7
|
+
# expressions from simpler expressions. For example, consider the
|
|
8
|
+
# following regular expression that will parse dates:
|
|
6
9
|
#
|
|
7
|
-
#
|
|
10
|
+
# /\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
|
|
11
|
+
#
|
|
12
|
+
# Using the Re library, That regular expression can be built
|
|
13
|
+
# incrementaly from smaller, easier to understand expressions.
|
|
14
|
+
# Perhaps something like this:
|
|
15
|
+
#
|
|
16
|
+
# require 're'
|
|
17
|
+
#
|
|
18
|
+
# include Re
|
|
19
|
+
#
|
|
20
|
+
# delim = re.any("- /.")
|
|
21
|
+
# century_prefix = re("19") | re("20")
|
|
22
|
+
# under_ten = re("0") + re.any("1-9")
|
|
23
|
+
# ten_to_twelve = re("1") + re.any("012")
|
|
24
|
+
# ten_and_under_thirty = re.any("12") + re.any("0-9")
|
|
25
|
+
# thirties = re("3") + re.any("01")
|
|
26
|
+
#
|
|
27
|
+
# year = (century_prefix + re.digit.repeat(2)).capture(:year)
|
|
28
|
+
# month = (under_ten | ten_to_twelve).capture(:month)
|
|
29
|
+
# day = (under_ten | ten_and_under_thirty | thirties).capture(:day)
|
|
30
|
+
#
|
|
31
|
+
# date = (year + delim + month + delim + day).all
|
|
32
|
+
#
|
|
33
|
+
# Although it is more code, the individual pieces are smaller and
|
|
34
|
+
# easier to independently verify. As an additional bonus, the capture
|
|
35
|
+
# groups can be retrieved by name:
|
|
36
|
+
#
|
|
37
|
+
# result = date.match("2009-01-23")
|
|
38
|
+
# result.data(:year) # => "2009"
|
|
39
|
+
# result.data(:month) # => "01"
|
|
40
|
+
# result.data(:day) # => "23"
|
|
41
|
+
#
|
|
42
|
+
# == Usage:
|
|
8
43
|
#
|
|
9
44
|
# include Re
|
|
10
45
|
#
|
|
@@ -15,7 +50,7 @@
|
|
|
15
50
|
# puts "No Match"
|
|
16
51
|
# end
|
|
17
52
|
#
|
|
18
|
-
# Examples:
|
|
53
|
+
# == Examples:
|
|
19
54
|
#
|
|
20
55
|
# re("a") -- matches "a"
|
|
21
56
|
# re("a") + re("b") -- matches "ab"
|
|
@@ -30,31 +65,70 @@
|
|
|
30
65
|
# Using re without an argument allows access to a number of common
|
|
31
66
|
# regular expression constants. For example:
|
|
32
67
|
#
|
|
33
|
-
# re.space
|
|
34
|
-
# re.spaces -- matches any number of spaces (but at least one)
|
|
68
|
+
# re.space / re.spaces -- matches " ", "\n" or "\t"
|
|
35
69
|
# re.digit / re.digits -- matches a digit / sequence of digits
|
|
36
70
|
#
|
|
37
|
-
#
|
|
71
|
+
# Also, re without arguments can also be used to construct character
|
|
72
|
+
# classes:
|
|
73
|
+
#
|
|
74
|
+
# re.any -- Matches any charactor
|
|
75
|
+
# re.any("abc") -- Matches "a", "b", or "c"
|
|
76
|
+
# re.any("0-9") -- Matches the digits 0 through 9
|
|
77
|
+
# re.any("A-Z", "a-z", "0-9", "_")
|
|
78
|
+
# -- Matches alphanumeric or an underscore
|
|
79
|
+
#
|
|
80
|
+
# See Re::ConstructionMethods for a complete list of common constants
|
|
81
|
+
# and character class functions.
|
|
82
|
+
#
|
|
83
|
+
# See Re.re, Re::Rexp, and Re::ConstructionMethods for details.
|
|
84
|
+
#
|
|
85
|
+
# == License and Copyright:
|
|
86
|
+
#
|
|
87
|
+
# Copyright 2009 by Jim Weirich (jim.weirich@gmail.com)
|
|
88
|
+
#
|
|
89
|
+
# Re is provided under the MIT open source license (see MIT-LICENSE)
|
|
90
|
+
#
|
|
91
|
+
# == Links:
|
|
92
|
+
#
|
|
93
|
+
# * Documentation :: http://re-lib.rubyforge.org
|
|
94
|
+
# * Source :: http://github.com/jimweirich/re
|
|
95
|
+
# * Bug Tracker :: http://www.pivotaltracker.com/projects/47758
|
|
96
|
+
# * Author :: jim.weirich@gmail.com
|
|
38
97
|
#
|
|
39
|
-
# See Re.re,
|
|
40
|
-
# Re::Rexp, and Re::NULL for details.
|
|
41
|
-
|
|
42
98
|
module Re
|
|
99
|
+
|
|
100
|
+
module Version
|
|
101
|
+
NUMBERS = [
|
|
102
|
+
MAJOR = 0,
|
|
103
|
+
MINOR = 0,
|
|
104
|
+
BUILD = 3,
|
|
105
|
+
BETA = nil,
|
|
106
|
+
].compact
|
|
107
|
+
end
|
|
108
|
+
VERSION = Version::NUMBERS.join('.')
|
|
109
|
+
|
|
110
|
+
# Re::Result captures the result of a match and allows lookup of the
|
|
111
|
+
# captured groups by name.
|
|
43
112
|
class Result
|
|
113
|
+
# Create a Re result object with the match data and the origina
|
|
114
|
+
# Re::Rexp object.
|
|
44
115
|
def initialize(match_data, rexp)
|
|
45
116
|
@match_data = match_data
|
|
46
117
|
@rexp = rexp
|
|
47
118
|
end
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
119
|
+
|
|
120
|
+
# Return the full match
|
|
121
|
+
def full_match
|
|
122
|
+
@match_data[0]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Return the named capture data.
|
|
126
|
+
def [](name)
|
|
127
|
+
index = @rexp.capture_keys.index(name)
|
|
128
|
+
index ? @match_data[index+1] : nil
|
|
55
129
|
end
|
|
56
130
|
end
|
|
57
|
-
|
|
131
|
+
|
|
58
132
|
# Precedence levels for regular expressions:
|
|
59
133
|
|
|
60
134
|
GROUPED = 4 # (r), [chars] :nodoc:
|
|
@@ -65,22 +139,22 @@ module Re
|
|
|
65
139
|
|
|
66
140
|
# Constructed regular expressions.
|
|
67
141
|
class Rexp
|
|
68
|
-
attr_reader :string, :level, :
|
|
142
|
+
attr_reader :string, :level, :options, :capture_keys
|
|
69
143
|
|
|
70
144
|
# Create a regular expression from the string. The regular
|
|
71
145
|
# expression will have a precedence of +level+ and will recognized
|
|
72
146
|
# +keys+ as a list of capture keys.
|
|
73
|
-
def initialize(string, level,
|
|
147
|
+
def initialize(string, level, options, keys)
|
|
74
148
|
@string = string
|
|
75
149
|
@level = level
|
|
76
|
-
@
|
|
150
|
+
@options = options
|
|
77
151
|
@capture_keys = keys
|
|
78
152
|
end
|
|
79
153
|
|
|
80
154
|
# Return a real regular expression from the the constructed
|
|
81
155
|
# regular expression.
|
|
82
156
|
def regexp
|
|
83
|
-
@regexp ||= Regexp.new(string,
|
|
157
|
+
@regexp ||= Regexp.new(string, options)
|
|
84
158
|
end
|
|
85
159
|
|
|
86
160
|
# Does it match a string? (returns Re::Result if match, nil otherwise)
|
|
@@ -94,7 +168,7 @@ module Re
|
|
|
94
168
|
def +(other)
|
|
95
169
|
Rexp.new(parenthesize(CONCAT) + other.parenthesize(CONCAT),
|
|
96
170
|
CONCAT,
|
|
97
|
-
|
|
171
|
+
options | other.options,
|
|
98
172
|
capture_keys + other.capture_keys)
|
|
99
173
|
end
|
|
100
174
|
|
|
@@ -102,43 +176,53 @@ module Re
|
|
|
102
176
|
def |(other)
|
|
103
177
|
Rexp.new(parenthesize(ALT) + "|" + other.parenthesize(ALT),
|
|
104
178
|
ALT,
|
|
105
|
-
|
|
179
|
+
options | other.options,
|
|
106
180
|
capture_keys + other.capture_keys)
|
|
107
181
|
end
|
|
108
182
|
|
|
109
183
|
# self is optional
|
|
110
184
|
def optional
|
|
111
|
-
Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX,
|
|
185
|
+
Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX, options, capture_keys)
|
|
112
186
|
end
|
|
113
187
|
|
|
114
188
|
# self matches many times (zero or more)
|
|
115
189
|
def many
|
|
116
|
-
Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX,
|
|
190
|
+
Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX, options, capture_keys)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# self matches many times (zero or more) (non-greedy version)
|
|
194
|
+
def many!
|
|
195
|
+
Rexp.new(parenthesize(POSTFIX) + "*?", POSTFIX, options, capture_keys)
|
|
117
196
|
end
|
|
118
197
|
|
|
119
198
|
# self matches one or more times
|
|
120
199
|
def one_or_more
|
|
121
|
-
Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX,
|
|
200
|
+
Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX, options, capture_keys)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# self matches one or more times
|
|
204
|
+
def one_or_more!
|
|
205
|
+
Rexp.new(parenthesize(POSTFIX) + "+?", POSTFIX, options, capture_keys)
|
|
122
206
|
end
|
|
123
207
|
|
|
124
208
|
# self is repeated from min to max times. If max is omitted, then
|
|
125
209
|
# it is repeated exactly min times.
|
|
126
210
|
def repeat(min, max=nil)
|
|
127
211
|
if min && max
|
|
128
|
-
Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX,
|
|
212
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX, options, capture_keys)
|
|
129
213
|
else
|
|
130
|
-
Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX,
|
|
214
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX, options, capture_keys)
|
|
131
215
|
end
|
|
132
216
|
end
|
|
133
217
|
|
|
134
218
|
# self is repeated at least min times
|
|
135
219
|
def at_least(min)
|
|
136
|
-
Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX,
|
|
220
|
+
Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX, options, capture_keys)
|
|
137
221
|
end
|
|
138
222
|
|
|
139
223
|
# self is repeated at least max times
|
|
140
224
|
def at_most(max)
|
|
141
|
-
Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX,
|
|
225
|
+
Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX, options, capture_keys)
|
|
142
226
|
end
|
|
143
227
|
|
|
144
228
|
# None of the given characters will match.
|
|
@@ -158,27 +242,27 @@ module Re
|
|
|
158
242
|
|
|
159
243
|
# self must match at the beginning of a line
|
|
160
244
|
def bol
|
|
161
|
-
Rexp.new("^" + parenthesize(CONCAT), CONCAT,
|
|
245
|
+
Rexp.new("^" + parenthesize(CONCAT), CONCAT, options, capture_keys)
|
|
162
246
|
end
|
|
163
247
|
|
|
164
248
|
# self must match at the end of a line
|
|
165
249
|
def eol
|
|
166
|
-
Rexp.new(parenthesize(CONCAT) + "$", CONCAT,
|
|
250
|
+
Rexp.new(parenthesize(CONCAT) + "$", CONCAT, options, capture_keys)
|
|
167
251
|
end
|
|
168
252
|
|
|
169
253
|
# self must match at the beginning of the string
|
|
170
254
|
def begin
|
|
171
|
-
Rexp.new("\\A" + parenthesize(CONCAT), CONCAT,
|
|
255
|
+
Rexp.new("\\A" + parenthesize(CONCAT), CONCAT, options, capture_keys)
|
|
172
256
|
end
|
|
173
257
|
|
|
174
258
|
# self must match the end of the string (with an optional new line)
|
|
175
259
|
def end
|
|
176
|
-
Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT,
|
|
260
|
+
Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT, options, capture_keys)
|
|
177
261
|
end
|
|
178
262
|
|
|
179
263
|
# self must match the very end of the string (including any new lines)
|
|
180
264
|
def very_end
|
|
181
|
-
Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT,
|
|
265
|
+
Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT, options, capture_keys)
|
|
182
266
|
end
|
|
183
267
|
|
|
184
268
|
# self must match an entire line.
|
|
@@ -188,32 +272,32 @@ module Re
|
|
|
188
272
|
|
|
189
273
|
# self is contained in a non-capturing group
|
|
190
274
|
def group
|
|
191
|
-
Rexp.new("(?:" + string + ")", GROUPED,
|
|
275
|
+
Rexp.new("(?:" + string + ")", GROUPED, options, capture_keys)
|
|
192
276
|
end
|
|
193
277
|
|
|
194
278
|
# self is a capturing group with the given name.
|
|
195
279
|
def capture(name)
|
|
196
|
-
Rexp.new("(" + string + ")", GROUPED,
|
|
280
|
+
Rexp.new("(" + string + ")", GROUPED, options, [name] + capture_keys)
|
|
197
281
|
end
|
|
198
282
|
|
|
199
283
|
# self will work in multiline matches
|
|
200
284
|
def multiline
|
|
201
|
-
Rexp.new(string, GROUPED,
|
|
285
|
+
Rexp.new(string, GROUPED, options|Regexp::MULTILINE, capture_keys)
|
|
202
286
|
end
|
|
203
287
|
|
|
204
288
|
# Is this a multiline regular expression?
|
|
205
289
|
def multiline?
|
|
206
|
-
(
|
|
290
|
+
(options & Regexp::MULTILINE) != 0
|
|
207
291
|
end
|
|
208
292
|
|
|
209
293
|
# self will work in multiline matches
|
|
210
294
|
def ignore_case
|
|
211
|
-
Rexp.new(string, GROUPED,
|
|
295
|
+
Rexp.new(string, GROUPED, options|Regexp::IGNORECASE, capture_keys)
|
|
212
296
|
end
|
|
213
297
|
|
|
214
298
|
# Does this regular expression ignore case?
|
|
215
299
|
def ignore_case?
|
|
216
|
-
(
|
|
300
|
+
(options & Regexp::IGNORECASE) != 0
|
|
217
301
|
end
|
|
218
302
|
|
|
219
303
|
# String representation of the constructed regular expression.
|
|
@@ -267,107 +351,155 @@ module Re
|
|
|
267
351
|
def re(exp=nil)
|
|
268
352
|
exp ? Rexp.literal(exp) : NULL
|
|
269
353
|
end
|
|
354
|
+
extend self
|
|
270
355
|
|
|
271
|
-
#
|
|
272
|
-
#
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
#
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
356
|
+
# This module defines a number of methods returning common
|
|
357
|
+
# pre-packaged regular expressions along with methods to create
|
|
358
|
+
# regular expressions from character classes and other objects.
|
|
359
|
+
# ConstructionMethods is mixed into the NULL Rexp object so that
|
|
360
|
+
# re() without arguments can be used to access the methods.
|
|
361
|
+
module ConstructionMethods
|
|
362
|
+
|
|
363
|
+
# :call-seq:
|
|
364
|
+
# re.null
|
|
365
|
+
#
|
|
366
|
+
# Matches the null string
|
|
367
|
+
def null
|
|
368
|
+
self
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# :call-seq:
|
|
372
|
+
# re.any
|
|
373
|
+
# re.any(chars)
|
|
374
|
+
# re.any(range)
|
|
375
|
+
# re.any(chars, range, ...)
|
|
376
|
+
#
|
|
377
|
+
# Match a character from the character class.
|
|
378
|
+
#
|
|
379
|
+
# Any without any arguments will match any single character. Any
|
|
380
|
+
# with one or more arguments will construct a character class for
|
|
381
|
+
# the arguments. If the argument is a three character string where
|
|
382
|
+
# the middle character is "-", then the argument represents a range
|
|
383
|
+
# of characters. Otherwise the arguments are treated as a list of
|
|
384
|
+
# characters to be added to the character class.
|
|
385
|
+
#
|
|
386
|
+
# Examples:
|
|
387
|
+
#
|
|
388
|
+
# re.any -- match any character
|
|
389
|
+
# re.any("aieouy") -- match vowels
|
|
390
|
+
# re.any("0-9") -- match digits
|
|
391
|
+
# re.any("A-Z", "a-z", "0-9") -- match alphanumerics
|
|
392
|
+
# re.any("A-Z", "a-z", "0-9", "_") -- match alphanumerics
|
|
393
|
+
#
|
|
394
|
+
def any(*chars)
|
|
395
|
+
if chars.empty?
|
|
396
|
+
@dot ||= Rexp.raw(".")
|
|
397
|
+
else
|
|
398
|
+
any_chars = ''
|
|
399
|
+
chars.each do |chs|
|
|
400
|
+
if /^.-.$/ =~ chs
|
|
401
|
+
any_chars << chs
|
|
402
|
+
else
|
|
403
|
+
any_chars << Rexp.escape_any(chs)
|
|
404
|
+
end
|
|
313
405
|
end
|
|
406
|
+
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
|
314
407
|
end
|
|
315
|
-
Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
|
|
316
408
|
end
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
409
|
+
|
|
410
|
+
# :call-seq:
|
|
411
|
+
# re.space
|
|
412
|
+
#
|
|
413
|
+
# Matches any white space
|
|
414
|
+
def space
|
|
415
|
+
@space ||= Rexp.raw("\\s")
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
# :call-seq:
|
|
419
|
+
# re.spaces
|
|
420
|
+
#
|
|
324
421
|
# Matches any white space
|
|
325
|
-
|
|
326
|
-
|
|
422
|
+
def spaces
|
|
423
|
+
@spaces ||= space.one_or_more
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
# :call-seq:
|
|
427
|
+
# re.nonspace
|
|
428
|
+
#
|
|
429
|
+
# Matches any non-white space
|
|
430
|
+
def nonspace
|
|
431
|
+
@nonspace ||= Rexp.raw("\\S")
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
# :call-seq:
|
|
435
|
+
# re.nonspaces
|
|
436
|
+
#
|
|
437
|
+
# Matches any non-white space
|
|
438
|
+
def nonspaces
|
|
439
|
+
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
# :call-seq:
|
|
443
|
+
# re.word_char
|
|
444
|
+
#
|
|
445
|
+
# Matches any sequence of word characters
|
|
446
|
+
def word_char
|
|
447
|
+
@word_char ||= Rexp.raw("\\w")
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# :call-seq:
|
|
451
|
+
# re.word
|
|
452
|
+
#
|
|
453
|
+
# Matches any sequence of word characters
|
|
454
|
+
def word
|
|
455
|
+
@word ||= word_char.one_or_more
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
# :call-seq:
|
|
459
|
+
# re.break
|
|
460
|
+
#
|
|
461
|
+
# Zero-length matches any break
|
|
462
|
+
def break
|
|
463
|
+
@break ||= Rexp.raw("\\b")
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# :call-seq:
|
|
467
|
+
# re.digit
|
|
468
|
+
#
|
|
469
|
+
# Matches a digit
|
|
470
|
+
def digit
|
|
471
|
+
@digit ||= any("0-9")
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# :call-seq:
|
|
475
|
+
# re.digits
|
|
476
|
+
#
|
|
477
|
+
# Matches a sequence of digits
|
|
478
|
+
def digits
|
|
479
|
+
@digits ||= digit.one_or_more
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
# :call-seq:
|
|
483
|
+
# re.hex_digit
|
|
484
|
+
#
|
|
485
|
+
# Matches a hex digit (upper or lower case)
|
|
486
|
+
def hex_digit
|
|
487
|
+
@hex_digit ||= any("0-9", "a-f", "A-F")
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
# :call-seq:
|
|
491
|
+
# re.hex_digits
|
|
492
|
+
#
|
|
493
|
+
# Matches a sequence of hex digits
|
|
494
|
+
def hex_digits
|
|
495
|
+
@hex_digits ||= hex_digit.one_or_more
|
|
496
|
+
end
|
|
327
497
|
end
|
|
328
498
|
|
|
329
|
-
# Matches
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
def NULL.nonspaces
|
|
336
|
-
@nonspaces ||= Rexp.raw("\\S").one_or_more
|
|
337
|
-
end
|
|
338
|
-
|
|
339
|
-
# Matches any sequence of word characters
|
|
340
|
-
def NULL.word_char
|
|
341
|
-
@word_char ||= Rexp.raw("\\w")
|
|
342
|
-
end
|
|
343
|
-
|
|
344
|
-
# Matches any sequence of word characters
|
|
345
|
-
def NULL.word
|
|
346
|
-
@word ||= word_char.one_or_more
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
# Zero-length matches any break
|
|
350
|
-
def NULL.break
|
|
351
|
-
@break ||= Rexp.raw("\\b")
|
|
352
|
-
end
|
|
353
|
-
|
|
354
|
-
# Matches a digit
|
|
355
|
-
def NULL.digit
|
|
356
|
-
@digit ||= any("0-9")
|
|
357
|
-
end
|
|
358
|
-
|
|
359
|
-
# Matches a sequence of digits
|
|
360
|
-
def NULL.digits
|
|
361
|
-
@digits ||= digit.one_or_more
|
|
362
|
-
end
|
|
363
|
-
|
|
364
|
-
# Matches a hex digit (upper or lower case)
|
|
365
|
-
def NULL.hex_digit
|
|
366
|
-
@hex_digit ||= any("0-9", "a-f", "A-F")
|
|
367
|
-
end
|
|
368
|
-
|
|
369
|
-
# Matches a sequence of hex digits
|
|
370
|
-
def NULL.hex_digits
|
|
371
|
-
@hex_digits ||= hex_digit.one_or_more
|
|
372
|
-
end
|
|
499
|
+
# Matches an empty string. Additional common regular expression
|
|
500
|
+
# construction methods are defined on NULL. See
|
|
501
|
+
# Re::ConstructionMethods for details.
|
|
502
|
+
NULL = Rexp.literal("")
|
|
503
|
+
NULL.extend(ConstructionMethods)
|
|
504
|
+
|
|
373
505
|
end
|
data/test/re_test.rb
CHANGED
|
@@ -6,6 +6,11 @@ require 're'
|
|
|
6
6
|
class ReTest < Test::Unit::TestCase
|
|
7
7
|
include Re
|
|
8
8
|
|
|
9
|
+
def test_module_access_to_re_function
|
|
10
|
+
r = Re.re("a").all
|
|
11
|
+
assert r =~ "a"
|
|
12
|
+
end
|
|
13
|
+
|
|
9
14
|
def test_strings_match
|
|
10
15
|
assert re("a") =~ "a"
|
|
11
16
|
assert re("a") !~ "A"
|
|
@@ -46,6 +51,20 @@ class ReTest < Test::Unit::TestCase
|
|
|
46
51
|
assert r =~ "xxx"
|
|
47
52
|
end
|
|
48
53
|
|
|
54
|
+
def test_greedy_many
|
|
55
|
+
r = re.any.many.capture(:x) + re("b")
|
|
56
|
+
result = r.match("xbxb")
|
|
57
|
+
assert result
|
|
58
|
+
assert_equal "xbx", result[:x]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def test_non_greedy_many
|
|
62
|
+
r = re.any.many!.capture(:x) + re("b")
|
|
63
|
+
result = r.match("xbxb")
|
|
64
|
+
assert result
|
|
65
|
+
assert_equal "x", result[:x]
|
|
66
|
+
end
|
|
67
|
+
|
|
49
68
|
def test_one_or_more
|
|
50
69
|
r = re("x").one_or_more.all
|
|
51
70
|
assert r !~ ""
|
|
@@ -53,6 +72,20 @@ class ReTest < Test::Unit::TestCase
|
|
|
53
72
|
assert r =~ "xxx"
|
|
54
73
|
end
|
|
55
74
|
|
|
75
|
+
def test_greedy_one_or_more
|
|
76
|
+
r = re.any.one_or_more.capture(:any) + re("b")
|
|
77
|
+
result = r.match("xbxb")
|
|
78
|
+
assert result
|
|
79
|
+
assert_equal "xbx", result[:any]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def test_non_greedy_one_or_more
|
|
83
|
+
r = re.any.one_or_more!.capture(:any) + re("b")
|
|
84
|
+
result = r.match("xbxb")
|
|
85
|
+
assert result
|
|
86
|
+
assert_equal "x", result[:any]
|
|
87
|
+
end
|
|
88
|
+
|
|
56
89
|
def test_repeat_fixed_number
|
|
57
90
|
r = re("a").repeat(3).all
|
|
58
91
|
assert r =~ "aaa"
|
|
@@ -104,7 +137,7 @@ class ReTest < Test::Unit::TestCase
|
|
|
104
137
|
assert r !~ "\n"
|
|
105
138
|
end
|
|
106
139
|
|
|
107
|
-
def
|
|
140
|
+
def test_no_options
|
|
108
141
|
r = re("a")
|
|
109
142
|
assert ! r.ignore_case?
|
|
110
143
|
assert ! r.multiline?
|
|
@@ -363,7 +396,7 @@ class ReTest < Test::Unit::TestCase
|
|
|
363
396
|
r = re.any("a-z").one_or_more.capture(:word)
|
|
364
397
|
result = (r =~ "012abc789")
|
|
365
398
|
assert result
|
|
366
|
-
assert_equal "abc", result
|
|
399
|
+
assert_equal "abc", result[:word]
|
|
367
400
|
end
|
|
368
401
|
|
|
369
402
|
def test_multiple_capture
|
|
@@ -373,10 +406,10 @@ class ReTest < Test::Unit::TestCase
|
|
|
373
406
|
result = (r =~ " now 123\n")
|
|
374
407
|
assert result
|
|
375
408
|
assert_equal [:everything, :word, :number], r.capture_keys
|
|
376
|
-
assert_equal "now", result
|
|
377
|
-
assert_equal "123", result
|
|
378
|
-
assert_equal "now 123", result
|
|
379
|
-
assert_equal "now 123", result.
|
|
409
|
+
assert_equal "now", result[:word]
|
|
410
|
+
assert_equal "123", result[:number]
|
|
411
|
+
assert_equal "now 123", result[:everything]
|
|
412
|
+
assert_equal "now 123", result.full_match
|
|
380
413
|
end
|
|
381
414
|
|
|
382
415
|
def test_precedence_concatentaion_vs_alteration
|
|
@@ -397,25 +430,10 @@ class ReTest < Test::Unit::TestCase
|
|
|
397
430
|
|
|
398
431
|
result = delim_definition.match("//[a][b][xyz]\n1a2b3xyz4")
|
|
399
432
|
assert result
|
|
400
|
-
assert_equal "[a][b][xyz]", result
|
|
433
|
+
assert_equal "[a][b][xyz]", result[:delims]
|
|
401
434
|
end
|
|
402
435
|
|
|
403
436
|
def test_date_parser
|
|
404
|
-
# (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
|
|
405
|
-
|
|
406
|
-
delim = re.any("- /.")
|
|
407
|
-
n_19_or_20 = re("19") | re("20")
|
|
408
|
-
n_1_to_9 = re("0") + re.any("1-9")
|
|
409
|
-
n_10_to_12 = re("1") + re.any("012")
|
|
410
|
-
n_10_to_29 = re.any("12") + re.any("0-9")
|
|
411
|
-
n_30_or_31 = re("3") + re.any("01")
|
|
412
|
-
|
|
413
|
-
year = n_19_or_20 + re.digit.repeat(2)
|
|
414
|
-
month = n_1_to_9 | n_10_to_12
|
|
415
|
-
day = n_1_to_9 | n_10_to_29 | n_30_or_31
|
|
416
|
-
|
|
417
|
-
date_re = (year.capture(:year) + delim + month.capture(:month) + delim + day.capture(:day)).all
|
|
418
|
-
|
|
419
437
|
assert date_re.match("1900/01/01")
|
|
420
438
|
assert date_re.match("1956/01/01")
|
|
421
439
|
assert date_re.match("2000/01/01")
|
|
@@ -435,6 +453,40 @@ class ReTest < Test::Unit::TestCase
|
|
|
435
453
|
assert ! date_re.match("2010/1/01")
|
|
436
454
|
assert ! date_re.match("2010/01/1")
|
|
437
455
|
end
|
|
456
|
+
|
|
457
|
+
def test_date_capture
|
|
458
|
+
result = date_re.match("2010/02/14")
|
|
459
|
+
assert result
|
|
460
|
+
assert_equal "2010", result[:year]
|
|
461
|
+
assert_equal "02", result[:month]
|
|
462
|
+
assert_equal "14", result[:day]
|
|
463
|
+
end
|
|
438
464
|
|
|
465
|
+
private
|
|
439
466
|
|
|
467
|
+
def date_re
|
|
468
|
+
self.class.date_re
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
class << self
|
|
472
|
+
include Re
|
|
473
|
+
def date_re
|
|
474
|
+
# (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
|
|
475
|
+
@date_re ||=
|
|
476
|
+
begin
|
|
477
|
+
delim_re = re.any("- /.")
|
|
478
|
+
century_prefix_re = re("19") | re("20")
|
|
479
|
+
under_ten_re = re("0") + re.any("1-9")
|
|
480
|
+
ten_to_twelve_re = re("1") + re.any("012")
|
|
481
|
+
ten_and_under_thirty_re = re.any("12") + re.any("0-9")
|
|
482
|
+
thirties_re = re("3") + re.any("01")
|
|
483
|
+
|
|
484
|
+
year = century_prefix_re + re.digit.repeat(2)
|
|
485
|
+
month = under_ten_re | ten_to_twelve_re
|
|
486
|
+
day = under_ten_re | ten_and_under_thirty_re | thirties_re
|
|
487
|
+
|
|
488
|
+
(year.capture(:year) + delim_re + month.capture(:month) + delim_re + day.capture(:day)).all
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
end
|
|
440
492
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: re
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jim Weirich
|
|
@@ -22,12 +22,13 @@ extensions: []
|
|
|
22
22
|
extra_rdoc_files: []
|
|
23
23
|
|
|
24
24
|
files:
|
|
25
|
+
- MIT-LICENSE
|
|
25
26
|
- Rakefile
|
|
26
27
|
- README.rdoc
|
|
27
28
|
- lib/re.rb
|
|
28
29
|
- test/re_test.rb
|
|
29
30
|
has_rdoc: true
|
|
30
|
-
homepage: http://re.rubyforge.org
|
|
31
|
+
homepage: http://re-lib.rubyforge.org
|
|
31
32
|
licenses: []
|
|
32
33
|
|
|
33
34
|
post_install_message:
|