re 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/MIT-LICENSE +21 -0
  2. data/README.rdoc +67 -8
  3. data/Rakefile +6 -3
  4. data/lib/re.rb +269 -137
  5. data/test/re_test.rb +74 -22
  6. metadata +3 -2
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2003, 2004 Jim Weirich
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
@@ -1,9 +1,46 @@
1
1
 
2
2
  = Regular Expression Construction.
3
3
 
4
- Construct regular expressions using the re() method.
4
+ Complex regular expressions are hard to construct and even harder to
5
+ read. The Re library allows users to construct complex regular
6
+ expressions from simpler expressions. For example, consider the
7
+ following regular expression that will parse dates:
5
8
 
6
- Usage:
9
+ /\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
10
+
11
+ Using the Re library, That regular expression can be built
12
+ incrementaly from smaller, easier to understand expressions.
13
+ Perhaps something like this:
14
+
15
+ require 're'
16
+
17
+ include Re
18
+
19
+ delim = re.any("- /.")
20
+ century_prefix = re("19") | re("20")
21
+ under_ten = re("0") + re.any("1-9")
22
+ ten_to_twelve = re("1") + re.any("012")
23
+ ten_and_under_thirty = re.any("12") + re.any("0-9")
24
+ thirties = re("3") + re.any("01")
25
+
26
+ year = (century_prefix + re.digit.repeat(2)).capture(:year)
27
+ month = (under_ten | ten_to_twelve).capture(:month)
28
+ day = (under_ten | ten_and_under_thirty | thirties).capture(:day)
29
+
30
+ date = (year + delim + month + delim + day).all
31
+
32
+ Although it is more code, the individual pieces are smaller and
33
+ easier to independently verify. As an additional bonus, the capture
34
+ groups can be retrieved by name:
35
+
36
+ result = date.match("2009-01-23")
37
+ result.data(:year) # => "2009"
38
+ result.data(:month) # => "01"
39
+ result.data(:day) # => "23"
40
+
41
+ == Version: 0.0.3
42
+
43
+ == Usage:
7
44
 
8
45
  include Re
9
46
 
@@ -14,7 +51,7 @@ Usage:
14
51
  puts "No Match"
15
52
  end
16
53
 
17
- Examples:
54
+ == Examples:
18
55
 
19
56
  re("a") -- matches "a"
20
57
  re("a") + re("b") -- matches "ab"
@@ -29,11 +66,33 @@ See Re::Rexp for a complete list of expressions.
29
66
  Using re without an argument allows access to a number of common
30
67
  regular expression constants. For example:
31
68
 
32
- re.space -- matches " ", "\n" or "\t"
33
- re.spaces -- matches any number of spaces (but at least one)
69
+ re.space / re.spaces -- matches " ", "\n" or "\t"
34
70
  re.digit / re.digits -- matches a digit / sequence of digits
35
71
 
36
- See Re::NULL for a complete list of common constants.
72
+ Also, re without arguments can also be used to construct character
73
+ classes:
74
+
75
+ re.any -- Matches any charactor
76
+ re.any("abc") -- Matches "a", "b", or "c"
77
+ re.any("0-9") -- Matches the digits 0 through 9
78
+ re.any("A-Z", "a-z", "0-9", "_")
79
+ -- Matches alphanumeric or an underscore
80
+
81
+ See Re::ConstructionMethods for a complete list of common constants
82
+ and character class functions.
83
+
84
+ See Re.re, Re::Rexp, and Re::ConstructionMethods for details.
85
+
86
+ == License and Copyright:
87
+
88
+ Copyright 2009 by Jim Weirich (jim.weirich@gmail.com)
89
+
90
+ Re is provided under the MIT open source license (see MIT-LICENSE)
91
+
92
+ == Links:
93
+
94
+ * Documentation :: http://re-lib.rubyforge.org
95
+ * Source :: http://github.com/jimweirich/re
96
+ * Bug Tracker :: http://www.pivotaltracker.com/projects/47758
97
+ * Author :: jim.weirich@gmail.com
37
98
 
38
- See Re.re,
39
- Re::Rexp, and Re::NULL for details.
data/Rakefile CHANGED
@@ -4,6 +4,8 @@ require 'rake/clean'
4
4
  require 'rake/testtask'
5
5
  require 'rake/rdoctask'
6
6
 
7
+ require 'lib/re'
8
+
7
9
  task :default => :test
8
10
 
9
11
  Rake::TestTask.new(:test) do |t|
@@ -12,7 +14,8 @@ Rake::TestTask.new(:test) do |t|
12
14
  t.test_files = FileList['test/*_test.rb']
13
15
  end
14
16
 
15
- Rake::RDocTask.new do |rd|
16
- rd.main = "lib/re.rb"
17
- rd.rdoc_files = FileList["lib/re.rb"]
17
+ task :release => [:check_non_beta, :readme, :gem, "publish:rdoc"]
18
+
19
+ task :check_non_beta do
20
+ fail "Must not be a beta version! Version is #{Re::VERSION}" if Re::Version::BETA
18
21
  end
data/lib/re.rb CHANGED
@@ -2,9 +2,44 @@
2
2
  #
3
3
  # = Regular Expression Construction.
4
4
  #
5
- # Construct regular expressions using the re() method.
5
+ # Complex regular expressions are hard to construct and even harder to
6
+ # read. The Re library allows users to construct complex regular
7
+ # expressions from simpler expressions. For example, consider the
8
+ # following regular expression that will parse dates:
6
9
  #
7
- # Usage:
10
+ # /\A((?:19|20)[0-9]{2})[\- \/.](0[1-9]|1[012])[\- \/.](0[1-9]|[12][0-9]|3[01])\z/
11
+ #
12
+ # Using the Re library, That regular expression can be built
13
+ # incrementaly from smaller, easier to understand expressions.
14
+ # Perhaps something like this:
15
+ #
16
+ # require 're'
17
+ #
18
+ # include Re
19
+ #
20
+ # delim = re.any("- /.")
21
+ # century_prefix = re("19") | re("20")
22
+ # under_ten = re("0") + re.any("1-9")
23
+ # ten_to_twelve = re("1") + re.any("012")
24
+ # ten_and_under_thirty = re.any("12") + re.any("0-9")
25
+ # thirties = re("3") + re.any("01")
26
+ #
27
+ # year = (century_prefix + re.digit.repeat(2)).capture(:year)
28
+ # month = (under_ten | ten_to_twelve).capture(:month)
29
+ # day = (under_ten | ten_and_under_thirty | thirties).capture(:day)
30
+ #
31
+ # date = (year + delim + month + delim + day).all
32
+ #
33
+ # Although it is more code, the individual pieces are smaller and
34
+ # easier to independently verify. As an additional bonus, the capture
35
+ # groups can be retrieved by name:
36
+ #
37
+ # result = date.match("2009-01-23")
38
+ # result.data(:year) # => "2009"
39
+ # result.data(:month) # => "01"
40
+ # result.data(:day) # => "23"
41
+ #
42
+ # == Usage:
8
43
  #
9
44
  # include Re
10
45
  #
@@ -15,7 +50,7 @@
15
50
  # puts "No Match"
16
51
  # end
17
52
  #
18
- # Examples:
53
+ # == Examples:
19
54
  #
20
55
  # re("a") -- matches "a"
21
56
  # re("a") + re("b") -- matches "ab"
@@ -30,31 +65,70 @@
30
65
  # Using re without an argument allows access to a number of common
31
66
  # regular expression constants. For example:
32
67
  #
33
- # re.space -- matches " ", "\n" or "\t"
34
- # re.spaces -- matches any number of spaces (but at least one)
68
+ # re.space / re.spaces -- matches " ", "\n" or "\t"
35
69
  # re.digit / re.digits -- matches a digit / sequence of digits
36
70
  #
37
- # See Re::NULL for a complete list of common constants.
71
+ # Also, re without arguments can also be used to construct character
72
+ # classes:
73
+ #
74
+ # re.any -- Matches any charactor
75
+ # re.any("abc") -- Matches "a", "b", or "c"
76
+ # re.any("0-9") -- Matches the digits 0 through 9
77
+ # re.any("A-Z", "a-z", "0-9", "_")
78
+ # -- Matches alphanumeric or an underscore
79
+ #
80
+ # See Re::ConstructionMethods for a complete list of common constants
81
+ # and character class functions.
82
+ #
83
+ # See Re.re, Re::Rexp, and Re::ConstructionMethods for details.
84
+ #
85
+ # == License and Copyright:
86
+ #
87
+ # Copyright 2009 by Jim Weirich (jim.weirich@gmail.com)
88
+ #
89
+ # Re is provided under the MIT open source license (see MIT-LICENSE)
90
+ #
91
+ # == Links:
92
+ #
93
+ # * Documentation :: http://re-lib.rubyforge.org
94
+ # * Source :: http://github.com/jimweirich/re
95
+ # * Bug Tracker :: http://www.pivotaltracker.com/projects/47758
96
+ # * Author :: jim.weirich@gmail.com
38
97
  #
39
- # See Re.re,
40
- # Re::Rexp, and Re::NULL for details.
41
-
42
98
  module Re
99
+
100
+ module Version
101
+ NUMBERS = [
102
+ MAJOR = 0,
103
+ MINOR = 0,
104
+ BUILD = 3,
105
+ BETA = nil,
106
+ ].compact
107
+ end
108
+ VERSION = Version::NUMBERS.join('.')
109
+
110
+ # Re::Result captures the result of a match and allows lookup of the
111
+ # captured groups by name.
43
112
  class Result
113
+ # Create a Re result object with the match data and the origina
114
+ # Re::Rexp object.
44
115
  def initialize(match_data, rexp)
45
116
  @match_data = match_data
46
117
  @rexp = rexp
47
118
  end
48
- def data(name=nil)
49
- if name
50
- index = @rexp.capture_keys.index(name)
51
- index ? @match_data[index+1] : nil
52
- else
53
- @match_data[0]
54
- end
119
+
120
+ # Return the full match
121
+ def full_match
122
+ @match_data[0]
123
+ end
124
+
125
+ # Return the named capture data.
126
+ def [](name)
127
+ index = @rexp.capture_keys.index(name)
128
+ index ? @match_data[index+1] : nil
55
129
  end
56
130
  end
57
-
131
+
58
132
  # Precedence levels for regular expressions:
59
133
 
60
134
  GROUPED = 4 # (r), [chars] :nodoc:
@@ -65,22 +139,22 @@ module Re
65
139
 
66
140
  # Constructed regular expressions.
67
141
  class Rexp
68
- attr_reader :string, :level, :flags, :capture_keys
142
+ attr_reader :string, :level, :options, :capture_keys
69
143
 
70
144
  # Create a regular expression from the string. The regular
71
145
  # expression will have a precedence of +level+ and will recognized
72
146
  # +keys+ as a list of capture keys.
73
- def initialize(string, level, flags, keys)
147
+ def initialize(string, level, options, keys)
74
148
  @string = string
75
149
  @level = level
76
- @flags = flags
150
+ @options = options
77
151
  @capture_keys = keys
78
152
  end
79
153
 
80
154
  # Return a real regular expression from the the constructed
81
155
  # regular expression.
82
156
  def regexp
83
- @regexp ||= Regexp.new(string, flags)
157
+ @regexp ||= Regexp.new(string, options)
84
158
  end
85
159
 
86
160
  # Does it match a string? (returns Re::Result if match, nil otherwise)
@@ -94,7 +168,7 @@ module Re
94
168
  def +(other)
95
169
  Rexp.new(parenthesize(CONCAT) + other.parenthesize(CONCAT),
96
170
  CONCAT,
97
- flags | other.flags,
171
+ options | other.options,
98
172
  capture_keys + other.capture_keys)
99
173
  end
100
174
 
@@ -102,43 +176,53 @@ module Re
102
176
  def |(other)
103
177
  Rexp.new(parenthesize(ALT) + "|" + other.parenthesize(ALT),
104
178
  ALT,
105
- flags | other.flags,
179
+ options | other.options,
106
180
  capture_keys + other.capture_keys)
107
181
  end
108
182
 
109
183
  # self is optional
110
184
  def optional
111
- Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX, flags, capture_keys)
185
+ Rexp.new(parenthesize(POSTFIX) + "?", POSTFIX, options, capture_keys)
112
186
  end
113
187
 
114
188
  # self matches many times (zero or more)
115
189
  def many
116
- Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX, flags, capture_keys)
190
+ Rexp.new(parenthesize(POSTFIX) + "*", POSTFIX, options, capture_keys)
191
+ end
192
+
193
+ # self matches many times (zero or more) (non-greedy version)
194
+ def many!
195
+ Rexp.new(parenthesize(POSTFIX) + "*?", POSTFIX, options, capture_keys)
117
196
  end
118
197
 
119
198
  # self matches one or more times
120
199
  def one_or_more
121
- Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX, flags, capture_keys)
200
+ Rexp.new(parenthesize(POSTFIX) + "+", POSTFIX, options, capture_keys)
201
+ end
202
+
203
+ # self matches one or more times
204
+ def one_or_more!
205
+ Rexp.new(parenthesize(POSTFIX) + "+?", POSTFIX, options, capture_keys)
122
206
  end
123
207
 
124
208
  # self is repeated from min to max times. If max is omitted, then
125
209
  # it is repeated exactly min times.
126
210
  def repeat(min, max=nil)
127
211
  if min && max
128
- Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX, flags, capture_keys)
212
+ Rexp.new(parenthesize(POSTFIX) + "{#{min},#{max}}", POSTFIX, options, capture_keys)
129
213
  else
130
- Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX, flags, capture_keys)
214
+ Rexp.new(parenthesize(POSTFIX) + "{#{min}}", POSTFIX, options, capture_keys)
131
215
  end
132
216
  end
133
217
 
134
218
  # self is repeated at least min times
135
219
  def at_least(min)
136
- Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX, flags, capture_keys)
220
+ Rexp.new(parenthesize(POSTFIX) + "{#{min},}", POSTFIX, options, capture_keys)
137
221
  end
138
222
 
139
223
  # self is repeated at least max times
140
224
  def at_most(max)
141
- Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX, flags, capture_keys)
225
+ Rexp.new(parenthesize(POSTFIX) + "{0,#{max}}", POSTFIX, options, capture_keys)
142
226
  end
143
227
 
144
228
  # None of the given characters will match.
@@ -158,27 +242,27 @@ module Re
158
242
 
159
243
  # self must match at the beginning of a line
160
244
  def bol
161
- Rexp.new("^" + parenthesize(CONCAT), CONCAT, flags, capture_keys)
245
+ Rexp.new("^" + parenthesize(CONCAT), CONCAT, options, capture_keys)
162
246
  end
163
247
 
164
248
  # self must match at the end of a line
165
249
  def eol
166
- Rexp.new(parenthesize(CONCAT) + "$", CONCAT, flags, capture_keys)
250
+ Rexp.new(parenthesize(CONCAT) + "$", CONCAT, options, capture_keys)
167
251
  end
168
252
 
169
253
  # self must match at the beginning of the string
170
254
  def begin
171
- Rexp.new("\\A" + parenthesize(CONCAT), CONCAT, flags, capture_keys)
255
+ Rexp.new("\\A" + parenthesize(CONCAT), CONCAT, options, capture_keys)
172
256
  end
173
257
 
174
258
  # self must match the end of the string (with an optional new line)
175
259
  def end
176
- Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT, flags, capture_keys)
260
+ Rexp.new(parenthesize(CONCAT) + "\\Z", CONCAT, options, capture_keys)
177
261
  end
178
262
 
179
263
  # self must match the very end of the string (including any new lines)
180
264
  def very_end
181
- Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT, flags, capture_keys)
265
+ Rexp.new(parenthesize(CONCAT) + "\\z", CONCAT, options, capture_keys)
182
266
  end
183
267
 
184
268
  # self must match an entire line.
@@ -188,32 +272,32 @@ module Re
188
272
 
189
273
  # self is contained in a non-capturing group
190
274
  def group
191
- Rexp.new("(?:" + string + ")", GROUPED, flags, capture_keys)
275
+ Rexp.new("(?:" + string + ")", GROUPED, options, capture_keys)
192
276
  end
193
277
 
194
278
  # self is a capturing group with the given name.
195
279
  def capture(name)
196
- Rexp.new("(" + string + ")", GROUPED, flags, [name] + capture_keys)
280
+ Rexp.new("(" + string + ")", GROUPED, options, [name] + capture_keys)
197
281
  end
198
282
 
199
283
  # self will work in multiline matches
200
284
  def multiline
201
- Rexp.new(string, GROUPED, flags|Regexp::MULTILINE, capture_keys)
285
+ Rexp.new(string, GROUPED, options|Regexp::MULTILINE, capture_keys)
202
286
  end
203
287
 
204
288
  # Is this a multiline regular expression?
205
289
  def multiline?
206
- (flags & Regexp::MULTILINE) != 0
290
+ (options & Regexp::MULTILINE) != 0
207
291
  end
208
292
 
209
293
  # self will work in multiline matches
210
294
  def ignore_case
211
- Rexp.new(string, GROUPED, flags|Regexp::IGNORECASE, capture_keys)
295
+ Rexp.new(string, GROUPED, options|Regexp::IGNORECASE, capture_keys)
212
296
  end
213
297
 
214
298
  # Does this regular expression ignore case?
215
299
  def ignore_case?
216
- (flags & Regexp::IGNORECASE) != 0
300
+ (options & Regexp::IGNORECASE) != 0
217
301
  end
218
302
 
219
303
  # String representation of the constructed regular expression.
@@ -267,107 +351,155 @@ module Re
267
351
  def re(exp=nil)
268
352
  exp ? Rexp.literal(exp) : NULL
269
353
  end
354
+ extend self
270
355
 
271
- # Matches an empty string. Additional common regular expression
272
- # constants are defined as methods on the NULL Rexp. See Re::NULL.
273
- NULL = Rexp.literal("")
274
-
275
- # Matches the null string
276
- def NULL.null
277
- self
278
- end
279
-
280
- # :call-seq:
281
- # re.any
282
- # re.any(chars)
283
- # re.any(range)
284
- # re.any(chars, range, ...)
285
- #
286
- # Match a character from the character class.
287
- #
288
- # Any without any arguments will match any single character. Any
289
- # with one or more arguments will construct a character class for
290
- # the arguments. If the argument is a three character string where
291
- # the middle character is "-", then the argument represents a range
292
- # of characters. Otherwise the arguments are treated as a list of
293
- # characters to be added to the character class.
294
- #
295
- # Examples:
296
- #
297
- # re.any -- match any character
298
- # re.any("aieouy") -- match vowels
299
- # re.any("0-9") -- match digits
300
- # re.any("A-Z", "a-z", "0-9") -- match alphanumerics
301
- # re.any("A-Z", "a-z", "0-9", "_") -- match alphanumerics
302
- #
303
- def NULL.any(*chars)
304
- if chars.empty?
305
- @dot ||= Rexp.raw(".")
306
- else
307
- any_chars = ''
308
- chars.each do |chs|
309
- if /^.-.$/ =~ chs
310
- any_chars << chs
311
- else
312
- any_chars << Rexp.escape_any(chs)
356
+ # This module defines a number of methods returning common
357
+ # pre-packaged regular expressions along with methods to create
358
+ # regular expressions from character classes and other objects.
359
+ # ConstructionMethods is mixed into the NULL Rexp object so that
360
+ # re() without arguments can be used to access the methods.
361
+ module ConstructionMethods
362
+
363
+ # :call-seq:
364
+ # re.null
365
+ #
366
+ # Matches the null string
367
+ def null
368
+ self
369
+ end
370
+
371
+ # :call-seq:
372
+ # re.any
373
+ # re.any(chars)
374
+ # re.any(range)
375
+ # re.any(chars, range, ...)
376
+ #
377
+ # Match a character from the character class.
378
+ #
379
+ # Any without any arguments will match any single character. Any
380
+ # with one or more arguments will construct a character class for
381
+ # the arguments. If the argument is a three character string where
382
+ # the middle character is "-", then the argument represents a range
383
+ # of characters. Otherwise the arguments are treated as a list of
384
+ # characters to be added to the character class.
385
+ #
386
+ # Examples:
387
+ #
388
+ # re.any -- match any character
389
+ # re.any("aieouy") -- match vowels
390
+ # re.any("0-9") -- match digits
391
+ # re.any("A-Z", "a-z", "0-9") -- match alphanumerics
392
+ # re.any("A-Z", "a-z", "0-9", "_") -- match alphanumerics
393
+ #
394
+ def any(*chars)
395
+ if chars.empty?
396
+ @dot ||= Rexp.raw(".")
397
+ else
398
+ any_chars = ''
399
+ chars.each do |chs|
400
+ if /^.-.$/ =~ chs
401
+ any_chars << chs
402
+ else
403
+ any_chars << Rexp.escape_any(chs)
404
+ end
313
405
  end
406
+ Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
314
407
  end
315
- Rexp.new("[" + any_chars + "]", GROUPED, 0, [])
316
408
  end
317
- end
318
-
319
- # Matches any white space
320
- def NULL.space
321
- @space ||= Rexp.raw("\\s")
322
- end
323
-
409
+
410
+ # :call-seq:
411
+ # re.space
412
+ #
413
+ # Matches any white space
414
+ def space
415
+ @space ||= Rexp.raw("\\s")
416
+ end
417
+
418
+ # :call-seq:
419
+ # re.spaces
420
+ #
324
421
  # Matches any white space
325
- def NULL.spaces
326
- @spaces ||= space.one_or_more
422
+ def spaces
423
+ @spaces ||= space.one_or_more
424
+ end
425
+
426
+ # :call-seq:
427
+ # re.nonspace
428
+ #
429
+ # Matches any non-white space
430
+ def nonspace
431
+ @nonspace ||= Rexp.raw("\\S")
432
+ end
433
+
434
+ # :call-seq:
435
+ # re.nonspaces
436
+ #
437
+ # Matches any non-white space
438
+ def nonspaces
439
+ @nonspaces ||= Rexp.raw("\\S").one_or_more
440
+ end
441
+
442
+ # :call-seq:
443
+ # re.word_char
444
+ #
445
+ # Matches any sequence of word characters
446
+ def word_char
447
+ @word_char ||= Rexp.raw("\\w")
448
+ end
449
+
450
+ # :call-seq:
451
+ # re.word
452
+ #
453
+ # Matches any sequence of word characters
454
+ def word
455
+ @word ||= word_char.one_or_more
456
+ end
457
+
458
+ # :call-seq:
459
+ # re.break
460
+ #
461
+ # Zero-length matches any break
462
+ def break
463
+ @break ||= Rexp.raw("\\b")
464
+ end
465
+
466
+ # :call-seq:
467
+ # re.digit
468
+ #
469
+ # Matches a digit
470
+ def digit
471
+ @digit ||= any("0-9")
472
+ end
473
+
474
+ # :call-seq:
475
+ # re.digits
476
+ #
477
+ # Matches a sequence of digits
478
+ def digits
479
+ @digits ||= digit.one_or_more
480
+ end
481
+
482
+ # :call-seq:
483
+ # re.hex_digit
484
+ #
485
+ # Matches a hex digit (upper or lower case)
486
+ def hex_digit
487
+ @hex_digit ||= any("0-9", "a-f", "A-F")
488
+ end
489
+
490
+ # :call-seq:
491
+ # re.hex_digits
492
+ #
493
+ # Matches a sequence of hex digits
494
+ def hex_digits
495
+ @hex_digits ||= hex_digit.one_or_more
496
+ end
327
497
  end
328
498
 
329
- # Matches any non-white space
330
- def NULL.nonspace
331
- @nonspace ||= Rexp.raw("\\S")
332
- end
333
-
334
- # Matches any non-white space
335
- def NULL.nonspaces
336
- @nonspaces ||= Rexp.raw("\\S").one_or_more
337
- end
338
-
339
- # Matches any sequence of word characters
340
- def NULL.word_char
341
- @word_char ||= Rexp.raw("\\w")
342
- end
343
-
344
- # Matches any sequence of word characters
345
- def NULL.word
346
- @word ||= word_char.one_or_more
347
- end
348
-
349
- # Zero-length matches any break
350
- def NULL.break
351
- @break ||= Rexp.raw("\\b")
352
- end
353
-
354
- # Matches a digit
355
- def NULL.digit
356
- @digit ||= any("0-9")
357
- end
358
-
359
- # Matches a sequence of digits
360
- def NULL.digits
361
- @digits ||= digit.one_or_more
362
- end
363
-
364
- # Matches a hex digit (upper or lower case)
365
- def NULL.hex_digit
366
- @hex_digit ||= any("0-9", "a-f", "A-F")
367
- end
368
-
369
- # Matches a sequence of hex digits
370
- def NULL.hex_digits
371
- @hex_digits ||= hex_digit.one_or_more
372
- end
499
+ # Matches an empty string. Additional common regular expression
500
+ # construction methods are defined on NULL. See
501
+ # Re::ConstructionMethods for details.
502
+ NULL = Rexp.literal("")
503
+ NULL.extend(ConstructionMethods)
504
+
373
505
  end
@@ -6,6 +6,11 @@ require 're'
6
6
  class ReTest < Test::Unit::TestCase
7
7
  include Re
8
8
 
9
+ def test_module_access_to_re_function
10
+ r = Re.re("a").all
11
+ assert r =~ "a"
12
+ end
13
+
9
14
  def test_strings_match
10
15
  assert re("a") =~ "a"
11
16
  assert re("a") !~ "A"
@@ -46,6 +51,20 @@ class ReTest < Test::Unit::TestCase
46
51
  assert r =~ "xxx"
47
52
  end
48
53
 
54
+ def test_greedy_many
55
+ r = re.any.many.capture(:x) + re("b")
56
+ result = r.match("xbxb")
57
+ assert result
58
+ assert_equal "xbx", result[:x]
59
+ end
60
+
61
+ def test_non_greedy_many
62
+ r = re.any.many!.capture(:x) + re("b")
63
+ result = r.match("xbxb")
64
+ assert result
65
+ assert_equal "x", result[:x]
66
+ end
67
+
49
68
  def test_one_or_more
50
69
  r = re("x").one_or_more.all
51
70
  assert r !~ ""
@@ -53,6 +72,20 @@ class ReTest < Test::Unit::TestCase
53
72
  assert r =~ "xxx"
54
73
  end
55
74
 
75
+ def test_greedy_one_or_more
76
+ r = re.any.one_or_more.capture(:any) + re("b")
77
+ result = r.match("xbxb")
78
+ assert result
79
+ assert_equal "xbx", result[:any]
80
+ end
81
+
82
+ def test_non_greedy_one_or_more
83
+ r = re.any.one_or_more!.capture(:any) + re("b")
84
+ result = r.match("xbxb")
85
+ assert result
86
+ assert_equal "x", result[:any]
87
+ end
88
+
56
89
  def test_repeat_fixed_number
57
90
  r = re("a").repeat(3).all
58
91
  assert r =~ "aaa"
@@ -104,7 +137,7 @@ class ReTest < Test::Unit::TestCase
104
137
  assert r !~ "\n"
105
138
  end
106
139
 
107
- def test_no_flags
140
+ def test_no_options
108
141
  r = re("a")
109
142
  assert ! r.ignore_case?
110
143
  assert ! r.multiline?
@@ -363,7 +396,7 @@ class ReTest < Test::Unit::TestCase
363
396
  r = re.any("a-z").one_or_more.capture(:word)
364
397
  result = (r =~ "012abc789")
365
398
  assert result
366
- assert_equal "abc", result.data(:word)
399
+ assert_equal "abc", result[:word]
367
400
  end
368
401
 
369
402
  def test_multiple_capture
@@ -373,10 +406,10 @@ class ReTest < Test::Unit::TestCase
373
406
  result = (r =~ " now 123\n")
374
407
  assert result
375
408
  assert_equal [:everything, :word, :number], r.capture_keys
376
- assert_equal "now", result.data(:word)
377
- assert_equal "123", result.data(:number)
378
- assert_equal "now 123", result.data(:everything)
379
- assert_equal "now 123", result.data
409
+ assert_equal "now", result[:word]
410
+ assert_equal "123", result[:number]
411
+ assert_equal "now 123", result[:everything]
412
+ assert_equal "now 123", result.full_match
380
413
  end
381
414
 
382
415
  def test_precedence_concatentaion_vs_alteration
@@ -397,25 +430,10 @@ class ReTest < Test::Unit::TestCase
397
430
 
398
431
  result = delim_definition.match("//[a][b][xyz]\n1a2b3xyz4")
399
432
  assert result
400
- assert_equal "[a][b][xyz]", result.data(:delims)
433
+ assert_equal "[a][b][xyz]", result[:delims]
401
434
  end
402
435
 
403
436
  def test_date_parser
404
- # (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
405
-
406
- delim = re.any("- /.")
407
- n_19_or_20 = re("19") | re("20")
408
- n_1_to_9 = re("0") + re.any("1-9")
409
- n_10_to_12 = re("1") + re.any("012")
410
- n_10_to_29 = re.any("12") + re.any("0-9")
411
- n_30_or_31 = re("3") + re.any("01")
412
-
413
- year = n_19_or_20 + re.digit.repeat(2)
414
- month = n_1_to_9 | n_10_to_12
415
- day = n_1_to_9 | n_10_to_29 | n_30_or_31
416
-
417
- date_re = (year.capture(:year) + delim + month.capture(:month) + delim + day.capture(:day)).all
418
-
419
437
  assert date_re.match("1900/01/01")
420
438
  assert date_re.match("1956/01/01")
421
439
  assert date_re.match("2000/01/01")
@@ -435,6 +453,40 @@ class ReTest < Test::Unit::TestCase
435
453
  assert ! date_re.match("2010/1/01")
436
454
  assert ! date_re.match("2010/01/1")
437
455
  end
456
+
457
+ def test_date_capture
458
+ result = date_re.match("2010/02/14")
459
+ assert result
460
+ assert_equal "2010", result[:year]
461
+ assert_equal "02", result[:month]
462
+ assert_equal "14", result[:day]
463
+ end
438
464
 
465
+ private
439
466
 
467
+ def date_re
468
+ self.class.date_re
469
+ end
470
+
471
+ class << self
472
+ include Re
473
+ def date_re
474
+ # (19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])
475
+ @date_re ||=
476
+ begin
477
+ delim_re = re.any("- /.")
478
+ century_prefix_re = re("19") | re("20")
479
+ under_ten_re = re("0") + re.any("1-9")
480
+ ten_to_twelve_re = re("1") + re.any("012")
481
+ ten_and_under_thirty_re = re.any("12") + re.any("0-9")
482
+ thirties_re = re("3") + re.any("01")
483
+
484
+ year = century_prefix_re + re.digit.repeat(2)
485
+ month = under_ten_re | ten_to_twelve_re
486
+ day = under_ten_re | ten_and_under_thirty_re | thirties_re
487
+
488
+ (year.capture(:year) + delim_re + month.capture(:month) + delim_re + day.capture(:day)).all
489
+ end
490
+ end
491
+ end
440
492
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: re
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jim Weirich
@@ -22,12 +22,13 @@ extensions: []
22
22
  extra_rdoc_files: []
23
23
 
24
24
  files:
25
+ - MIT-LICENSE
25
26
  - Rakefile
26
27
  - README.rdoc
27
28
  - lib/re.rb
28
29
  - test/re_test.rb
29
30
  has_rdoc: true
30
- homepage: http://re.rubyforge.org
31
+ homepage: http://re-lib.rubyforge.org
31
32
  licenses: []
32
33
 
33
34
  post_install_message: