cnuregexp 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +43 -0
- data/Rakefile +62 -0
- data/lib/cnuregexp.rb +345 -0
- data/lib/cnuregexp/version.rb +9 -0
- data/lib/cnuregexp_config.yml +33 -0
- data/test/test_cnuregexp.rb +51 -0
- data/test/test_helper.rb +2 -0
- metadata +59 -0
data/README.txt
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
cnuregexp
|
2
|
+
====================
|
3
|
+
|
4
|
+
-What is cnuregexp?
|
5
|
+
cnuregexp allows tags to be placed inside a regex which function as labels
|
6
|
+
for the matches. The matches within the MatchData object can
|
7
|
+
then be accessed like a hash with the tag name as the key. cnuregexp also
|
8
|
+
provides a greedy match which will return an array of all matches rather than
|
9
|
+
just the first match.
|
10
|
+
|
11
|
+
cnuregexp can also extract various data from an xml tag with the Regexp.xml_tag
|
12
|
+
method. It uses Regexps to get the tag name, the attributes and their values,
|
13
|
+
the tag content, and any other relevant data from an xml string.
|
14
|
+
|
15
|
+
Lastly, cnuregexp allows commonly used regular expressions to be stored in a
|
16
|
+
config file(lib/cnuregexp_config.yml) and accessed with
|
17
|
+
Regexp.regular_expression_name notation eg. Regexp.ssn, Regexp.email_address.
|
18
|
+
cnuregexp comes preloaded with a few common regular expressions which are
|
19
|
+
located in lib/cnuregexp_config.yml.
|
20
|
+
|
21
|
+
-Installation
|
22
|
+
sudo gem install cnuregexp
|
23
|
+
|
24
|
+
-Examples
|
25
|
+
re=/((?# areacode)[0-9]{3})-((?# number)[0-9]{3}-[0-9]{4})/.tag!
|
26
|
+
m=re.match("my phone number is 312-243-0979")
|
27
|
+
m["areacode"] # => "312"
|
28
|
+
m["number"] # => ["243-0979"]
|
29
|
+
|
30
|
+
r = Regexp.xml_tag :a, {:href=>true, :attr2=>true, :attr_not_exist=>false}
|
31
|
+
m = r.match('<a href="#test" attr2="_test">test</a>')
|
32
|
+
m[:href_value] # => "#test"
|
33
|
+
m[:attr2_value] # => "_test"
|
34
|
+
m[:content] # => "test"
|
35
|
+
|
36
|
+
Regexp.email_address # => / \b ((?# user) (?!\.+)(?!-+)[\w\d._%\-+]+ ) @ ((?# subdomain) ((?!-+)[\w\d\-]+\.)* ((?!-+)[\w\d\-]+ ) ) \. ((?# tld) \w{2,6} ) \b /x
|
37
|
+
|
38
|
+
-License
|
39
|
+
cnuregexp is distributed under the Ruby license
|
40
|
+
|
41
|
+
-Contact
|
42
|
+
website: www.opensource.cashnetusa.com, www.usergenic.com
|
43
|
+
email: OpenSource@cashnetusa.com, brendan@usergenic.com
|
data/Rakefile
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'hoe'
|
11
|
+
include FileUtils
|
12
|
+
require File.join(File.dirname(__FILE__), 'lib', 'cnuregexp', 'version')
|
13
|
+
|
14
|
+
AUTHOR = ['CashnetUSA', 'Brendan Baldwin']
|
15
|
+
DESCRIPTION =
|
16
|
+
'cnuregexp allows tags to be placed inside a regex which function as labels
|
17
|
+
for the matches. The matches within the MatchData object can
|
18
|
+
then be accessed like a hash with the tag name as the key. cnuregexp also
|
19
|
+
provides a greedy match which will return an array of all matches rather than
|
20
|
+
just the first match.
|
21
|
+
|
22
|
+
cnuregexp can also extract various data from an xml tag with the Regexp.xml_tag
|
23
|
+
method. It uses Regexps to get the tag name, the attributes and their values,
|
24
|
+
the tag content, and any other relevant data from an xml string.
|
25
|
+
|
26
|
+
Lastly, cnuregexp allows commonly used regular expressions to be stored in a
|
27
|
+
config file(lib/cnuregexp_config.yml) and accessed with
|
28
|
+
Regexp.regular_expression_name notation eg. Regexp.ssn, Regexp.email_address.
|
29
|
+
cnuregexp comes preloaded with a few common regular expressions which are
|
30
|
+
located in lib/cnuregexp_config.yml.'
|
31
|
+
|
32
|
+
GEM_NAME = 'cnuregexp'
|
33
|
+
RUBYFORGE_PROJECT = 'cnuregexp'
|
34
|
+
HOMEPATH = "http://opensource.cashnetusa.com"
|
35
|
+
DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
|
36
|
+
|
37
|
+
NAME = "cnuregexp"
|
38
|
+
REV = nil
|
39
|
+
VERS = Cnuregexp::VERSION::STRING + (REV ? ".#{REV}" : "")
|
40
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
|
41
|
+
|
42
|
+
# Generate all the Rake tasks
|
43
|
+
hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
44
|
+
p.author = AUTHOR
|
45
|
+
p.description = DESCRIPTION
|
46
|
+
p.url = HOMEPATH
|
47
|
+
p.rubyforge_name = RUBYFORGE_PROJECT
|
48
|
+
p.test_globs = ["test/**/test_*.rb"]
|
49
|
+
p.clean_globs = CLEAN
|
50
|
+
end
|
51
|
+
|
52
|
+
task :check_version do
|
53
|
+
unless ENV['VERSION']
|
54
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
55
|
+
exit
|
56
|
+
end
|
57
|
+
unless ENV['VERSION'] == VERS
|
58
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
data/lib/cnuregexp.rb
ADDED
@@ -0,0 +1,345 @@
|
|
1
|
+
class Regexp
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
# The following regexp defines any character sequence which is *not*
|
6
|
+
# a backreference-generating open-parentheses. This is used to strip
|
7
|
+
# out non-backreference-generating open-parentheses, prior to counting
|
8
|
+
# open parentheses to determine number of backreferences generated up
|
9
|
+
# to a point in an expression.
|
10
|
+
NO_BACKREF_REGEXP = /
|
11
|
+
\\{2}+ | # slashes in multiples of 2
|
12
|
+
\\\( | # slash escaped open paren
|
13
|
+
\(\? | # open paren followed by question is ruby regexp extension
|
14
|
+
[^\(] # any character that is not an open paren
|
15
|
+
/x
|
16
|
+
|
17
|
+
# Defines a backreference inside a Regexp, like \1 or \52. This
|
18
|
+
# successfully ignores escaped backreference like \\1 or \\\\\\1 etc.
|
19
|
+
BACKREF_REGEXP = /
|
20
|
+
((?: # ruby needs lookbehind but doesnt have, so...
|
21
|
+
(?:[^\\]|\A)\\{2}+ | # slashes in multiples of two
|
22
|
+
[^\\] | # not a slash
|
23
|
+
\A # beginning of string
|
24
|
+
) \\ ) # slash to start backreference
|
25
|
+
(\d+) # the backreference number
|
26
|
+
/x
|
27
|
+
|
28
|
+
# Define what a 'Tag/Comment' is in regexp-ease: (?# tag name)
|
29
|
+
# Note: Ruby regexps ignore escaping of close-parentheses inside
|
30
|
+
# comments. For example /(?#\)/ is a complete comment and the
|
31
|
+
# following fails to compile: /(?# test(ing))/ since ruby only
|
32
|
+
# cares about the first close paren. The following *will* "work":
|
33
|
+
# /(?# test(ing)/ <= Though we're *techinically* missing a close ')'
|
34
|
+
# This is *not* a hack.
|
35
|
+
TAG_REGEXP = /
|
36
|
+
( \\{2}+ | [^\\] | \A ) # make sure the comment start is not escaped
|
37
|
+
\(\?\# ([^)]*) \) # the actual ruby regexp extension coment
|
38
|
+
/x
|
39
|
+
|
40
|
+
# This internal module contains the methods to extend the Regexp
|
41
|
+
# instance of, when the tag! method is called.
|
42
|
+
module Tagged
|
43
|
+
|
44
|
+
# A tagged Regexp's match method is overwritten with this version
|
45
|
+
# that stows a copy of the Regexp inside the MatchData object,
|
46
|
+
# which is necessary in order for the MatchData to use the Regexp's
|
47
|
+
# tag_index.
|
48
|
+
def match(string)
|
49
|
+
match_data = @original_match_method.call(string)
|
50
|
+
match_data.regexp = self unless match_data.nil?
|
51
|
+
match_data
|
52
|
+
end
|
53
|
+
|
54
|
+
# Returns a hash of the capture/backreference indexes with the
|
55
|
+
# comment strings as the hash keys and the indexes as the values.
|
56
|
+
def tag_index
|
57
|
+
return @tag_index unless @tag_index.nil?
|
58
|
+
@tag_index = {}
|
59
|
+
search_space = self.to_s
|
60
|
+
backref_count = 0
|
61
|
+
while match = TAG_REGEXP.match(search_space) do
|
62
|
+
tag_text = match.captures.last.strip
|
63
|
+
backref_count = backref_count + (
|
64
|
+
match.pre_match + match.captures.first
|
65
|
+
).gsub(NO_BACKREF_REGEXP,'').length
|
66
|
+
search_space = match.post_match
|
67
|
+
@tag_index.merge!({tag_text => backref_count})
|
68
|
+
end
|
69
|
+
@tag_index
|
70
|
+
end
|
71
|
+
|
72
|
+
# Regexp has already been tagged so just returns a copy of self
|
73
|
+
def tag
|
74
|
+
self.clone
|
75
|
+
end
|
76
|
+
|
77
|
+
# Regexp has already been tagged so just returns self
|
78
|
+
def tag!
|
79
|
+
self
|
80
|
+
end
|
81
|
+
|
82
|
+
# Returns that the Regexp has indeed been tagged
|
83
|
+
def tagged?
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
# Concatenate this Regexp with another Regexp or String. Why use
|
90
|
+
# this function? It preserves any internal backreferences in the
|
91
|
+
# second Regexp. For example: /(a)\1/ + /(b)\1/ will return
|
92
|
+
# /(?-mix:(a)\1)(?-mix:(a)\2)/ # <= notice the \2 in the second.
|
93
|
+
def + arg
|
94
|
+
raise 'String or Regexp required' unless arg.respond_to? :to_s
|
95
|
+
self_s, arg_s = self.to_s, arg.to_s
|
96
|
+
backref_count = self_s.gsub(NO_BACKREF_REGEXP,'').size
|
97
|
+
s = self_s + arg_s.gsub(BACKREF_REGEXP) do |s|
|
98
|
+
"#{$1}#{$2.to_i + backref_count}"
|
99
|
+
end
|
100
|
+
result = /#{s}/
|
101
|
+
result.tag! if self.tagged? || (arg.is_a?(Regexp) && arg.tagged?)
|
102
|
+
result
|
103
|
+
end
|
104
|
+
|
105
|
+
# Matches returns a MatchDataCollection representing ALL successive
|
106
|
+
# matches of the regexp to the provided string. Kind of like doing
|
107
|
+
# String#scan(regexp) except that the MatchDataCollection will
|
108
|
+
# consume less resources as it only stores a single MatchData object
|
109
|
+
# in memory at any given time, vs. returning an array of ALL matches.
|
110
|
+
def matches(string)
|
111
|
+
first = match(string)
|
112
|
+
if first && !respond_to?(:tag_index) then # am i a tagged regexp?
|
113
|
+
first.regexp = self
|
114
|
+
end
|
115
|
+
MatchDataCollection.new first
|
116
|
+
end
|
117
|
+
|
118
|
+
# Return a tagged copy of self
|
119
|
+
def tag
|
120
|
+
self.clone.tag!
|
121
|
+
end
|
122
|
+
|
123
|
+
# Convert self into a 'tagged' Regexp. This extends self with the
|
124
|
+
# methods in the Regexp#Tagged module.
|
125
|
+
#
|
126
|
+
# The MatchData object produced by way of the /regexp/.tag!.match
|
127
|
+
# method provides a new method for identifying/accessing elements
|
128
|
+
# in the MatchData#captures array: Using the ruby Regexp 'comment'
|
129
|
+
# extension in your Regexps, you create a symbolic reference to the
|
130
|
+
# group/backreference containing the comment. For example:
|
131
|
+
#
|
132
|
+
# re=/((?# areacode)[0-9]{3})-((?# number)[0-9]{3}-[0-9]{4})/.tag!
|
133
|
+
# m=re.match("my phone number is 312-243-0979")
|
134
|
+
# m["areacode"]# => "312"
|
135
|
+
# m["number"] # => "243-0979"
|
136
|
+
#
|
137
|
+
# You can alternately use symbols to retrieve captured values:
|
138
|
+
#
|
139
|
+
# m[:areacode] # => "312"
|
140
|
+
# m[:number] # => "243-0979"
|
141
|
+
#
|
142
|
+
# And of course the [] method also still functions as originally
|
143
|
+
# designed to allow access via Fixnums, Ranges, and Fixnum Pairs:
|
144
|
+
#
|
145
|
+
# m[1] # => "312"
|
146
|
+
# m[1..2] # => ["312","243-0979"]
|
147
|
+
# m[1,2] # => ["312","243-0979"]
|
148
|
+
# m[0,3] # => ["312-243-0979","312","243-0979"]
|
149
|
+
#
|
150
|
+
# Note, due to an implementation issue, you advised to place your
|
151
|
+
# comments at the head of the group you're trying to index. For
|
152
|
+
# example:
|
153
|
+
#
|
154
|
+
# r = /((?# here) string to (match))/x
|
155
|
+
# r.match("string to match")[:here] # <= "string to match"
|
156
|
+
# r = /(string to (match)(?# not here))/x
|
157
|
+
# r.match("string to match")[:not_here] # <= "match"
|
158
|
+
#
|
159
|
+
# The problem here is that we'd think [:not_here] should return
|
160
|
+
# "string to match", but since the comment appears after another
|
161
|
+
# set of backreference-generating open-parentheses, the index
|
162
|
+
# counter is bumped up and we get something other than we may
|
163
|
+
# actually want.
|
164
|
+
def tag!
|
165
|
+
@original_match_method = method(:match)
|
166
|
+
self.extend Tagged
|
167
|
+
tag_index # ensure memoization on reuse
|
168
|
+
self
|
169
|
+
end
|
170
|
+
|
171
|
+
# Is this Regexp tagged? (see tag! method)
|
172
|
+
def tagged?
|
173
|
+
false
|
174
|
+
end
|
175
|
+
|
176
|
+
class << self
|
177
|
+
|
178
|
+
# Reads Regexps from a config file and returns their tagged version
|
179
|
+
def method_missing(sym)
|
180
|
+
regexp = YAML::load_file(File.dirname(__FILE__)+'/cnuregexp_config.yml')[sym.to_s]
|
181
|
+
return regexp.tag! if regexp
|
182
|
+
super
|
183
|
+
end
|
184
|
+
|
185
|
+
# Creates a Regexp to capture an XML tag matching the
|
186
|
+
# specified tag_name. Optional attributes hash provided,
|
187
|
+
# allows filtering of the tag based on tag attribute values.
|
188
|
+
# Attributes hash keys are the attribute names and values
|
189
|
+
# may be objects of any of the following classes:
|
190
|
+
# * Array => attribute value must match at least one value
|
191
|
+
# in the Array. Elements in array may be any of the
|
192
|
+
# following classes in this list.
|
193
|
+
# * FalseClass => attribute must not appear in tag.
|
194
|
+
# * Regexp => attribute value must match the Regexp.
|
195
|
+
# * String => attribute value must match the string.
|
196
|
+
# * Symbol => attribute value must match the symbol.to_s.
|
197
|
+
# * TrueClass => any value is okay, but must exist.
|
198
|
+
#
|
199
|
+
# When you get a match out via .match etc, you'll have the
|
200
|
+
# following comment captures to access via [] on the matchdata
|
201
|
+
# object:
|
202
|
+
# * :tag_name => the name of the tag
|
203
|
+
# * :attributes => all tag attributes as a single string
|
204
|
+
# * :content => if the tag has a closing tag, the inner content
|
205
|
+
# And for each attribute name in the attributes hash argument
|
206
|
+
# you get a corresponding :name_value. For example:
|
207
|
+
#
|
208
|
+
# r=Regexp.xml_tag :a, :href=>true
|
209
|
+
# r.match('<a href="#test">test</a>')[:href_value] # <= #test
|
210
|
+
def xml_tag(tag_name,attributes={})
|
211
|
+
regexp = "<((?#tag_name)#{tag_name.to_s})((?#attributes)" +
|
212
|
+
attributes.inject("") do |attributes,attribute|
|
213
|
+
name, value = attribute
|
214
|
+
has_false = case
|
215
|
+
when value.is_a?(Array): value.select{|v|v.class==FalseClass}.length>0
|
216
|
+
when value.is_a?(FalseClass): true
|
217
|
+
else false
|
218
|
+
end
|
219
|
+
has_nonfalse = case
|
220
|
+
when value.is_a?(Array): value.select{|v|v.class!=FalseClass}.length>0
|
221
|
+
when value.is_a?(FalseClass): false
|
222
|
+
else true
|
223
|
+
end
|
224
|
+
|
225
|
+
# Create a separate non-backreference-generating group
|
226
|
+
# to wrap the positive and negative lookahead assertions
|
227
|
+
# where we're searching for 'either' a 'false' (where
|
228
|
+
# the attribute is not present) in addition to a 'nonfalse'
|
229
|
+
# (where we're capturing the attribute value and possibly
|
230
|
+
# trying to match that specific value.)
|
231
|
+
attributes += "(?:" if has_false && has_nonfalse
|
232
|
+
|
233
|
+
# Include negative assertion for attributes
|
234
|
+
# with 'false' value.
|
235
|
+
if has_false
|
236
|
+
attributes += "(?!(?:\\s+(?:[^>]*\\s+)?)" +
|
237
|
+
Regexp.escape(name.to_s) + "(?:\\s|=|>))"
|
238
|
+
end
|
239
|
+
|
240
|
+
# Pop a pipe into the mix to OR the 'false' and 'nonfalse'
|
241
|
+
# lookahead expressions.
|
242
|
+
attributes += "|" if has_false && has_nonfalse
|
243
|
+
|
244
|
+
# Include positive assertion with value capture
|
245
|
+
# for attributes with 'nonfalse' value.
|
246
|
+
if has_nonfalse
|
247
|
+
value=[value] unless value.class==Array
|
248
|
+
attributes += "(?=(?:\\s+(?:[^>]*\\s+)?)" +
|
249
|
+
Regexp.escape(name.to_s) + "\\s*=\\s*\"((?##{name.to_s}_value)" +
|
250
|
+
value.select {|v|v.class!=FalseClass}.collect do |value|
|
251
|
+
case
|
252
|
+
when value.is_a?(Regexp): value.to_s # TODO: FIX INTERNAL BACKREFS BEFORE CONCAT!
|
253
|
+
when value.is_a?(String): Regexp.escape(value)
|
254
|
+
when value.is_a?(Symbol): Regexp.escape(value.to_s)
|
255
|
+
else "[^\"]*"
|
256
|
+
end
|
257
|
+
end.join("|") +
|
258
|
+
")\")"
|
259
|
+
end
|
260
|
+
|
261
|
+
# Close the group if we're checking for 'false' *and* 'nonfalse.'
|
262
|
+
attributes += ")" if has_false && has_nonfalse
|
263
|
+
|
264
|
+
attributes
|
265
|
+
end +
|
266
|
+
"(?:(?!/>)[^>])*" +
|
267
|
+
")" + # end of attributes
|
268
|
+
"(?:/>|>((?#content)(?:(?!</#{tag_name}>).)*)</#{tag_name}>)"
|
269
|
+
/#{regexp}/m.tag!
|
270
|
+
end
|
271
|
+
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|
275
|
+
|
276
|
+
# MatchData updated to allow additional use of the [] method to
|
277
|
+
# retrieve a captured subexpression containing a ruby regexp
|
278
|
+
# comment that matches the [] argument. For usage examples see
|
279
|
+
# the comments for Regexp#tag!
|
280
|
+
class MatchData
|
281
|
+
alias_method :original_brackets, :[] if !method_defined?(:original_brackets)
|
282
|
+
attr_accessor :regexp
|
283
|
+
|
284
|
+
# tag_match_brackets allows additional use of the [] method to
|
285
|
+
# retrieve a captured subexpression containing a ruby regexp
|
286
|
+
# comment that matches the [] argument.
|
287
|
+
def tag_match_brackets(*args)
|
288
|
+
if args.first.is_a?(String) || args.first.is_a?(Symbol)
|
289
|
+
if self.regexp.nil? || !self.regexp.respond_to?(:tag_index)
|
290
|
+
return nil
|
291
|
+
end
|
292
|
+
backrefs = args.collect do |comment|
|
293
|
+
index = self.regexp.tag_index[comment.to_s.strip]
|
294
|
+
if index.nil? then nil else self.to_a[index] end
|
295
|
+
end
|
296
|
+
return backrefs.first if args.size == 1
|
297
|
+
backrefs
|
298
|
+
else
|
299
|
+
case args.size
|
300
|
+
when 1: return self.original_brackets(args.first)
|
301
|
+
when 2: return self.original_brackets(args.first,args.last)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
nil
|
305
|
+
end
|
306
|
+
|
307
|
+
alias_method '[]', :tag_match_brackets
|
308
|
+
end
|
309
|
+
|
310
|
+
# MatchDataCollection is a NEW class that is returned via the
|
311
|
+
# Regexp.matches method, which includes Enumerable to allow
|
312
|
+
# iterating over ALL regexp matches in a string.
|
313
|
+
class MatchDataCollection
|
314
|
+
include Enumerable
|
315
|
+
attr_reader :first
|
316
|
+
|
317
|
+
def initialize(first)
|
318
|
+
@first = first
|
319
|
+
end
|
320
|
+
|
321
|
+
# Iterates over all *successive* matches in a string:
|
322
|
+
# /\d{2}/.matches("123456").collect {|m| m[0]} # <= ["12","34","56"]
|
323
|
+
# Even though there are technically 5 occurrences of the pattern
|
324
|
+
# /\d{2}/ in the string "123456", the iteration follows only the
|
325
|
+
# *remainder* of the string following each match.
|
326
|
+
def each
|
327
|
+
match = first
|
328
|
+
while match do
|
329
|
+
yield match
|
330
|
+
if match[0].size > 0
|
331
|
+
match = first.regexp.match(match.post_match)
|
332
|
+
else
|
333
|
+
match = first.regexp.match(match.post_match[1..-1])
|
334
|
+
end
|
335
|
+
end
|
336
|
+
self
|
337
|
+
end
|
338
|
+
|
339
|
+
# Returns the size of the MatchDataCollection
|
340
|
+
def size
|
341
|
+
@size ||= first && to_a.size || 0
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
345
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
proper_name: !ruby/regexp /^[a-z .'\d\-]+$/i
|
2
|
+
|
3
|
+
email_address:
|
4
|
+
!ruby/regexp '/ \b
|
5
|
+
((?# user) (?!\.+)(?!-+)[\w\d._%\-+]+ )
|
6
|
+
@
|
7
|
+
((?# subdomain)
|
8
|
+
((?!-+)[\w\d\-]+\.)*
|
9
|
+
((?!-+)[\w\d\-]+ )
|
10
|
+
)
|
11
|
+
\.
|
12
|
+
((?# tld) \w{2,6} )
|
13
|
+
\b
|
14
|
+
/x'
|
15
|
+
|
16
|
+
zipcode_us:
|
17
|
+
!ruby/regexp '/^\d{5}(-?\d{4})?$/'
|
18
|
+
|
19
|
+
phone_number_us:
|
20
|
+
!ruby/regexp '/ (?:\(\s*)?\b
|
21
|
+
((?# area_code)[2-9][0-9][0-9])
|
22
|
+
(?:\)\s*|[\s\-\.\/])?
|
23
|
+
((?# prefix)[2-9][0-9][0-9])
|
24
|
+
[\s\-\.\/]?
|
25
|
+
((?# suffix)\d{4})
|
26
|
+
\b
|
27
|
+
/x'
|
28
|
+
|
29
|
+
ssn: !ruby/regexp '/\b((?# area_number)(?!000)(?:[0-6]\d{2}|7(?:[0-6]\d|7[012])))([-]?)((?# group_number)(?!00)\d{2})\2((?# serial_number)(?!0000)\d{4})\b/'
|
30
|
+
|
31
|
+
social_security_number: !ruby/regexp '/\b((?# area_number)(?!000)(?:[0-6]\d{2}|7(?:[0-6]\d|7[012])))([-]?)((?# group_number)(?!00)\d{2})\2((?# serial_number)(?!0000)\d{4})\b/'
|
32
|
+
|
33
|
+
social_security_number_us: !ruby/regexp '/\b((?# area_number)(?!000)(?:[0-6]\d{2}|7(?:[0-6]\d|7[012])))([-]?)((?# group_number)(?!00)\d{2})\2((?# serial_number)(?!0000)\d{4})\b/'
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
class TestCnuregexp < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_basics
|
9
|
+
r = /((?# areacode)[0-9]{3})-((?# number)[0-9]{3}-[0-9]{4})/.tag!
|
10
|
+
|
11
|
+
assert r.methods.include?('tag_index'), 'Regexp class was not extended'
|
12
|
+
assert r.tagged?, 'tagged? failed'
|
13
|
+
assert_equal 1,r.tag_index['areacode'], 'tag_index failed'
|
14
|
+
|
15
|
+
m = r.match('312-555-1212')
|
16
|
+
assert_equal '312', m['areacode'], 'access by tag name(tag_match_brackets) failed from MatchData'
|
17
|
+
assert_equal '555-1212', m['number'], 'access by tag name(tag_match_brackets) failed from MatchData'
|
18
|
+
|
19
|
+
assert_equal(/(?-mix:(a)\1)(?-mix:(b)\2)/,/(a)\1/ + /(b)\1/, '+ operator failed')
|
20
|
+
|
21
|
+
#test MatchDataCollection
|
22
|
+
m = r.matches('312-555-1212 and 123-666-9898')
|
23
|
+
a = []
|
24
|
+
assert_nothing_thrown do m.each{|match| a.push match['areacode']; a.push match['number']} end
|
25
|
+
assert_equal'312', a[0],'MatchDataCollection failed'
|
26
|
+
assert_equal'555-1212', a[1],'MatchDataCollection failed'
|
27
|
+
assert_equal'123', a[2],'MatchDataCollection failed'
|
28
|
+
assert_equal'666-9898', a[3],'MatchDataCollection failed'
|
29
|
+
assert_equal 2, m.size
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_xml
|
34
|
+
|
35
|
+
r = Regexp.xml_tag :a, {:href=>true, :attr2=>true, :attr_not_exist=>false}
|
36
|
+
|
37
|
+
m = r.match('<a href="#test" attr2="_test">test</a>')
|
38
|
+
assert_equal '#test', m[:href_value], 'xml_tag failed'
|
39
|
+
assert_equal '_test', m[:attr2_value], 'xml_tag failed'
|
40
|
+
assert_equal 'test', m[:content], 'xml_tag failed'
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_config_file
|
45
|
+
|
46
|
+
assert_nothing_thrown do Regexp.proper_name end
|
47
|
+
assert_nothing_thrown do Regexp.ssn end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.10
|
3
|
+
specification_version: 1
|
4
|
+
name: cnuregexp
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2007-05-22
|
8
|
+
summary: The author was too lazy to write a summary
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: ryand-ruby@zenspider.com
|
12
|
+
homepage: http://opensource.cashnetusa.com
|
13
|
+
rubyforge_project: cnuregexp
|
14
|
+
description: cnuregexp allows tags to be placed inside a regex which function as labels for the matches. The matches within the MatchData object can then be accessed like a hash with the tag name as the key. cnuregexp also provides a greedy match which will return an array of all matches rather than just the first match. cnuregexp can also extract various data from an xml tag with the Regexp.xml_tag method. It uses Regexps to get the tag name, the attributes and their values, the tag content, and any other relevant data from an xml string. Lastly, cnuregexp allows commonly used regular expressions to be stored in a config file(lib/cnuregexp_config.yml) and accessed with Regexp.regular_expression_name notation eg. Regexp.ssn, Regexp.email_address. cnuregexp comes preloaded with a few common regular expressions which are located in lib/cnuregexp_config.yml.
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
authors:
|
27
|
+
- CashnetUSA
|
28
|
+
- Brendan Baldwin
|
29
|
+
files:
|
30
|
+
- Rakefile
|
31
|
+
- README.txt
|
32
|
+
- lib/cnuregexp.rb
|
33
|
+
- lib/cnuregexp_config.yml
|
34
|
+
- lib/cnuregexp/version.rb
|
35
|
+
- test/test_cnuregexp.rb
|
36
|
+
- test/test_helper.rb
|
37
|
+
test_files:
|
38
|
+
- test/test_helper.rb
|
39
|
+
- test/test_cnuregexp.rb
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
executables: []
|
45
|
+
|
46
|
+
extensions: []
|
47
|
+
|
48
|
+
requirements: []
|
49
|
+
|
50
|
+
dependencies:
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
name: hoe
|
53
|
+
version_requirement:
|
54
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: 1.2.0
|
59
|
+
version:
|