regex 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +18 -1
- data/LICENSE +202 -21
- data/PACKAGE +7 -0
- data/PROFILE +20 -0
- data/README +76 -21
- data/bin/regex +1 -1
- data/lib/regex.rb +19 -228
- data/lib/regex/command.rb +16 -97
- data/lib/regex/extractor.rb +481 -0
- data/lib/regex/package.yml +7 -0
- data/lib/regex/replacer.rb +221 -0
- data/lib/regex/string.rb +1 -1
- data/lib/regex/templates.rb +85 -0
- data/{test/demos → qed}/regex.rdoc +5 -5
- data/qed/replacer.rdoc +57 -0
- metadata +54 -29
- data/MANIFEST +0 -25
- data/lib/regex/templates/common.rb +0 -13
- data/meta/authors +0 -2
- data/meta/created +0 -1
- data/meta/description +0 -1
- data/meta/download +0 -1
- data/meta/homepage +0 -1
- data/meta/mailinglist +0 -1
- data/meta/name +0 -1
- data/meta/repository +0 -1
- data/meta/summary +0 -1
- data/meta/title +0 -1
- data/meta/version +0 -1
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module Regex
|
4
|
+
|
5
|
+
#
|
6
|
+
class Replacer
|
7
|
+
|
8
|
+
# Array of [search, replace] rules.
|
9
|
+
attr_reader :rules
|
10
|
+
|
11
|
+
# Make all patterns exact string matchers.
|
12
|
+
attr_accessor :escape
|
13
|
+
|
14
|
+
# Make all patterns global matchers.
|
15
|
+
attr_accessor :global
|
16
|
+
|
17
|
+
# Make all patterns case-insenstive matchers.
|
18
|
+
attr_accessor :insensitive
|
19
|
+
|
20
|
+
# Make all patterns multi-line matchers.
|
21
|
+
attr_accessor :multiline
|
22
|
+
|
23
|
+
# Make backups of files when they change.
|
24
|
+
attr_accessor :backup
|
25
|
+
|
26
|
+
#
|
27
|
+
def initialize(options={})
|
28
|
+
@rules = []
|
29
|
+
options.each do |k,v|
|
30
|
+
__send__("#{k}=", v)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
def rule(pattern, replacement)
|
36
|
+
@rules << [re(pattern), replacement]
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
def apply(*ios)
|
41
|
+
ios.each do |io|
|
42
|
+
original = (IO === io || StringIO === io ? io.read : io.to_s)
|
43
|
+
generate = original
|
44
|
+
rules.each do |(pattern, replacement)|
|
45
|
+
if pattern.global
|
46
|
+
generate = generate.gsub(pattern.to_re, replacement)
|
47
|
+
else
|
48
|
+
generate = generate.sub(pattern.to_re, replacement)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
if original != generate
|
52
|
+
write(io, generate)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
# Parse pattern matcher.
|
60
|
+
def re(pattern)
|
61
|
+
Matcher.new(
|
62
|
+
pattern,
|
63
|
+
:global=>global,
|
64
|
+
:escape=>escape,
|
65
|
+
:multiline=>multiline,
|
66
|
+
:insensitive=>insensitive
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
def write(io, text)
|
72
|
+
case io
|
73
|
+
when File
|
74
|
+
if backup
|
75
|
+
backup_file = io.path + '.bak'
|
76
|
+
File.open(backup_file, 'w'){ |f| f << File.read(io.path) }
|
77
|
+
end
|
78
|
+
File.open(io.path, 'w'){ |w| w << text }
|
79
|
+
when StringIO
|
80
|
+
io.string = text
|
81
|
+
when IO
|
82
|
+
# TODO: How to handle general IO object?
|
83
|
+
io.write(text)
|
84
|
+
else
|
85
|
+
io.replace(text)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
def self.cli(argv=ARGV)
|
91
|
+
searches = []
|
92
|
+
replaces = []
|
93
|
+
options = {}
|
94
|
+
parser = OptionParser.new do |opt|
|
95
|
+
opt.on('--subtitute', '-s PATTERN', 'search portion of substitution') do |search|
|
96
|
+
searches << search
|
97
|
+
end
|
98
|
+
opt.on('--template', '-t NAME', 'search for built-in regular expression') do |name|
|
99
|
+
searches << "$#{name}"
|
100
|
+
end
|
101
|
+
opt.on('--replace', '-r STRING', 'replacement string of substitution') do |replace|
|
102
|
+
replaces << replace
|
103
|
+
end
|
104
|
+
opt.on('--escape', '-e', 'make all patterns exact string matchers') do
|
105
|
+
options[:escape] = true
|
106
|
+
end
|
107
|
+
opt.on('--insensitive', '-i', 'make all patterns case-insensitive matchers') do
|
108
|
+
options[:insensitive] = true
|
109
|
+
end
|
110
|
+
#opt.on('--unxml', '-x', 'ignore XML/HTML tags') do
|
111
|
+
# options[:unxml] = true
|
112
|
+
#end
|
113
|
+
opt.on('--global', '-g', 'make all patterns global matchers') do
|
114
|
+
options[:global] = true
|
115
|
+
end
|
116
|
+
opt.on('--multiline', '-m', 'make all patterns multi-line matchers') do
|
117
|
+
options[:multiline] = true
|
118
|
+
end
|
119
|
+
opt.on('-b', '--backup', 'backup any files that are changed') do
|
120
|
+
options[:backup] = true
|
121
|
+
end
|
122
|
+
opt.on_tail('--debug', 'run in debug mode') do
|
123
|
+
$DEBUG = true
|
124
|
+
end
|
125
|
+
opt.on_tail('--help', '-h', 'display this lovely help message') do
|
126
|
+
puts opt
|
127
|
+
exit 0
|
128
|
+
end
|
129
|
+
end
|
130
|
+
parser.parse!(argv)
|
131
|
+
|
132
|
+
files = argv
|
133
|
+
files.each do |file|
|
134
|
+
raise "file does not exist -- #{file}" unless File.exist?(file)
|
135
|
+
end
|
136
|
+
targets = files.empty? ? [ARGF] : files.map{ |f| File.new(f) }
|
137
|
+
|
138
|
+
unless searches.size == replaces.size
|
139
|
+
raise "search replace mismatch -- #{searches.size} to #{replaces.size}"
|
140
|
+
end
|
141
|
+
rules = searches.zip(replaces)
|
142
|
+
|
143
|
+
replacer = new(options)
|
144
|
+
rules.each do |search, replace|
|
145
|
+
replacer.rule(search, replace)
|
146
|
+
end
|
147
|
+
replacer.apply(*targets)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Basically a Regex but handles a couple extra options.
|
151
|
+
class Matcher
|
152
|
+
|
153
|
+
#
|
154
|
+
attr_accessor :global
|
155
|
+
|
156
|
+
#
|
157
|
+
attr_accessor :escape
|
158
|
+
|
159
|
+
#
|
160
|
+
attr_accessor :multiline
|
161
|
+
|
162
|
+
#
|
163
|
+
attr_accessor :insensitive
|
164
|
+
|
165
|
+
#
|
166
|
+
def initialize(pattern, options={})
|
167
|
+
options.each do |k,v|
|
168
|
+
__send__("#{k}=", v) if respond_to?("#{k}=")
|
169
|
+
end
|
170
|
+
@regexp = parse(pattern)
|
171
|
+
end
|
172
|
+
|
173
|
+
#
|
174
|
+
def =~(string)
|
175
|
+
@regexp =~ string
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
def match(string)
|
180
|
+
@regexp.match(string)
|
181
|
+
end
|
182
|
+
|
183
|
+
#
|
184
|
+
def to_re
|
185
|
+
@regexp
|
186
|
+
end
|
187
|
+
|
188
|
+
# Parse pattern matcher.
|
189
|
+
def parse(pattern)
|
190
|
+
case pattern
|
191
|
+
when Regexp
|
192
|
+
pattern
|
193
|
+
when /^\$/
|
194
|
+
Templates.const_get($'.upcase)
|
195
|
+
when /^\/(.*?)\/(\w+)$/
|
196
|
+
flags = []
|
197
|
+
@global = true if $2.index('g')
|
198
|
+
flags << Regexp::MULTILINE if $2.index('m') or multiline
|
199
|
+
flags << Regexp::IGNORECASE if $2.index('i') or insensitive
|
200
|
+
if $2.index('e') or escape
|
201
|
+
Regexp.new(Regexp.escape($1), *flags)
|
202
|
+
else
|
203
|
+
Regexp.new($1, *flags)
|
204
|
+
end
|
205
|
+
else
|
206
|
+
flags = []
|
207
|
+
flags << Regexp::MULTILINE if multiline
|
208
|
+
flags << Regexp::IGNORECASE if insensitive
|
209
|
+
if escape
|
210
|
+
Regexp.new(Regexp.escape(pattern), *flags)
|
211
|
+
else
|
212
|
+
Regexp.new(pattern, *flags)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
end
|
220
|
+
|
221
|
+
end
|
data/lib/regex/string.rb
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
module Regex
|
2
|
+
|
3
|
+
# = Templates
|
4
|
+
#
|
5
|
+
# TODO: What about regular expressions with variable content?
|
6
|
+
# Should these be methods rather than constants? But then how
|
7
|
+
# would we handle named substituions?
|
8
|
+
module Templates
|
9
|
+
|
10
|
+
# Empty line.
|
11
|
+
EMPTY = /^$/
|
12
|
+
|
13
|
+
# Blank line.
|
14
|
+
BLANK = /^\s*$/
|
15
|
+
|
16
|
+
NUMBER = /[-+]?[0-9]*\.?[0-9]+/
|
17
|
+
|
18
|
+
# Markup language tag, e.g \<a>stuff</a>.
|
19
|
+
MLTAG = /<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)<\/\1>/i
|
20
|
+
|
21
|
+
# IPv4 Address
|
22
|
+
IPV4 = /\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/
|
23
|
+
|
24
|
+
# Username
|
25
|
+
USERNAME = /^[a-zA-Z0-9_]{3,16}$/
|
26
|
+
|
27
|
+
# Email Address
|
28
|
+
EMAIL = /([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)/i
|
29
|
+
|
30
|
+
# United States phone number.
|
31
|
+
USPHONE = /(\d\d\d[-]|\(\d\d\d\))?(\d\d\d)[-](\d\d\d\d)/
|
32
|
+
|
33
|
+
# United States zip code.
|
34
|
+
USZIP = /^[0-9]{5}(-[0-9]{4})?$/
|
35
|
+
|
36
|
+
# United States social secuirty number.
|
37
|
+
SSN = /[0-9]\{3\}-[0-9]\{2\}-[0-9]\{4\}/
|
38
|
+
|
39
|
+
# United States dollar amount.
|
40
|
+
DOLLARS = /\$[0-9]*.[0-9][0-9]/
|
41
|
+
|
42
|
+
# Bank Ientification Code
|
43
|
+
BIC = /([a-zA-Z]{4}[a-zA-Z]{2}[a-zA-Z0-9]{2}([a-zA-Z0-9]{3})?)/
|
44
|
+
|
45
|
+
#
|
46
|
+
IBAN = /[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,16}/
|
47
|
+
|
48
|
+
# Hexidecimal value.
|
49
|
+
HEX = /(#([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})\b)/
|
50
|
+
|
51
|
+
# HTTP URL Address
|
52
|
+
HTTP = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \?=.-]*)*\/?$/
|
53
|
+
|
54
|
+
# Ruby comment block.
|
55
|
+
RUBYBLOCK = /^=begin\s*(.*?)\n(.*?)\n=end/m
|
56
|
+
|
57
|
+
# Ruby method definition.
|
58
|
+
# TODO: Not quite right.
|
59
|
+
RUBYMETHOD_WITH_COMMENT = /(^\ *\#.*?)^\s*def\s*(.*?)$/m
|
60
|
+
|
61
|
+
#
|
62
|
+
RUBYMETHOD = /^\ *def\s*(.*?)$/
|
63
|
+
|
64
|
+
# By the legendary abigail. Fails to match if and only if it is matched against
|
65
|
+
# a prime number of 1's. That is, '11' fails, but '1111' does not.
|
66
|
+
# I once heard him talk why this works, but I forgot most of it.
|
67
|
+
PRIMEONES = /^1?$|^(11+?)\1+$/
|
68
|
+
|
69
|
+
# Name of all constants.
|
70
|
+
def self.list
|
71
|
+
constants.map{ |c| c.downcase }
|
72
|
+
end
|
73
|
+
|
74
|
+
# Lookup a template by name.
|
75
|
+
def self.[](name)
|
76
|
+
Templates.const_get(name.upcase)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
# Add templates to Regex module.
|
82
|
+
include Templates
|
83
|
+
|
84
|
+
end
|
85
|
+
|
@@ -19,26 +19,26 @@ We will also suppoly a matching pattern, as none of
|
|
19
19
|
the matching functions will work without providing
|
20
20
|
a pattern or the name of built-in pattern template.
|
21
21
|
|
22
|
-
|
22
|
+
rx = Regex.new(text, :pattern=>'\w+')
|
23
23
|
|
24
24
|
We can see that the Regex object has converted the pattern
|
25
25
|
into the expected regular expression via the #regex method.
|
26
26
|
|
27
|
-
|
27
|
+
rx.regex.assert == /\w+/
|
28
28
|
|
29
29
|
Under the hood, Regex has split the process of matching,
|
30
30
|
organizing and formating the results into separate methods.
|
31
31
|
We can use the #structure method to see thematch results
|
32
32
|
organized into uniform arrays.
|
33
33
|
|
34
|
-
|
34
|
+
rx.structure.assert == %w{We}
|
35
35
|
|
36
36
|
Whereas the last use only returns a single metch, if we turn
|
37
37
|
on repeat mode we can see every word.
|
38
38
|
|
39
|
-
|
39
|
+
rx.repeat = true
|
40
40
|
|
41
|
-
|
41
|
+
rx.structure.assert == %w{We will match against this string}.map{ |e| [e] }
|
42
42
|
|
43
43
|
Notice that repeat mode creates an array in an array.
|
44
44
|
|
data/qed/replacer.rdoc
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= Search and Replace
|
2
|
+
|
3
|
+
Regex can also be used to do search and replace across multiple
|
4
|
+
strings or IO objects, includeing files.
|
5
|
+
|
6
|
+
require 'regex'
|
7
|
+
|
8
|
+
To perform search and replace procedure we create a Regex::Replacer object.
|
9
|
+
The constructor method takes a Hash of options which set universal parameters
|
10
|
+
to apply to all search and replace rules. Usually, each individual rule
|
11
|
+
will specify it's own options, so for this example we provide none.
|
12
|
+
|
13
|
+
replacer = Regex::Replacer.new
|
14
|
+
|
15
|
+
Rules are added via the #rule method.
|
16
|
+
|
17
|
+
replacer.rule('World', 'Planet Earth')
|
18
|
+
replacer.rule('!', '!!!')
|
19
|
+
|
20
|
+
Rules are applied in the order they were defined. If there rules overlap
|
21
|
+
in their effects this can be signifficant.
|
22
|
+
|
23
|
+
Now, lets say we have that famous String,
|
24
|
+
|
25
|
+
string = "Hello, World!"
|
26
|
+
|
27
|
+
We use the #apply method to actually perform the substitutions.
|
28
|
+
|
29
|
+
replacer.apply(string)
|
30
|
+
|
31
|
+
The replacements occur in place. Since in this case we are performing
|
32
|
+
the serach and replace on a String object, we can see the change
|
33
|
+
has taken place.
|
34
|
+
|
35
|
+
string.assert == "Hello, Planet Earth!!!"
|
36
|
+
|
37
|
+
As we mentioned at the beginning, substitutions can be applied to IO
|
38
|
+
objects in general, so long as they they can be reopended for writing.
|
39
|
+
|
40
|
+
require 'stringio'
|
41
|
+
|
42
|
+
io = StringIO.new("Hello, World!")
|
43
|
+
|
44
|
+
replacer.apply(io)
|
45
|
+
|
46
|
+
io.read.assert == "Hello, Planet Earth!!!"
|
47
|
+
|
48
|
+
If +io+ were a File object, rather than a StringIO, the file would
|
49
|
+
be changed on disk. As a precaution a backup file can be written
|
50
|
+
with then name of file plus a '.bak' extension in the same directory as
|
51
|
+
the file. To turn on the backup option, either supply it as an option
|
52
|
+
to the constructor, or set it via the writer method.
|
53
|
+
|
54
|
+
replacer.backup = true
|
55
|
+
|
56
|
+
(TODO: Example of a file search and replace.)
|
57
|
+
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 19
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 1
|
8
|
+
- 1
|
7
9
|
- 0
|
8
|
-
|
9
|
-
version: 1.0.0
|
10
|
+
version: 1.1.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Thomas Sawyer
|
@@ -15,12 +16,39 @@ autorequire:
|
|
15
16
|
bindir: bin
|
16
17
|
cert_chain: []
|
17
18
|
|
18
|
-
date: 2010-
|
19
|
+
date: 2010-10-12 00:00:00 -04:00
|
19
20
|
default_executable:
|
20
|
-
dependencies:
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: syckle
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
type: :development
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: qed
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
type: :development
|
49
|
+
version_requirements: *id002
|
50
|
+
description: Regex is a simple commmandline Regular Expression tool, that makes easy to search documents for content matches.
|
51
|
+
email: ""
|
24
52
|
executables:
|
25
53
|
- regex
|
26
54
|
extensions: []
|
@@ -28,32 +56,25 @@ extensions: []
|
|
28
56
|
extra_rdoc_files:
|
29
57
|
- README
|
30
58
|
files:
|
31
|
-
- HISTORY
|
32
|
-
- LICENSE
|
33
|
-
- MANIFEST
|
34
|
-
- README
|
35
59
|
- bin/regex
|
36
|
-
- lib/regex.rb
|
37
60
|
- lib/regex/command.rb
|
38
61
|
- lib/regex/extractor.rb
|
62
|
+
- lib/regex/package.yml
|
63
|
+
- lib/regex/replacer.rb
|
39
64
|
- lib/regex/string.rb
|
40
|
-
- lib/regex/templates
|
41
|
-
-
|
42
|
-
-
|
43
|
-
-
|
44
|
-
-
|
45
|
-
-
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
49
|
-
- meta/summary
|
50
|
-
- meta/title
|
51
|
-
- meta/version
|
52
|
-
- test/demos/regex.rdoc
|
65
|
+
- lib/regex/templates.rb
|
66
|
+
- lib/regex.rb
|
67
|
+
- qed/regex.rdoc
|
68
|
+
- qed/replacer.rdoc
|
69
|
+
- PROFILE
|
70
|
+
- PACKAGE
|
71
|
+
- LICENSE
|
72
|
+
- README
|
73
|
+
- HISTORY
|
53
74
|
has_rdoc: true
|
54
75
|
homepage: http://proutils.github.com/regex
|
55
|
-
licenses:
|
56
|
-
|
76
|
+
licenses:
|
77
|
+
- ""
|
57
78
|
post_install_message:
|
58
79
|
rdoc_options:
|
59
80
|
- --title
|
@@ -63,25 +84,29 @@ rdoc_options:
|
|
63
84
|
require_paths:
|
64
85
|
- lib
|
65
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
66
88
|
requirements:
|
67
89
|
- - ">="
|
68
90
|
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
69
92
|
segments:
|
70
93
|
- 0
|
71
94
|
version: "0"
|
72
95
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
73
97
|
requirements:
|
74
98
|
- - ">="
|
75
99
|
- !ruby/object:Gem::Version
|
100
|
+
hash: 3
|
76
101
|
segments:
|
77
102
|
- 0
|
78
103
|
version: "0"
|
79
104
|
requirements: []
|
80
105
|
|
81
106
|
rubyforge_project: regex
|
82
|
-
rubygems_version: 1.3.
|
107
|
+
rubygems_version: 1.3.7
|
83
108
|
signing_key:
|
84
109
|
specification_version: 3
|
85
|
-
summary: Regex is simple commmandline Regular Expression tool.
|
110
|
+
summary: Regex is a simple commmandline Regular Expression tool.
|
86
111
|
test_files: []
|
87
112
|
|