regex 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +18 -1
- data/LICENSE +202 -21
- data/PACKAGE +7 -0
- data/PROFILE +20 -0
- data/README +76 -21
- data/bin/regex +1 -1
- data/lib/regex.rb +19 -228
- data/lib/regex/command.rb +16 -97
- data/lib/regex/extractor.rb +481 -0
- data/lib/regex/package.yml +7 -0
- data/lib/regex/replacer.rb +221 -0
- data/lib/regex/string.rb +1 -1
- data/lib/regex/templates.rb +85 -0
- data/{test/demos → qed}/regex.rdoc +5 -5
- data/qed/replacer.rdoc +57 -0
- metadata +54 -29
- data/MANIFEST +0 -25
- data/lib/regex/templates/common.rb +0 -13
- data/meta/authors +0 -2
- data/meta/created +0 -1
- data/meta/description +0 -1
- data/meta/download +0 -1
- data/meta/homepage +0 -1
- data/meta/mailinglist +0 -1
- data/meta/name +0 -1
- data/meta/repository +0 -1
- data/meta/summary +0 -1
- data/meta/title +0 -1
- data/meta/version +0 -1
@@ -0,0 +1,221 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module Regex
|
4
|
+
|
5
|
+
#
|
6
|
+
class Replacer
|
7
|
+
|
8
|
+
# Array of [search, replace] rules.
|
9
|
+
attr_reader :rules
|
10
|
+
|
11
|
+
# Make all patterns exact string matchers.
|
12
|
+
attr_accessor :escape
|
13
|
+
|
14
|
+
# Make all patterns global matchers.
|
15
|
+
attr_accessor :global
|
16
|
+
|
17
|
+
# Make all patterns case-insenstive matchers.
|
18
|
+
attr_accessor :insensitive
|
19
|
+
|
20
|
+
# Make all patterns multi-line matchers.
|
21
|
+
attr_accessor :multiline
|
22
|
+
|
23
|
+
# Make backups of files when they change.
|
24
|
+
attr_accessor :backup
|
25
|
+
|
26
|
+
#
|
27
|
+
def initialize(options={})
|
28
|
+
@rules = []
|
29
|
+
options.each do |k,v|
|
30
|
+
__send__("#{k}=", v)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
def rule(pattern, replacement)
|
36
|
+
@rules << [re(pattern), replacement]
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
def apply(*ios)
|
41
|
+
ios.each do |io|
|
42
|
+
original = (IO === io || StringIO === io ? io.read : io.to_s)
|
43
|
+
generate = original
|
44
|
+
rules.each do |(pattern, replacement)|
|
45
|
+
if pattern.global
|
46
|
+
generate = generate.gsub(pattern.to_re, replacement)
|
47
|
+
else
|
48
|
+
generate = generate.sub(pattern.to_re, replacement)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
if original != generate
|
52
|
+
write(io, generate)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
# Parse pattern matcher.
|
60
|
+
def re(pattern)
|
61
|
+
Matcher.new(
|
62
|
+
pattern,
|
63
|
+
:global=>global,
|
64
|
+
:escape=>escape,
|
65
|
+
:multiline=>multiline,
|
66
|
+
:insensitive=>insensitive
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
#
|
71
|
+
def write(io, text)
|
72
|
+
case io
|
73
|
+
when File
|
74
|
+
if backup
|
75
|
+
backup_file = io.path + '.bak'
|
76
|
+
File.open(backup_file, 'w'){ |f| f << File.read(io.path) }
|
77
|
+
end
|
78
|
+
File.open(io.path, 'w'){ |w| w << text }
|
79
|
+
when StringIO
|
80
|
+
io.string = text
|
81
|
+
when IO
|
82
|
+
# TODO: How to handle general IO object?
|
83
|
+
io.write(text)
|
84
|
+
else
|
85
|
+
io.replace(text)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
def self.cli(argv=ARGV)
|
91
|
+
searches = []
|
92
|
+
replaces = []
|
93
|
+
options = {}
|
94
|
+
parser = OptionParser.new do |opt|
|
95
|
+
opt.on('--subtitute', '-s PATTERN', 'search portion of substitution') do |search|
|
96
|
+
searches << search
|
97
|
+
end
|
98
|
+
opt.on('--template', '-t NAME', 'search for built-in regular expression') do |name|
|
99
|
+
searches << "$#{name}"
|
100
|
+
end
|
101
|
+
opt.on('--replace', '-r STRING', 'replacement string of substitution') do |replace|
|
102
|
+
replaces << replace
|
103
|
+
end
|
104
|
+
opt.on('--escape', '-e', 'make all patterns exact string matchers') do
|
105
|
+
options[:escape] = true
|
106
|
+
end
|
107
|
+
opt.on('--insensitive', '-i', 'make all patterns case-insensitive matchers') do
|
108
|
+
options[:insensitive] = true
|
109
|
+
end
|
110
|
+
#opt.on('--unxml', '-x', 'ignore XML/HTML tags') do
|
111
|
+
# options[:unxml] = true
|
112
|
+
#end
|
113
|
+
opt.on('--global', '-g', 'make all patterns global matchers') do
|
114
|
+
options[:global] = true
|
115
|
+
end
|
116
|
+
opt.on('--multiline', '-m', 'make all patterns multi-line matchers') do
|
117
|
+
options[:multiline] = true
|
118
|
+
end
|
119
|
+
opt.on('-b', '--backup', 'backup any files that are changed') do
|
120
|
+
options[:backup] = true
|
121
|
+
end
|
122
|
+
opt.on_tail('--debug', 'run in debug mode') do
|
123
|
+
$DEBUG = true
|
124
|
+
end
|
125
|
+
opt.on_tail('--help', '-h', 'display this lovely help message') do
|
126
|
+
puts opt
|
127
|
+
exit 0
|
128
|
+
end
|
129
|
+
end
|
130
|
+
parser.parse!(argv)
|
131
|
+
|
132
|
+
files = argv
|
133
|
+
files.each do |file|
|
134
|
+
raise "file does not exist -- #{file}" unless File.exist?(file)
|
135
|
+
end
|
136
|
+
targets = files.empty? ? [ARGF] : files.map{ |f| File.new(f) }
|
137
|
+
|
138
|
+
unless searches.size == replaces.size
|
139
|
+
raise "search replace mismatch -- #{searches.size} to #{replaces.size}"
|
140
|
+
end
|
141
|
+
rules = searches.zip(replaces)
|
142
|
+
|
143
|
+
replacer = new(options)
|
144
|
+
rules.each do |search, replace|
|
145
|
+
replacer.rule(search, replace)
|
146
|
+
end
|
147
|
+
replacer.apply(*targets)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Basically a Regex but handles a couple extra options.
|
151
|
+
class Matcher
|
152
|
+
|
153
|
+
#
|
154
|
+
attr_accessor :global
|
155
|
+
|
156
|
+
#
|
157
|
+
attr_accessor :escape
|
158
|
+
|
159
|
+
#
|
160
|
+
attr_accessor :multiline
|
161
|
+
|
162
|
+
#
|
163
|
+
attr_accessor :insensitive
|
164
|
+
|
165
|
+
#
|
166
|
+
def initialize(pattern, options={})
|
167
|
+
options.each do |k,v|
|
168
|
+
__send__("#{k}=", v) if respond_to?("#{k}=")
|
169
|
+
end
|
170
|
+
@regexp = parse(pattern)
|
171
|
+
end
|
172
|
+
|
173
|
+
#
|
174
|
+
def =~(string)
|
175
|
+
@regexp =~ string
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
def match(string)
|
180
|
+
@regexp.match(string)
|
181
|
+
end
|
182
|
+
|
183
|
+
#
|
184
|
+
def to_re
|
185
|
+
@regexp
|
186
|
+
end
|
187
|
+
|
188
|
+
# Parse pattern matcher.
|
189
|
+
def parse(pattern)
|
190
|
+
case pattern
|
191
|
+
when Regexp
|
192
|
+
pattern
|
193
|
+
when /^\$/
|
194
|
+
Templates.const_get($'.upcase)
|
195
|
+
when /^\/(.*?)\/(\w+)$/
|
196
|
+
flags = []
|
197
|
+
@global = true if $2.index('g')
|
198
|
+
flags << Regexp::MULTILINE if $2.index('m') or multiline
|
199
|
+
flags << Regexp::IGNORECASE if $2.index('i') or insensitive
|
200
|
+
if $2.index('e') or escape
|
201
|
+
Regexp.new(Regexp.escape($1), *flags)
|
202
|
+
else
|
203
|
+
Regexp.new($1, *flags)
|
204
|
+
end
|
205
|
+
else
|
206
|
+
flags = []
|
207
|
+
flags << Regexp::MULTILINE if multiline
|
208
|
+
flags << Regexp::IGNORECASE if insensitive
|
209
|
+
if escape
|
210
|
+
Regexp.new(Regexp.escape(pattern), *flags)
|
211
|
+
else
|
212
|
+
Regexp.new(pattern, *flags)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
end
|
220
|
+
|
221
|
+
end
|
data/lib/regex/string.rb
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
module Regex
|
2
|
+
|
3
|
+
# = Templates
|
4
|
+
#
|
5
|
+
# TODO: What about regular expressions with variable content?
|
6
|
+
# Should these be methods rather than constants? But then how
|
7
|
+
# would we handle named substituions?
|
8
|
+
module Templates
|
9
|
+
|
10
|
+
# Empty line.
|
11
|
+
EMPTY = /^$/
|
12
|
+
|
13
|
+
# Blank line.
|
14
|
+
BLANK = /^\s*$/
|
15
|
+
|
16
|
+
NUMBER = /[-+]?[0-9]*\.?[0-9]+/
|
17
|
+
|
18
|
+
# Markup language tag, e.g \<a>stuff</a>.
|
19
|
+
MLTAG = /<([A-Z][A-Z0-9]*)\b[^>]*>(.*?)<\/\1>/i
|
20
|
+
|
21
|
+
# IPv4 Address
|
22
|
+
IPV4 = /\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/
|
23
|
+
|
24
|
+
# Username
|
25
|
+
USERNAME = /^[a-zA-Z0-9_]{3,16}$/
|
26
|
+
|
27
|
+
# Email Address
|
28
|
+
EMAIL = /([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)/i
|
29
|
+
|
30
|
+
# United States phone number.
|
31
|
+
USPHONE = /(\d\d\d[-]|\(\d\d\d\))?(\d\d\d)[-](\d\d\d\d)/
|
32
|
+
|
33
|
+
# United States zip code.
|
34
|
+
USZIP = /^[0-9]{5}(-[0-9]{4})?$/
|
35
|
+
|
36
|
+
# United States social secuirty number.
|
37
|
+
SSN = /[0-9]\{3\}-[0-9]\{2\}-[0-9]\{4\}/
|
38
|
+
|
39
|
+
# United States dollar amount.
|
40
|
+
DOLLARS = /\$[0-9]*.[0-9][0-9]/
|
41
|
+
|
42
|
+
# Bank Ientification Code
|
43
|
+
BIC = /([a-zA-Z]{4}[a-zA-Z]{2}[a-zA-Z0-9]{2}([a-zA-Z0-9]{3})?)/
|
44
|
+
|
45
|
+
#
|
46
|
+
IBAN = /[a-zA-Z]{2}[0-9]{2}[a-zA-Z0-9]{4}[0-9]{7}([a-zA-Z0-9]?){0,16}/
|
47
|
+
|
48
|
+
# Hexidecimal value.
|
49
|
+
HEX = /(#([0-9A-Fa-f]{3}|[0-9A-Fa-f]{6})\b)/
|
50
|
+
|
51
|
+
# HTTP URL Address
|
52
|
+
HTTP = /^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \?=.-]*)*\/?$/
|
53
|
+
|
54
|
+
# Ruby comment block.
|
55
|
+
RUBYBLOCK = /^=begin\s*(.*?)\n(.*?)\n=end/m
|
56
|
+
|
57
|
+
# Ruby method definition.
|
58
|
+
# TODO: Not quite right.
|
59
|
+
RUBYMETHOD_WITH_COMMENT = /(^\ *\#.*?)^\s*def\s*(.*?)$/m
|
60
|
+
|
61
|
+
#
|
62
|
+
RUBYMETHOD = /^\ *def\s*(.*?)$/
|
63
|
+
|
64
|
+
# By the legendary abigail. Fails to match if and only if it is matched against
|
65
|
+
# a prime number of 1's. That is, '11' fails, but '1111' does not.
|
66
|
+
# I once heard him talk why this works, but I forgot most of it.
|
67
|
+
PRIMEONES = /^1?$|^(11+?)\1+$/
|
68
|
+
|
69
|
+
# Name of all constants.
|
70
|
+
def self.list
|
71
|
+
constants.map{ |c| c.downcase }
|
72
|
+
end
|
73
|
+
|
74
|
+
# Lookup a template by name.
|
75
|
+
def self.[](name)
|
76
|
+
Templates.const_get(name.upcase)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
# Add templates to Regex module.
|
82
|
+
include Templates
|
83
|
+
|
84
|
+
end
|
85
|
+
|
@@ -19,26 +19,26 @@ We will also suppoly a matching pattern, as none of
|
|
19
19
|
the matching functions will work without providing
|
20
20
|
a pattern or the name of built-in pattern template.
|
21
21
|
|
22
|
-
|
22
|
+
rx = Regex.new(text, :pattern=>'\w+')
|
23
23
|
|
24
24
|
We can see that the Regex object has converted the pattern
|
25
25
|
into the expected regular expression via the #regex method.
|
26
26
|
|
27
|
-
|
27
|
+
rx.regex.assert == /\w+/
|
28
28
|
|
29
29
|
Under the hood, Regex has split the process of matching,
|
30
30
|
organizing and formating the results into separate methods.
|
31
31
|
We can use the #structure method to see thematch results
|
32
32
|
organized into uniform arrays.
|
33
33
|
|
34
|
-
|
34
|
+
rx.structure.assert == %w{We}
|
35
35
|
|
36
36
|
Whereas the last use only returns a single metch, if we turn
|
37
37
|
on repeat mode we can see every word.
|
38
38
|
|
39
|
-
|
39
|
+
rx.repeat = true
|
40
40
|
|
41
|
-
|
41
|
+
rx.structure.assert == %w{We will match against this string}.map{ |e| [e] }
|
42
42
|
|
43
43
|
Notice that repeat mode creates an array in an array.
|
44
44
|
|
data/qed/replacer.rdoc
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
= Search and Replace
|
2
|
+
|
3
|
+
Regex can also be used to do search and replace across multiple
|
4
|
+
strings or IO objects, includeing files.
|
5
|
+
|
6
|
+
require 'regex'
|
7
|
+
|
8
|
+
To perform search and replace procedure we create a Regex::Replacer object.
|
9
|
+
The constructor method takes a Hash of options which set universal parameters
|
10
|
+
to apply to all search and replace rules. Usually, each individual rule
|
11
|
+
will specify it's own options, so for this example we provide none.
|
12
|
+
|
13
|
+
replacer = Regex::Replacer.new
|
14
|
+
|
15
|
+
Rules are added via the #rule method.
|
16
|
+
|
17
|
+
replacer.rule('World', 'Planet Earth')
|
18
|
+
replacer.rule('!', '!!!')
|
19
|
+
|
20
|
+
Rules are applied in the order they were defined. If there rules overlap
|
21
|
+
in their effects this can be signifficant.
|
22
|
+
|
23
|
+
Now, lets say we have that famous String,
|
24
|
+
|
25
|
+
string = "Hello, World!"
|
26
|
+
|
27
|
+
We use the #apply method to actually perform the substitutions.
|
28
|
+
|
29
|
+
replacer.apply(string)
|
30
|
+
|
31
|
+
The replacements occur in place. Since in this case we are performing
|
32
|
+
the serach and replace on a String object, we can see the change
|
33
|
+
has taken place.
|
34
|
+
|
35
|
+
string.assert == "Hello, Planet Earth!!!"
|
36
|
+
|
37
|
+
As we mentioned at the beginning, substitutions can be applied to IO
|
38
|
+
objects in general, so long as they they can be reopended for writing.
|
39
|
+
|
40
|
+
require 'stringio'
|
41
|
+
|
42
|
+
io = StringIO.new("Hello, World!")
|
43
|
+
|
44
|
+
replacer.apply(io)
|
45
|
+
|
46
|
+
io.read.assert == "Hello, Planet Earth!!!"
|
47
|
+
|
48
|
+
If +io+ were a File object, rather than a StringIO, the file would
|
49
|
+
be changed on disk. As a precaution a backup file can be written
|
50
|
+
with then name of file plus a '.bak' extension in the same directory as
|
51
|
+
the file. To turn on the backup option, either supply it as an option
|
52
|
+
to the constructor, or set it via the writer method.
|
53
|
+
|
54
|
+
replacer.backup = true
|
55
|
+
|
56
|
+
(TODO: Example of a file search and replace.)
|
57
|
+
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 19
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 1
|
8
|
+
- 1
|
7
9
|
- 0
|
8
|
-
|
9
|
-
version: 1.0.0
|
10
|
+
version: 1.1.0
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Thomas Sawyer
|
@@ -15,12 +16,39 @@ autorequire:
|
|
15
16
|
bindir: bin
|
16
17
|
cert_chain: []
|
17
18
|
|
18
|
-
date: 2010-
|
19
|
+
date: 2010-10-12 00:00:00 -04:00
|
19
20
|
default_executable:
|
20
|
-
dependencies:
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: syckle
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ">="
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
hash: 3
|
31
|
+
segments:
|
32
|
+
- 0
|
33
|
+
version: "0"
|
34
|
+
type: :development
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: qed
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 3
|
45
|
+
segments:
|
46
|
+
- 0
|
47
|
+
version: "0"
|
48
|
+
type: :development
|
49
|
+
version_requirements: *id002
|
50
|
+
description: Regex is a simple commmandline Regular Expression tool, that makes easy to search documents for content matches.
|
51
|
+
email: ""
|
24
52
|
executables:
|
25
53
|
- regex
|
26
54
|
extensions: []
|
@@ -28,32 +56,25 @@ extensions: []
|
|
28
56
|
extra_rdoc_files:
|
29
57
|
- README
|
30
58
|
files:
|
31
|
-
- HISTORY
|
32
|
-
- LICENSE
|
33
|
-
- MANIFEST
|
34
|
-
- README
|
35
59
|
- bin/regex
|
36
|
-
- lib/regex.rb
|
37
60
|
- lib/regex/command.rb
|
38
61
|
- lib/regex/extractor.rb
|
62
|
+
- lib/regex/package.yml
|
63
|
+
- lib/regex/replacer.rb
|
39
64
|
- lib/regex/string.rb
|
40
|
-
- lib/regex/templates
|
41
|
-
-
|
42
|
-
-
|
43
|
-
-
|
44
|
-
-
|
45
|
-
-
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
49
|
-
- meta/summary
|
50
|
-
- meta/title
|
51
|
-
- meta/version
|
52
|
-
- test/demos/regex.rdoc
|
65
|
+
- lib/regex/templates.rb
|
66
|
+
- lib/regex.rb
|
67
|
+
- qed/regex.rdoc
|
68
|
+
- qed/replacer.rdoc
|
69
|
+
- PROFILE
|
70
|
+
- PACKAGE
|
71
|
+
- LICENSE
|
72
|
+
- README
|
73
|
+
- HISTORY
|
53
74
|
has_rdoc: true
|
54
75
|
homepage: http://proutils.github.com/regex
|
55
|
-
licenses:
|
56
|
-
|
76
|
+
licenses:
|
77
|
+
- ""
|
57
78
|
post_install_message:
|
58
79
|
rdoc_options:
|
59
80
|
- --title
|
@@ -63,25 +84,29 @@ rdoc_options:
|
|
63
84
|
require_paths:
|
64
85
|
- lib
|
65
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
66
88
|
requirements:
|
67
89
|
- - ">="
|
68
90
|
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
69
92
|
segments:
|
70
93
|
- 0
|
71
94
|
version: "0"
|
72
95
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
|
+
none: false
|
73
97
|
requirements:
|
74
98
|
- - ">="
|
75
99
|
- !ruby/object:Gem::Version
|
100
|
+
hash: 3
|
76
101
|
segments:
|
77
102
|
- 0
|
78
103
|
version: "0"
|
79
104
|
requirements: []
|
80
105
|
|
81
106
|
rubyforge_project: regex
|
82
|
-
rubygems_version: 1.3.
|
107
|
+
rubygems_version: 1.3.7
|
83
108
|
signing_key:
|
84
109
|
specification_version: 3
|
85
|
-
summary: Regex is simple commmandline Regular Expression tool.
|
110
|
+
summary: Regex is a simple commmandline Regular Expression tool.
|
86
111
|
test_files: []
|
87
112
|
|