regex 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +18 -1
- data/LICENSE +202 -21
- data/PACKAGE +7 -0
- data/PROFILE +20 -0
- data/README +76 -21
- data/bin/regex +1 -1
- data/lib/regex.rb +19 -228
- data/lib/regex/command.rb +16 -97
- data/lib/regex/extractor.rb +481 -0
- data/lib/regex/package.yml +7 -0
- data/lib/regex/replacer.rb +221 -0
- data/lib/regex/string.rb +1 -1
- data/lib/regex/templates.rb +85 -0
- data/{test/demos → qed}/regex.rdoc +5 -5
- data/qed/replacer.rdoc +57 -0
- metadata +54 -29
- data/MANIFEST +0 -25
- data/lib/regex/templates/common.rb +0 -13
- data/meta/authors +0 -2
- data/meta/created +0 -1
- data/meta/description +0 -1
- data/meta/download +0 -1
- data/meta/homepage +0 -1
- data/meta/mailinglist +0 -1
- data/meta/name +0 -1
- data/meta/repository +0 -1
- data/meta/summary +0 -1
- data/meta/title +0 -1
- data/meta/version +0 -1
data/lib/regex/command.rb
CHANGED
@@ -1,107 +1,26 @@
|
|
1
|
-
require 'regex'
|
1
|
+
require 'regex/extractor'
|
2
|
+
require 'regex/replacer'
|
2
3
|
|
3
|
-
|
4
|
+
module Regex
|
4
5
|
|
5
6
|
# Commandline interface.
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
#
|
13
|
-
attr :format
|
14
|
-
|
15
|
-
#
|
16
|
-
attr :options
|
17
|
-
|
18
|
-
#
|
19
|
-
def self.main(*argv)
|
20
|
-
new(*argv).main
|
21
|
-
end
|
22
|
-
|
23
|
-
# New Command.
|
24
|
-
def initialize(*argv)
|
25
|
-
@file = nil
|
26
|
-
@format = nil
|
27
|
-
@options = {}
|
28
|
-
parse(*argv)
|
7
|
+
def self.cli(*argv)
|
8
|
+
if argv.include?('-r') or argv.include?('--replace')
|
9
|
+
controller = Replacer
|
10
|
+
else
|
11
|
+
controller = Extractor
|
29
12
|
end
|
30
13
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
unless File.file?(@file)
|
40
|
-
puts "No such file -- '#{file}'."
|
41
|
-
exit 1
|
42
|
-
end
|
14
|
+
begin
|
15
|
+
controller.cli(argv)
|
16
|
+
rescue => error
|
17
|
+
if $DEBUG
|
18
|
+
raise error
|
19
|
+
#puts error.backtrace.join("\n ")
|
20
|
+
else
|
21
|
+
abort error.to_s
|
43
22
|
end
|
44
23
|
end
|
45
|
-
|
46
|
-
# OptionParser instance.
|
47
|
-
def parser
|
48
|
-
require 'optparse'
|
49
|
-
@options = {}
|
50
|
-
OptionParser.new do |opt|
|
51
|
-
opt.on('--template', '-t NAME', "select a built-in regular expression") do |name|
|
52
|
-
@options[:template] = name
|
53
|
-
end
|
54
|
-
|
55
|
-
opt.on('--index', '-n INT', "return a specific match index") do |int|
|
56
|
-
@options[:index] = int.to_i
|
57
|
-
end
|
58
|
-
|
59
|
-
opt.on('--insensitive', '-i', "case insensitive matching") do
|
60
|
-
@options[:insensitive] = true
|
61
|
-
end
|
62
|
-
|
63
|
-
opt.on('--unxml', '-x', "ignore XML/HTML tags") do
|
64
|
-
@options[:unxml] = true
|
65
|
-
end
|
66
|
-
|
67
|
-
opt.on('--repeat', '-r', "find all matching occurances") do
|
68
|
-
@options[:repeat] = true
|
69
|
-
end
|
70
|
-
|
71
|
-
opt.on('--yaml', '-y', "output in YAML format") do
|
72
|
-
@format = :yaml
|
73
|
-
end
|
74
|
-
|
75
|
-
opt.on('--json', '-j', "output in JSON format") do
|
76
|
-
@format = :json
|
77
|
-
end
|
78
|
-
|
79
|
-
opt.on_tail('--help', '-h', "display this lovely help message") do
|
80
|
-
puts opt
|
81
|
-
exit 0
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
#
|
87
|
-
def extraction
|
88
|
-
target = file ? File.new(file) : ARGF
|
89
|
-
Regex.new(target, options)
|
90
|
-
end
|
91
|
-
|
92
|
-
# Extract and display.
|
93
|
-
def main
|
94
|
-
begin
|
95
|
-
puts extraction.to_s(@format)
|
96
|
-
rescue => error
|
97
|
-
if $DEBUG
|
98
|
-
raise error
|
99
|
-
else
|
100
|
-
abort error.to_s
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
24
|
end
|
106
25
|
|
107
26
|
end
|
data/lib/regex/extractor.rb
CHANGED
@@ -1 +1,482 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'regex/string'
|
4
|
+
|
5
|
+
module Regex
|
6
|
+
|
7
|
+
# Supports [:name:] notation for subsitution of built-in templates.
|
8
|
+
class Extractor
|
9
|
+
|
10
|
+
# When the regular expression return multiple groups,
|
11
|
+
# each is divided by the group deliminator.
|
12
|
+
# This is the default value.
|
13
|
+
DELIMINATOR_GROUP = 29.chr + "\n"
|
14
|
+
|
15
|
+
# When using repeat mode, each match is divided by
|
16
|
+
# the record deliminator. This is the default value.
|
17
|
+
DELIMINATOR_RECORD = 30.chr + "\n"
|
18
|
+
|
19
|
+
#
|
20
|
+
def self.input_cache(input)
|
21
|
+
@input_cache ||= {}
|
22
|
+
@input_cache[input] ||= (
|
23
|
+
case input
|
24
|
+
when String
|
25
|
+
input
|
26
|
+
else
|
27
|
+
input.read
|
28
|
+
end
|
29
|
+
)
|
30
|
+
end
|
31
|
+
|
32
|
+
# List of IO objects or Strings to search.
|
33
|
+
attr_accessor :io
|
34
|
+
|
35
|
+
# Remove XML tags from search. (NOT CURRENTLY SUPPORTED)
|
36
|
+
attr_accessor :unxml
|
37
|
+
|
38
|
+
# Regular expression.
|
39
|
+
attr_accessor :pattern
|
40
|
+
|
41
|
+
# Select built-in regular expression by name.
|
42
|
+
attr_accessor :template
|
43
|
+
|
44
|
+
# Index of expression return.
|
45
|
+
attr_accessor :index
|
46
|
+
|
47
|
+
# Multiline match.
|
48
|
+
attr_accessor :multiline
|
49
|
+
|
50
|
+
# Ignore case.
|
51
|
+
attr_accessor :insensitive
|
52
|
+
|
53
|
+
# Escape expression.
|
54
|
+
attr_accessor :escape
|
55
|
+
|
56
|
+
# Repeat Match.
|
57
|
+
attr_accessor :repeat
|
58
|
+
|
59
|
+
# Output format.
|
60
|
+
attr_accessor :format
|
61
|
+
|
62
|
+
# Provide detailed output.
|
63
|
+
attr_accessor :detail
|
64
|
+
|
65
|
+
# Use ANSI codes in output?
|
66
|
+
attr_accessor :ansi
|
67
|
+
|
68
|
+
# Use ANSI codes in output?
|
69
|
+
def ansi? ; @ansi ; end
|
70
|
+
|
71
|
+
# New extractor.
|
72
|
+
def initialize(*io)
|
73
|
+
options = Hash === io.last ? io.pop : {}
|
74
|
+
|
75
|
+
@io = io
|
76
|
+
@ansi = true
|
77
|
+
|
78
|
+
options.each do |k,v|
|
79
|
+
__send__("#{k}=", v)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
def inspect
|
85
|
+
"#{self.class.name}"
|
86
|
+
end
|
87
|
+
|
88
|
+
#--
|
89
|
+
# TODO: unxml is too primative, use real xml parser like nokogiri
|
90
|
+
#++
|
91
|
+
#def text
|
92
|
+
# @text ||= (
|
93
|
+
# if unxml
|
94
|
+
# raw.gsub!(/\<(.*?)\>/, '')
|
95
|
+
# else
|
96
|
+
# @raw
|
97
|
+
# end
|
98
|
+
# )
|
99
|
+
#end
|
100
|
+
|
101
|
+
#
|
102
|
+
def regex
|
103
|
+
@regex ||= (
|
104
|
+
if template
|
105
|
+
Templates.const_get(template.upcase)
|
106
|
+
else
|
107
|
+
case pattern
|
108
|
+
when Regexp
|
109
|
+
pattern
|
110
|
+
when String
|
111
|
+
flags = 0
|
112
|
+
flags + Regexp::MULTILINE if multiline
|
113
|
+
flags + Regexp::IGNORECASE if insensitive
|
114
|
+
if escape
|
115
|
+
Regexp.new(Regexp.escape(pattern), flags)
|
116
|
+
else
|
117
|
+
pat = substitute_templates(pattern)
|
118
|
+
Regexp.new(pat, flags)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
#
|
126
|
+
def substitute_templates(pattern)
|
127
|
+
pat = pattern
|
128
|
+
Templates.list.each do |name|
|
129
|
+
if pat.include?("[:#{name}:]")
|
130
|
+
pat = pat.gsub(/(?!:\\)\[\:#{name}\:\]/, Templates[name].to_s)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
pat
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
def to_s(format=nil)
|
138
|
+
case format
|
139
|
+
when :yaml
|
140
|
+
to_s_yaml
|
141
|
+
when :json
|
142
|
+
to_s_json
|
143
|
+
else
|
144
|
+
if detail
|
145
|
+
output_detailed_text
|
146
|
+
else
|
147
|
+
output_text
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
def to_s_yaml
|
154
|
+
require 'yaml'
|
155
|
+
if detail
|
156
|
+
matches_by_path.to_yaml
|
157
|
+
else
|
158
|
+
structure.to_yaml
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#
|
163
|
+
def to_s_json
|
164
|
+
begin
|
165
|
+
require 'json'
|
166
|
+
rescue LoadError
|
167
|
+
require 'json_pure'
|
168
|
+
end
|
169
|
+
if detail
|
170
|
+
matches_by_path.to_json
|
171
|
+
else
|
172
|
+
structure.to_json
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
#
|
177
|
+
def output_text
|
178
|
+
out = structure
|
179
|
+
if repeat
|
180
|
+
out = out.map{ |m| m.join(deliminator_group) }
|
181
|
+
out = out.join(deliminator_record) #.chomp("\n") + "\n"
|
182
|
+
else
|
183
|
+
out = out.join(deliminator_group) #.chomp("\n") + "\n"
|
184
|
+
end
|
185
|
+
out
|
186
|
+
end
|
187
|
+
|
188
|
+
# Detailed text output.
|
189
|
+
def output_detailed_text
|
190
|
+
if repeat
|
191
|
+
count = 0
|
192
|
+
string = []
|
193
|
+
mapping.each do |input, matches|
|
194
|
+
path = (File === input ? input.path : "(io #{input.object_id})")
|
195
|
+
string << ""
|
196
|
+
string << bold(path)
|
197
|
+
matches.each do |match|
|
198
|
+
string << formatted_match(input, match)
|
199
|
+
count += 1
|
200
|
+
end
|
201
|
+
end
|
202
|
+
string.join("\n") + "\n"
|
203
|
+
string << "\n(#{count} matches)"
|
204
|
+
else
|
205
|
+
string = []
|
206
|
+
match = scan.first
|
207
|
+
input = match.input
|
208
|
+
path = (File === input ? input.path : "(io #{input.object_id})")
|
209
|
+
string << ""
|
210
|
+
string << bold(path)
|
211
|
+
string << formatted_match(input, match)
|
212
|
+
string.join("\n")
|
213
|
+
string << "" #"\n1 match"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
#
|
218
|
+
def formatted_match(input, match)
|
219
|
+
string = []
|
220
|
+
path = (File === input ? input.path : "(io #{input.object_id})")
|
221
|
+
part, char, line = match.info(0)
|
222
|
+
if index
|
223
|
+
part, char, line = match.info(index)
|
224
|
+
string << "%s %s %s" % [line, char, part.inspect]
|
225
|
+
else
|
226
|
+
string << bold("%s %s %s" % [line, char, part.inspect])
|
227
|
+
if match.size > 0
|
228
|
+
(1...match.size).each do |i|
|
229
|
+
part, char, line = match.info(i)
|
230
|
+
string << "#{i}. %s %s %s" % [line, char, part.inspect]
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
string.join("\n")
|
235
|
+
end
|
236
|
+
|
237
|
+
#
|
238
|
+
def matches_by_path
|
239
|
+
r = Hash.new{ |h,k| h[k] = [] }
|
240
|
+
h = Hash.new{ |h,k| h[k] = [] }
|
241
|
+
scan.each do |match|
|
242
|
+
h[match.input] << match
|
243
|
+
end
|
244
|
+
h.each do |input, matches|
|
245
|
+
path = (File === input ? input.path : "(io #{input.object_id})")
|
246
|
+
if index
|
247
|
+
matches.each do |match|
|
248
|
+
r[path] << match.breakdown[index]
|
249
|
+
end
|
250
|
+
else
|
251
|
+
matches.each do |match|
|
252
|
+
r[path] << match.breakdown
|
253
|
+
end
|
254
|
+
end
|
255
|
+
end
|
256
|
+
r
|
257
|
+
end
|
258
|
+
|
259
|
+
# Structure the matchdata according to specified options.
|
260
|
+
def structure
|
261
|
+
repeat ? structure_repeat : structure_single
|
262
|
+
end
|
263
|
+
|
264
|
+
# Structure the matchdata for single match.
|
265
|
+
def structure_single
|
266
|
+
structure_repeat.first
|
267
|
+
end
|
268
|
+
|
269
|
+
# Structure the matchdata for repeat matches.
|
270
|
+
def structure_repeat
|
271
|
+
if index
|
272
|
+
scan.map{ |match| [match[index]] }
|
273
|
+
else
|
274
|
+
scan.map{ |match| match.size > 1 ? match[1..-1] : [match[0]] }
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
# Scan inputs for matches.
|
279
|
+
#
|
280
|
+
# Return an associative Array of [input, matchdata].
|
281
|
+
def scan
|
282
|
+
list = []
|
283
|
+
io.each do |input|
|
284
|
+
text = read(input)
|
285
|
+
text.scan(regex) do
|
286
|
+
list << Match.new(input, $~)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
list
|
290
|
+
end
|
291
|
+
|
292
|
+
#
|
293
|
+
def mapping
|
294
|
+
hash = Hash.new{ |h,k| h[k]=[] }
|
295
|
+
scan.each do |match|
|
296
|
+
hash[match.input] << match
|
297
|
+
end
|
298
|
+
hash
|
299
|
+
end
|
300
|
+
|
301
|
+
# TODO: unxml won't give corrent char counts.
|
302
|
+
def read(input)
|
303
|
+
Extractor.input_cache(input)
|
304
|
+
# if unxml
|
305
|
+
# txt.gsub(/\<(.*?)\>/, '')
|
306
|
+
# else
|
307
|
+
# txt
|
308
|
+
# end
|
309
|
+
end
|
310
|
+
|
311
|
+
# Return the line number of the +char+ position within +text+.
|
312
|
+
def line_at(io, char)
|
313
|
+
read(io)[0..char].count("\n") + 1
|
314
|
+
end
|
315
|
+
|
316
|
+
def deliminator_group
|
317
|
+
DELIMINATOR_GROUP
|
318
|
+
end
|
319
|
+
|
320
|
+
def deliminator_record
|
321
|
+
DELIMINATOR_RECORD
|
322
|
+
end
|
323
|
+
|
324
|
+
# Commandline Interface to Extractor.
|
325
|
+
def self.cli(argv=ARGV)
|
326
|
+
require 'optparse'
|
327
|
+
format = nil
|
328
|
+
options = {}
|
329
|
+
parser = OptionParser.new do |opt|
|
330
|
+
opt.on('--template', '-t NAME', "select a built-in regular expression") do |name|
|
331
|
+
options[:template] = name
|
332
|
+
end
|
333
|
+
opt.on('--search', '-s PATTERN', "search for regular expression") do |re|
|
334
|
+
options[:pattern] = re
|
335
|
+
end
|
336
|
+
opt.on('--index', '-n INT', "return a specific match index") do |int|
|
337
|
+
options[:index] = int.to_i
|
338
|
+
end
|
339
|
+
opt.on('--insensitive', '-i', "case insensitive matching") do
|
340
|
+
options[:insensitive] = true
|
341
|
+
end
|
342
|
+
opt.on('--multiline', '-m', "multiline matching") do
|
343
|
+
options[:multiline] = true
|
344
|
+
end
|
345
|
+
#opt.on('--unxml', '-x', "ignore XML/HTML tags") do
|
346
|
+
# options[:unxml] = true
|
347
|
+
#end
|
348
|
+
opt.on('--global', '-g', "find all matching occurances") do
|
349
|
+
options[:repeat] = true
|
350
|
+
end
|
351
|
+
opt.on('--yaml', '-y', "output in YAML format") do
|
352
|
+
format = :yaml
|
353
|
+
end
|
354
|
+
opt.on('--json', '-j', "output in JSON format") do
|
355
|
+
format = :json
|
356
|
+
end
|
357
|
+
opt.on('--detail', '-d', "provide match details") do
|
358
|
+
options[:detail] = :json
|
359
|
+
end
|
360
|
+
opt.on('--[no-]ansi', "toggle ansi color") do |val|
|
361
|
+
options[:ansi] = val
|
362
|
+
end
|
363
|
+
opt.on_tail('--debug', 'run in debug mode') do
|
364
|
+
$DEBUG = true
|
365
|
+
end
|
366
|
+
opt.on_tail('--help', '-h', "display this lovely help message") do
|
367
|
+
puts opt
|
368
|
+
exit 0
|
369
|
+
end
|
370
|
+
end
|
371
|
+
parser.parse!(argv)
|
372
|
+
|
373
|
+
unless options[:pattern] or options[:template]
|
374
|
+
re = argv.shift
|
375
|
+
case re
|
376
|
+
when /^\/(.*?)\/(\w*?)$/
|
377
|
+
options[:pattern] = $1
|
378
|
+
$2.split(//).each do |c|
|
379
|
+
case c
|
380
|
+
when 'e' then options[:escape] = true
|
381
|
+
when 'g' then options[:repeat] = true
|
382
|
+
when 'i' then options[:insensitive] = true
|
383
|
+
end
|
384
|
+
end
|
385
|
+
else
|
386
|
+
options[:template] = re
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
files = argv
|
391
|
+
|
392
|
+
files.each do |file|
|
393
|
+
if !File.file?(file)
|
394
|
+
$stderr.puts "No such file -- '#{file}'."
|
395
|
+
exit 1
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
if files.empty?
|
400
|
+
args = [ARGF]
|
401
|
+
else
|
402
|
+
args = files.map{ |f| open(f) } #File.new(f) }
|
403
|
+
end
|
404
|
+
|
405
|
+
args << options
|
406
|
+
|
407
|
+
extract = new(*args)
|
408
|
+
|
409
|
+
puts extract.to_s(format)
|
410
|
+
end
|
411
|
+
|
412
|
+
#
|
413
|
+
def bold(str)
|
414
|
+
if ansi?
|
415
|
+
"\e[1m" + str + "\e[0m"
|
416
|
+
else
|
417
|
+
string
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
|
422
|
+
#
|
423
|
+
class Match
|
424
|
+
attr :input
|
425
|
+
attr :match
|
426
|
+
|
427
|
+
# match - Instance of MatchData
|
428
|
+
#
|
429
|
+
def initialize(input, match)
|
430
|
+
@input = input
|
431
|
+
@match = match
|
432
|
+
end
|
433
|
+
|
434
|
+
#
|
435
|
+
def [](i)
|
436
|
+
@match[i]
|
437
|
+
end
|
438
|
+
|
439
|
+
#
|
440
|
+
def size
|
441
|
+
@match.size
|
442
|
+
end
|
443
|
+
|
444
|
+
#
|
445
|
+
def breakdown
|
446
|
+
m = []
|
447
|
+
range = (0...match.size)
|
448
|
+
range.each do |i|
|
449
|
+
char = match.offset(i)[0]
|
450
|
+
line = line_at(char)
|
451
|
+
part = match[i]
|
452
|
+
m << {'index'=>i, 'line'=>line, 'char'=>char, 'text'=>part}
|
453
|
+
end
|
454
|
+
m
|
455
|
+
end
|
456
|
+
|
457
|
+
#
|
458
|
+
def info(index)
|
459
|
+
text = match[index]
|
460
|
+
char = match.offset(index)[0]
|
461
|
+
line = line_at(char)
|
462
|
+
return text, char, line
|
463
|
+
end
|
464
|
+
|
465
|
+
# Return the line number of the +char+ position within +text+.
|
466
|
+
def line_at(char)
|
467
|
+
return nil unless char
|
468
|
+
text[0..char].count("\n") + 1
|
469
|
+
end
|
470
|
+
|
471
|
+
#
|
472
|
+
def text
|
473
|
+
Extractor.input_cache(input)
|
474
|
+
end
|
475
|
+
|
476
|
+
end
|
477
|
+
|
478
|
+
|
479
|
+
end
|
480
|
+
|
481
|
+
end
|
1
482
|
|