regex 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,107 +1,26 @@
1
- require 'regex'
1
+ require 'regex/extractor'
2
+ require 'regex/replacer'
2
3
 
3
- class Regex
4
+ module Regex
4
5
 
5
6
  # Commandline interface.
6
- #
7
- class Command
8
-
9
- #
10
- attr :file
11
-
12
- #
13
- attr :format
14
-
15
- #
16
- attr :options
17
-
18
- #
19
- def self.main(*argv)
20
- new(*argv).main
21
- end
22
-
23
- # New Command.
24
- def initialize(*argv)
25
- @file = nil
26
- @format = nil
27
- @options = {}
28
- parse(*argv)
7
+ def self.cli(*argv)
8
+ if argv.include?('-r') or argv.include?('--replace')
9
+ controller = Replacer
10
+ else
11
+ controller = Extractor
29
12
  end
30
13
 
31
- #
32
- def parse(*argv)
33
- parser.parse!(argv)
34
- unless @options[:template]
35
- @options[:pattern] = argv.shift
36
- end
37
- @file = argv.shift
38
- if @file
39
- unless File.file?(@file)
40
- puts "No such file -- '#{file}'."
41
- exit 1
42
- end
14
+ begin
15
+ controller.cli(argv)
16
+ rescue => error
17
+ if $DEBUG
18
+ raise error
19
+ #puts error.backtrace.join("\n ")
20
+ else
21
+ abort error.to_s
43
22
  end
44
23
  end
45
-
46
- # OptionParser instance.
47
- def parser
48
- require 'optparse'
49
- @options = {}
50
- OptionParser.new do |opt|
51
- opt.on('--template', '-t NAME', "select a built-in regular expression") do |name|
52
- @options[:template] = name
53
- end
54
-
55
- opt.on('--index', '-n INT', "return a specific match index") do |int|
56
- @options[:index] = int.to_i
57
- end
58
-
59
- opt.on('--insensitive', '-i', "case insensitive matching") do
60
- @options[:insensitive] = true
61
- end
62
-
63
- opt.on('--unxml', '-x', "ignore XML/HTML tags") do
64
- @options[:unxml] = true
65
- end
66
-
67
- opt.on('--repeat', '-r', "find all matching occurances") do
68
- @options[:repeat] = true
69
- end
70
-
71
- opt.on('--yaml', '-y', "output in YAML format") do
72
- @format = :yaml
73
- end
74
-
75
- opt.on('--json', '-j', "output in JSON format") do
76
- @format = :json
77
- end
78
-
79
- opt.on_tail('--help', '-h', "display this lovely help message") do
80
- puts opt
81
- exit 0
82
- end
83
- end
84
- end
85
-
86
- #
87
- def extraction
88
- target = file ? File.new(file) : ARGF
89
- Regex.new(target, options)
90
- end
91
-
92
- # Extract and display.
93
- def main
94
- begin
95
- puts extraction.to_s(@format)
96
- rescue => error
97
- if $DEBUG
98
- raise error
99
- else
100
- abort error.to_s
101
- end
102
- end
103
- end
104
-
105
24
  end
106
25
 
107
26
  end
@@ -1 +1,482 @@
1
+ require 'fileutils'
2
+ require 'open-uri'
3
+ require 'regex/string'
4
+
5
+ module Regex
6
+
7
+ # Supports [:name:] notation for subsitution of built-in templates.
8
+ class Extractor
9
+
10
+ # When the regular expression return multiple groups,
11
+ # each is divided by the group deliminator.
12
+ # This is the default value.
13
+ DELIMINATOR_GROUP = 29.chr + "\n"
14
+
15
+ # When using repeat mode, each match is divided by
16
+ # the record deliminator. This is the default value.
17
+ DELIMINATOR_RECORD = 30.chr + "\n"
18
+
19
+ #
20
+ def self.input_cache(input)
21
+ @input_cache ||= {}
22
+ @input_cache[input] ||= (
23
+ case input
24
+ when String
25
+ input
26
+ else
27
+ input.read
28
+ end
29
+ )
30
+ end
31
+
32
+ # List of IO objects or Strings to search.
33
+ attr_accessor :io
34
+
35
+ # Remove XML tags from search. (NOT CURRENTLY SUPPORTED)
36
+ attr_accessor :unxml
37
+
38
+ # Regular expression.
39
+ attr_accessor :pattern
40
+
41
+ # Select built-in regular expression by name.
42
+ attr_accessor :template
43
+
44
+ # Index of expression return.
45
+ attr_accessor :index
46
+
47
+ # Multiline match.
48
+ attr_accessor :multiline
49
+
50
+ # Ignore case.
51
+ attr_accessor :insensitive
52
+
53
+ # Escape expression.
54
+ attr_accessor :escape
55
+
56
+ # Repeat Match.
57
+ attr_accessor :repeat
58
+
59
+ # Output format.
60
+ attr_accessor :format
61
+
62
+ # Provide detailed output.
63
+ attr_accessor :detail
64
+
65
+ # Use ANSI codes in output?
66
+ attr_accessor :ansi
67
+
68
+ # Use ANSI codes in output?
69
+ def ansi? ; @ansi ; end
70
+
71
+ # New extractor.
72
+ def initialize(*io)
73
+ options = Hash === io.last ? io.pop : {}
74
+
75
+ @io = io
76
+ @ansi = true
77
+
78
+ options.each do |k,v|
79
+ __send__("#{k}=", v)
80
+ end
81
+ end
82
+
83
+ #
84
+ def inspect
85
+ "#{self.class.name}"
86
+ end
87
+
88
+ #--
89
+ # TODO: unxml is too primative, use real xml parser like nokogiri
90
+ #++
91
+ #def text
92
+ # @text ||= (
93
+ # if unxml
94
+ # raw.gsub!(/\<(.*?)\>/, '')
95
+ # else
96
+ # @raw
97
+ # end
98
+ # )
99
+ #end
100
+
101
+ #
102
+ def regex
103
+ @regex ||= (
104
+ if template
105
+ Templates.const_get(template.upcase)
106
+ else
107
+ case pattern
108
+ when Regexp
109
+ pattern
110
+ when String
111
+ flags = 0
112
+ flags + Regexp::MULTILINE if multiline
113
+ flags + Regexp::IGNORECASE if insensitive
114
+ if escape
115
+ Regexp.new(Regexp.escape(pattern), flags)
116
+ else
117
+ pat = substitute_templates(pattern)
118
+ Regexp.new(pat, flags)
119
+ end
120
+ end
121
+ end
122
+ )
123
+ end
124
+
125
+ #
126
+ def substitute_templates(pattern)
127
+ pat = pattern
128
+ Templates.list.each do |name|
129
+ if pat.include?("[:#{name}:]")
130
+ pat = pat.gsub(/(?!:\\)\[\:#{name}\:\]/, Templates[name].to_s)
131
+ end
132
+ end
133
+ pat
134
+ end
135
+
136
+ #
137
+ def to_s(format=nil)
138
+ case format
139
+ when :yaml
140
+ to_s_yaml
141
+ when :json
142
+ to_s_json
143
+ else
144
+ if detail
145
+ output_detailed_text
146
+ else
147
+ output_text
148
+ end
149
+ end
150
+ end
151
+
152
+ #
153
+ def to_s_yaml
154
+ require 'yaml'
155
+ if detail
156
+ matches_by_path.to_yaml
157
+ else
158
+ structure.to_yaml
159
+ end
160
+ end
161
+
162
+ #
163
+ def to_s_json
164
+ begin
165
+ require 'json'
166
+ rescue LoadError
167
+ require 'json_pure'
168
+ end
169
+ if detail
170
+ matches_by_path.to_json
171
+ else
172
+ structure.to_json
173
+ end
174
+ end
175
+
176
+ #
177
+ def output_text
178
+ out = structure
179
+ if repeat
180
+ out = out.map{ |m| m.join(deliminator_group) }
181
+ out = out.join(deliminator_record) #.chomp("\n") + "\n"
182
+ else
183
+ out = out.join(deliminator_group) #.chomp("\n") + "\n"
184
+ end
185
+ out
186
+ end
187
+
188
+ # Detailed text output.
189
+ def output_detailed_text
190
+ if repeat
191
+ count = 0
192
+ string = []
193
+ mapping.each do |input, matches|
194
+ path = (File === input ? input.path : "(io #{input.object_id})")
195
+ string << ""
196
+ string << bold(path)
197
+ matches.each do |match|
198
+ string << formatted_match(input, match)
199
+ count += 1
200
+ end
201
+ end
202
+ string.join("\n") + "\n"
203
+ string << "\n(#{count} matches)"
204
+ else
205
+ string = []
206
+ match = scan.first
207
+ input = match.input
208
+ path = (File === input ? input.path : "(io #{input.object_id})")
209
+ string << ""
210
+ string << bold(path)
211
+ string << formatted_match(input, match)
212
+ string.join("\n")
213
+ string << "" #"\n1 match"
214
+ end
215
+ end
216
+
217
+ #
218
+ def formatted_match(input, match)
219
+ string = []
220
+ path = (File === input ? input.path : "(io #{input.object_id})")
221
+ part, char, line = match.info(0)
222
+ if index
223
+ part, char, line = match.info(index)
224
+ string << "%s %s %s" % [line, char, part.inspect]
225
+ else
226
+ string << bold("%s %s %s" % [line, char, part.inspect])
227
+ if match.size > 0
228
+ (1...match.size).each do |i|
229
+ part, char, line = match.info(i)
230
+ string << "#{i}. %s %s %s" % [line, char, part.inspect]
231
+ end
232
+ end
233
+ end
234
+ string.join("\n")
235
+ end
236
+
237
+ #
238
+ def matches_by_path
239
+ r = Hash.new{ |h,k| h[k] = [] }
240
+ h = Hash.new{ |h,k| h[k] = [] }
241
+ scan.each do |match|
242
+ h[match.input] << match
243
+ end
244
+ h.each do |input, matches|
245
+ path = (File === input ? input.path : "(io #{input.object_id})")
246
+ if index
247
+ matches.each do |match|
248
+ r[path] << match.breakdown[index]
249
+ end
250
+ else
251
+ matches.each do |match|
252
+ r[path] << match.breakdown
253
+ end
254
+ end
255
+ end
256
+ r
257
+ end
258
+
259
+ # Structure the matchdata according to specified options.
260
+ def structure
261
+ repeat ? structure_repeat : structure_single
262
+ end
263
+
264
+ # Structure the matchdata for single match.
265
+ def structure_single
266
+ structure_repeat.first
267
+ end
268
+
269
+ # Structure the matchdata for repeat matches.
270
+ def structure_repeat
271
+ if index
272
+ scan.map{ |match| [match[index]] }
273
+ else
274
+ scan.map{ |match| match.size > 1 ? match[1..-1] : [match[0]] }
275
+ end
276
+ end
277
+
278
+ # Scan inputs for matches.
279
+ #
280
+ # Return an associative Array of [input, matchdata].
281
+ def scan
282
+ list = []
283
+ io.each do |input|
284
+ text = read(input)
285
+ text.scan(regex) do
286
+ list << Match.new(input, $~)
287
+ end
288
+ end
289
+ list
290
+ end
291
+
292
+ #
293
+ def mapping
294
+ hash = Hash.new{ |h,k| h[k]=[] }
295
+ scan.each do |match|
296
+ hash[match.input] << match
297
+ end
298
+ hash
299
+ end
300
+
301
+ # TODO: unxml won't give corrent char counts.
302
+ def read(input)
303
+ Extractor.input_cache(input)
304
+ # if unxml
305
+ # txt.gsub(/\<(.*?)\>/, '')
306
+ # else
307
+ # txt
308
+ # end
309
+ end
310
+
311
+ # Return the line number of the +char+ position within +text+.
312
+ def line_at(io, char)
313
+ read(io)[0..char].count("\n") + 1
314
+ end
315
+
316
+ def deliminator_group
317
+ DELIMINATOR_GROUP
318
+ end
319
+
320
+ def deliminator_record
321
+ DELIMINATOR_RECORD
322
+ end
323
+
324
+ # Commandline Interface to Extractor.
325
+ def self.cli(argv=ARGV)
326
+ require 'optparse'
327
+ format = nil
328
+ options = {}
329
+ parser = OptionParser.new do |opt|
330
+ opt.on('--template', '-t NAME', "select a built-in regular expression") do |name|
331
+ options[:template] = name
332
+ end
333
+ opt.on('--search', '-s PATTERN', "search for regular expression") do |re|
334
+ options[:pattern] = re
335
+ end
336
+ opt.on('--index', '-n INT', "return a specific match index") do |int|
337
+ options[:index] = int.to_i
338
+ end
339
+ opt.on('--insensitive', '-i', "case insensitive matching") do
340
+ options[:insensitive] = true
341
+ end
342
+ opt.on('--multiline', '-m', "multiline matching") do
343
+ options[:multiline] = true
344
+ end
345
+ #opt.on('--unxml', '-x', "ignore XML/HTML tags") do
346
+ # options[:unxml] = true
347
+ #end
348
+ opt.on('--global', '-g', "find all matching occurances") do
349
+ options[:repeat] = true
350
+ end
351
+ opt.on('--yaml', '-y', "output in YAML format") do
352
+ format = :yaml
353
+ end
354
+ opt.on('--json', '-j', "output in JSON format") do
355
+ format = :json
356
+ end
357
+ opt.on('--detail', '-d', "provide match details") do
358
+ options[:detail] = :json
359
+ end
360
+ opt.on('--[no-]ansi', "toggle ansi color") do |val|
361
+ options[:ansi] = val
362
+ end
363
+ opt.on_tail('--debug', 'run in debug mode') do
364
+ $DEBUG = true
365
+ end
366
+ opt.on_tail('--help', '-h', "display this lovely help message") do
367
+ puts opt
368
+ exit 0
369
+ end
370
+ end
371
+ parser.parse!(argv)
372
+
373
+ unless options[:pattern] or options[:template]
374
+ re = argv.shift
375
+ case re
376
+ when /^\/(.*?)\/(\w*?)$/
377
+ options[:pattern] = $1
378
+ $2.split(//).each do |c|
379
+ case c
380
+ when 'e' then options[:escape] = true
381
+ when 'g' then options[:repeat] = true
382
+ when 'i' then options[:insensitive] = true
383
+ end
384
+ end
385
+ else
386
+ options[:template] = re
387
+ end
388
+ end
389
+
390
+ files = argv
391
+
392
+ files.each do |file|
393
+ if !File.file?(file)
394
+ $stderr.puts "No such file -- '#{file}'."
395
+ exit 1
396
+ end
397
+ end
398
+
399
+ if files.empty?
400
+ args = [ARGF]
401
+ else
402
+ args = files.map{ |f| open(f) } #File.new(f) }
403
+ end
404
+
405
+ args << options
406
+
407
+ extract = new(*args)
408
+
409
+ puts extract.to_s(format)
410
+ end
411
+
412
+ #
413
+ def bold(str)
414
+ if ansi?
415
+ "\e[1m" + str + "\e[0m"
416
+ else
417
+ string
418
+ end
419
+ end
420
+
421
+
422
+ #
423
+ class Match
424
+ attr :input
425
+ attr :match
426
+
427
+ # match - Instance of MatchData
428
+ #
429
+ def initialize(input, match)
430
+ @input = input
431
+ @match = match
432
+ end
433
+
434
+ #
435
+ def [](i)
436
+ @match[i]
437
+ end
438
+
439
+ #
440
+ def size
441
+ @match.size
442
+ end
443
+
444
+ #
445
+ def breakdown
446
+ m = []
447
+ range = (0...match.size)
448
+ range.each do |i|
449
+ char = match.offset(i)[0]
450
+ line = line_at(char)
451
+ part = match[i]
452
+ m << {'index'=>i, 'line'=>line, 'char'=>char, 'text'=>part}
453
+ end
454
+ m
455
+ end
456
+
457
+ #
458
+ def info(index)
459
+ text = match[index]
460
+ char = match.offset(index)[0]
461
+ line = line_at(char)
462
+ return text, char, line
463
+ end
464
+
465
+ # Return the line number of the +char+ position within +text+.
466
+ def line_at(char)
467
+ return nil unless char
468
+ text[0..char].count("\n") + 1
469
+ end
470
+
471
+ #
472
+ def text
473
+ Extractor.input_cache(input)
474
+ end
475
+
476
+ end
477
+
478
+
479
+ end
480
+
481
+ end
1
482