regex 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,107 +1,26 @@
1
- require 'regex'
1
+ require 'regex/extractor'
2
+ require 'regex/replacer'
2
3
 
3
- class Regex
4
+ module Regex
4
5
 
5
6
  # Commandline interface.
6
- #
7
- class Command
8
-
9
- #
10
- attr :file
11
-
12
- #
13
- attr :format
14
-
15
- #
16
- attr :options
17
-
18
- #
19
- def self.main(*argv)
20
- new(*argv).main
21
- end
22
-
23
- # New Command.
24
- def initialize(*argv)
25
- @file = nil
26
- @format = nil
27
- @options = {}
28
- parse(*argv)
7
+ def self.cli(*argv)
8
+ if argv.include?('-r') or argv.include?('--replace')
9
+ controller = Replacer
10
+ else
11
+ controller = Extractor
29
12
  end
30
13
 
31
- #
32
- def parse(*argv)
33
- parser.parse!(argv)
34
- unless @options[:template]
35
- @options[:pattern] = argv.shift
36
- end
37
- @file = argv.shift
38
- if @file
39
- unless File.file?(@file)
40
- puts "No such file -- '#{file}'."
41
- exit 1
42
- end
14
+ begin
15
+ controller.cli(argv)
16
+ rescue => error
17
+ if $DEBUG
18
+ raise error
19
+ #puts error.backtrace.join("\n ")
20
+ else
21
+ abort error.to_s
43
22
  end
44
23
  end
45
-
46
- # OptionParser instance.
47
- def parser
48
- require 'optparse'
49
- @options = {}
50
- OptionParser.new do |opt|
51
- opt.on('--template', '-t NAME', "select a built-in regular expression") do |name|
52
- @options[:template] = name
53
- end
54
-
55
- opt.on('--index', '-n INT', "return a specific match index") do |int|
56
- @options[:index] = int.to_i
57
- end
58
-
59
- opt.on('--insensitive', '-i', "case insensitive matching") do
60
- @options[:insensitive] = true
61
- end
62
-
63
- opt.on('--unxml', '-x', "ignore XML/HTML tags") do
64
- @options[:unxml] = true
65
- end
66
-
67
- opt.on('--repeat', '-r', "find all matching occurances") do
68
- @options[:repeat] = true
69
- end
70
-
71
- opt.on('--yaml', '-y', "output in YAML format") do
72
- @format = :yaml
73
- end
74
-
75
- opt.on('--json', '-j', "output in JSON format") do
76
- @format = :json
77
- end
78
-
79
- opt.on_tail('--help', '-h', "display this lovely help message") do
80
- puts opt
81
- exit 0
82
- end
83
- end
84
- end
85
-
86
- #
87
- def extraction
88
- target = file ? File.new(file) : ARGF
89
- Regex.new(target, options)
90
- end
91
-
92
- # Extract and display.
93
- def main
94
- begin
95
- puts extraction.to_s(@format)
96
- rescue => error
97
- if $DEBUG
98
- raise error
99
- else
100
- abort error.to_s
101
- end
102
- end
103
- end
104
-
105
24
  end
106
25
 
107
26
  end
@@ -1 +1,482 @@
1
+ require 'fileutils'
2
+ require 'open-uri'
3
+ require 'regex/string'
4
+
5
+ module Regex
6
+
7
+ # Supports [:name:] notation for subsitution of built-in templates.
8
+ class Extractor
9
+
10
+ # When the regular expression return multiple groups,
11
+ # each is divided by the group deliminator.
12
+ # This is the default value.
13
+ DELIMINATOR_GROUP = 29.chr + "\n"
14
+
15
+ # When using repeat mode, each match is divided by
16
+ # the record deliminator. This is the default value.
17
+ DELIMINATOR_RECORD = 30.chr + "\n"
18
+
19
+ #
20
+ def self.input_cache(input)
21
+ @input_cache ||= {}
22
+ @input_cache[input] ||= (
23
+ case input
24
+ when String
25
+ input
26
+ else
27
+ input.read
28
+ end
29
+ )
30
+ end
31
+
32
+ # List of IO objects or Strings to search.
33
+ attr_accessor :io
34
+
35
+ # Remove XML tags from search. (NOT CURRENTLY SUPPORTED)
36
+ attr_accessor :unxml
37
+
38
+ # Regular expression.
39
+ attr_accessor :pattern
40
+
41
+ # Select built-in regular expression by name.
42
+ attr_accessor :template
43
+
44
+ # Index of expression return.
45
+ attr_accessor :index
46
+
47
+ # Multiline match.
48
+ attr_accessor :multiline
49
+
50
+ # Ignore case.
51
+ attr_accessor :insensitive
52
+
53
+ # Escape expression.
54
+ attr_accessor :escape
55
+
56
+ # Repeat Match.
57
+ attr_accessor :repeat
58
+
59
+ # Output format.
60
+ attr_accessor :format
61
+
62
+ # Provide detailed output.
63
+ attr_accessor :detail
64
+
65
+ # Use ANSI codes in output?
66
+ attr_accessor :ansi
67
+
68
+ # Use ANSI codes in output?
69
+ def ansi? ; @ansi ; end
70
+
71
+ # New extractor.
72
+ def initialize(*io)
73
+ options = Hash === io.last ? io.pop : {}
74
+
75
+ @io = io
76
+ @ansi = true
77
+
78
+ options.each do |k,v|
79
+ __send__("#{k}=", v)
80
+ end
81
+ end
82
+
83
+ #
84
+ def inspect
85
+ "#{self.class.name}"
86
+ end
87
+
88
+ #--
89
+ # TODO: unxml is too primative, use real xml parser like nokogiri
90
+ #++
91
+ #def text
92
+ # @text ||= (
93
+ # if unxml
94
+ # raw.gsub!(/\<(.*?)\>/, '')
95
+ # else
96
+ # @raw
97
+ # end
98
+ # )
99
+ #end
100
+
101
+ #
102
+ def regex
103
+ @regex ||= (
104
+ if template
105
+ Templates.const_get(template.upcase)
106
+ else
107
+ case pattern
108
+ when Regexp
109
+ pattern
110
+ when String
111
+ flags = 0
112
+ flags + Regexp::MULTILINE if multiline
113
+ flags + Regexp::IGNORECASE if insensitive
114
+ if escape
115
+ Regexp.new(Regexp.escape(pattern), flags)
116
+ else
117
+ pat = substitute_templates(pattern)
118
+ Regexp.new(pat, flags)
119
+ end
120
+ end
121
+ end
122
+ )
123
+ end
124
+
125
+ #
126
+ def substitute_templates(pattern)
127
+ pat = pattern
128
+ Templates.list.each do |name|
129
+ if pat.include?("[:#{name}:]")
130
+ pat = pat.gsub(/(?!:\\)\[\:#{name}\:\]/, Templates[name].to_s)
131
+ end
132
+ end
133
+ pat
134
+ end
135
+
136
+ #
137
+ def to_s(format=nil)
138
+ case format
139
+ when :yaml
140
+ to_s_yaml
141
+ when :json
142
+ to_s_json
143
+ else
144
+ if detail
145
+ output_detailed_text
146
+ else
147
+ output_text
148
+ end
149
+ end
150
+ end
151
+
152
+ #
153
+ def to_s_yaml
154
+ require 'yaml'
155
+ if detail
156
+ matches_by_path.to_yaml
157
+ else
158
+ structure.to_yaml
159
+ end
160
+ end
161
+
162
+ #
163
+ def to_s_json
164
+ begin
165
+ require 'json'
166
+ rescue LoadError
167
+ require 'json_pure'
168
+ end
169
+ if detail
170
+ matches_by_path.to_json
171
+ else
172
+ structure.to_json
173
+ end
174
+ end
175
+
176
+ #
177
+ def output_text
178
+ out = structure
179
+ if repeat
180
+ out = out.map{ |m| m.join(deliminator_group) }
181
+ out = out.join(deliminator_record) #.chomp("\n") + "\n"
182
+ else
183
+ out = out.join(deliminator_group) #.chomp("\n") + "\n"
184
+ end
185
+ out
186
+ end
187
+
188
+ # Detailed text output.
189
+ def output_detailed_text
190
+ if repeat
191
+ count = 0
192
+ string = []
193
+ mapping.each do |input, matches|
194
+ path = (File === input ? input.path : "(io #{input.object_id})")
195
+ string << ""
196
+ string << bold(path)
197
+ matches.each do |match|
198
+ string << formatted_match(input, match)
199
+ count += 1
200
+ end
201
+ end
202
+ string.join("\n") + "\n"
203
+ string << "\n(#{count} matches)"
204
+ else
205
+ string = []
206
+ match = scan.first
207
+ input = match.input
208
+ path = (File === input ? input.path : "(io #{input.object_id})")
209
+ string << ""
210
+ string << bold(path)
211
+ string << formatted_match(input, match)
212
+ string.join("\n")
213
+ string << "" #"\n1 match"
214
+ end
215
+ end
216
+
217
+ #
218
+ def formatted_match(input, match)
219
+ string = []
220
+ path = (File === input ? input.path : "(io #{input.object_id})")
221
+ part, char, line = match.info(0)
222
+ if index
223
+ part, char, line = match.info(index)
224
+ string << "%s %s %s" % [line, char, part.inspect]
225
+ else
226
+ string << bold("%s %s %s" % [line, char, part.inspect])
227
+ if match.size > 0
228
+ (1...match.size).each do |i|
229
+ part, char, line = match.info(i)
230
+ string << "#{i}. %s %s %s" % [line, char, part.inspect]
231
+ end
232
+ end
233
+ end
234
+ string.join("\n")
235
+ end
236
+
237
+ #
238
+ def matches_by_path
239
+ r = Hash.new{ |h,k| h[k] = [] }
240
+ h = Hash.new{ |h,k| h[k] = [] }
241
+ scan.each do |match|
242
+ h[match.input] << match
243
+ end
244
+ h.each do |input, matches|
245
+ path = (File === input ? input.path : "(io #{input.object_id})")
246
+ if index
247
+ matches.each do |match|
248
+ r[path] << match.breakdown[index]
249
+ end
250
+ else
251
+ matches.each do |match|
252
+ r[path] << match.breakdown
253
+ end
254
+ end
255
+ end
256
+ r
257
+ end
258
+
259
+ # Structure the matchdata according to specified options.
260
+ def structure
261
+ repeat ? structure_repeat : structure_single
262
+ end
263
+
264
+ # Structure the matchdata for single match.
265
+ def structure_single
266
+ structure_repeat.first
267
+ end
268
+
269
+ # Structure the matchdata for repeat matches.
270
+ def structure_repeat
271
+ if index
272
+ scan.map{ |match| [match[index]] }
273
+ else
274
+ scan.map{ |match| match.size > 1 ? match[1..-1] : [match[0]] }
275
+ end
276
+ end
277
+
278
+ # Scan inputs for matches.
279
+ #
280
+ # Return an associative Array of [input, matchdata].
281
+ def scan
282
+ list = []
283
+ io.each do |input|
284
+ text = read(input)
285
+ text.scan(regex) do
286
+ list << Match.new(input, $~)
287
+ end
288
+ end
289
+ list
290
+ end
291
+
292
+ #
293
+ def mapping
294
+ hash = Hash.new{ |h,k| h[k]=[] }
295
+ scan.each do |match|
296
+ hash[match.input] << match
297
+ end
298
+ hash
299
+ end
300
+
301
+ # TODO: unxml won't give corrent char counts.
302
+ def read(input)
303
+ Extractor.input_cache(input)
304
+ # if unxml
305
+ # txt.gsub(/\<(.*?)\>/, '')
306
+ # else
307
+ # txt
308
+ # end
309
+ end
310
+
311
+ # Return the line number of the +char+ position within +text+.
312
+ def line_at(io, char)
313
+ read(io)[0..char].count("\n") + 1
314
+ end
315
+
316
+ def deliminator_group
317
+ DELIMINATOR_GROUP
318
+ end
319
+
320
+ def deliminator_record
321
+ DELIMINATOR_RECORD
322
+ end
323
+
324
+ # Commandline Interface to Extractor.
325
+ def self.cli(argv=ARGV)
326
+ require 'optparse'
327
+ format = nil
328
+ options = {}
329
+ parser = OptionParser.new do |opt|
330
+ opt.on('--template', '-t NAME', "select a built-in regular expression") do |name|
331
+ options[:template] = name
332
+ end
333
+ opt.on('--search', '-s PATTERN', "search for regular expression") do |re|
334
+ options[:pattern] = re
335
+ end
336
+ opt.on('--index', '-n INT', "return a specific match index") do |int|
337
+ options[:index] = int.to_i
338
+ end
339
+ opt.on('--insensitive', '-i', "case insensitive matching") do
340
+ options[:insensitive] = true
341
+ end
342
+ opt.on('--multiline', '-m', "multiline matching") do
343
+ options[:multiline] = true
344
+ end
345
+ #opt.on('--unxml', '-x', "ignore XML/HTML tags") do
346
+ # options[:unxml] = true
347
+ #end
348
+ opt.on('--global', '-g', "find all matching occurances") do
349
+ options[:repeat] = true
350
+ end
351
+ opt.on('--yaml', '-y', "output in YAML format") do
352
+ format = :yaml
353
+ end
354
+ opt.on('--json', '-j', "output in JSON format") do
355
+ format = :json
356
+ end
357
+ opt.on('--detail', '-d', "provide match details") do
358
+ options[:detail] = :json
359
+ end
360
+ opt.on('--[no-]ansi', "toggle ansi color") do |val|
361
+ options[:ansi] = val
362
+ end
363
+ opt.on_tail('--debug', 'run in debug mode') do
364
+ $DEBUG = true
365
+ end
366
+ opt.on_tail('--help', '-h', "display this lovely help message") do
367
+ puts opt
368
+ exit 0
369
+ end
370
+ end
371
+ parser.parse!(argv)
372
+
373
+ unless options[:pattern] or options[:template]
374
+ re = argv.shift
375
+ case re
376
+ when /^\/(.*?)\/(\w*?)$/
377
+ options[:pattern] = $1
378
+ $2.split(//).each do |c|
379
+ case c
380
+ when 'e' then options[:escape] = true
381
+ when 'g' then options[:repeat] = true
382
+ when 'i' then options[:insensitive] = true
383
+ end
384
+ end
385
+ else
386
+ options[:template] = re
387
+ end
388
+ end
389
+
390
+ files = argv
391
+
392
+ files.each do |file|
393
+ if !File.file?(file)
394
+ $stderr.puts "No such file -- '#{file}'."
395
+ exit 1
396
+ end
397
+ end
398
+
399
+ if files.empty?
400
+ args = [ARGF]
401
+ else
402
+ args = files.map{ |f| open(f) } #File.new(f) }
403
+ end
404
+
405
+ args << options
406
+
407
+ extract = new(*args)
408
+
409
+ puts extract.to_s(format)
410
+ end
411
+
412
+ #
413
+ def bold(str)
414
+ if ansi?
415
+ "\e[1m" + str + "\e[0m"
416
+ else
417
+ string
418
+ end
419
+ end
420
+
421
+
422
+ #
423
+ class Match
424
+ attr :input
425
+ attr :match
426
+
427
+ # match - Instance of MatchData
428
+ #
429
+ def initialize(input, match)
430
+ @input = input
431
+ @match = match
432
+ end
433
+
434
+ #
435
+ def [](i)
436
+ @match[i]
437
+ end
438
+
439
+ #
440
+ def size
441
+ @match.size
442
+ end
443
+
444
+ #
445
+ def breakdown
446
+ m = []
447
+ range = (0...match.size)
448
+ range.each do |i|
449
+ char = match.offset(i)[0]
450
+ line = line_at(char)
451
+ part = match[i]
452
+ m << {'index'=>i, 'line'=>line, 'char'=>char, 'text'=>part}
453
+ end
454
+ m
455
+ end
456
+
457
+ #
458
+ def info(index)
459
+ text = match[index]
460
+ char = match.offset(index)[0]
461
+ line = line_at(char)
462
+ return text, char, line
463
+ end
464
+
465
+ # Return the line number of the +char+ position within +text+.
466
+ def line_at(char)
467
+ return nil unless char
468
+ text[0..char].count("\n") + 1
469
+ end
470
+
471
+ #
472
+ def text
473
+ Extractor.input_cache(input)
474
+ end
475
+
476
+ end
477
+
478
+
479
+ end
480
+
481
+ end
1
482