getopt-declare 1.09.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/Getopt/Declare.rb +1638 -0
- data/lib/Getopt/DelimScanner.rb +1278 -0
- data/samples/cmdline_array.rb +25 -0
- data/samples/cmdline_basic.rb +31 -0
- data/samples/cmdline_code.rb +31 -0
- data/samples/cmdline_defer.rb +23 -0
- data/samples/cmdline_file.rb +38 -0
- data/samples/cmdline_inlines.rb +24 -0
- data/samples/cmdline_mid.rb +39 -0
- data/samples/cmdline_noargv.rb +29 -0
- data/samples/cmdline_parameters.rb +23 -0
- data/samples/cmdline_pvtype.rb +20 -0
- data/samples/cmdline_pvtype2.rb +20 -0
- data/samples/cmdline_regex.rb +27 -0
- data/samples/cmdline_singles.rb +28 -0
- data/samples/demo_cmdline.rb +70 -0
- data/samples/demo_csv.rb +49 -0
- data/samples/demo_interp.rb +44 -0
- data/samples/demo_shell.rb +37 -0
- metadata +55 -0
@@ -0,0 +1,1278 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# A derivative of StringScanner that can scan for delimited constructs in
|
4
|
+
# addition to regular expressions. It is a loose port of the Text::Balanced
|
5
|
+
# module for Perl by Damian Conway <damian@cs.monash.edu.au>.
|
6
|
+
#
|
7
|
+
# == Synopsis
|
8
|
+
#
|
9
|
+
# se = DelimScanner::new( myString )
|
10
|
+
#
|
11
|
+
# == Authors
|
12
|
+
#
|
13
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
14
|
+
# * Gonzalo Garramuno <GGarramuno@aol.com>
|
15
|
+
#
|
16
|
+
# Copyright (c) 2002, 2003 The FaerieMUD Consortium. Most rights reserved.
|
17
|
+
#
|
18
|
+
# This work is licensed under the Creative Commons Attribution License. To view
|
19
|
+
# a copy of this license, visit http://creativecommons.org/licenses/by/1.0 or
|
20
|
+
# send a letter to Creative Commons, 559 Nathan Abbott Way, Stanford, California
|
21
|
+
# 94305, USA.
|
22
|
+
#
|
23
|
+
# == Version
|
24
|
+
#
|
25
|
+
# $Id: DelimScanner.rb,v 1.1.1.1 2004/01/25 07:02:48 gga Exp $
|
26
|
+
#
|
27
|
+
# == History
|
28
|
+
#
|
29
|
+
# - Added :suffix hash key for returning rest (right) of matches, like Perl's
|
30
|
+
# Text::Balanced, on several methods.
|
31
|
+
# - Added one or two \ for backquoting brackets, as new ruby1.8 complains
|
32
|
+
#
|
33
|
+
|
34
|
+
require 'strscan'
|
35
|
+
require 'forwardable'
|
36
|
+
|
37
|
+
### Add some stuff to the String class to allow easy transformation to Regexp
|
38
|
+
### and in-place interpolation.
|
39
|
+
class String
|
40
|
+
def to_re( casefold=false, extended=false )
|
41
|
+
return Regexp::new( self.dup )
|
42
|
+
end
|
43
|
+
|
44
|
+
### Ideas for String-interpolation stuff courtesy of Hal E. Fulton
|
45
|
+
### <hal9000@hypermetrics.com> via ruby-talk
|
46
|
+
|
47
|
+
def interpolate( scope )
|
48
|
+
unless scope.is_a?( Binding )
|
49
|
+
raise TypeError, "Argument to interpolate must be a Binding, not "\
|
50
|
+
"a #{scope.class.name}"
|
51
|
+
end
|
52
|
+
|
53
|
+
# $stderr.puts ">>> Interpolating '#{self}'..."
|
54
|
+
|
55
|
+
copy = self.gsub( /"/, %q:\": )
|
56
|
+
eval( '"' + copy + '"', scope )
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
### A derivative of StringScanner that can scan for delimited constructs in
|
63
|
+
### addition to regular expressions.
|
64
|
+
class DelimScanner
|
65
|
+
|
66
|
+
### Scanner exception classes
|
67
|
+
class MatchFailure < RuntimeError ; end
|
68
|
+
class DelimiterError < RuntimeError ; end
|
69
|
+
|
70
|
+
|
71
|
+
extend Forwardable
|
72
|
+
StringScanner.must_C_version
|
73
|
+
|
74
|
+
|
75
|
+
### Class constants
|
76
|
+
Version = /([\d\.]+)/.match( %q{$Revision: 1.1.1.1 $} )[1]
|
77
|
+
Rcsid = %q$Id: DelimScanner.rb,v 1.1.1.1 2004/01/25 07:02:48 gga Exp $
|
78
|
+
|
79
|
+
# Pattern to match a valid XML name
|
80
|
+
XmlName = '[a-zA-Z_:][a-zA-Z0-9:.-]*'
|
81
|
+
|
82
|
+
|
83
|
+
### Namespace module for DelimString constants
|
84
|
+
module Default
|
85
|
+
|
86
|
+
# The list of default opening => closing codeblock delimiters to use for
|
87
|
+
# scanCodeblock.
|
88
|
+
CodeblockDelimiters = {
|
89
|
+
'{' => '}',
|
90
|
+
'begin' => 'end',
|
91
|
+
'do' => 'end',
|
92
|
+
}
|
93
|
+
|
94
|
+
# Default scanMultiple operations and their arguments
|
95
|
+
MultipleFunctions = [
|
96
|
+
:scanVariable => [],
|
97
|
+
:scanQuotelike => [],
|
98
|
+
:scanCodeblock => [],
|
99
|
+
]
|
100
|
+
|
101
|
+
end
|
102
|
+
include Default
|
103
|
+
|
104
|
+
|
105
|
+
### Define delegating methods that cast their argument to a Regexp from a
|
106
|
+
### String. This allows the scanner's scanning methods to be called with
|
107
|
+
### Strings in addition to Regexps. This was mostly stolen from
|
108
|
+
### forwardable.rb.
|
109
|
+
def self.def_casting_delegators( *methods )
|
110
|
+
methods.each {|methodName|
|
111
|
+
class_eval( <<-EOF, "(--def_casting_delegators--)", 1 )
|
112
|
+
def #{methodName}( pattern )
|
113
|
+
pattern = pattern.to_s.to_re unless pattern.is_a?( Regexp )
|
114
|
+
@scanner.#{methodName}( pattern )
|
115
|
+
end
|
116
|
+
EOF
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
### Create a new DelimScanner object for the specified <tt>string</tt>. If
|
122
|
+
### <tt>dup</tt> is <tt>true</tt>, a duplicate of the target string will be
|
123
|
+
### used instead of the one given. The target string will be frozen after
|
124
|
+
### the scanner is created.
|
125
|
+
def initialize( string, dup=true )
|
126
|
+
@scanner = StringScanner::new( string, dup )
|
127
|
+
@matchError = nil
|
128
|
+
@debugLevel = 0
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
######
|
134
|
+
public
|
135
|
+
######
|
136
|
+
|
137
|
+
# Here, some delegation trickery is done to make a DelimScanner behave like
|
138
|
+
# a StringScanner. Some methods are directly delegated, while some are
|
139
|
+
# delegated via a method which casts its argument to a Regexp first so some
|
140
|
+
# scanner methods can be called with Strings as well as Regexps.
|
141
|
+
|
142
|
+
# A list of delegated methods that need casting.
|
143
|
+
NeedCastingDelegators = :scan, :skip, :match?, :check,
|
144
|
+
:scan_until, :skip_until, :exist?, :check_until
|
145
|
+
|
146
|
+
# Delegate all StringScanner instance methods to the associated scanner
|
147
|
+
# object, except those that need a casting delegator, which uses an indirect
|
148
|
+
# delegation method.
|
149
|
+
def_delegators :@scanner,
|
150
|
+
*( StringScanner.instance_methods(false) -
|
151
|
+
NeedCastingDelegators.collect {|sym| sym.id2name} )
|
152
|
+
|
153
|
+
def_casting_delegators( *NeedCastingDelegators )
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
# The last match error encountered by the scanner
|
158
|
+
attr_accessor :matchError
|
159
|
+
protected :matchError= ; # ; is to work around a ruby-mode indent bug
|
160
|
+
|
161
|
+
# Debugging level
|
162
|
+
attr_accessor :debugLevel
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
### Returns <tt>true</tt> if the scanner has encountered a match error.
|
167
|
+
def matchError?
|
168
|
+
return ! @matchError.nil?
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
### Starting at the scan pointer, try to match a substring delimited by the
|
173
|
+
### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
|
174
|
+
### and any character escaped by the specified <tt>escape</tt>
|
175
|
+
### character/s. If matched, advances the scan pointer and returns a Hash
|
176
|
+
### with the following key/value pairs on success:
|
177
|
+
###
|
178
|
+
### [<tt>:match</tt>]
|
179
|
+
### The text of the match, including delimiters.
|
180
|
+
### [<tt>:prefix</tt>]
|
181
|
+
### The matched prefix, if any.
|
182
|
+
###
|
183
|
+
### If the match fails, returns nil.
|
184
|
+
def scanDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
|
185
|
+
delimiters ||= "'\"`"
|
186
|
+
prefix ||= '\\s*'
|
187
|
+
escape ||= '\\'
|
188
|
+
|
189
|
+
debugMsg( 1, "Scanning for delimited text: delim = (%s), prefix=(%s), escape=(%s)",
|
190
|
+
delimiters, prefix, escape )
|
191
|
+
self.matchError = nil
|
192
|
+
|
193
|
+
# Try to match the prefix first to get the length
|
194
|
+
unless (( prefixLength = self.match?(prefix.to_re) ))
|
195
|
+
self.matchError = "Failed to match prefix '%s' at offset %d" %
|
196
|
+
[ prefix, self.pointer ]
|
197
|
+
return nil
|
198
|
+
end
|
199
|
+
|
200
|
+
# Now build a delimited pattern with the specified parameters.
|
201
|
+
delimPattern = makeDelimPattern( delimiters, escape, prefix )
|
202
|
+
debugMsg( 2, "Delimiter pattern is %s" % delimPattern.inspect )
|
203
|
+
|
204
|
+
# Fail if no match
|
205
|
+
unless (( matchedString = self.scan(delimPattern) ))
|
206
|
+
self.matchError = "No delimited string found."
|
207
|
+
return nil
|
208
|
+
end
|
209
|
+
|
210
|
+
return {
|
211
|
+
:match => matchedString[prefixLength .. -1],
|
212
|
+
:prefix => matchedString[0..prefixLength-1],
|
213
|
+
}
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
### Match using the #scanDelimited method, but only return the match or nil.
|
218
|
+
def extractDelimited( *args )
|
219
|
+
rval = scanDelimited( *args ) or return nil
|
220
|
+
return rval[:match]
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
### Starting at the scan pointer, try to match a substring delimited by the
|
225
|
+
### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
|
226
|
+
### and any character escaped by the specified <tt>escape</tt>
|
227
|
+
### character/s. If matched, advances the scan pointer and returns the
|
228
|
+
### length of the matched string; if it fails the match, returns nil.
|
229
|
+
def skipDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
|
230
|
+
delimiters ||= "'\"`"
|
231
|
+
prefix ||= '\\s*'
|
232
|
+
escape ||= '\\'
|
233
|
+
|
234
|
+
self.matchError = nil
|
235
|
+
return self.skip( makeDelimPattern(delimiters, escape, prefix) )
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
### Starting at the scan pointer, try to match a substring delimited by
|
240
|
+
### balanced <tt>delimiters</tt> of the type specified, after skipping the
|
241
|
+
### specified <tt>prefix</tt>. On a successful match, this method advances
|
242
|
+
### the scan pointer and returns a Hash with the following key/value pairs:
|
243
|
+
###
|
244
|
+
### [<tt>:match</tt>]
|
245
|
+
### The text of the match, including the delimiting brackets.
|
246
|
+
### [<tt>:prefix</tt>]
|
247
|
+
### The matched prefix, if any.
|
248
|
+
###
|
249
|
+
### On failure, returns nil.
|
250
|
+
def scanBracketed( delimiters="{([<", prefix='\s*' )
|
251
|
+
delimiters ||= "{([<"
|
252
|
+
prefix ||= '\s*'
|
253
|
+
|
254
|
+
prefix = prefix.to_re unless prefix.kind_of?( Regexp )
|
255
|
+
|
256
|
+
debugMsg( 1, "Scanning for bracketed text: delimiters = (%s), prefix = (%s)",
|
257
|
+
delimiters, prefix )
|
258
|
+
|
259
|
+
self.matchError = nil
|
260
|
+
|
261
|
+
# Split the left-delimiters (brackets) from the quote delimiters.
|
262
|
+
ldel = delimiters.dup
|
263
|
+
qdel = ldel.squeeze.split(//).find_all {|char| char =~ /["'`]/ }.join('|')
|
264
|
+
qdel = nil if qdel.empty?
|
265
|
+
quotelike = true if ldel =~ /q/
|
266
|
+
|
267
|
+
# Change all instances of delimiters to the left-hand versions, and
|
268
|
+
# strip away anything but bracketing delimiters
|
269
|
+
ldel = ldel.tr( '[](){}<>', '[[(({{<<' ).gsub(/[^#{Regexp.quote('[\\](){}<>')}]+/, '').squeeze
|
270
|
+
|
271
|
+
### Now build the right-delim equivalent of the left delim string
|
272
|
+
rdel = ldel.dup
|
273
|
+
unless rdel.tr!( '[({<', '])}>' )
|
274
|
+
raise DelimiterError, "Did not find a suitable bracket in delimiter: '#{delimiters}'"
|
275
|
+
end
|
276
|
+
|
277
|
+
# Build regexps from both bracketing delimiter strings
|
278
|
+
ldel = ldel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
|
279
|
+
rdel = rdel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
|
280
|
+
|
281
|
+
depth = self.scanDepth
|
282
|
+
result = nil
|
283
|
+
startPos = self.pointer
|
284
|
+
|
285
|
+
begin
|
286
|
+
result = matchBracketed( prefix, ldel, qdel, quotelike, rdel )
|
287
|
+
rescue MatchFailure => e
|
288
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
289
|
+
self.matchError = e.message
|
290
|
+
self.pointer = startPos
|
291
|
+
result = nil
|
292
|
+
rescue => e
|
293
|
+
self.pointer = startPos
|
294
|
+
Kernel::raise
|
295
|
+
end
|
296
|
+
|
297
|
+
return result
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
### Match using the #scanBracketed method, but only return the match or nil.
|
302
|
+
def extractBracketed( *args )
|
303
|
+
rval = scanBracketed( *args ) or return nil
|
304
|
+
return rval[:match]
|
305
|
+
end
|
306
|
+
|
307
|
+
|
308
|
+
### Starting at the scan pointer, try to match a substring with
|
309
|
+
### #scanBracketed. On a successful match, this method advances the scan
|
310
|
+
### pointer and returns the length of the match, including the delimiters
|
311
|
+
### and any prefix that was skipped. On failure, returns nil.
|
312
|
+
def skipBracketed( *args )
|
313
|
+
startPos = self.pointer
|
314
|
+
|
315
|
+
match = scanBracketed( *args )
|
316
|
+
|
317
|
+
return nil unless match
|
318
|
+
return match.length + prefix.length
|
319
|
+
ensure
|
320
|
+
debugMsg( 2, "Resetting scan pointer." )
|
321
|
+
self.pointer = startPos
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
### Extracts and segments text from the scan pointer forward that occurs
|
326
|
+
### between (balanced) specified tags, after skipping the specified
|
327
|
+
### <tt>prefix</tt>. If the opentag argument is <tt>nil</tt>, a pattern which
|
328
|
+
### will match any standard HTML/XML tag will be used. If the
|
329
|
+
### <tt>closetag</tt> argument is <tt>nil</tt>, a pattern is created which
|
330
|
+
### prepends a <tt>/</tt> character to the matched opening tag, after any
|
331
|
+
### bracketing characters. The <tt>options</tt> argument is a Hash of one or
|
332
|
+
### more options which govern the matching operation. They are described in
|
333
|
+
### more detail in the Description section of 'lib/DelimScanner.rb'. On a
|
334
|
+
### successful match, this method advances the scan pointer and returns an
|
335
|
+
###
|
336
|
+
### [<tt>:match</tt>]
|
337
|
+
### The text of the match, including the delimiting tags.
|
338
|
+
### [<tt>:prefix</tt>]
|
339
|
+
### The matched prefix, if any.
|
340
|
+
###
|
341
|
+
### On failure, returns nil.
|
342
|
+
def scanTagged( opentag=nil, closetag=nil, prefix='\s*', options={} )
|
343
|
+
prefix ||= '\s*'
|
344
|
+
|
345
|
+
ldel = opentag || %Q,<\\w+(?:#{ makeDelimPattern(%q:'":) }|[^>])*>,
|
346
|
+
rdel = closetag
|
347
|
+
raise ArgumentError, "Options argument must be a hash" unless options.kind_of?( Hash )
|
348
|
+
|
349
|
+
failmode = options[:fail]
|
350
|
+
bad = if options[:reject].is_a?( Array ) then
|
351
|
+
options[:reject].join("|")
|
352
|
+
else
|
353
|
+
(options[:reject] || '')
|
354
|
+
end
|
355
|
+
ignore = if options[:ignore].is_a?( Array ) then
|
356
|
+
options[:ignore].join("|")
|
357
|
+
else
|
358
|
+
(options[:ignore] || '')
|
359
|
+
end
|
360
|
+
|
361
|
+
self.matchError = nil
|
362
|
+
result = nil
|
363
|
+
startPos = self.pointer
|
364
|
+
|
365
|
+
depth = self.scanDepth
|
366
|
+
|
367
|
+
begin
|
368
|
+
result = matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
|
369
|
+
rescue MatchFailure => e
|
370
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
371
|
+
self.matchError = e.message
|
372
|
+
self.pointer = startPos
|
373
|
+
result = nil
|
374
|
+
rescue => e
|
375
|
+
self.pointer = startPos
|
376
|
+
Kernel::raise
|
377
|
+
end
|
378
|
+
|
379
|
+
return result
|
380
|
+
end
|
381
|
+
|
382
|
+
|
383
|
+
### Match using the #scanTagged method, but only return the match or nil.
|
384
|
+
def extractTagged( *args )
|
385
|
+
rval = scanTagged( *args ) or return nil
|
386
|
+
return rval[:match]
|
387
|
+
end
|
388
|
+
|
389
|
+
|
390
|
+
### Starting at the scan pointer, try to match a substring with
|
391
|
+
### #scanTagged. On a successful match, this method advances the scan
|
392
|
+
### pointer and returns the length of the match, including any delimiters
|
393
|
+
### and any prefix that was skipped. On failure, returns nil.
|
394
|
+
def skipTagged( *args )
|
395
|
+
startPos = self.pointer
|
396
|
+
|
397
|
+
match = scanTagged( *args )
|
398
|
+
|
399
|
+
return nil unless match
|
400
|
+
return match.length + prefix.length
|
401
|
+
ensure
|
402
|
+
debugMsg( 2, "Resetting scan pointer." )
|
403
|
+
self.pointer = startPos
|
404
|
+
end
|
405
|
+
|
406
|
+
|
407
|
+
# :NOTE:
|
408
|
+
# Since the extract_quotelike function isn't documented at all in
|
409
|
+
# Text::Balanced, I'm only guessing this is correct...
|
410
|
+
|
411
|
+
### Starting from the scan pointer, try to match any one of the various Ruby
|
412
|
+
### quotes and quotelike operators after skipping the specified
|
413
|
+
### <tt>prefix</tt>. Nested backslashed delimiters, embedded balanced
|
414
|
+
### bracket delimiters (for the quotelike operators), and trailing modifiers
|
415
|
+
### are all caught. If <tt>matchRawRegex</tt> is <tt>true</tt>, inline
|
416
|
+
### regexen (eg., <tt>/pattern/</tt>) are matched as well. Advances the scan
|
417
|
+
### pointer and returns a Hash with the following key/value pairs on
|
418
|
+
### success:
|
419
|
+
###
|
420
|
+
### [<tt>:match</tt>]
|
421
|
+
### The entire text of the match.
|
422
|
+
### [<tt>:prefix</tt>]
|
423
|
+
### The matched prefix, if any.
|
424
|
+
### [<tt>:quoteOp</tt>]
|
425
|
+
### The name of the quotelike operator (if any) (eg., '%Q', '%r', etc).
|
426
|
+
### [<tt>:leftDelim</tt>]
|
427
|
+
### The left delimiter of the first block of the operation.
|
428
|
+
### [<tt>:delimText</tt>]
|
429
|
+
### The text of the first block of the operation.
|
430
|
+
### [<tt>:rightDelim</tt>]
|
431
|
+
### The right delimiter of the first block of the operation.
|
432
|
+
### [<tt>:modifiers</tt>]
|
433
|
+
### The trailing modifiers on the operation (if any).
|
434
|
+
###
|
435
|
+
### On failure, returns nil.
|
436
|
+
def scanQuotelike( prefix='\s*', matchRawRegex=true )
|
437
|
+
|
438
|
+
self.matchError = nil
|
439
|
+
result = nil
|
440
|
+
startPos = self.pointer
|
441
|
+
|
442
|
+
depth = self.scanDepth
|
443
|
+
|
444
|
+
begin
|
445
|
+
result = matchQuotelike( prefix, matchRawRegex )
|
446
|
+
rescue MatchFailure => e
|
447
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
448
|
+
self.matchError = e.message
|
449
|
+
self.pointer = startPos
|
450
|
+
result = nil
|
451
|
+
rescue => e
|
452
|
+
self.pointer = startPos
|
453
|
+
Kernel::raise
|
454
|
+
end
|
455
|
+
|
456
|
+
return result
|
457
|
+
end
|
458
|
+
|
459
|
+
|
460
|
+
### Match using the #scanQuotelike method, but only return the match or nil.
|
461
|
+
def extractQuotelike( *args )
|
462
|
+
rval = scanQuotelike( *args ) or return nil
|
463
|
+
return rval[:match]
|
464
|
+
end
|
465
|
+
|
466
|
+
|
467
|
+
### Starting at the scan pointer, try to match a substring with
|
468
|
+
### #scanQuotelike. On a successful match, this method advances the scan
|
469
|
+
### pointer and returns the length of the match, including any delimiters
|
470
|
+
### and any prefix that was skipped. On failure, returns nil.
|
471
|
+
def skipQuotelike( *args )
|
472
|
+
startPos = self.pointer
|
473
|
+
|
474
|
+
match = scanQuotelike( *args )
|
475
|
+
|
476
|
+
return nil unless match
|
477
|
+
return match.length + prefix.length
|
478
|
+
ensure
|
479
|
+
debugMsg( 2, "Resetting scan pointer." )
|
480
|
+
self.pointer = startPos
|
481
|
+
end
|
482
|
+
|
483
|
+
|
484
|
+
### Starting from the scan pointer, try to match a Ruby variable after
|
485
|
+
### skipping the specified prefix.
|
486
|
+
def scanVariable( prefix='\s*' )
|
487
|
+
self.matchError = nil
|
488
|
+
result = nil
|
489
|
+
startPos = self.pointer
|
490
|
+
|
491
|
+
depth = self.scanDepth
|
492
|
+
|
493
|
+
begin
|
494
|
+
result = matchVariable( prefix )
|
495
|
+
rescue MatchFailure => e
|
496
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
497
|
+
self.matchError = e.message
|
498
|
+
self.pointer = startPos
|
499
|
+
result = nil
|
500
|
+
rescue => e
|
501
|
+
self.pointer = startPos
|
502
|
+
Kernel::raise
|
503
|
+
end
|
504
|
+
|
505
|
+
return result
|
506
|
+
end
|
507
|
+
|
508
|
+
|
509
|
+
### Match using the #scanVariable method, but only return the match or nil.
|
510
|
+
def extractVariable( *args )
|
511
|
+
rval = scanVariable( *args ) or return nil
|
512
|
+
return rval[:match]
|
513
|
+
end
|
514
|
+
|
515
|
+
|
516
|
+
### Starting at the scan pointer, try to match a substring with
|
517
|
+
### #scanVariable. On a successful match, this method advances the scan
|
518
|
+
### pointer and returns the length of the match, including any delimiters
|
519
|
+
### and any prefix that was skipped. On failure, returns nil.
|
520
|
+
def skipVariable( *args )
|
521
|
+
startPos = self.pointer
|
522
|
+
|
523
|
+
match = scanVariable( *args )
|
524
|
+
|
525
|
+
return nil unless match
|
526
|
+
return match.length + prefix.length
|
527
|
+
ensure
|
528
|
+
debugMsg( 2, "Resetting scan pointer." )
|
529
|
+
self.pointer = startPos
|
530
|
+
end
|
531
|
+
|
532
|
+
|
533
|
+
### Starting from the scan pointer, and skipping the specified
|
534
|
+
### <tt>prefix</tt>, try to to recognize and match a balanced bracket-,
|
535
|
+
### do/end-, or begin/end-delimited substring that may contain unbalanced
|
536
|
+
### delimiters inside quotes or quotelike operations.
|
537
|
+
def scanCodeblock( innerDelim=CodeblockDelimiters, prefix='\s*', outerDelim=innerDelim )
|
538
|
+
self.matchError = nil
|
539
|
+
result = nil
|
540
|
+
startPos = self.pointer
|
541
|
+
|
542
|
+
prefix ||= '\s*'
|
543
|
+
innerDelim ||= CodeblockDelimiters
|
544
|
+
outerDelim ||= innerDelim
|
545
|
+
|
546
|
+
depth = caller(1).find_all {|frame|
|
547
|
+
frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
|
548
|
+
}.length
|
549
|
+
|
550
|
+
begin
|
551
|
+
debugMsg 3, "------------------------------------"
|
552
|
+
debugMsg 3, "Calling matchCodeBlock( %s, %s, %s )",
|
553
|
+
prefix.inspect, innerDelim.inspect, outerDelim.inspect
|
554
|
+
debugMsg 3, "------------------------------------"
|
555
|
+
result = matchCodeblock( prefix, innerDelim, outerDelim )
|
556
|
+
rescue MatchFailure => e
|
557
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
558
|
+
self.matchError = e.message
|
559
|
+
self.pointer = startPos
|
560
|
+
result = nil
|
561
|
+
rescue => e
|
562
|
+
self.pointer = startPos
|
563
|
+
Kernel::raise
|
564
|
+
end
|
565
|
+
|
566
|
+
return result
|
567
|
+
end
|
568
|
+
|
569
|
+
|
570
|
+
### Match using the #scanCodeblock method, but only return the match or nil.
|
571
|
+
def extractCodeblock( *args )
|
572
|
+
rval = scanCodeblock( *args ) or return nil
|
573
|
+
return rval[:match]
|
574
|
+
end
|
575
|
+
|
576
|
+
|
577
|
+
### Starting at the scan pointer, try to match a substring with
|
578
|
+
### #scanCodeblock. On a successful match, this method advances the scan
|
579
|
+
### pointer and returns the length of the match, including any delimiters
|
580
|
+
### and any prefix that was skipped. On failure, returns nil.
|
581
|
+
def skipCodeblock( *args )
|
582
|
+
startPos = self.pointer
|
583
|
+
|
584
|
+
match = scanCodeblock( *args )
|
585
|
+
|
586
|
+
return nil unless match
|
587
|
+
return match.length + prefix.length
|
588
|
+
ensure
|
589
|
+
debugMsg( 2, "Resetting scan pointer." )
|
590
|
+
self.pointer = startPos
|
591
|
+
end
|
592
|
+
|
593
|
+
|
594
|
+
|
595
|
+
|
596
|
+
#########
|
597
|
+
protected
|
598
|
+
#########
|
599
|
+
|
600
|
+
### Scan the string from the scan pointer forward, skipping the specified
|
601
|
+
### <tt>prefix</tt> and trying to match a string delimited by bracketing
|
602
|
+
### delimiters <tt>ldel</tt> and <tt>rdel</tt> (Regexp objects), and quoting
|
603
|
+
### delimiters <tt>qdel</tt> (Regexp). If <tt>quotelike</tt> is
|
604
|
+
### <tt>true</tt>, Ruby quotelike constructs will also be honored.
|
605
|
+
def matchBracketed( prefix, ldel, qdel, quotelike, rdel )
|
606
|
+
startPos = self.pointer
|
607
|
+
debugMsg( 2, "matchBracketed starting at pos = %d: prefix = %s, "\
|
608
|
+
"ldel = %s, qdel = %s, quotelike = %s, rdel = %s",
|
609
|
+
startPos, prefix.inspect, ldel.inspect, qdel.inspect, quotelike.inspect,
|
610
|
+
rdel.inspect )
|
611
|
+
|
612
|
+
# Test for the prefix, failing if not found
|
613
|
+
raise MatchFailure, "Did not find prefix: #{prefix.inspect}" unless
|
614
|
+
self.skip( prefix )
|
615
|
+
|
616
|
+
# Mark this position as the left-delimiter pointer
|
617
|
+
ldelpos = self.pointer
|
618
|
+
debugMsg( 3, "Found prefix. Left delim pointer at %d", ldelpos )
|
619
|
+
|
620
|
+
# Match opening delimiter or fail
|
621
|
+
unless (( delim = self.scan(ldel) ))
|
622
|
+
raise MatchFailure, "Did not find opening bracket after prefix: '%s' (%d)" %
|
623
|
+
[ self.string[startPos..ldelpos].chomp, ldelpos ]
|
624
|
+
end
|
625
|
+
|
626
|
+
# A stack to keep track of nested delimiters
|
627
|
+
nesting = [ delim ]
|
628
|
+
debugMsg( 3, "Found opening bracket. Nesting = %s", nesting.inspect )
|
629
|
+
|
630
|
+
while self.rest?
|
631
|
+
|
632
|
+
debugMsg( 5, "Starting scan loop. Nesting = %s", nesting.inspect )
|
633
|
+
|
634
|
+
# Skip anything that's backslashed
|
635
|
+
if self.skip( /\\./ )
|
636
|
+
debugMsg( 4, "Skipping backslashed literal at offset %d: '%s'",
|
637
|
+
self.pointer - 2, self.string[ self.pointer - 2, 2 ].chomp )
|
638
|
+
next
|
639
|
+
end
|
640
|
+
|
641
|
+
# Opening bracket (left delimiter)
|
642
|
+
if self.scan(ldel)
|
643
|
+
delim = self.matched
|
644
|
+
debugMsg( 4, "Found opening delim %s at offset %d",
|
645
|
+
delim.inspect, self.pointer - 1 )
|
646
|
+
nesting.push delim
|
647
|
+
|
648
|
+
# Closing bracket (right delimiter)
|
649
|
+
elsif self.scan(rdel)
|
650
|
+
delim = self.matched
|
651
|
+
|
652
|
+
debugMsg( 4, "Found closing delim %s at offset %d",
|
653
|
+
delim.inspect, self.pointer - 1 )
|
654
|
+
|
655
|
+
# :TODO: When is this code reached?
|
656
|
+
if nesting.empty?
|
657
|
+
raise MatchFailure, "Unmatched closing bracket '%s' at offset %d" %
|
658
|
+
[ delim, self.pointer - 1 ]
|
659
|
+
end
|
660
|
+
|
661
|
+
# Figure out what the compliment of the bracket next off the
|
662
|
+
# stack should be.
|
663
|
+
expected = nesting.pop.tr( '({[<', ')}]>' )
|
664
|
+
debugMsg( 4, "Got a '%s' bracket off nesting stack", expected )
|
665
|
+
|
666
|
+
# Check for mismatched brackets
|
667
|
+
if expected != delim
|
668
|
+
raise MatchFailure, "Mismatched closing bracket at offset %d: "\
|
669
|
+
"Expected '%s', but found '%s' instead." %
|
670
|
+
[ self.pointer - 1, expected, delim ]
|
671
|
+
end
|
672
|
+
|
673
|
+
# If we've found the closing delimiter, stop scanning
|
674
|
+
if nesting.empty?
|
675
|
+
debugMsg( 4, "Finished with scan: nesting stack empty." )
|
676
|
+
break
|
677
|
+
end
|
678
|
+
|
679
|
+
# Quoted chunk (quoted delimiter)
|
680
|
+
elsif qdel && self.scan(qdel)
|
681
|
+
match = self.matched
|
682
|
+
|
683
|
+
if self. scan( /[^\\#{match}]*(?:\\.[^\\#{match}]*)*(#{Regexp::quote(match)})/ )
|
684
|
+
debugMsg( 4, "Skipping quoted chunk. Scan pointer now at offset %d", self.pointer )
|
685
|
+
next
|
686
|
+
end
|
687
|
+
|
688
|
+
raise MatchFailure, "Unmatched embedded quote (%s) at offset %d" %
|
689
|
+
[ match, self.pointer - 1 ]
|
690
|
+
|
691
|
+
# Embedded quotelike
|
692
|
+
elsif quotelike && self.scanQuotelike
|
693
|
+
debugMsg( 4, "Matched a quotelike. Scan pointer now at offset %d", self.pointer )
|
694
|
+
next
|
695
|
+
|
696
|
+
# Skip word characters, or a single non-word character
|
697
|
+
else
|
698
|
+
self.skip( /(?:[a-zA-Z0-9]+|.)/m )
|
699
|
+
debugMsg 5, "Skipping '%s' at offset %d." %
|
700
|
+
[ self.matched, self.pointer ]
|
701
|
+
end
|
702
|
+
|
703
|
+
end
|
704
|
+
|
705
|
+
# If there's one or more brackets left on the delimiter stack, we're
|
706
|
+
# missing a closing delim.
|
707
|
+
unless nesting.empty?
|
708
|
+
raise MatchFailure, "Unmatched opening bracket(s): %s.. at offset %d" %
|
709
|
+
[ nesting.join('..'), self.pointer ]
|
710
|
+
end
|
711
|
+
|
712
|
+
rval = {
|
713
|
+
:match => self.string[ ldelpos .. (self.pointer - 1) ],
|
714
|
+
:prefix => self.string[ startPos, (ldelpos-startPos) ],
|
715
|
+
:suffix => self.string[ self.pointer..-1 ],
|
716
|
+
}
|
717
|
+
debugMsg 1, "matchBracketed succeeded: %s" % rval.inspect
|
718
|
+
return rval
|
719
|
+
end
|
720
|
+
|
721
|
+
|
722
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
723
|
+
### try to match text bracketed by the given left and right tag-delimiters
|
724
|
+
### (<tt>ldel</tt> and <tt>rdel</tt>).
|
725
|
+
def matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
|
726
|
+
failmode = failmode.to_s.intern if failmode
|
727
|
+
startPos = self.pointer
|
728
|
+
debugMsg 2, "matchTagged starting at pos = %d: prefix = %s, "\
|
729
|
+
"ldel = %s, rdel = %s, failmode = %s, bad = %s, ignore = %s",
|
730
|
+
startPos, prefix.inspect, ldel.inspect, rdel.inspect,
|
731
|
+
failmode.inspect, bad.inspect, ignore.inspect
|
732
|
+
|
733
|
+
rdelspec = ''
|
734
|
+
openTagPos, textPos, paraPos, closeTagPos, endPos = ([nil] * 5)
|
735
|
+
match = nil
|
736
|
+
|
737
|
+
# Look for the prefix
|
738
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
739
|
+
self.skip( prefix )
|
740
|
+
|
741
|
+
openTagPos = self.pointer
|
742
|
+
debugMsg 3, "Found prefix. Pointer now at offset %d" % self.pointer
|
743
|
+
|
744
|
+
# Look for the opening delimiter
|
745
|
+
unless (( match = self.scan(ldel) ))
|
746
|
+
raise MatchFailure, "Did not find opening tag %s at offset %d" %
|
747
|
+
[ ldel.inspect, self.pointer ]
|
748
|
+
end
|
749
|
+
|
750
|
+
textPos = self.pointer
|
751
|
+
debugMsg 3, "Found left delimiter '%s': offset now %d" % [ match, textPos ]
|
752
|
+
|
753
|
+
# Make a right delim out of the tag we found if none was specified
|
754
|
+
if rdel.nil?
|
755
|
+
rdelspec = makeClosingTag( match )
|
756
|
+
debugMsg 3, "Generated right-delimiting tag: %s" % rdelspec.inspect
|
757
|
+
else
|
758
|
+
# Make the regexp-related globals from the match
|
759
|
+
rdelspec = rdel.gsub( /(\A|[^\\])\$([1-9])/, '\1self[\2]' ).interpolate( binding )
|
760
|
+
debugMsg 3, "Right delimiter (after interpolation) is: %s" % rdelspec.inspect
|
761
|
+
end
|
762
|
+
|
763
|
+
# Process until we reach the end of the string or find a closing tag
|
764
|
+
while self.rest? && closeTagPos.nil?
|
765
|
+
|
766
|
+
# Skip backslashed characters
|
767
|
+
if (( self.skip( /^\\./ ) ))
|
768
|
+
debugMsg 4, "Skipping backslashed literal at offset %d" % self.pointer
|
769
|
+
next
|
770
|
+
|
771
|
+
# Match paragraphs-break for fail == :para
|
772
|
+
elsif (( matchlength = self.skip( /^(\n[ \t]*\n)/ ) ))
|
773
|
+
paraPos ||= self.pointer - matchlength
|
774
|
+
debugMsg 4, "Found paragraph position at offset %d" % paraPos
|
775
|
+
|
776
|
+
# Match closing tag
|
777
|
+
elsif (( matchlength = self.skip( rdelspec ) ))
|
778
|
+
closeTagPos = self.pointer - matchlength
|
779
|
+
debugMsg 3, "Found closing tag at offset %d" % closeTagPos
|
780
|
+
|
781
|
+
# If we're ignoring anything, try to match and move beyond it
|
782
|
+
elsif ignore && !ignore.empty? && self.skip(ignore)
|
783
|
+
debugMsg 3, "Skipping ignored text '%s' at offset %d" %
|
784
|
+
[ self.matched, self.pointer - self.matched_size ]
|
785
|
+
next
|
786
|
+
|
787
|
+
# If there's a "bad" pattern, try to match it, shorting the
|
788
|
+
# outer loop if it matches in para or max mode, or failing with
|
789
|
+
# a match error if not.
|
790
|
+
elsif bad && !bad.empty? && self.match?( bad )
|
791
|
+
if failmode == :para || failmode == :max
|
792
|
+
break
|
793
|
+
else
|
794
|
+
raise MatchFailure, "Found invalid nested tag '%s' at offset %d" %
|
795
|
+
[ match, self.pointer ]
|
796
|
+
end
|
797
|
+
|
798
|
+
# If there's another opening tag, make a recursive call to
|
799
|
+
# ourselves to move the cursor beyond it
|
800
|
+
elsif (( match = self.scan( ldel ) ))
|
801
|
+
tag = match
|
802
|
+
self.unscan
|
803
|
+
|
804
|
+
unless self.matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
|
805
|
+
break if failmode == :para || failmode == :max
|
806
|
+
|
807
|
+
raise MatchFailure, "Found unbalanced nested tag '%s' at offset %d" %
|
808
|
+
[ tag, self.pointer ]
|
809
|
+
end
|
810
|
+
|
811
|
+
else
|
812
|
+
self.pointer += 1
|
813
|
+
debugMsg 5, "Advanced scan pointer to offset %d" % self.pointer
|
814
|
+
end
|
815
|
+
end
|
816
|
+
|
817
|
+
# If the closing hasn't been found, then it's a "short" match, which is
|
818
|
+
# okay if the failmode indicates we don't care. Otherwise, it's an error.
|
819
|
+
unless closeTagPos
|
820
|
+
debugMsg 3, "No close tag position found. "
|
821
|
+
|
822
|
+
if failmode == :max || failmode == :para
|
823
|
+
closeTagPos = self.pointer - 1
|
824
|
+
debugMsg 4, "Failmode %s tolerates no closing tag. Close tag position set to %d" %
|
825
|
+
[ failmode.inspect, closeTagPos ]
|
826
|
+
|
827
|
+
# Sync the scan pointer and the paragraph marker if it's set.
|
828
|
+
if failmode == :para && paraPos
|
829
|
+
self.pointer = paraPos + 1
|
830
|
+
end
|
831
|
+
else
|
832
|
+
raise MatchFailure, "No closing tag found."
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
rval = {
|
837
|
+
:match => self.string[ openTagPos .. (self.pointer - 1) ],
|
838
|
+
:prefix => self.string[ startPos, (openTagPos-startPos) ],
|
839
|
+
:suffix => self.string[ self.pointer..-1 ],
|
840
|
+
}
|
841
|
+
debugMsg 1, "matchTagged succeeded: %s" % rval.inspect
|
842
|
+
return rval
|
843
|
+
end
|
844
|
+
|
845
|
+
|
846
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
847
|
+
### try to match text inside a Ruby quotelike construct. If
|
848
|
+
### <tt>matchRawRegex</tt> is <tt>true</tt>, the regex construct
|
849
|
+
### <tt>/pattern/</tt> is also matched.
|
850
|
+
def matchQuotelike( prefix, matchRawRegex )
|
851
|
+
startPos = self.pointer
|
852
|
+
debugMsg 2, "matchQuotelike starting at pos = %d: prefix = %s, "\
|
853
|
+
"matchRawRegex = %s",
|
854
|
+
startPos, prefix.inspect, matchRawRegex.inspect
|
855
|
+
|
856
|
+
# Init position markers
|
857
|
+
rval = oppos = preldpos = ldpos = strpos = rdpos = modpos = nil
|
858
|
+
|
859
|
+
# Look for the prefix
|
860
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
861
|
+
self.skip( prefix )
|
862
|
+
oppos = self.pointer
|
863
|
+
|
864
|
+
|
865
|
+
# Peek at the next character
|
866
|
+
# If the initial quote is a simple quote, our job is easy
|
867
|
+
if self.check(/^["`']/) || ( matchRawRegex && self.check(%r:/:) )
|
868
|
+
|
869
|
+
initial = self.matched
|
870
|
+
|
871
|
+
# Build the pattern for matching the simple string
|
872
|
+
pattern = "%s [^\\%s]* (\\.[^\\%s]*)* %s" %
|
873
|
+
[ Regexp.quote(initial),
|
874
|
+
initial, initial,
|
875
|
+
Regexp.quote(initial) ]
|
876
|
+
debugMsg 2, "Matching simple quote at offset %d with /%s/" %
|
877
|
+
[ self.pointer, pattern ]
|
878
|
+
|
879
|
+
# Search for it, raising an exception if it's not found
|
880
|
+
unless self.scan( /#{pattern}/xism )
|
881
|
+
raise MatchFailure,
|
882
|
+
"Did not find closing delimiter to match '%s' at '%s...' (offset %d)" %
|
883
|
+
[ initial, self.string[ oppos, 20 ].chomp, self.pointer ]
|
884
|
+
end
|
885
|
+
|
886
|
+
modpos = self.pointer
|
887
|
+
rdpos = modpos - 1
|
888
|
+
|
889
|
+
# If we're matching a regex, look for any trailing modifiers
|
890
|
+
if initial == '/'
|
891
|
+
pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
|
892
|
+
self.scan( pattern )
|
893
|
+
end
|
894
|
+
|
895
|
+
rval = {
|
896
|
+
:prefix => self.string[ startPos, (oppos-startPos) ],
|
897
|
+
:match => self.string[ oppos .. (self.pointer - 1) ],
|
898
|
+
:leftDelim => self.string[ oppos, 1 ],
|
899
|
+
:delimText => self.string[ (oppos+1) .. (rdpos-1) ],
|
900
|
+
:rightDelim => self.string[ rdpos, 1 ],
|
901
|
+
:modifiers => self.string[ modpos, (self.pointer-modpos) ],
|
902
|
+
:suffix => self.string[ self.pointer.. -1 ],
|
903
|
+
}
|
904
|
+
|
905
|
+
# If it's one of the fancy quotelike operators, our job is somewhat
|
906
|
+
# complicated (though nothing like Perl's, thank the Goddess)
|
907
|
+
elsif self.scan( %r:%[rwqQx]?(?=\S): )
|
908
|
+
op = self.matched
|
909
|
+
debugMsg 2, "Matching a real quotelike ('%s') at offset %d" %
|
910
|
+
[ op, self.pointer ]
|
911
|
+
modifiers = nil
|
912
|
+
|
913
|
+
ldpos = self.pointer
|
914
|
+
strpos = ldpos + 1
|
915
|
+
|
916
|
+
# Peek ahead to see what the delimiter is
|
917
|
+
ldel = self.check( /\S/ )
|
918
|
+
|
919
|
+
# If it's a bracketing character, just use matchBracketed
|
920
|
+
if ldel =~ /[\[(<{]/
|
921
|
+
rdel = ldel.tr( '[({<', '])}>' )
|
922
|
+
debugMsg 4, "Left delim is a bracket: %s; looking for compliment: %s" %
|
923
|
+
[ ldel, rdel ]
|
924
|
+
self.matchBracketed( '', Regexp::quote(ldel), nil, nil, Regexp::quote(rdel) )
|
925
|
+
else
|
926
|
+
debugMsg 4, "Left delim isn't a bracket: '#{ldel}'; looking for closing instance"
|
927
|
+
self.scan( /#{ldel}[^\\#{ldel}]*(\\.[^\\#{ldel}]*)*#{ldel}/ ) or
|
928
|
+
raise MatchFailure,
|
929
|
+
"Can't find a closing delimiter '%s' at '%s...' (offset %d)" %
|
930
|
+
[ ldel, self.rest[0,20].chomp, self.pointer ]
|
931
|
+
end
|
932
|
+
rdelpos = self.pointer - 1
|
933
|
+
|
934
|
+
# Match modifiers for Regexp quote
|
935
|
+
if op == '%r'
|
936
|
+
pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
|
937
|
+
modifiers = self.scan( pattern ) || ''
|
938
|
+
end
|
939
|
+
|
940
|
+
rval = {
|
941
|
+
:prefix => self.string[ startPos, (oppos-startPos) ],
|
942
|
+
:match => self.string[ oppos .. (self.pointer - 1) ],
|
943
|
+
:quoteOp => op,
|
944
|
+
:leftDelim => self.string[ ldpos, 1 ],
|
945
|
+
:delimText => self.string[ strpos, (rdelpos-strpos) ],
|
946
|
+
:rightDelim => self.string[ rdelpos, 1 ],
|
947
|
+
:modifiers => modifiers,
|
948
|
+
:suffix => self.string[ self.pointer.. -1 ],
|
949
|
+
}
|
950
|
+
|
951
|
+
# If it's a here-doc, things get even hairier.
|
952
|
+
elsif self.scan( %r:<<(-)?: )
|
953
|
+
debugMsg 2, "Matching a here-document at offset %d" % self.pointer
|
954
|
+
op = self.matched
|
955
|
+
|
956
|
+
# If there was a dash, start with optional whitespace
|
957
|
+
indent = self[1] ? '\s*' : ''
|
958
|
+
ldpos = self.pointer
|
959
|
+
label = ''
|
960
|
+
|
961
|
+
# Plain identifier
|
962
|
+
if self.scan( /[A-Za-z_]\w*/ )
|
963
|
+
label = self.matched
|
964
|
+
debugMsg 3, "Setting heredoc terminator to bare identifier '%s'" % label
|
965
|
+
|
966
|
+
# Quoted string
|
967
|
+
elsif self.scan( / ' ([^'\\]* (?:\\.[^'\\]*)*) ' /sx ) ||
|
968
|
+
self.scan( / " ([^"\\]* (?:\\.[^"\\]*)*) " /sx ) ||
|
969
|
+
self.scan( / ` ([^`\\]* (?:\\.[^`\\]*)*) ` /sx )
|
970
|
+
label = self[1]
|
971
|
+
debugMsg 3, "Setting heredoc terminator to quoted identifier '%s'" % label
|
972
|
+
|
973
|
+
# Ruby, unlike Perl, requires a terminal, even if it's only an empty
|
974
|
+
# string
|
975
|
+
else
|
976
|
+
raise MatchFailure,
|
977
|
+
"Missing heredoc terminator before end of line at "\
|
978
|
+
"'%s...' (offset %d)" %
|
979
|
+
[ self.rest[0,20].chomp, self.pointer ]
|
980
|
+
end
|
981
|
+
extrapos = self.pointer
|
982
|
+
|
983
|
+
# Advance to the beginning of the string
|
984
|
+
self.skip( /.*\n/ )
|
985
|
+
strpos = self.pointer
|
986
|
+
debugMsg 3, "Scanning until /\\n#{indent}#{label}\\n/m"
|
987
|
+
|
988
|
+
# Match to the label
|
989
|
+
unless self.scan_until( /\n#{indent}#{label}\n/m )
|
990
|
+
raise MatchFailure,
|
991
|
+
"Couldn't find heredoc terminator '%s' after '%s...' (offset %d)" %
|
992
|
+
[ label, self.rest[0,20].chomp, self.pointer ]
|
993
|
+
end
|
994
|
+
|
995
|
+
rdpos = self.pointer - self.matched_size
|
996
|
+
|
997
|
+
rval = {
|
998
|
+
:prefix => self.string[ startPos, (oppos-startPos) ],
|
999
|
+
:match => self.string[ oppos .. (self.pointer - 1) ],
|
1000
|
+
:quoteOp => op,
|
1001
|
+
:leftDelim => self.string[ ldpos, (extrapos-ldpos) ],
|
1002
|
+
:delimText => self.string[ strpos, (rdpos-strpos) ],
|
1003
|
+
:rightDelim => self.string[ rdpos, (self.pointer-rdpos) ],
|
1004
|
+
:suffix => self.string[ self.pointer.. -1 ],
|
1005
|
+
}
|
1006
|
+
|
1007
|
+
else
|
1008
|
+
raise MatchFailure,
|
1009
|
+
"No quotelike operator found after prefix at '%s...'" %
|
1010
|
+
self.rest[0,20].chomp
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
|
1014
|
+
debugMsg 1, "matchQuotelike succeeded: %s" % rval.inspect
|
1015
|
+
return rval
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
|
1019
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
1020
|
+
### try to match text that is a valid Ruby variable or identifier, ...?
|
1021
|
+
def matchVariable( prefix )
|
1022
|
+
startPos = self.pointer
|
1023
|
+
debugMsg 2, "matchVariable starting at pos = %d: prefix = %s",
|
1024
|
+
startPos, prefix.inspect
|
1025
|
+
|
1026
|
+
# Look for the prefix
|
1027
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
1028
|
+
self.skip( prefix )
|
1029
|
+
|
1030
|
+
varPos = self.pointer
|
1031
|
+
|
1032
|
+
# If the variable matched is a predefined global, no need to look for an
|
1033
|
+
# identifier
|
1034
|
+
unless self.scan( %r~\$(?:[!@/\\,;.<>$?:_\~&`'+]|-\w|\d+)~ )
|
1035
|
+
|
1036
|
+
debugMsg 2, "Not a predefined global at '%s...' (offset %d)" %
|
1037
|
+
[ self.rest[0,20].chomp, self.pointer ]
|
1038
|
+
|
1039
|
+
# Look for a valid identifier
|
1040
|
+
unless self.scan( /\*?(?:[$@]|::)?(?:[a-z_]\w*(?:::\s*))*[_a-z]\w*/is )
|
1041
|
+
raise MatchFailure, "No variable found: Bad identifier (offset %d)" % self.pointer
|
1042
|
+
end
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
debugMsg 2, "Matched '%s' at offset %d" % [ self.matched, self.pointer ]
|
1046
|
+
|
1047
|
+
# Match methodchain with trailing codeblock
|
1048
|
+
while self.rest?
|
1049
|
+
# Match a regular chained method
|
1050
|
+
next if scanCodeblock( {"("=>")", "do"=>"end", "begin"=>"end", "{"=>"}"},
|
1051
|
+
/\s*(?:\.|::)\s*[a-zA-Z_]\w+\s*/ )
|
1052
|
+
|
1053
|
+
# Match a trailing block or an element ref
|
1054
|
+
next if scanCodeblock( nil, /\s*/, {'{' => '}', '[' => ']'} )
|
1055
|
+
|
1056
|
+
# This matched a dereferencer in Perl, which doesn't have any
|
1057
|
+
# equivalent in Ruby.
|
1058
|
+
#next if scanVariable( '\s*(\.|::)\s*' )
|
1059
|
+
|
1060
|
+
# Match a method call without parens (?)
|
1061
|
+
next if self.scan( '\s*(\.|::)\s*\w+(?![{(\[])' )
|
1062
|
+
|
1063
|
+
break
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
rval = {
|
1067
|
+
:match => self.string[ varPos .. (self.pointer - 1) ],
|
1068
|
+
:prefix => self.string[ startPos, (varPos-startPos) ],
|
1069
|
+
:suffix => self.string[ self.pointer..-1 ],
|
1070
|
+
}
|
1071
|
+
debugMsg 1, "matchVariable succeeded: %s" % rval.inspect
|
1072
|
+
return rval
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
|
1076
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
1077
|
+
### try to match text inside a Ruby code block construct which must be
|
1078
|
+
### delimited by the specified <tt>outerDelimPairs</tt>. It may optionally
|
1079
|
+
### contain sub-blocks delimited with the given <tt>innerDelimPairs</tt>.
|
1080
|
+
def matchCodeblock( prefix, innerDelimPairs, outerDelimPairs )
|
1081
|
+
startPos = self.pointer
|
1082
|
+
debugMsg 2, "Starting matchCodeblock at offset %d (%s)", startPos, self.rest.inspect
|
1083
|
+
|
1084
|
+
# Look for the prefix
|
1085
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
1086
|
+
self.skip( prefix )
|
1087
|
+
codePos = self.pointer
|
1088
|
+
debugMsg 3, "Skipped prefix '%s' to offset %d" %
|
1089
|
+
[ self.matched, codePos ]
|
1090
|
+
|
1091
|
+
# Build a regexp for the outer delimiters
|
1092
|
+
ldelimOuter = "(" + outerDelimPairs.keys .uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
|
1093
|
+
rdelimOuter = "(" + outerDelimPairs.values.uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
|
1094
|
+
debugMsg 4, "Using /%s/ as the outer delim regex" % ldelimOuter
|
1095
|
+
|
1096
|
+
unless self.scan( ldelimOuter )
|
1097
|
+
raise MatchFailure, %q:Did not find opening bracket at "%s..." offset %d: %
|
1098
|
+
[ self.rest[0,20].chomp, codePos ]
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
# Look up the corresponding outer delimiter
|
1102
|
+
closingDelim = outerDelimPairs[self.matched] or
|
1103
|
+
raise DelimiterError, "Could not find closing delimiter for '%s'" %
|
1104
|
+
self.matched
|
1105
|
+
|
1106
|
+
debugMsg 3, "Scanning for closing delim '#{closingDelim}'"
|
1107
|
+
matched = ''
|
1108
|
+
patvalid = true
|
1109
|
+
|
1110
|
+
# Scan until the end of the text or until an explicit break
|
1111
|
+
while self.rest?
|
1112
|
+
debugMsg 5, "Scanning from offset %d (%s)", self.pointer, self.rest.inspect
|
1113
|
+
matched = ''
|
1114
|
+
|
1115
|
+
# Skip comments
|
1116
|
+
debugMsg 5, "Trying to match a comment"
|
1117
|
+
if self.scan( /\s*#.*/ )
|
1118
|
+
debugMsg 4, "Skipping comment '%s' to offset %d" %
|
1119
|
+
[ self.matched, self.pointer ]
|
1120
|
+
next
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
# Look for (any) closing delimiter
|
1124
|
+
debugMsg 5, "Trying to match a closing outer delimiter with /\s*(#{rdelimOuter})/"
|
1125
|
+
if self.scan( /\s*(#{rdelimOuter})/ )
|
1126
|
+
debugMsg 4, "Found a right delimiter '#{self.matched}'"
|
1127
|
+
|
1128
|
+
# If it's the delimiter we're looking for, stop the scan
|
1129
|
+
if self.matched.strip == closingDelim
|
1130
|
+
matched = self.matched
|
1131
|
+
debugMsg 3, "Found the closing delimiter we've been looking for (#{matched.inspect})."
|
1132
|
+
break
|
1133
|
+
|
1134
|
+
# Otherwise, it's an error, as we've apparently seen a closing
|
1135
|
+
# delimiter without a corresponding opening one.
|
1136
|
+
else
|
1137
|
+
raise MatchFailure,
|
1138
|
+
%q:Mismatched closing bracket at "%s..." (offset %s). Expected '%s': %
|
1139
|
+
[ self.rest[0,20], self.pointer, closingDelim ]
|
1140
|
+
end
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
# Try to match a variable or a quoted phrase
|
1144
|
+
debugMsg 5, "Trying to match either a variable or quotelike"
|
1145
|
+
if self.scanVariable( '\s*' ) || self.scanQuotelike( '\s*', patvalid )
|
1146
|
+
debugMsg 3, "Matched either a variable or quotelike. Offset now %d" % self.pointer
|
1147
|
+
patvalid = false
|
1148
|
+
next
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
# Match some operators
|
1152
|
+
# :TODO: This hasn't really been ruby-ified
|
1153
|
+
debugMsg 5, "Trying to match an operator"
|
1154
|
+
if self.scan( %r:\s*([-+*x/%^&|.]=?
|
1155
|
+
| [!=]~
|
1156
|
+
| =(?!>)
|
1157
|
+
| (\*\*|&&|\|\||<<|>>)=?
|
1158
|
+
| split|grep|map|return
|
1159
|
+
):x )
|
1160
|
+
debugMsg 3, "Skipped miscellaneous operator '%s' to offset %d." %
|
1161
|
+
[ self.matched, self.pointer ]
|
1162
|
+
patvalid = true
|
1163
|
+
next
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
# Try to match an embedded codeblock
|
1167
|
+
debugMsg 5, "Trying to match an embedded codeblock with delim pairs: %s",
|
1168
|
+
innerDelimPairs.inspect
|
1169
|
+
if self.scanCodeblock( innerDelimPairs )
|
1170
|
+
debugMsg 3, "Skipped inner codeblock to offset %d." % self.pointer
|
1171
|
+
patvalid = true
|
1172
|
+
next
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
# Try to match a stray outer-left delimiter
|
1176
|
+
debugMsg 5, "Trying to match a stray outer-left delimiter (#{ldelimOuter})"
|
1177
|
+
if self.match?( ldelimOuter )
|
1178
|
+
raise MatchFailure, "Improperly nested codeblock at offset %d: %s... " %
|
1179
|
+
[ self.pointer, self.rest[0,20] ]
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
patvalid = false
|
1183
|
+
self.scan( /\s*(\w+|[-=>]>|.|\Z)/m )
|
1184
|
+
debugMsg 3, "Skipped '%s' to offset %d" %
|
1185
|
+
[ self.matched, self.pointer ]
|
1186
|
+
end
|
1187
|
+
|
1188
|
+
|
1189
|
+
unless matched
|
1190
|
+
raise MatchFailure, "No match found for opening bracket"
|
1191
|
+
end
|
1192
|
+
|
1193
|
+
rval = {
|
1194
|
+
:match => self.string[codePos .. (self.pointer - 1)],
|
1195
|
+
:prefix => self.string[startPos, (codePos-startPos)],
|
1196
|
+
:suffix => self.string[ self.pointer..-1 ],
|
1197
|
+
}
|
1198
|
+
debugMsg 1, "matchCodeblock succeeded: %s" % rval.inspect
|
1199
|
+
return rval
|
1200
|
+
end
|
1201
|
+
|
1202
|
+
|
1203
|
+
### Attempt to derive and return the number of scan methods traversed up to
|
1204
|
+
### this point by examining the call stack.
|
1205
|
+
def scanDepth
|
1206
|
+
return caller(2).find_all {|frame|
|
1207
|
+
frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
|
1208
|
+
}.length
|
1209
|
+
end
|
1210
|
+
|
1211
|
+
|
1212
|
+
#######
|
1213
|
+
private
|
1214
|
+
#######
|
1215
|
+
|
1216
|
+
### Print the specified <tt>message</tt> to STDERR if the scanner's
|
1217
|
+
### debugging level is greater than or equal to <tt>level</tt>.
|
1218
|
+
def debugMsg( level, msgFormat, *args )
|
1219
|
+
return unless level.nonzero? && self.debugLevel >= level
|
1220
|
+
msg = if args.empty? then msgFormat else format(msgFormat, *args) end
|
1221
|
+
$stderr.puts( (" " * (level-1) * 2) + msg )
|
1222
|
+
end
|
1223
|
+
|
1224
|
+
|
1225
|
+
### Given a series of one or more bracket characters (eg., '<', '[', '{',
|
1226
|
+
### etc.), return the brackets reversed in order and direction.
|
1227
|
+
def revbracket( bracket )
|
1228
|
+
return bracket.to_s.reverse.tr( '<[{(', '>]})' )
|
1229
|
+
end
|
1230
|
+
|
1231
|
+
|
1232
|
+
### Given an opening <tt>tag</tt> of the sort matched by #scanTagged,
|
1233
|
+
### construct and return a closing tag.
|
1234
|
+
def makeClosingTag( tag )
|
1235
|
+
debugMsg 3, "Making a closing tag for '%s'" % tag
|
1236
|
+
|
1237
|
+
closingTag = tag.gsub( /^([[(<{]+)(#{XmlName}).*/ ) {
|
1238
|
+
Regexp.quote( "#{$1}/#{$2}" + revbracket($1) )
|
1239
|
+
}
|
1240
|
+
|
1241
|
+
raise MatchFailure, "Unable to construct closing tag to match: #{tag}" unless closingTag
|
1242
|
+
return closingTag
|
1243
|
+
end
|
1244
|
+
|
1245
|
+
|
1246
|
+
### Make and return a new Regexp which matches substrings bounded by the
|
1247
|
+
### specified +delimiters+, not counting those which have been escaped with
|
1248
|
+
### the escape characters in +escapes+.
|
1249
|
+
def makeDelimPattern( delimiters, escapes='\\', prefix='\\s*' )
|
1250
|
+
delimiters = delimiters.to_s
|
1251
|
+
escapes = escapes.to_s
|
1252
|
+
|
1253
|
+
raise DelimiterError, "Illegal delimiter '#{delimiter}'" unless delimiters =~ /\S/
|
1254
|
+
|
1255
|
+
# Pad the escapes string to the same length as the delimiters
|
1256
|
+
escapes.concat( escapes[-1,1] * (delimiters.length - escapes.length) )
|
1257
|
+
patParts = []
|
1258
|
+
|
1259
|
+
# Escape each delimiter and a corresponding escape character, and then
|
1260
|
+
# build a pattern part from them
|
1261
|
+
delimiters.length.times do |i|
|
1262
|
+
del = Regexp.escape( delimiters[i, 1] )
|
1263
|
+
esc = Regexp.escape( escapes[i, 1] )
|
1264
|
+
|
1265
|
+
if del == esc then
|
1266
|
+
patParts.push "#{del}(?:[^#{del}]*(?:(?:#{del}#{del})[^#{del}]*)*)#{del}"
|
1267
|
+
else
|
1268
|
+
patParts.push "#{del}(?:[^#{esc}#{del}]*(?:#{esc}.[^#{esc}#{del}]*)*)#{del}";
|
1269
|
+
end
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
# Join all the parts together and return one big pattern
|
1273
|
+
return Regexp::new( "#{prefix}(?:#{patParts.join("|")})" )
|
1274
|
+
end
|
1275
|
+
|
1276
|
+
end # class StringExtractor
|
1277
|
+
|
1278
|
+
|