getopt-declare 1.09.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/Getopt/Declare.rb +1638 -0
- data/lib/Getopt/DelimScanner.rb +1278 -0
- data/samples/cmdline_array.rb +25 -0
- data/samples/cmdline_basic.rb +31 -0
- data/samples/cmdline_code.rb +31 -0
- data/samples/cmdline_defer.rb +23 -0
- data/samples/cmdline_file.rb +38 -0
- data/samples/cmdline_inlines.rb +24 -0
- data/samples/cmdline_mid.rb +39 -0
- data/samples/cmdline_noargv.rb +29 -0
- data/samples/cmdline_parameters.rb +23 -0
- data/samples/cmdline_pvtype.rb +20 -0
- data/samples/cmdline_pvtype2.rb +20 -0
- data/samples/cmdline_regex.rb +27 -0
- data/samples/cmdline_singles.rb +28 -0
- data/samples/demo_cmdline.rb +70 -0
- data/samples/demo_csv.rb +49 -0
- data/samples/demo_interp.rb +44 -0
- data/samples/demo_shell.rb +37 -0
- metadata +55 -0
@@ -0,0 +1,1278 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# A derivative of StringScanner that can scan for delimited constructs in
|
4
|
+
# addition to regular expressions. It is a loose port of the Text::Balanced
|
5
|
+
# module for Perl by Damian Conway <damian@cs.monash.edu.au>.
|
6
|
+
#
|
7
|
+
# == Synopsis
|
8
|
+
#
|
9
|
+
# se = DelimScanner::new( myString )
|
10
|
+
#
|
11
|
+
# == Authors
|
12
|
+
#
|
13
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
14
|
+
# * Gonzalo Garramuno <GGarramuno@aol.com>
|
15
|
+
#
|
16
|
+
# Copyright (c) 2002, 2003 The FaerieMUD Consortium. Most rights reserved.
|
17
|
+
#
|
18
|
+
# This work is licensed under the Creative Commons Attribution License. To view
|
19
|
+
# a copy of this license, visit http://creativecommons.org/licenses/by/1.0 or
|
20
|
+
# send a letter to Creative Commons, 559 Nathan Abbott Way, Stanford, California
|
21
|
+
# 94305, USA.
|
22
|
+
#
|
23
|
+
# == Version
|
24
|
+
#
|
25
|
+
# $Id: DelimScanner.rb,v 1.1.1.1 2004/01/25 07:02:48 gga Exp $
|
26
|
+
#
|
27
|
+
# == History
|
28
|
+
#
|
29
|
+
# - Added :suffix hash key for returning rest (right) of matches, like Perl's
|
30
|
+
# Text::Balanced, on several methods.
|
31
|
+
# - Added one or two \ for backquoting brackets, as new ruby1.8 complains
|
32
|
+
#
|
33
|
+
|
34
|
+
require 'strscan'
|
35
|
+
require 'forwardable'
|
36
|
+
|
37
|
+
### Add some stuff to the String class to allow easy transformation to Regexp
|
38
|
+
### and in-place interpolation.
|
39
|
+
class String
|
40
|
+
def to_re( casefold=false, extended=false )
|
41
|
+
return Regexp::new( self.dup )
|
42
|
+
end
|
43
|
+
|
44
|
+
### Ideas for String-interpolation stuff courtesy of Hal E. Fulton
|
45
|
+
### <hal9000@hypermetrics.com> via ruby-talk
|
46
|
+
|
47
|
+
def interpolate( scope )
|
48
|
+
unless scope.is_a?( Binding )
|
49
|
+
raise TypeError, "Argument to interpolate must be a Binding, not "\
|
50
|
+
"a #{scope.class.name}"
|
51
|
+
end
|
52
|
+
|
53
|
+
# $stderr.puts ">>> Interpolating '#{self}'..."
|
54
|
+
|
55
|
+
copy = self.gsub( /"/, %q:\": )
|
56
|
+
eval( '"' + copy + '"', scope )
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
### A derivative of StringScanner that can scan for delimited constructs in
|
63
|
+
### addition to regular expressions.
|
64
|
+
class DelimScanner
|
65
|
+
|
66
|
+
### Scanner exception classes
|
67
|
+
class MatchFailure < RuntimeError ; end
|
68
|
+
class DelimiterError < RuntimeError ; end
|
69
|
+
|
70
|
+
|
71
|
+
extend Forwardable
|
72
|
+
StringScanner.must_C_version
|
73
|
+
|
74
|
+
|
75
|
+
### Class constants
|
76
|
+
Version = /([\d\.]+)/.match( %q{$Revision: 1.1.1.1 $} )[1]
|
77
|
+
Rcsid = %q$Id: DelimScanner.rb,v 1.1.1.1 2004/01/25 07:02:48 gga Exp $
|
78
|
+
|
79
|
+
# Pattern to match a valid XML name
|
80
|
+
XmlName = '[a-zA-Z_:][a-zA-Z0-9:.-]*'
|
81
|
+
|
82
|
+
|
83
|
+
### Namespace module for DelimString constants
|
84
|
+
module Default
|
85
|
+
|
86
|
+
# The list of default opening => closing codeblock delimiters to use for
|
87
|
+
# scanCodeblock.
|
88
|
+
CodeblockDelimiters = {
|
89
|
+
'{' => '}',
|
90
|
+
'begin' => 'end',
|
91
|
+
'do' => 'end',
|
92
|
+
}
|
93
|
+
|
94
|
+
# Default scanMultiple operations and their arguments
|
95
|
+
MultipleFunctions = [
|
96
|
+
:scanVariable => [],
|
97
|
+
:scanQuotelike => [],
|
98
|
+
:scanCodeblock => [],
|
99
|
+
]
|
100
|
+
|
101
|
+
end
|
102
|
+
include Default
|
103
|
+
|
104
|
+
|
105
|
+
### Define delegating methods that cast their argument to a Regexp from a
|
106
|
+
### String. This allows the scanner's scanning methods to be called with
|
107
|
+
### Strings in addition to Regexps. This was mostly stolen from
|
108
|
+
### forwardable.rb.
|
109
|
+
def self.def_casting_delegators( *methods )
|
110
|
+
methods.each {|methodName|
|
111
|
+
class_eval( <<-EOF, "(--def_casting_delegators--)", 1 )
|
112
|
+
def #{methodName}( pattern )
|
113
|
+
pattern = pattern.to_s.to_re unless pattern.is_a?( Regexp )
|
114
|
+
@scanner.#{methodName}( pattern )
|
115
|
+
end
|
116
|
+
EOF
|
117
|
+
}
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
### Create a new DelimScanner object for the specified <tt>string</tt>. If
|
122
|
+
### <tt>dup</tt> is <tt>true</tt>, a duplicate of the target string will be
|
123
|
+
### used instead of the one given. The target string will be frozen after
|
124
|
+
### the scanner is created.
|
125
|
+
def initialize( string, dup=true )
|
126
|
+
@scanner = StringScanner::new( string, dup )
|
127
|
+
@matchError = nil
|
128
|
+
@debugLevel = 0
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
######
|
134
|
+
public
|
135
|
+
######
|
136
|
+
|
137
|
+
# Here, some delegation trickery is done to make a DelimScanner behave like
|
138
|
+
# a StringScanner. Some methods are directly delegated, while some are
|
139
|
+
# delegated via a method which casts its argument to a Regexp first so some
|
140
|
+
# scanner methods can be called with Strings as well as Regexps.
|
141
|
+
|
142
|
+
# A list of delegated methods that need casting.
|
143
|
+
NeedCastingDelegators = :scan, :skip, :match?, :check,
|
144
|
+
:scan_until, :skip_until, :exist?, :check_until
|
145
|
+
|
146
|
+
# Delegate all StringScanner instance methods to the associated scanner
|
147
|
+
# object, except those that need a casting delegator, which uses an indirect
|
148
|
+
# delegation method.
|
149
|
+
def_delegators :@scanner,
|
150
|
+
*( StringScanner.instance_methods(false) -
|
151
|
+
NeedCastingDelegators.collect {|sym| sym.id2name} )
|
152
|
+
|
153
|
+
def_casting_delegators( *NeedCastingDelegators )
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
# The last match error encountered by the scanner
|
158
|
+
attr_accessor :matchError
|
159
|
+
protected :matchError= ; # ; is to work around a ruby-mode indent bug
|
160
|
+
|
161
|
+
# Debugging level
|
162
|
+
attr_accessor :debugLevel
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
### Returns <tt>true</tt> if the scanner has encountered a match error.
|
167
|
+
def matchError?
|
168
|
+
return ! @matchError.nil?
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
### Starting at the scan pointer, try to match a substring delimited by the
|
173
|
+
### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
|
174
|
+
### and any character escaped by the specified <tt>escape</tt>
|
175
|
+
### character/s. If matched, advances the scan pointer and returns a Hash
|
176
|
+
### with the following key/value pairs on success:
|
177
|
+
###
|
178
|
+
### [<tt>:match</tt>]
|
179
|
+
### The text of the match, including delimiters.
|
180
|
+
### [<tt>:prefix</tt>]
|
181
|
+
### The matched prefix, if any.
|
182
|
+
###
|
183
|
+
### If the match fails, returns nil.
|
184
|
+
def scanDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
|
185
|
+
delimiters ||= "'\"`"
|
186
|
+
prefix ||= '\\s*'
|
187
|
+
escape ||= '\\'
|
188
|
+
|
189
|
+
debugMsg( 1, "Scanning for delimited text: delim = (%s), prefix=(%s), escape=(%s)",
|
190
|
+
delimiters, prefix, escape )
|
191
|
+
self.matchError = nil
|
192
|
+
|
193
|
+
# Try to match the prefix first to get the length
|
194
|
+
unless (( prefixLength = self.match?(prefix.to_re) ))
|
195
|
+
self.matchError = "Failed to match prefix '%s' at offset %d" %
|
196
|
+
[ prefix, self.pointer ]
|
197
|
+
return nil
|
198
|
+
end
|
199
|
+
|
200
|
+
# Now build a delimited pattern with the specified parameters.
|
201
|
+
delimPattern = makeDelimPattern( delimiters, escape, prefix )
|
202
|
+
debugMsg( 2, "Delimiter pattern is %s" % delimPattern.inspect )
|
203
|
+
|
204
|
+
# Fail if no match
|
205
|
+
unless (( matchedString = self.scan(delimPattern) ))
|
206
|
+
self.matchError = "No delimited string found."
|
207
|
+
return nil
|
208
|
+
end
|
209
|
+
|
210
|
+
return {
|
211
|
+
:match => matchedString[prefixLength .. -1],
|
212
|
+
:prefix => matchedString[0..prefixLength-1],
|
213
|
+
}
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
### Match using the #scanDelimited method, but only return the match or nil.
|
218
|
+
def extractDelimited( *args )
|
219
|
+
rval = scanDelimited( *args ) or return nil
|
220
|
+
return rval[:match]
|
221
|
+
end
|
222
|
+
|
223
|
+
|
224
|
+
### Starting at the scan pointer, try to match a substring delimited by the
|
225
|
+
### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
|
226
|
+
### and any character escaped by the specified <tt>escape</tt>
|
227
|
+
### character/s. If matched, advances the scan pointer and returns the
|
228
|
+
### length of the matched string; if it fails the match, returns nil.
|
229
|
+
def skipDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
|
230
|
+
delimiters ||= "'\"`"
|
231
|
+
prefix ||= '\\s*'
|
232
|
+
escape ||= '\\'
|
233
|
+
|
234
|
+
self.matchError = nil
|
235
|
+
return self.skip( makeDelimPattern(delimiters, escape, prefix) )
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
### Starting at the scan pointer, try to match a substring delimited by
|
240
|
+
### balanced <tt>delimiters</tt> of the type specified, after skipping the
|
241
|
+
### specified <tt>prefix</tt>. On a successful match, this method advances
|
242
|
+
### the scan pointer and returns a Hash with the following key/value pairs:
|
243
|
+
###
|
244
|
+
### [<tt>:match</tt>]
|
245
|
+
### The text of the match, including the delimiting brackets.
|
246
|
+
### [<tt>:prefix</tt>]
|
247
|
+
### The matched prefix, if any.
|
248
|
+
###
|
249
|
+
### On failure, returns nil.
|
250
|
+
def scanBracketed( delimiters="{([<", prefix='\s*' )
|
251
|
+
delimiters ||= "{([<"
|
252
|
+
prefix ||= '\s*'
|
253
|
+
|
254
|
+
prefix = prefix.to_re unless prefix.kind_of?( Regexp )
|
255
|
+
|
256
|
+
debugMsg( 1, "Scanning for bracketed text: delimiters = (%s), prefix = (%s)",
|
257
|
+
delimiters, prefix )
|
258
|
+
|
259
|
+
self.matchError = nil
|
260
|
+
|
261
|
+
# Split the left-delimiters (brackets) from the quote delimiters.
|
262
|
+
ldel = delimiters.dup
|
263
|
+
qdel = ldel.squeeze.split(//).find_all {|char| char =~ /["'`]/ }.join('|')
|
264
|
+
qdel = nil if qdel.empty?
|
265
|
+
quotelike = true if ldel =~ /q/
|
266
|
+
|
267
|
+
# Change all instances of delimiters to the left-hand versions, and
|
268
|
+
# strip away anything but bracketing delimiters
|
269
|
+
ldel = ldel.tr( '[](){}<>', '[[(({{<<' ).gsub(/[^#{Regexp.quote('[\\](){}<>')}]+/, '').squeeze
|
270
|
+
|
271
|
+
### Now build the right-delim equivalent of the left delim string
|
272
|
+
rdel = ldel.dup
|
273
|
+
unless rdel.tr!( '[({<', '])}>' )
|
274
|
+
raise DelimiterError, "Did not find a suitable bracket in delimiter: '#{delimiters}'"
|
275
|
+
end
|
276
|
+
|
277
|
+
# Build regexps from both bracketing delimiter strings
|
278
|
+
ldel = ldel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
|
279
|
+
rdel = rdel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
|
280
|
+
|
281
|
+
depth = self.scanDepth
|
282
|
+
result = nil
|
283
|
+
startPos = self.pointer
|
284
|
+
|
285
|
+
begin
|
286
|
+
result = matchBracketed( prefix, ldel, qdel, quotelike, rdel )
|
287
|
+
rescue MatchFailure => e
|
288
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
289
|
+
self.matchError = e.message
|
290
|
+
self.pointer = startPos
|
291
|
+
result = nil
|
292
|
+
rescue => e
|
293
|
+
self.pointer = startPos
|
294
|
+
Kernel::raise
|
295
|
+
end
|
296
|
+
|
297
|
+
return result
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
### Match using the #scanBracketed method, but only return the match or nil.
|
302
|
+
def extractBracketed( *args )
|
303
|
+
rval = scanBracketed( *args ) or return nil
|
304
|
+
return rval[:match]
|
305
|
+
end
|
306
|
+
|
307
|
+
|
308
|
+
### Starting at the scan pointer, try to match a substring with
|
309
|
+
### #scanBracketed. On a successful match, this method advances the scan
|
310
|
+
### pointer and returns the length of the match, including the delimiters
|
311
|
+
### and any prefix that was skipped. On failure, returns nil.
|
312
|
+
def skipBracketed( *args )
|
313
|
+
startPos = self.pointer
|
314
|
+
|
315
|
+
match = scanBracketed( *args )
|
316
|
+
|
317
|
+
return nil unless match
|
318
|
+
return match.length + prefix.length
|
319
|
+
ensure
|
320
|
+
debugMsg( 2, "Resetting scan pointer." )
|
321
|
+
self.pointer = startPos
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
### Extracts and segments text from the scan pointer forward that occurs
|
326
|
+
### between (balanced) specified tags, after skipping the specified
|
327
|
+
### <tt>prefix</tt>. If the opentag argument is <tt>nil</tt>, a pattern which
|
328
|
+
### will match any standard HTML/XML tag will be used. If the
|
329
|
+
### <tt>closetag</tt> argument is <tt>nil</tt>, a pattern is created which
|
330
|
+
### prepends a <tt>/</tt> character to the matched opening tag, after any
|
331
|
+
### bracketing characters. The <tt>options</tt> argument is a Hash of one or
|
332
|
+
### more options which govern the matching operation. They are described in
|
333
|
+
### more detail in the Description section of 'lib/DelimScanner.rb'. On a
|
334
|
+
### successful match, this method advances the scan pointer and returns an
|
335
|
+
###
|
336
|
+
### [<tt>:match</tt>]
|
337
|
+
### The text of the match, including the delimiting tags.
|
338
|
+
### [<tt>:prefix</tt>]
|
339
|
+
### The matched prefix, if any.
|
340
|
+
###
|
341
|
+
### On failure, returns nil.
|
342
|
+
def scanTagged( opentag=nil, closetag=nil, prefix='\s*', options={} )
|
343
|
+
prefix ||= '\s*'
|
344
|
+
|
345
|
+
ldel = opentag || %Q,<\\w+(?:#{ makeDelimPattern(%q:'":) }|[^>])*>,
|
346
|
+
rdel = closetag
|
347
|
+
raise ArgumentError, "Options argument must be a hash" unless options.kind_of?( Hash )
|
348
|
+
|
349
|
+
failmode = options[:fail]
|
350
|
+
bad = if options[:reject].is_a?( Array ) then
|
351
|
+
options[:reject].join("|")
|
352
|
+
else
|
353
|
+
(options[:reject] || '')
|
354
|
+
end
|
355
|
+
ignore = if options[:ignore].is_a?( Array ) then
|
356
|
+
options[:ignore].join("|")
|
357
|
+
else
|
358
|
+
(options[:ignore] || '')
|
359
|
+
end
|
360
|
+
|
361
|
+
self.matchError = nil
|
362
|
+
result = nil
|
363
|
+
startPos = self.pointer
|
364
|
+
|
365
|
+
depth = self.scanDepth
|
366
|
+
|
367
|
+
begin
|
368
|
+
result = matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
|
369
|
+
rescue MatchFailure => e
|
370
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
371
|
+
self.matchError = e.message
|
372
|
+
self.pointer = startPos
|
373
|
+
result = nil
|
374
|
+
rescue => e
|
375
|
+
self.pointer = startPos
|
376
|
+
Kernel::raise
|
377
|
+
end
|
378
|
+
|
379
|
+
return result
|
380
|
+
end
|
381
|
+
|
382
|
+
|
383
|
+
### Match using the #scanTagged method, but only return the match or nil.
|
384
|
+
def extractTagged( *args )
|
385
|
+
rval = scanTagged( *args ) or return nil
|
386
|
+
return rval[:match]
|
387
|
+
end
|
388
|
+
|
389
|
+
|
390
|
+
### Starting at the scan pointer, try to match a substring with
|
391
|
+
### #scanTagged. On a successful match, this method advances the scan
|
392
|
+
### pointer and returns the length of the match, including any delimiters
|
393
|
+
### and any prefix that was skipped. On failure, returns nil.
|
394
|
+
def skipTagged( *args )
|
395
|
+
startPos = self.pointer
|
396
|
+
|
397
|
+
match = scanTagged( *args )
|
398
|
+
|
399
|
+
return nil unless match
|
400
|
+
return match.length + prefix.length
|
401
|
+
ensure
|
402
|
+
debugMsg( 2, "Resetting scan pointer." )
|
403
|
+
self.pointer = startPos
|
404
|
+
end
|
405
|
+
|
406
|
+
|
407
|
+
# :NOTE:
|
408
|
+
# Since the extract_quotelike function isn't documented at all in
|
409
|
+
# Text::Balanced, I'm only guessing this is correct...
|
410
|
+
|
411
|
+
### Starting from the scan pointer, try to match any one of the various Ruby
|
412
|
+
### quotes and quotelike operators after skipping the specified
|
413
|
+
### <tt>prefix</tt>. Nested backslashed delimiters, embedded balanced
|
414
|
+
### bracket delimiters (for the quotelike operators), and trailing modifiers
|
415
|
+
### are all caught. If <tt>matchRawRegex</tt> is <tt>true</tt>, inline
|
416
|
+
### regexen (eg., <tt>/pattern/</tt>) are matched as well. Advances the scan
|
417
|
+
### pointer and returns a Hash with the following key/value pairs on
|
418
|
+
### success:
|
419
|
+
###
|
420
|
+
### [<tt>:match</tt>]
|
421
|
+
### The entire text of the match.
|
422
|
+
### [<tt>:prefix</tt>]
|
423
|
+
### The matched prefix, if any.
|
424
|
+
### [<tt>:quoteOp</tt>]
|
425
|
+
### The name of the quotelike operator (if any) (eg., '%Q', '%r', etc).
|
426
|
+
### [<tt>:leftDelim</tt>]
|
427
|
+
### The left delimiter of the first block of the operation.
|
428
|
+
### [<tt>:delimText</tt>]
|
429
|
+
### The text of the first block of the operation.
|
430
|
+
### [<tt>:rightDelim</tt>]
|
431
|
+
### The right delimiter of the first block of the operation.
|
432
|
+
### [<tt>:modifiers</tt>]
|
433
|
+
### The trailing modifiers on the operation (if any).
|
434
|
+
###
|
435
|
+
### On failure, returns nil.
|
436
|
+
def scanQuotelike( prefix='\s*', matchRawRegex=true )
|
437
|
+
|
438
|
+
self.matchError = nil
|
439
|
+
result = nil
|
440
|
+
startPos = self.pointer
|
441
|
+
|
442
|
+
depth = self.scanDepth
|
443
|
+
|
444
|
+
begin
|
445
|
+
result = matchQuotelike( prefix, matchRawRegex )
|
446
|
+
rescue MatchFailure => e
|
447
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
448
|
+
self.matchError = e.message
|
449
|
+
self.pointer = startPos
|
450
|
+
result = nil
|
451
|
+
rescue => e
|
452
|
+
self.pointer = startPos
|
453
|
+
Kernel::raise
|
454
|
+
end
|
455
|
+
|
456
|
+
return result
|
457
|
+
end
|
458
|
+
|
459
|
+
|
460
|
+
### Match using the #scanQuotelike method, but only return the match or nil.
|
461
|
+
def extractQuotelike( *args )
|
462
|
+
rval = scanQuotelike( *args ) or return nil
|
463
|
+
return rval[:match]
|
464
|
+
end
|
465
|
+
|
466
|
+
|
467
|
+
### Starting at the scan pointer, try to match a substring with
|
468
|
+
### #scanQuotelike. On a successful match, this method advances the scan
|
469
|
+
### pointer and returns the length of the match, including any delimiters
|
470
|
+
### and any prefix that was skipped. On failure, returns nil.
|
471
|
+
def skipQuotelike( *args )
|
472
|
+
startPos = self.pointer
|
473
|
+
|
474
|
+
match = scanQuotelike( *args )
|
475
|
+
|
476
|
+
return nil unless match
|
477
|
+
return match.length + prefix.length
|
478
|
+
ensure
|
479
|
+
debugMsg( 2, "Resetting scan pointer." )
|
480
|
+
self.pointer = startPos
|
481
|
+
end
|
482
|
+
|
483
|
+
|
484
|
+
### Starting from the scan pointer, try to match a Ruby variable after
|
485
|
+
### skipping the specified prefix.
|
486
|
+
def scanVariable( prefix='\s*' )
|
487
|
+
self.matchError = nil
|
488
|
+
result = nil
|
489
|
+
startPos = self.pointer
|
490
|
+
|
491
|
+
depth = self.scanDepth
|
492
|
+
|
493
|
+
begin
|
494
|
+
result = matchVariable( prefix )
|
495
|
+
rescue MatchFailure => e
|
496
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
497
|
+
self.matchError = e.message
|
498
|
+
self.pointer = startPos
|
499
|
+
result = nil
|
500
|
+
rescue => e
|
501
|
+
self.pointer = startPos
|
502
|
+
Kernel::raise
|
503
|
+
end
|
504
|
+
|
505
|
+
return result
|
506
|
+
end
|
507
|
+
|
508
|
+
|
509
|
+
### Match using the #scanVariable method, but only return the match or nil.
|
510
|
+
def extractVariable( *args )
|
511
|
+
rval = scanVariable( *args ) or return nil
|
512
|
+
return rval[:match]
|
513
|
+
end
|
514
|
+
|
515
|
+
|
516
|
+
### Starting at the scan pointer, try to match a substring with
|
517
|
+
### #scanVariable. On a successful match, this method advances the scan
|
518
|
+
### pointer and returns the length of the match, including any delimiters
|
519
|
+
### and any prefix that was skipped. On failure, returns nil.
|
520
|
+
def skipVariable( *args )
|
521
|
+
startPos = self.pointer
|
522
|
+
|
523
|
+
match = scanVariable( *args )
|
524
|
+
|
525
|
+
return nil unless match
|
526
|
+
return match.length + prefix.length
|
527
|
+
ensure
|
528
|
+
debugMsg( 2, "Resetting scan pointer." )
|
529
|
+
self.pointer = startPos
|
530
|
+
end
|
531
|
+
|
532
|
+
|
533
|
+
### Starting from the scan pointer, and skipping the specified
|
534
|
+
### <tt>prefix</tt>, try to to recognize and match a balanced bracket-,
|
535
|
+
### do/end-, or begin/end-delimited substring that may contain unbalanced
|
536
|
+
### delimiters inside quotes or quotelike operations.
|
537
|
+
def scanCodeblock( innerDelim=CodeblockDelimiters, prefix='\s*', outerDelim=innerDelim )
|
538
|
+
self.matchError = nil
|
539
|
+
result = nil
|
540
|
+
startPos = self.pointer
|
541
|
+
|
542
|
+
prefix ||= '\s*'
|
543
|
+
innerDelim ||= CodeblockDelimiters
|
544
|
+
outerDelim ||= innerDelim
|
545
|
+
|
546
|
+
depth = caller(1).find_all {|frame|
|
547
|
+
frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
|
548
|
+
}.length
|
549
|
+
|
550
|
+
begin
|
551
|
+
debugMsg 3, "------------------------------------"
|
552
|
+
debugMsg 3, "Calling matchCodeBlock( %s, %s, %s )",
|
553
|
+
prefix.inspect, innerDelim.inspect, outerDelim.inspect
|
554
|
+
debugMsg 3, "------------------------------------"
|
555
|
+
result = matchCodeblock( prefix, innerDelim, outerDelim )
|
556
|
+
rescue MatchFailure => e
|
557
|
+
debugMsg( depth + 1, "Match error: %s" % e.message )
|
558
|
+
self.matchError = e.message
|
559
|
+
self.pointer = startPos
|
560
|
+
result = nil
|
561
|
+
rescue => e
|
562
|
+
self.pointer = startPos
|
563
|
+
Kernel::raise
|
564
|
+
end
|
565
|
+
|
566
|
+
return result
|
567
|
+
end
|
568
|
+
|
569
|
+
|
570
|
+
### Match using the #scanCodeblock method, but only return the match or nil.
|
571
|
+
def extractCodeblock( *args )
|
572
|
+
rval = scanCodeblock( *args ) or return nil
|
573
|
+
return rval[:match]
|
574
|
+
end
|
575
|
+
|
576
|
+
|
577
|
+
### Starting at the scan pointer, try to match a substring with
|
578
|
+
### #scanCodeblock. On a successful match, this method advances the scan
|
579
|
+
### pointer and returns the length of the match, including any delimiters
|
580
|
+
### and any prefix that was skipped. On failure, returns nil.
|
581
|
+
def skipCodeblock( *args )
|
582
|
+
startPos = self.pointer
|
583
|
+
|
584
|
+
match = scanCodeblock( *args )
|
585
|
+
|
586
|
+
return nil unless match
|
587
|
+
return match.length + prefix.length
|
588
|
+
ensure
|
589
|
+
debugMsg( 2, "Resetting scan pointer." )
|
590
|
+
self.pointer = startPos
|
591
|
+
end
|
592
|
+
|
593
|
+
|
594
|
+
|
595
|
+
|
596
|
+
#########
|
597
|
+
protected
|
598
|
+
#########
|
599
|
+
|
600
|
+
### Scan the string from the scan pointer forward, skipping the specified
|
601
|
+
### <tt>prefix</tt> and trying to match a string delimited by bracketing
|
602
|
+
### delimiters <tt>ldel</tt> and <tt>rdel</tt> (Regexp objects), and quoting
|
603
|
+
### delimiters <tt>qdel</tt> (Regexp). If <tt>quotelike</tt> is
|
604
|
+
### <tt>true</tt>, Ruby quotelike constructs will also be honored.
|
605
|
+
def matchBracketed( prefix, ldel, qdel, quotelike, rdel )
|
606
|
+
startPos = self.pointer
|
607
|
+
debugMsg( 2, "matchBracketed starting at pos = %d: prefix = %s, "\
|
608
|
+
"ldel = %s, qdel = %s, quotelike = %s, rdel = %s",
|
609
|
+
startPos, prefix.inspect, ldel.inspect, qdel.inspect, quotelike.inspect,
|
610
|
+
rdel.inspect )
|
611
|
+
|
612
|
+
# Test for the prefix, failing if not found
|
613
|
+
raise MatchFailure, "Did not find prefix: #{prefix.inspect}" unless
|
614
|
+
self.skip( prefix )
|
615
|
+
|
616
|
+
# Mark this position as the left-delimiter pointer
|
617
|
+
ldelpos = self.pointer
|
618
|
+
debugMsg( 3, "Found prefix. Left delim pointer at %d", ldelpos )
|
619
|
+
|
620
|
+
# Match opening delimiter or fail
|
621
|
+
unless (( delim = self.scan(ldel) ))
|
622
|
+
raise MatchFailure, "Did not find opening bracket after prefix: '%s' (%d)" %
|
623
|
+
[ self.string[startPos..ldelpos].chomp, ldelpos ]
|
624
|
+
end
|
625
|
+
|
626
|
+
# A stack to keep track of nested delimiters
|
627
|
+
nesting = [ delim ]
|
628
|
+
debugMsg( 3, "Found opening bracket. Nesting = %s", nesting.inspect )
|
629
|
+
|
630
|
+
while self.rest?
|
631
|
+
|
632
|
+
debugMsg( 5, "Starting scan loop. Nesting = %s", nesting.inspect )
|
633
|
+
|
634
|
+
# Skip anything that's backslashed
|
635
|
+
if self.skip( /\\./ )
|
636
|
+
debugMsg( 4, "Skipping backslashed literal at offset %d: '%s'",
|
637
|
+
self.pointer - 2, self.string[ self.pointer - 2, 2 ].chomp )
|
638
|
+
next
|
639
|
+
end
|
640
|
+
|
641
|
+
# Opening bracket (left delimiter)
|
642
|
+
if self.scan(ldel)
|
643
|
+
delim = self.matched
|
644
|
+
debugMsg( 4, "Found opening delim %s at offset %d",
|
645
|
+
delim.inspect, self.pointer - 1 )
|
646
|
+
nesting.push delim
|
647
|
+
|
648
|
+
# Closing bracket (right delimiter)
|
649
|
+
elsif self.scan(rdel)
|
650
|
+
delim = self.matched
|
651
|
+
|
652
|
+
debugMsg( 4, "Found closing delim %s at offset %d",
|
653
|
+
delim.inspect, self.pointer - 1 )
|
654
|
+
|
655
|
+
# :TODO: When is this code reached?
|
656
|
+
if nesting.empty?
|
657
|
+
raise MatchFailure, "Unmatched closing bracket '%s' at offset %d" %
|
658
|
+
[ delim, self.pointer - 1 ]
|
659
|
+
end
|
660
|
+
|
661
|
+
# Figure out what the compliment of the bracket next off the
|
662
|
+
# stack should be.
|
663
|
+
expected = nesting.pop.tr( '({[<', ')}]>' )
|
664
|
+
debugMsg( 4, "Got a '%s' bracket off nesting stack", expected )
|
665
|
+
|
666
|
+
# Check for mismatched brackets
|
667
|
+
if expected != delim
|
668
|
+
raise MatchFailure, "Mismatched closing bracket at offset %d: "\
|
669
|
+
"Expected '%s', but found '%s' instead." %
|
670
|
+
[ self.pointer - 1, expected, delim ]
|
671
|
+
end
|
672
|
+
|
673
|
+
# If we've found the closing delimiter, stop scanning
|
674
|
+
if nesting.empty?
|
675
|
+
debugMsg( 4, "Finished with scan: nesting stack empty." )
|
676
|
+
break
|
677
|
+
end
|
678
|
+
|
679
|
+
# Quoted chunk (quoted delimiter)
|
680
|
+
elsif qdel && self.scan(qdel)
|
681
|
+
match = self.matched
|
682
|
+
|
683
|
+
if self. scan( /[^\\#{match}]*(?:\\.[^\\#{match}]*)*(#{Regexp::quote(match)})/ )
|
684
|
+
debugMsg( 4, "Skipping quoted chunk. Scan pointer now at offset %d", self.pointer )
|
685
|
+
next
|
686
|
+
end
|
687
|
+
|
688
|
+
raise MatchFailure, "Unmatched embedded quote (%s) at offset %d" %
|
689
|
+
[ match, self.pointer - 1 ]
|
690
|
+
|
691
|
+
# Embedded quotelike
|
692
|
+
elsif quotelike && self.scanQuotelike
|
693
|
+
debugMsg( 4, "Matched a quotelike. Scan pointer now at offset %d", self.pointer )
|
694
|
+
next
|
695
|
+
|
696
|
+
# Skip word characters, or a single non-word character
|
697
|
+
else
|
698
|
+
self.skip( /(?:[a-zA-Z0-9]+|.)/m )
|
699
|
+
debugMsg 5, "Skipping '%s' at offset %d." %
|
700
|
+
[ self.matched, self.pointer ]
|
701
|
+
end
|
702
|
+
|
703
|
+
end
|
704
|
+
|
705
|
+
# If there's one or more brackets left on the delimiter stack, we're
|
706
|
+
# missing a closing delim.
|
707
|
+
unless nesting.empty?
|
708
|
+
raise MatchFailure, "Unmatched opening bracket(s): %s.. at offset %d" %
|
709
|
+
[ nesting.join('..'), self.pointer ]
|
710
|
+
end
|
711
|
+
|
712
|
+
rval = {
|
713
|
+
:match => self.string[ ldelpos .. (self.pointer - 1) ],
|
714
|
+
:prefix => self.string[ startPos, (ldelpos-startPos) ],
|
715
|
+
:suffix => self.string[ self.pointer..-1 ],
|
716
|
+
}
|
717
|
+
debugMsg 1, "matchBracketed succeeded: %s" % rval.inspect
|
718
|
+
return rval
|
719
|
+
end
|
720
|
+
|
721
|
+
|
722
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
723
|
+
### try to match text bracketed by the given left and right tag-delimiters
|
724
|
+
### (<tt>ldel</tt> and <tt>rdel</tt>).
|
725
|
+
def matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
|
726
|
+
failmode = failmode.to_s.intern if failmode
|
727
|
+
startPos = self.pointer
|
728
|
+
debugMsg 2, "matchTagged starting at pos = %d: prefix = %s, "\
|
729
|
+
"ldel = %s, rdel = %s, failmode = %s, bad = %s, ignore = %s",
|
730
|
+
startPos, prefix.inspect, ldel.inspect, rdel.inspect,
|
731
|
+
failmode.inspect, bad.inspect, ignore.inspect
|
732
|
+
|
733
|
+
rdelspec = ''
|
734
|
+
openTagPos, textPos, paraPos, closeTagPos, endPos = ([nil] * 5)
|
735
|
+
match = nil
|
736
|
+
|
737
|
+
# Look for the prefix
|
738
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
739
|
+
self.skip( prefix )
|
740
|
+
|
741
|
+
openTagPos = self.pointer
|
742
|
+
debugMsg 3, "Found prefix. Pointer now at offset %d" % self.pointer
|
743
|
+
|
744
|
+
# Look for the opening delimiter
|
745
|
+
unless (( match = self.scan(ldel) ))
|
746
|
+
raise MatchFailure, "Did not find opening tag %s at offset %d" %
|
747
|
+
[ ldel.inspect, self.pointer ]
|
748
|
+
end
|
749
|
+
|
750
|
+
textPos = self.pointer
|
751
|
+
debugMsg 3, "Found left delimiter '%s': offset now %d" % [ match, textPos ]
|
752
|
+
|
753
|
+
# Make a right delim out of the tag we found if none was specified
|
754
|
+
if rdel.nil?
|
755
|
+
rdelspec = makeClosingTag( match )
|
756
|
+
debugMsg 3, "Generated right-delimiting tag: %s" % rdelspec.inspect
|
757
|
+
else
|
758
|
+
# Make the regexp-related globals from the match
|
759
|
+
rdelspec = rdel.gsub( /(\A|[^\\])\$([1-9])/, '\1self[\2]' ).interpolate( binding )
|
760
|
+
debugMsg 3, "Right delimiter (after interpolation) is: %s" % rdelspec.inspect
|
761
|
+
end
|
762
|
+
|
763
|
+
# Process until we reach the end of the string or find a closing tag
|
764
|
+
while self.rest? && closeTagPos.nil?
|
765
|
+
|
766
|
+
# Skip backslashed characters
|
767
|
+
if (( self.skip( /^\\./ ) ))
|
768
|
+
debugMsg 4, "Skipping backslashed literal at offset %d" % self.pointer
|
769
|
+
next
|
770
|
+
|
771
|
+
# Match paragraphs-break for fail == :para
|
772
|
+
elsif (( matchlength = self.skip( /^(\n[ \t]*\n)/ ) ))
|
773
|
+
paraPos ||= self.pointer - matchlength
|
774
|
+
debugMsg 4, "Found paragraph position at offset %d" % paraPos
|
775
|
+
|
776
|
+
# Match closing tag
|
777
|
+
elsif (( matchlength = self.skip( rdelspec ) ))
|
778
|
+
closeTagPos = self.pointer - matchlength
|
779
|
+
debugMsg 3, "Found closing tag at offset %d" % closeTagPos
|
780
|
+
|
781
|
+
# If we're ignoring anything, try to match and move beyond it
|
782
|
+
elsif ignore && !ignore.empty? && self.skip(ignore)
|
783
|
+
debugMsg 3, "Skipping ignored text '%s' at offset %d" %
|
784
|
+
[ self.matched, self.pointer - self.matched_size ]
|
785
|
+
next
|
786
|
+
|
787
|
+
# If there's a "bad" pattern, try to match it, shorting the
|
788
|
+
# outer loop if it matches in para or max mode, or failing with
|
789
|
+
# a match error if not.
|
790
|
+
elsif bad && !bad.empty? && self.match?( bad )
|
791
|
+
if failmode == :para || failmode == :max
|
792
|
+
break
|
793
|
+
else
|
794
|
+
raise MatchFailure, "Found invalid nested tag '%s' at offset %d" %
|
795
|
+
[ match, self.pointer ]
|
796
|
+
end
|
797
|
+
|
798
|
+
# If there's another opening tag, make a recursive call to
|
799
|
+
# ourselves to move the cursor beyond it
|
800
|
+
elsif (( match = self.scan( ldel ) ))
|
801
|
+
tag = match
|
802
|
+
self.unscan
|
803
|
+
|
804
|
+
unless self.matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
|
805
|
+
break if failmode == :para || failmode == :max
|
806
|
+
|
807
|
+
raise MatchFailure, "Found unbalanced nested tag '%s' at offset %d" %
|
808
|
+
[ tag, self.pointer ]
|
809
|
+
end
|
810
|
+
|
811
|
+
else
|
812
|
+
self.pointer += 1
|
813
|
+
debugMsg 5, "Advanced scan pointer to offset %d" % self.pointer
|
814
|
+
end
|
815
|
+
end
|
816
|
+
|
817
|
+
# If the closing hasn't been found, then it's a "short" match, which is
|
818
|
+
# okay if the failmode indicates we don't care. Otherwise, it's an error.
|
819
|
+
unless closeTagPos
|
820
|
+
debugMsg 3, "No close tag position found. "
|
821
|
+
|
822
|
+
if failmode == :max || failmode == :para
|
823
|
+
closeTagPos = self.pointer - 1
|
824
|
+
debugMsg 4, "Failmode %s tolerates no closing tag. Close tag position set to %d" %
|
825
|
+
[ failmode.inspect, closeTagPos ]
|
826
|
+
|
827
|
+
# Sync the scan pointer and the paragraph marker if it's set.
|
828
|
+
if failmode == :para && paraPos
|
829
|
+
self.pointer = paraPos + 1
|
830
|
+
end
|
831
|
+
else
|
832
|
+
raise MatchFailure, "No closing tag found."
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
rval = {
|
837
|
+
:match => self.string[ openTagPos .. (self.pointer - 1) ],
|
838
|
+
:prefix => self.string[ startPos, (openTagPos-startPos) ],
|
839
|
+
:suffix => self.string[ self.pointer..-1 ],
|
840
|
+
}
|
841
|
+
debugMsg 1, "matchTagged succeeded: %s" % rval.inspect
|
842
|
+
return rval
|
843
|
+
end
|
844
|
+
|
845
|
+
|
846
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
847
|
+
### try to match text inside a Ruby quotelike construct. If
|
848
|
+
### <tt>matchRawRegex</tt> is <tt>true</tt>, the regex construct
|
849
|
+
### <tt>/pattern/</tt> is also matched.
|
850
|
+
def matchQuotelike( prefix, matchRawRegex )
|
851
|
+
startPos = self.pointer
|
852
|
+
debugMsg 2, "matchQuotelike starting at pos = %d: prefix = %s, "\
|
853
|
+
"matchRawRegex = %s",
|
854
|
+
startPos, prefix.inspect, matchRawRegex.inspect
|
855
|
+
|
856
|
+
# Init position markers
|
857
|
+
rval = oppos = preldpos = ldpos = strpos = rdpos = modpos = nil
|
858
|
+
|
859
|
+
# Look for the prefix
|
860
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
861
|
+
self.skip( prefix )
|
862
|
+
oppos = self.pointer
|
863
|
+
|
864
|
+
|
865
|
+
# Peek at the next character
|
866
|
+
# If the initial quote is a simple quote, our job is easy
|
867
|
+
if self.check(/^["`']/) || ( matchRawRegex && self.check(%r:/:) )
|
868
|
+
|
869
|
+
initial = self.matched
|
870
|
+
|
871
|
+
# Build the pattern for matching the simple string
|
872
|
+
pattern = "%s [^\\%s]* (\\.[^\\%s]*)* %s" %
|
873
|
+
[ Regexp.quote(initial),
|
874
|
+
initial, initial,
|
875
|
+
Regexp.quote(initial) ]
|
876
|
+
debugMsg 2, "Matching simple quote at offset %d with /%s/" %
|
877
|
+
[ self.pointer, pattern ]
|
878
|
+
|
879
|
+
# Search for it, raising an exception if it's not found
|
880
|
+
unless self.scan( /#{pattern}/xism )
|
881
|
+
raise MatchFailure,
|
882
|
+
"Did not find closing delimiter to match '%s' at '%s...' (offset %d)" %
|
883
|
+
[ initial, self.string[ oppos, 20 ].chomp, self.pointer ]
|
884
|
+
end
|
885
|
+
|
886
|
+
modpos = self.pointer
|
887
|
+
rdpos = modpos - 1
|
888
|
+
|
889
|
+
# If we're matching a regex, look for any trailing modifiers
|
890
|
+
if initial == '/'
|
891
|
+
pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
|
892
|
+
self.scan( pattern )
|
893
|
+
end
|
894
|
+
|
895
|
+
rval = {
|
896
|
+
:prefix => self.string[ startPos, (oppos-startPos) ],
|
897
|
+
:match => self.string[ oppos .. (self.pointer - 1) ],
|
898
|
+
:leftDelim => self.string[ oppos, 1 ],
|
899
|
+
:delimText => self.string[ (oppos+1) .. (rdpos-1) ],
|
900
|
+
:rightDelim => self.string[ rdpos, 1 ],
|
901
|
+
:modifiers => self.string[ modpos, (self.pointer-modpos) ],
|
902
|
+
:suffix => self.string[ self.pointer.. -1 ],
|
903
|
+
}
|
904
|
+
|
905
|
+
# If it's one of the fancy quotelike operators, our job is somewhat
|
906
|
+
# complicated (though nothing like Perl's, thank the Goddess)
|
907
|
+
elsif self.scan( %r:%[rwqQx]?(?=\S): )
|
908
|
+
op = self.matched
|
909
|
+
debugMsg 2, "Matching a real quotelike ('%s') at offset %d" %
|
910
|
+
[ op, self.pointer ]
|
911
|
+
modifiers = nil
|
912
|
+
|
913
|
+
ldpos = self.pointer
|
914
|
+
strpos = ldpos + 1
|
915
|
+
|
916
|
+
# Peek ahead to see what the delimiter is
|
917
|
+
ldel = self.check( /\S/ )
|
918
|
+
|
919
|
+
# If it's a bracketing character, just use matchBracketed
|
920
|
+
if ldel =~ /[\[(<{]/
|
921
|
+
rdel = ldel.tr( '[({<', '])}>' )
|
922
|
+
debugMsg 4, "Left delim is a bracket: %s; looking for compliment: %s" %
|
923
|
+
[ ldel, rdel ]
|
924
|
+
self.matchBracketed( '', Regexp::quote(ldel), nil, nil, Regexp::quote(rdel) )
|
925
|
+
else
|
926
|
+
debugMsg 4, "Left delim isn't a bracket: '#{ldel}'; looking for closing instance"
|
927
|
+
self.scan( /#{ldel}[^\\#{ldel}]*(\\.[^\\#{ldel}]*)*#{ldel}/ ) or
|
928
|
+
raise MatchFailure,
|
929
|
+
"Can't find a closing delimiter '%s' at '%s...' (offset %d)" %
|
930
|
+
[ ldel, self.rest[0,20].chomp, self.pointer ]
|
931
|
+
end
|
932
|
+
rdelpos = self.pointer - 1
|
933
|
+
|
934
|
+
# Match modifiers for Regexp quote
|
935
|
+
if op == '%r'
|
936
|
+
pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
|
937
|
+
modifiers = self.scan( pattern ) || ''
|
938
|
+
end
|
939
|
+
|
940
|
+
rval = {
|
941
|
+
:prefix => self.string[ startPos, (oppos-startPos) ],
|
942
|
+
:match => self.string[ oppos .. (self.pointer - 1) ],
|
943
|
+
:quoteOp => op,
|
944
|
+
:leftDelim => self.string[ ldpos, 1 ],
|
945
|
+
:delimText => self.string[ strpos, (rdelpos-strpos) ],
|
946
|
+
:rightDelim => self.string[ rdelpos, 1 ],
|
947
|
+
:modifiers => modifiers,
|
948
|
+
:suffix => self.string[ self.pointer.. -1 ],
|
949
|
+
}
|
950
|
+
|
951
|
+
# If it's a here-doc, things get even hairier.
|
952
|
+
elsif self.scan( %r:<<(-)?: )
|
953
|
+
debugMsg 2, "Matching a here-document at offset %d" % self.pointer
|
954
|
+
op = self.matched
|
955
|
+
|
956
|
+
# If there was a dash, start with optional whitespace
|
957
|
+
indent = self[1] ? '\s*' : ''
|
958
|
+
ldpos = self.pointer
|
959
|
+
label = ''
|
960
|
+
|
961
|
+
# Plain identifier
|
962
|
+
if self.scan( /[A-Za-z_]\w*/ )
|
963
|
+
label = self.matched
|
964
|
+
debugMsg 3, "Setting heredoc terminator to bare identifier '%s'" % label
|
965
|
+
|
966
|
+
# Quoted string
|
967
|
+
elsif self.scan( / ' ([^'\\]* (?:\\.[^'\\]*)*) ' /sx ) ||
|
968
|
+
self.scan( / " ([^"\\]* (?:\\.[^"\\]*)*) " /sx ) ||
|
969
|
+
self.scan( / ` ([^`\\]* (?:\\.[^`\\]*)*) ` /sx )
|
970
|
+
label = self[1]
|
971
|
+
debugMsg 3, "Setting heredoc terminator to quoted identifier '%s'" % label
|
972
|
+
|
973
|
+
# Ruby, unlike Perl, requires a terminal, even if it's only an empty
|
974
|
+
# string
|
975
|
+
else
|
976
|
+
raise MatchFailure,
|
977
|
+
"Missing heredoc terminator before end of line at "\
|
978
|
+
"'%s...' (offset %d)" %
|
979
|
+
[ self.rest[0,20].chomp, self.pointer ]
|
980
|
+
end
|
981
|
+
extrapos = self.pointer
|
982
|
+
|
983
|
+
# Advance to the beginning of the string
|
984
|
+
self.skip( /.*\n/ )
|
985
|
+
strpos = self.pointer
|
986
|
+
debugMsg 3, "Scanning until /\\n#{indent}#{label}\\n/m"
|
987
|
+
|
988
|
+
# Match to the label
|
989
|
+
unless self.scan_until( /\n#{indent}#{label}\n/m )
|
990
|
+
raise MatchFailure,
|
991
|
+
"Couldn't find heredoc terminator '%s' after '%s...' (offset %d)" %
|
992
|
+
[ label, self.rest[0,20].chomp, self.pointer ]
|
993
|
+
end
|
994
|
+
|
995
|
+
rdpos = self.pointer - self.matched_size
|
996
|
+
|
997
|
+
rval = {
|
998
|
+
:prefix => self.string[ startPos, (oppos-startPos) ],
|
999
|
+
:match => self.string[ oppos .. (self.pointer - 1) ],
|
1000
|
+
:quoteOp => op,
|
1001
|
+
:leftDelim => self.string[ ldpos, (extrapos-ldpos) ],
|
1002
|
+
:delimText => self.string[ strpos, (rdpos-strpos) ],
|
1003
|
+
:rightDelim => self.string[ rdpos, (self.pointer-rdpos) ],
|
1004
|
+
:suffix => self.string[ self.pointer.. -1 ],
|
1005
|
+
}
|
1006
|
+
|
1007
|
+
else
|
1008
|
+
raise MatchFailure,
|
1009
|
+
"No quotelike operator found after prefix at '%s...'" %
|
1010
|
+
self.rest[0,20].chomp
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
|
1014
|
+
debugMsg 1, "matchQuotelike succeeded: %s" % rval.inspect
|
1015
|
+
return rval
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
|
1019
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
1020
|
+
### try to match text that is a valid Ruby variable or identifier, ...?
|
1021
|
+
def matchVariable( prefix )
|
1022
|
+
startPos = self.pointer
|
1023
|
+
debugMsg 2, "matchVariable starting at pos = %d: prefix = %s",
|
1024
|
+
startPos, prefix.inspect
|
1025
|
+
|
1026
|
+
# Look for the prefix
|
1027
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
1028
|
+
self.skip( prefix )
|
1029
|
+
|
1030
|
+
varPos = self.pointer
|
1031
|
+
|
1032
|
+
# If the variable matched is a predefined global, no need to look for an
|
1033
|
+
# identifier
|
1034
|
+
unless self.scan( %r~\$(?:[!@/\\,;.<>$?:_\~&`'+]|-\w|\d+)~ )
|
1035
|
+
|
1036
|
+
debugMsg 2, "Not a predefined global at '%s...' (offset %d)" %
|
1037
|
+
[ self.rest[0,20].chomp, self.pointer ]
|
1038
|
+
|
1039
|
+
# Look for a valid identifier
|
1040
|
+
unless self.scan( /\*?(?:[$@]|::)?(?:[a-z_]\w*(?:::\s*))*[_a-z]\w*/is )
|
1041
|
+
raise MatchFailure, "No variable found: Bad identifier (offset %d)" % self.pointer
|
1042
|
+
end
|
1043
|
+
end
|
1044
|
+
|
1045
|
+
debugMsg 2, "Matched '%s' at offset %d" % [ self.matched, self.pointer ]
|
1046
|
+
|
1047
|
+
# Match methodchain with trailing codeblock
|
1048
|
+
while self.rest?
|
1049
|
+
# Match a regular chained method
|
1050
|
+
next if scanCodeblock( {"("=>")", "do"=>"end", "begin"=>"end", "{"=>"}"},
|
1051
|
+
/\s*(?:\.|::)\s*[a-zA-Z_]\w+\s*/ )
|
1052
|
+
|
1053
|
+
# Match a trailing block or an element ref
|
1054
|
+
next if scanCodeblock( nil, /\s*/, {'{' => '}', '[' => ']'} )
|
1055
|
+
|
1056
|
+
# This matched a dereferencer in Perl, which doesn't have any
|
1057
|
+
# equivalent in Ruby.
|
1058
|
+
#next if scanVariable( '\s*(\.|::)\s*' )
|
1059
|
+
|
1060
|
+
# Match a method call without parens (?)
|
1061
|
+
next if self.scan( '\s*(\.|::)\s*\w+(?![{(\[])' )
|
1062
|
+
|
1063
|
+
break
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
rval = {
|
1067
|
+
:match => self.string[ varPos .. (self.pointer - 1) ],
|
1068
|
+
:prefix => self.string[ startPos, (varPos-startPos) ],
|
1069
|
+
:suffix => self.string[ self.pointer..-1 ],
|
1070
|
+
}
|
1071
|
+
debugMsg 1, "matchVariable succeeded: %s" % rval.inspect
|
1072
|
+
return rval
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
|
1076
|
+
### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
|
1077
|
+
### try to match text inside a Ruby code block construct which must be
|
1078
|
+
### delimited by the specified <tt>outerDelimPairs</tt>. It may optionally
|
1079
|
+
### contain sub-blocks delimited with the given <tt>innerDelimPairs</tt>.
|
1080
|
+
def matchCodeblock( prefix, innerDelimPairs, outerDelimPairs )
|
1081
|
+
startPos = self.pointer
|
1082
|
+
debugMsg 2, "Starting matchCodeblock at offset %d (%s)", startPos, self.rest.inspect
|
1083
|
+
|
1084
|
+
# Look for the prefix
|
1085
|
+
raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
|
1086
|
+
self.skip( prefix )
|
1087
|
+
codePos = self.pointer
|
1088
|
+
debugMsg 3, "Skipped prefix '%s' to offset %d" %
|
1089
|
+
[ self.matched, codePos ]
|
1090
|
+
|
1091
|
+
# Build a regexp for the outer delimiters
|
1092
|
+
ldelimOuter = "(" + outerDelimPairs.keys .uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
|
1093
|
+
rdelimOuter = "(" + outerDelimPairs.values.uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
|
1094
|
+
debugMsg 4, "Using /%s/ as the outer delim regex" % ldelimOuter
|
1095
|
+
|
1096
|
+
unless self.scan( ldelimOuter )
|
1097
|
+
raise MatchFailure, %q:Did not find opening bracket at "%s..." offset %d: %
|
1098
|
+
[ self.rest[0,20].chomp, codePos ]
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
# Look up the corresponding outer delimiter
|
1102
|
+
closingDelim = outerDelimPairs[self.matched] or
|
1103
|
+
raise DelimiterError, "Could not find closing delimiter for '%s'" %
|
1104
|
+
self.matched
|
1105
|
+
|
1106
|
+
debugMsg 3, "Scanning for closing delim '#{closingDelim}'"
|
1107
|
+
matched = ''
|
1108
|
+
patvalid = true
|
1109
|
+
|
1110
|
+
# Scan until the end of the text or until an explicit break
|
1111
|
+
while self.rest?
|
1112
|
+
debugMsg 5, "Scanning from offset %d (%s)", self.pointer, self.rest.inspect
|
1113
|
+
matched = ''
|
1114
|
+
|
1115
|
+
# Skip comments
|
1116
|
+
debugMsg 5, "Trying to match a comment"
|
1117
|
+
if self.scan( /\s*#.*/ )
|
1118
|
+
debugMsg 4, "Skipping comment '%s' to offset %d" %
|
1119
|
+
[ self.matched, self.pointer ]
|
1120
|
+
next
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
# Look for (any) closing delimiter
|
1124
|
+
debugMsg 5, "Trying to match a closing outer delimiter with /\s*(#{rdelimOuter})/"
|
1125
|
+
if self.scan( /\s*(#{rdelimOuter})/ )
|
1126
|
+
debugMsg 4, "Found a right delimiter '#{self.matched}'"
|
1127
|
+
|
1128
|
+
# If it's the delimiter we're looking for, stop the scan
|
1129
|
+
if self.matched.strip == closingDelim
|
1130
|
+
matched = self.matched
|
1131
|
+
debugMsg 3, "Found the closing delimiter we've been looking for (#{matched.inspect})."
|
1132
|
+
break
|
1133
|
+
|
1134
|
+
# Otherwise, it's an error, as we've apparently seen a closing
|
1135
|
+
# delimiter without a corresponding opening one.
|
1136
|
+
else
|
1137
|
+
raise MatchFailure,
|
1138
|
+
%q:Mismatched closing bracket at "%s..." (offset %s). Expected '%s': %
|
1139
|
+
[ self.rest[0,20], self.pointer, closingDelim ]
|
1140
|
+
end
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
# Try to match a variable or a quoted phrase
|
1144
|
+
debugMsg 5, "Trying to match either a variable or quotelike"
|
1145
|
+
if self.scanVariable( '\s*' ) || self.scanQuotelike( '\s*', patvalid )
|
1146
|
+
debugMsg 3, "Matched either a variable or quotelike. Offset now %d" % self.pointer
|
1147
|
+
patvalid = false
|
1148
|
+
next
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
# Match some operators
|
1152
|
+
# :TODO: This hasn't really been ruby-ified
|
1153
|
+
debugMsg 5, "Trying to match an operator"
|
1154
|
+
if self.scan( %r:\s*([-+*x/%^&|.]=?
|
1155
|
+
| [!=]~
|
1156
|
+
| =(?!>)
|
1157
|
+
| (\*\*|&&|\|\||<<|>>)=?
|
1158
|
+
| split|grep|map|return
|
1159
|
+
):x )
|
1160
|
+
debugMsg 3, "Skipped miscellaneous operator '%s' to offset %d." %
|
1161
|
+
[ self.matched, self.pointer ]
|
1162
|
+
patvalid = true
|
1163
|
+
next
|
1164
|
+
end
|
1165
|
+
|
1166
|
+
# Try to match an embedded codeblock
|
1167
|
+
debugMsg 5, "Trying to match an embedded codeblock with delim pairs: %s",
|
1168
|
+
innerDelimPairs.inspect
|
1169
|
+
if self.scanCodeblock( innerDelimPairs )
|
1170
|
+
debugMsg 3, "Skipped inner codeblock to offset %d." % self.pointer
|
1171
|
+
patvalid = true
|
1172
|
+
next
|
1173
|
+
end
|
1174
|
+
|
1175
|
+
# Try to match a stray outer-left delimiter
|
1176
|
+
debugMsg 5, "Trying to match a stray outer-left delimiter (#{ldelimOuter})"
|
1177
|
+
if self.match?( ldelimOuter )
|
1178
|
+
raise MatchFailure, "Improperly nested codeblock at offset %d: %s... " %
|
1179
|
+
[ self.pointer, self.rest[0,20] ]
|
1180
|
+
end
|
1181
|
+
|
1182
|
+
patvalid = false
|
1183
|
+
self.scan( /\s*(\w+|[-=>]>|.|\Z)/m )
|
1184
|
+
debugMsg 3, "Skipped '%s' to offset %d" %
|
1185
|
+
[ self.matched, self.pointer ]
|
1186
|
+
end
|
1187
|
+
|
1188
|
+
|
1189
|
+
unless matched
|
1190
|
+
raise MatchFailure, "No match found for opening bracket"
|
1191
|
+
end
|
1192
|
+
|
1193
|
+
rval = {
|
1194
|
+
:match => self.string[codePos .. (self.pointer - 1)],
|
1195
|
+
:prefix => self.string[startPos, (codePos-startPos)],
|
1196
|
+
:suffix => self.string[ self.pointer..-1 ],
|
1197
|
+
}
|
1198
|
+
debugMsg 1, "matchCodeblock succeeded: %s" % rval.inspect
|
1199
|
+
return rval
|
1200
|
+
end
|
1201
|
+
|
1202
|
+
|
1203
|
+
### Attempt to derive and return the number of scan methods traversed up to
|
1204
|
+
### this point by examining the call stack.
|
1205
|
+
def scanDepth
|
1206
|
+
return caller(2).find_all {|frame|
|
1207
|
+
frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
|
1208
|
+
}.length
|
1209
|
+
end
|
1210
|
+
|
1211
|
+
|
1212
|
+
#######
|
1213
|
+
private
|
1214
|
+
#######
|
1215
|
+
|
1216
|
+
### Print the specified <tt>message</tt> to STDERR if the scanner's
|
1217
|
+
### debugging level is greater than or equal to <tt>level</tt>.
|
1218
|
+
def debugMsg( level, msgFormat, *args )
|
1219
|
+
return unless level.nonzero? && self.debugLevel >= level
|
1220
|
+
msg = if args.empty? then msgFormat else format(msgFormat, *args) end
|
1221
|
+
$stderr.puts( (" " * (level-1) * 2) + msg )
|
1222
|
+
end
|
1223
|
+
|
1224
|
+
|
1225
|
+
### Given a series of one or more bracket characters (eg., '<', '[', '{',
|
1226
|
+
### etc.), return the brackets reversed in order and direction.
|
1227
|
+
def revbracket( bracket )
|
1228
|
+
return bracket.to_s.reverse.tr( '<[{(', '>]})' )
|
1229
|
+
end
|
1230
|
+
|
1231
|
+
|
1232
|
+
### Given an opening <tt>tag</tt> of the sort matched by #scanTagged,
|
1233
|
+
### construct and return a closing tag.
|
1234
|
+
def makeClosingTag( tag )
|
1235
|
+
debugMsg 3, "Making a closing tag for '%s'" % tag
|
1236
|
+
|
1237
|
+
closingTag = tag.gsub( /^([[(<{]+)(#{XmlName}).*/ ) {
|
1238
|
+
Regexp.quote( "#{$1}/#{$2}" + revbracket($1) )
|
1239
|
+
}
|
1240
|
+
|
1241
|
+
raise MatchFailure, "Unable to construct closing tag to match: #{tag}" unless closingTag
|
1242
|
+
return closingTag
|
1243
|
+
end
|
1244
|
+
|
1245
|
+
|
1246
|
+
### Make and return a new Regexp which matches substrings bounded by the
|
1247
|
+
### specified +delimiters+, not counting those which have been escaped with
|
1248
|
+
### the escape characters in +escapes+.
|
1249
|
+
def makeDelimPattern( delimiters, escapes='\\', prefix='\\s*' )
|
1250
|
+
delimiters = delimiters.to_s
|
1251
|
+
escapes = escapes.to_s
|
1252
|
+
|
1253
|
+
raise DelimiterError, "Illegal delimiter '#{delimiter}'" unless delimiters =~ /\S/
|
1254
|
+
|
1255
|
+
# Pad the escapes string to the same length as the delimiters
|
1256
|
+
escapes.concat( escapes[-1,1] * (delimiters.length - escapes.length) )
|
1257
|
+
patParts = []
|
1258
|
+
|
1259
|
+
# Escape each delimiter and a corresponding escape character, and then
|
1260
|
+
# build a pattern part from them
|
1261
|
+
delimiters.length.times do |i|
|
1262
|
+
del = Regexp.escape( delimiters[i, 1] )
|
1263
|
+
esc = Regexp.escape( escapes[i, 1] )
|
1264
|
+
|
1265
|
+
if del == esc then
|
1266
|
+
patParts.push "#{del}(?:[^#{del}]*(?:(?:#{del}#{del})[^#{del}]*)*)#{del}"
|
1267
|
+
else
|
1268
|
+
patParts.push "#{del}(?:[^#{esc}#{del}]*(?:#{esc}.[^#{esc}#{del}]*)*)#{del}";
|
1269
|
+
end
|
1270
|
+
end
|
1271
|
+
|
1272
|
+
# Join all the parts together and return one big pattern
|
1273
|
+
return Regexp::new( "#{prefix}(?:#{patParts.join("|")})" )
|
1274
|
+
end
|
1275
|
+
|
1276
|
+
end # class StringExtractor
|
1277
|
+
|
1278
|
+
|