getopt-declare 1.09.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1278 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # A derivative of StringScanner that can scan for delimited constructs in
4
+ # addition to regular expressions. It is a loose port of the Text::Balanced
5
+ # module for Perl by Damian Conway <damian@cs.monash.edu.au>.
6
+ #
7
+ # == Synopsis
8
+ #
9
+ # se = DelimScanner::new( myString )
10
+ #
11
+ # == Authors
12
+ #
13
+ # * Michael Granger <ged@FaerieMUD.org>
14
+ # * Gonzalo Garramuno <GGarramuno@aol.com>
15
+ #
16
+ # Copyright (c) 2002, 2003 The FaerieMUD Consortium. Most rights reserved.
17
+ #
18
+ # This work is licensed under the Creative Commons Attribution License. To view
19
+ # a copy of this license, visit http://creativecommons.org/licenses/by/1.0 or
20
+ # send a letter to Creative Commons, 559 Nathan Abbott Way, Stanford, California
21
+ # 94305, USA.
22
+ #
23
+ # == Version
24
+ #
25
+ # $Id: DelimScanner.rb,v 1.1.1.1 2004/01/25 07:02:48 gga Exp $
26
+ #
27
+ # == History
28
+ #
29
+ # - Added :suffix hash key for returning rest (right) of matches, like Perl's
30
+ # Text::Balanced, on several methods.
31
+ # - Added one or two \ for backquoting brackets, as new ruby1.8 complains
32
+ #
33
+
34
+ require 'strscan'
35
+ require 'forwardable'
36
+
37
+ ### Add some stuff to the String class to allow easy transformation to Regexp
38
+ ### and in-place interpolation.
39
+ class String
40
+ def to_re( casefold=false, extended=false )
41
+ return Regexp::new( self.dup )
42
+ end
43
+
44
+ ### Ideas for String-interpolation stuff courtesy of Hal E. Fulton
45
+ ### <hal9000@hypermetrics.com> via ruby-talk
46
+
47
+ def interpolate( scope )
48
+ unless scope.is_a?( Binding )
49
+ raise TypeError, "Argument to interpolate must be a Binding, not "\
50
+ "a #{scope.class.name}"
51
+ end
52
+
53
+ # $stderr.puts ">>> Interpolating '#{self}'..."
54
+
55
+ copy = self.gsub( /"/, %q:\": )
56
+ eval( '"' + copy + '"', scope )
57
+ end
58
+
59
+ end
60
+
61
+
62
+ ### A derivative of StringScanner that can scan for delimited constructs in
63
+ ### addition to regular expressions.
64
+ class DelimScanner
65
+
66
+ ### Scanner exception classes
67
+ class MatchFailure < RuntimeError ; end
68
+ class DelimiterError < RuntimeError ; end
69
+
70
+
71
+ extend Forwardable
72
+ StringScanner.must_C_version
73
+
74
+
75
+ ### Class constants
76
+ Version = /([\d\.]+)/.match( %q{$Revision: 1.1.1.1 $} )[1]
77
+ Rcsid = %q$Id: DelimScanner.rb,v 1.1.1.1 2004/01/25 07:02:48 gga Exp $
78
+
79
+ # Pattern to match a valid XML name
80
+ XmlName = '[a-zA-Z_:][a-zA-Z0-9:.-]*'
81
+
82
+
83
+ ### Namespace module for DelimString constants
84
+ module Default
85
+
86
+ # The list of default opening => closing codeblock delimiters to use for
87
+ # scanCodeblock.
88
+ CodeblockDelimiters = {
89
+ '{' => '}',
90
+ 'begin' => 'end',
91
+ 'do' => 'end',
92
+ }
93
+
94
+ # Default scanMultiple operations and their arguments
95
+ MultipleFunctions = [
96
+ :scanVariable => [],
97
+ :scanQuotelike => [],
98
+ :scanCodeblock => [],
99
+ ]
100
+
101
+ end
102
+ include Default
103
+
104
+
105
+ ### Define delegating methods that cast their argument to a Regexp from a
106
+ ### String. This allows the scanner's scanning methods to be called with
107
+ ### Strings in addition to Regexps. This was mostly stolen from
108
+ ### forwardable.rb.
109
+ def self.def_casting_delegators( *methods )
110
+ methods.each {|methodName|
111
+ class_eval( <<-EOF, "(--def_casting_delegators--)", 1 )
112
+ def #{methodName}( pattern )
113
+ pattern = pattern.to_s.to_re unless pattern.is_a?( Regexp )
114
+ @scanner.#{methodName}( pattern )
115
+ end
116
+ EOF
117
+ }
118
+ end
119
+
120
+
121
+ ### Create a new DelimScanner object for the specified <tt>string</tt>. If
122
+ ### <tt>dup</tt> is <tt>true</tt>, a duplicate of the target string will be
123
+ ### used instead of the one given. The target string will be frozen after
124
+ ### the scanner is created.
125
+ def initialize( string, dup=true )
126
+ @scanner = StringScanner::new( string, dup )
127
+ @matchError = nil
128
+ @debugLevel = 0
129
+ end
130
+
131
+
132
+
133
+ ######
134
+ public
135
+ ######
136
+
137
+ # Here, some delegation trickery is done to make a DelimScanner behave like
138
+ # a StringScanner. Some methods are directly delegated, while some are
139
+ # delegated via a method which casts its argument to a Regexp first so some
140
+ # scanner methods can be called with Strings as well as Regexps.
141
+
142
+ # A list of delegated methods that need casting.
143
+ NeedCastingDelegators = :scan, :skip, :match?, :check,
144
+ :scan_until, :skip_until, :exist?, :check_until
145
+
146
+ # Delegate all StringScanner instance methods to the associated scanner
147
+ # object, except those that need a casting delegator, which uses an indirect
148
+ # delegation method.
149
+ def_delegators :@scanner,
150
+ *( StringScanner.instance_methods(false) -
151
+ NeedCastingDelegators.collect {|sym| sym.id2name} )
152
+
153
+ def_casting_delegators( *NeedCastingDelegators )
154
+
155
+
156
+
157
+ # The last match error encountered by the scanner
158
+ attr_accessor :matchError
159
+ protected :matchError= ; # ; is to work around a ruby-mode indent bug
160
+
161
+ # Debugging level
162
+ attr_accessor :debugLevel
163
+
164
+
165
+
166
+ ### Returns <tt>true</tt> if the scanner has encountered a match error.
167
+ def matchError?
168
+ return ! @matchError.nil?
169
+ end
170
+
171
+
172
+ ### Starting at the scan pointer, try to match a substring delimited by the
173
+ ### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
174
+ ### and any character escaped by the specified <tt>escape</tt>
175
+ ### character/s. If matched, advances the scan pointer and returns a Hash
176
+ ### with the following key/value pairs on success:
177
+ ###
178
+ ### [<tt>:match</tt>]
179
+ ### The text of the match, including delimiters.
180
+ ### [<tt>:prefix</tt>]
181
+ ### The matched prefix, if any.
182
+ ###
183
+ ### If the match fails, returns nil.
184
+ def scanDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
185
+ delimiters ||= "'\"`"
186
+ prefix ||= '\\s*'
187
+ escape ||= '\\'
188
+
189
+ debugMsg( 1, "Scanning for delimited text: delim = (%s), prefix=(%s), escape=(%s)",
190
+ delimiters, prefix, escape )
191
+ self.matchError = nil
192
+
193
+ # Try to match the prefix first to get the length
194
+ unless (( prefixLength = self.match?(prefix.to_re) ))
195
+ self.matchError = "Failed to match prefix '%s' at offset %d" %
196
+ [ prefix, self.pointer ]
197
+ return nil
198
+ end
199
+
200
+ # Now build a delimited pattern with the specified parameters.
201
+ delimPattern = makeDelimPattern( delimiters, escape, prefix )
202
+ debugMsg( 2, "Delimiter pattern is %s" % delimPattern.inspect )
203
+
204
+ # Fail if no match
205
+ unless (( matchedString = self.scan(delimPattern) ))
206
+ self.matchError = "No delimited string found."
207
+ return nil
208
+ end
209
+
210
+ return {
211
+ :match => matchedString[prefixLength .. -1],
212
+ :prefix => matchedString[0..prefixLength-1],
213
+ }
214
+ end
215
+
216
+
217
+ ### Match using the #scanDelimited method, but only return the match or nil.
218
+ def extractDelimited( *args )
219
+ rval = scanDelimited( *args ) or return nil
220
+ return rval[:match]
221
+ end
222
+
223
+
224
+ ### Starting at the scan pointer, try to match a substring delimited by the
225
+ ### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
226
+ ### and any character escaped by the specified <tt>escape</tt>
227
+ ### character/s. If matched, advances the scan pointer and returns the
228
+ ### length of the matched string; if it fails the match, returns nil.
229
+ def skipDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
230
+ delimiters ||= "'\"`"
231
+ prefix ||= '\\s*'
232
+ escape ||= '\\'
233
+
234
+ self.matchError = nil
235
+ return self.skip( makeDelimPattern(delimiters, escape, prefix) )
236
+ end
237
+
238
+
239
+ ### Starting at the scan pointer, try to match a substring delimited by
240
+ ### balanced <tt>delimiters</tt> of the type specified, after skipping the
241
+ ### specified <tt>prefix</tt>. On a successful match, this method advances
242
+ ### the scan pointer and returns a Hash with the following key/value pairs:
243
+ ###
244
+ ### [<tt>:match</tt>]
245
+ ### The text of the match, including the delimiting brackets.
246
+ ### [<tt>:prefix</tt>]
247
+ ### The matched prefix, if any.
248
+ ###
249
+ ### On failure, returns nil.
250
+ def scanBracketed( delimiters="{([<", prefix='\s*' )
251
+ delimiters ||= "{([<"
252
+ prefix ||= '\s*'
253
+
254
+ prefix = prefix.to_re unless prefix.kind_of?( Regexp )
255
+
256
+ debugMsg( 1, "Scanning for bracketed text: delimiters = (%s), prefix = (%s)",
257
+ delimiters, prefix )
258
+
259
+ self.matchError = nil
260
+
261
+ # Split the left-delimiters (brackets) from the quote delimiters.
262
+ ldel = delimiters.dup
263
+ qdel = ldel.squeeze.split(//).find_all {|char| char =~ /["'`]/ }.join('|')
264
+ qdel = nil if qdel.empty?
265
+ quotelike = true if ldel =~ /q/
266
+
267
+ # Change all instances of delimiters to the left-hand versions, and
268
+ # strip away anything but bracketing delimiters
269
+ ldel = ldel.tr( '[](){}<>', '[[(({{<<' ).gsub(/[^#{Regexp.quote('[\\](){}<>')}]+/, '').squeeze
270
+
271
+ ### Now build the right-delim equivalent of the left delim string
272
+ rdel = ldel.dup
273
+ unless rdel.tr!( '[({<', '])}>' )
274
+ raise DelimiterError, "Did not find a suitable bracket in delimiter: '#{delimiters}'"
275
+ end
276
+
277
+ # Build regexps from both bracketing delimiter strings
278
+ ldel = ldel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
279
+ rdel = rdel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
280
+
281
+ depth = self.scanDepth
282
+ result = nil
283
+ startPos = self.pointer
284
+
285
+ begin
286
+ result = matchBracketed( prefix, ldel, qdel, quotelike, rdel )
287
+ rescue MatchFailure => e
288
+ debugMsg( depth + 1, "Match error: %s" % e.message )
289
+ self.matchError = e.message
290
+ self.pointer = startPos
291
+ result = nil
292
+ rescue => e
293
+ self.pointer = startPos
294
+ Kernel::raise
295
+ end
296
+
297
+ return result
298
+ end
299
+
300
+
301
+ ### Match using the #scanBracketed method, but only return the match or nil.
302
+ def extractBracketed( *args )
303
+ rval = scanBracketed( *args ) or return nil
304
+ return rval[:match]
305
+ end
306
+
307
+
308
+ ### Starting at the scan pointer, try to match a substring with
309
+ ### #scanBracketed. On a successful match, this method advances the scan
310
+ ### pointer and returns the length of the match, including the delimiters
311
+ ### and any prefix that was skipped. On failure, returns nil.
312
+ def skipBracketed( *args )
313
+ startPos = self.pointer
314
+
315
+ match = scanBracketed( *args )
316
+
317
+ return nil unless match
318
+ return match.length + prefix.length
319
+ ensure
320
+ debugMsg( 2, "Resetting scan pointer." )
321
+ self.pointer = startPos
322
+ end
323
+
324
+
325
+ ### Extracts and segments text from the scan pointer forward that occurs
326
+ ### between (balanced) specified tags, after skipping the specified
327
+ ### <tt>prefix</tt>. If the opentag argument is <tt>nil</tt>, a pattern which
328
+ ### will match any standard HTML/XML tag will be used. If the
329
+ ### <tt>closetag</tt> argument is <tt>nil</tt>, a pattern is created which
330
+ ### prepends a <tt>/</tt> character to the matched opening tag, after any
331
+ ### bracketing characters. The <tt>options</tt> argument is a Hash of one or
332
+ ### more options which govern the matching operation. They are described in
333
+ ### more detail in the Description section of 'lib/DelimScanner.rb'. On a
334
+ ### successful match, this method advances the scan pointer and returns an
335
+ ###
336
+ ### [<tt>:match</tt>]
337
+ ### The text of the match, including the delimiting tags.
338
+ ### [<tt>:prefix</tt>]
339
+ ### The matched prefix, if any.
340
+ ###
341
+ ### On failure, returns nil.
342
+ def scanTagged( opentag=nil, closetag=nil, prefix='\s*', options={} )
343
+ prefix ||= '\s*'
344
+
345
+ ldel = opentag || %Q,<\\w+(?:#{ makeDelimPattern(%q:'":) }|[^>])*>,
346
+ rdel = closetag
347
+ raise ArgumentError, "Options argument must be a hash" unless options.kind_of?( Hash )
348
+
349
+ failmode = options[:fail]
350
+ bad = if options[:reject].is_a?( Array ) then
351
+ options[:reject].join("|")
352
+ else
353
+ (options[:reject] || '')
354
+ end
355
+ ignore = if options[:ignore].is_a?( Array ) then
356
+ options[:ignore].join("|")
357
+ else
358
+ (options[:ignore] || '')
359
+ end
360
+
361
+ self.matchError = nil
362
+ result = nil
363
+ startPos = self.pointer
364
+
365
+ depth = self.scanDepth
366
+
367
+ begin
368
+ result = matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
369
+ rescue MatchFailure => e
370
+ debugMsg( depth + 1, "Match error: %s" % e.message )
371
+ self.matchError = e.message
372
+ self.pointer = startPos
373
+ result = nil
374
+ rescue => e
375
+ self.pointer = startPos
376
+ Kernel::raise
377
+ end
378
+
379
+ return result
380
+ end
381
+
382
+
383
+ ### Match using the #scanTagged method, but only return the match or nil.
384
+ def extractTagged( *args )
385
+ rval = scanTagged( *args ) or return nil
386
+ return rval[:match]
387
+ end
388
+
389
+
390
+ ### Starting at the scan pointer, try to match a substring with
391
+ ### #scanTagged. On a successful match, this method advances the scan
392
+ ### pointer and returns the length of the match, including any delimiters
393
+ ### and any prefix that was skipped. On failure, returns nil.
394
+ def skipTagged( *args )
395
+ startPos = self.pointer
396
+
397
+ match = scanTagged( *args )
398
+
399
+ return nil unless match
400
+ return match.length + prefix.length
401
+ ensure
402
+ debugMsg( 2, "Resetting scan pointer." )
403
+ self.pointer = startPos
404
+ end
405
+
406
+
407
+ # :NOTE:
408
+ # Since the extract_quotelike function isn't documented at all in
409
+ # Text::Balanced, I'm only guessing this is correct...
410
+
411
+ ### Starting from the scan pointer, try to match any one of the various Ruby
412
+ ### quotes and quotelike operators after skipping the specified
413
+ ### <tt>prefix</tt>. Nested backslashed delimiters, embedded balanced
414
+ ### bracket delimiters (for the quotelike operators), and trailing modifiers
415
+ ### are all caught. If <tt>matchRawRegex</tt> is <tt>true</tt>, inline
416
+ ### regexen (eg., <tt>/pattern/</tt>) are matched as well. Advances the scan
417
+ ### pointer and returns a Hash with the following key/value pairs on
418
+ ### success:
419
+ ###
420
+ ### [<tt>:match</tt>]
421
+ ### The entire text of the match.
422
+ ### [<tt>:prefix</tt>]
423
+ ### The matched prefix, if any.
424
+ ### [<tt>:quoteOp</tt>]
425
+ ### The name of the quotelike operator (if any) (eg., '%Q', '%r', etc).
426
+ ### [<tt>:leftDelim</tt>]
427
+ ### The left delimiter of the first block of the operation.
428
+ ### [<tt>:delimText</tt>]
429
+ ### The text of the first block of the operation.
430
+ ### [<tt>:rightDelim</tt>]
431
+ ### The right delimiter of the first block of the operation.
432
+ ### [<tt>:modifiers</tt>]
433
+ ### The trailing modifiers on the operation (if any).
434
+ ###
435
+ ### On failure, returns nil.
436
+ def scanQuotelike( prefix='\s*', matchRawRegex=true )
437
+
438
+ self.matchError = nil
439
+ result = nil
440
+ startPos = self.pointer
441
+
442
+ depth = self.scanDepth
443
+
444
+ begin
445
+ result = matchQuotelike( prefix, matchRawRegex )
446
+ rescue MatchFailure => e
447
+ debugMsg( depth + 1, "Match error: %s" % e.message )
448
+ self.matchError = e.message
449
+ self.pointer = startPos
450
+ result = nil
451
+ rescue => e
452
+ self.pointer = startPos
453
+ Kernel::raise
454
+ end
455
+
456
+ return result
457
+ end
458
+
459
+
460
+ ### Match using the #scanQuotelike method, but only return the match or nil.
461
+ def extractQuotelike( *args )
462
+ rval = scanQuotelike( *args ) or return nil
463
+ return rval[:match]
464
+ end
465
+
466
+
467
+ ### Starting at the scan pointer, try to match a substring with
468
+ ### #scanQuotelike. On a successful match, this method advances the scan
469
+ ### pointer and returns the length of the match, including any delimiters
470
+ ### and any prefix that was skipped. On failure, returns nil.
471
+ def skipQuotelike( *args )
472
+ startPos = self.pointer
473
+
474
+ match = scanQuotelike( *args )
475
+
476
+ return nil unless match
477
+ return match.length + prefix.length
478
+ ensure
479
+ debugMsg( 2, "Resetting scan pointer." )
480
+ self.pointer = startPos
481
+ end
482
+
483
+
484
+ ### Starting from the scan pointer, try to match a Ruby variable after
485
+ ### skipping the specified prefix.
486
+ def scanVariable( prefix='\s*' )
487
+ self.matchError = nil
488
+ result = nil
489
+ startPos = self.pointer
490
+
491
+ depth = self.scanDepth
492
+
493
+ begin
494
+ result = matchVariable( prefix )
495
+ rescue MatchFailure => e
496
+ debugMsg( depth + 1, "Match error: %s" % e.message )
497
+ self.matchError = e.message
498
+ self.pointer = startPos
499
+ result = nil
500
+ rescue => e
501
+ self.pointer = startPos
502
+ Kernel::raise
503
+ end
504
+
505
+ return result
506
+ end
507
+
508
+
509
+ ### Match using the #scanVariable method, but only return the match or nil.
510
+ def extractVariable( *args )
511
+ rval = scanVariable( *args ) or return nil
512
+ return rval[:match]
513
+ end
514
+
515
+
516
+ ### Starting at the scan pointer, try to match a substring with
517
+ ### #scanVariable. On a successful match, this method advances the scan
518
+ ### pointer and returns the length of the match, including any delimiters
519
+ ### and any prefix that was skipped. On failure, returns nil.
520
+ def skipVariable( *args )
521
+ startPos = self.pointer
522
+
523
+ match = scanVariable( *args )
524
+
525
+ return nil unless match
526
+ return match.length + prefix.length
527
+ ensure
528
+ debugMsg( 2, "Resetting scan pointer." )
529
+ self.pointer = startPos
530
+ end
531
+
532
+
533
+ ### Starting from the scan pointer, and skipping the specified
534
+ ### <tt>prefix</tt>, try to to recognize and match a balanced bracket-,
535
+ ### do/end-, or begin/end-delimited substring that may contain unbalanced
536
+ ### delimiters inside quotes or quotelike operations.
537
+ def scanCodeblock( innerDelim=CodeblockDelimiters, prefix='\s*', outerDelim=innerDelim )
538
+ self.matchError = nil
539
+ result = nil
540
+ startPos = self.pointer
541
+
542
+ prefix ||= '\s*'
543
+ innerDelim ||= CodeblockDelimiters
544
+ outerDelim ||= innerDelim
545
+
546
+ depth = caller(1).find_all {|frame|
547
+ frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
548
+ }.length
549
+
550
+ begin
551
+ debugMsg 3, "------------------------------------"
552
+ debugMsg 3, "Calling matchCodeBlock( %s, %s, %s )",
553
+ prefix.inspect, innerDelim.inspect, outerDelim.inspect
554
+ debugMsg 3, "------------------------------------"
555
+ result = matchCodeblock( prefix, innerDelim, outerDelim )
556
+ rescue MatchFailure => e
557
+ debugMsg( depth + 1, "Match error: %s" % e.message )
558
+ self.matchError = e.message
559
+ self.pointer = startPos
560
+ result = nil
561
+ rescue => e
562
+ self.pointer = startPos
563
+ Kernel::raise
564
+ end
565
+
566
+ return result
567
+ end
568
+
569
+
570
+ ### Match using the #scanCodeblock method, but only return the match or nil.
571
+ def extractCodeblock( *args )
572
+ rval = scanCodeblock( *args ) or return nil
573
+ return rval[:match]
574
+ end
575
+
576
+
577
+ ### Starting at the scan pointer, try to match a substring with
578
+ ### #scanCodeblock. On a successful match, this method advances the scan
579
+ ### pointer and returns the length of the match, including any delimiters
580
+ ### and any prefix that was skipped. On failure, returns nil.
581
+ def skipCodeblock( *args )
582
+ startPos = self.pointer
583
+
584
+ match = scanCodeblock( *args )
585
+
586
+ return nil unless match
587
+ return match.length + prefix.length
588
+ ensure
589
+ debugMsg( 2, "Resetting scan pointer." )
590
+ self.pointer = startPos
591
+ end
592
+
593
+
594
+
595
+
596
+ #########
597
+ protected
598
+ #########
599
+
600
+ ### Scan the string from the scan pointer forward, skipping the specified
601
+ ### <tt>prefix</tt> and trying to match a string delimited by bracketing
602
+ ### delimiters <tt>ldel</tt> and <tt>rdel</tt> (Regexp objects), and quoting
603
+ ### delimiters <tt>qdel</tt> (Regexp). If <tt>quotelike</tt> is
604
+ ### <tt>true</tt>, Ruby quotelike constructs will also be honored.
605
+ def matchBracketed( prefix, ldel, qdel, quotelike, rdel )
606
+ startPos = self.pointer
607
+ debugMsg( 2, "matchBracketed starting at pos = %d: prefix = %s, "\
608
+ "ldel = %s, qdel = %s, quotelike = %s, rdel = %s",
609
+ startPos, prefix.inspect, ldel.inspect, qdel.inspect, quotelike.inspect,
610
+ rdel.inspect )
611
+
612
+ # Test for the prefix, failing if not found
613
+ raise MatchFailure, "Did not find prefix: #{prefix.inspect}" unless
614
+ self.skip( prefix )
615
+
616
+ # Mark this position as the left-delimiter pointer
617
+ ldelpos = self.pointer
618
+ debugMsg( 3, "Found prefix. Left delim pointer at %d", ldelpos )
619
+
620
+ # Match opening delimiter or fail
621
+ unless (( delim = self.scan(ldel) ))
622
+ raise MatchFailure, "Did not find opening bracket after prefix: '%s' (%d)" %
623
+ [ self.string[startPos..ldelpos].chomp, ldelpos ]
624
+ end
625
+
626
+ # A stack to keep track of nested delimiters
627
+ nesting = [ delim ]
628
+ debugMsg( 3, "Found opening bracket. Nesting = %s", nesting.inspect )
629
+
630
+ while self.rest?
631
+
632
+ debugMsg( 5, "Starting scan loop. Nesting = %s", nesting.inspect )
633
+
634
+ # Skip anything that's backslashed
635
+ if self.skip( /\\./ )
636
+ debugMsg( 4, "Skipping backslashed literal at offset %d: '%s'",
637
+ self.pointer - 2, self.string[ self.pointer - 2, 2 ].chomp )
638
+ next
639
+ end
640
+
641
+ # Opening bracket (left delimiter)
642
+ if self.scan(ldel)
643
+ delim = self.matched
644
+ debugMsg( 4, "Found opening delim %s at offset %d",
645
+ delim.inspect, self.pointer - 1 )
646
+ nesting.push delim
647
+
648
+ # Closing bracket (right delimiter)
649
+ elsif self.scan(rdel)
650
+ delim = self.matched
651
+
652
+ debugMsg( 4, "Found closing delim %s at offset %d",
653
+ delim.inspect, self.pointer - 1 )
654
+
655
+ # :TODO: When is this code reached?
656
+ if nesting.empty?
657
+ raise MatchFailure, "Unmatched closing bracket '%s' at offset %d" %
658
+ [ delim, self.pointer - 1 ]
659
+ end
660
+
661
+ # Figure out what the compliment of the bracket next off the
662
+ # stack should be.
663
+ expected = nesting.pop.tr( '({[<', ')}]>' )
664
+ debugMsg( 4, "Got a '%s' bracket off nesting stack", expected )
665
+
666
+ # Check for mismatched brackets
667
+ if expected != delim
668
+ raise MatchFailure, "Mismatched closing bracket at offset %d: "\
669
+ "Expected '%s', but found '%s' instead." %
670
+ [ self.pointer - 1, expected, delim ]
671
+ end
672
+
673
+ # If we've found the closing delimiter, stop scanning
674
+ if nesting.empty?
675
+ debugMsg( 4, "Finished with scan: nesting stack empty." )
676
+ break
677
+ end
678
+
679
+ # Quoted chunk (quoted delimiter)
680
+ elsif qdel && self.scan(qdel)
681
+ match = self.matched
682
+
683
+ if self. scan( /[^\\#{match}]*(?:\\.[^\\#{match}]*)*(#{Regexp::quote(match)})/ )
684
+ debugMsg( 4, "Skipping quoted chunk. Scan pointer now at offset %d", self.pointer )
685
+ next
686
+ end
687
+
688
+ raise MatchFailure, "Unmatched embedded quote (%s) at offset %d" %
689
+ [ match, self.pointer - 1 ]
690
+
691
+ # Embedded quotelike
692
+ elsif quotelike && self.scanQuotelike
693
+ debugMsg( 4, "Matched a quotelike. Scan pointer now at offset %d", self.pointer )
694
+ next
695
+
696
+ # Skip word characters, or a single non-word character
697
+ else
698
+ self.skip( /(?:[a-zA-Z0-9]+|.)/m )
699
+ debugMsg 5, "Skipping '%s' at offset %d." %
700
+ [ self.matched, self.pointer ]
701
+ end
702
+
703
+ end
704
+
705
+ # If there's one or more brackets left on the delimiter stack, we're
706
+ # missing a closing delim.
707
+ unless nesting.empty?
708
+ raise MatchFailure, "Unmatched opening bracket(s): %s.. at offset %d" %
709
+ [ nesting.join('..'), self.pointer ]
710
+ end
711
+
712
+ rval = {
713
+ :match => self.string[ ldelpos .. (self.pointer - 1) ],
714
+ :prefix => self.string[ startPos, (ldelpos-startPos) ],
715
+ :suffix => self.string[ self.pointer..-1 ],
716
+ }
717
+ debugMsg 1, "matchBracketed succeeded: %s" % rval.inspect
718
+ return rval
719
+ end
720
+
721
+
722
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
723
+ ### try to match text bracketed by the given left and right tag-delimiters
724
+ ### (<tt>ldel</tt> and <tt>rdel</tt>).
725
+ def matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
726
+ failmode = failmode.to_s.intern if failmode
727
+ startPos = self.pointer
728
+ debugMsg 2, "matchTagged starting at pos = %d: prefix = %s, "\
729
+ "ldel = %s, rdel = %s, failmode = %s, bad = %s, ignore = %s",
730
+ startPos, prefix.inspect, ldel.inspect, rdel.inspect,
731
+ failmode.inspect, bad.inspect, ignore.inspect
732
+
733
+ rdelspec = ''
734
+ openTagPos, textPos, paraPos, closeTagPos, endPos = ([nil] * 5)
735
+ match = nil
736
+
737
+ # Look for the prefix
738
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
739
+ self.skip( prefix )
740
+
741
+ openTagPos = self.pointer
742
+ debugMsg 3, "Found prefix. Pointer now at offset %d" % self.pointer
743
+
744
+ # Look for the opening delimiter
745
+ unless (( match = self.scan(ldel) ))
746
+ raise MatchFailure, "Did not find opening tag %s at offset %d" %
747
+ [ ldel.inspect, self.pointer ]
748
+ end
749
+
750
+ textPos = self.pointer
751
+ debugMsg 3, "Found left delimiter '%s': offset now %d" % [ match, textPos ]
752
+
753
+ # Make a right delim out of the tag we found if none was specified
754
+ if rdel.nil?
755
+ rdelspec = makeClosingTag( match )
756
+ debugMsg 3, "Generated right-delimiting tag: %s" % rdelspec.inspect
757
+ else
758
+ # Make the regexp-related globals from the match
759
+ rdelspec = rdel.gsub( /(\A|[^\\])\$([1-9])/, '\1self[\2]' ).interpolate( binding )
760
+ debugMsg 3, "Right delimiter (after interpolation) is: %s" % rdelspec.inspect
761
+ end
762
+
763
+ # Process until we reach the end of the string or find a closing tag
764
+ while self.rest? && closeTagPos.nil?
765
+
766
+ # Skip backslashed characters
767
+ if (( self.skip( /^\\./ ) ))
768
+ debugMsg 4, "Skipping backslashed literal at offset %d" % self.pointer
769
+ next
770
+
771
+ # Match paragraphs-break for fail == :para
772
+ elsif (( matchlength = self.skip( /^(\n[ \t]*\n)/ ) ))
773
+ paraPos ||= self.pointer - matchlength
774
+ debugMsg 4, "Found paragraph position at offset %d" % paraPos
775
+
776
+ # Match closing tag
777
+ elsif (( matchlength = self.skip( rdelspec ) ))
778
+ closeTagPos = self.pointer - matchlength
779
+ debugMsg 3, "Found closing tag at offset %d" % closeTagPos
780
+
781
+ # If we're ignoring anything, try to match and move beyond it
782
+ elsif ignore && !ignore.empty? && self.skip(ignore)
783
+ debugMsg 3, "Skipping ignored text '%s' at offset %d" %
784
+ [ self.matched, self.pointer - self.matched_size ]
785
+ next
786
+
787
+ # If there's a "bad" pattern, try to match it, shorting the
788
+ # outer loop if it matches in para or max mode, or failing with
789
+ # a match error if not.
790
+ elsif bad && !bad.empty? && self.match?( bad )
791
+ if failmode == :para || failmode == :max
792
+ break
793
+ else
794
+ raise MatchFailure, "Found invalid nested tag '%s' at offset %d" %
795
+ [ match, self.pointer ]
796
+ end
797
+
798
+ # If there's another opening tag, make a recursive call to
799
+ # ourselves to move the cursor beyond it
800
+ elsif (( match = self.scan( ldel ) ))
801
+ tag = match
802
+ self.unscan
803
+
804
+ unless self.matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
805
+ break if failmode == :para || failmode == :max
806
+
807
+ raise MatchFailure, "Found unbalanced nested tag '%s' at offset %d" %
808
+ [ tag, self.pointer ]
809
+ end
810
+
811
+ else
812
+ self.pointer += 1
813
+ debugMsg 5, "Advanced scan pointer to offset %d" % self.pointer
814
+ end
815
+ end
816
+
817
+ # If the closing hasn't been found, then it's a "short" match, which is
818
+ # okay if the failmode indicates we don't care. Otherwise, it's an error.
819
+ unless closeTagPos
820
+ debugMsg 3, "No close tag position found. "
821
+
822
+ if failmode == :max || failmode == :para
823
+ closeTagPos = self.pointer - 1
824
+ debugMsg 4, "Failmode %s tolerates no closing tag. Close tag position set to %d" %
825
+ [ failmode.inspect, closeTagPos ]
826
+
827
+ # Sync the scan pointer and the paragraph marker if it's set.
828
+ if failmode == :para && paraPos
829
+ self.pointer = paraPos + 1
830
+ end
831
+ else
832
+ raise MatchFailure, "No closing tag found."
833
+ end
834
+ end
835
+
836
+ rval = {
837
+ :match => self.string[ openTagPos .. (self.pointer - 1) ],
838
+ :prefix => self.string[ startPos, (openTagPos-startPos) ],
839
+ :suffix => self.string[ self.pointer..-1 ],
840
+ }
841
+ debugMsg 1, "matchTagged succeeded: %s" % rval.inspect
842
+ return rval
843
+ end
844
+
845
+
846
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
847
+ ### try to match text inside a Ruby quotelike construct. If
848
+ ### <tt>matchRawRegex</tt> is <tt>true</tt>, the regex construct
849
+ ### <tt>/pattern/</tt> is also matched.
850
+ def matchQuotelike( prefix, matchRawRegex )
851
+ startPos = self.pointer
852
+ debugMsg 2, "matchQuotelike starting at pos = %d: prefix = %s, "\
853
+ "matchRawRegex = %s",
854
+ startPos, prefix.inspect, matchRawRegex.inspect
855
+
856
+ # Init position markers
857
+ rval = oppos = preldpos = ldpos = strpos = rdpos = modpos = nil
858
+
859
+ # Look for the prefix
860
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
861
+ self.skip( prefix )
862
+ oppos = self.pointer
863
+
864
+
865
+ # Peek at the next character
866
+ # If the initial quote is a simple quote, our job is easy
867
+ if self.check(/^["`']/) || ( matchRawRegex && self.check(%r:/:) )
868
+
869
+ initial = self.matched
870
+
871
+ # Build the pattern for matching the simple string
872
+ pattern = "%s [^\\%s]* (\\.[^\\%s]*)* %s" %
873
+ [ Regexp.quote(initial),
874
+ initial, initial,
875
+ Regexp.quote(initial) ]
876
+ debugMsg 2, "Matching simple quote at offset %d with /%s/" %
877
+ [ self.pointer, pattern ]
878
+
879
+ # Search for it, raising an exception if it's not found
880
+ unless self.scan( /#{pattern}/xism )
881
+ raise MatchFailure,
882
+ "Did not find closing delimiter to match '%s' at '%s...' (offset %d)" %
883
+ [ initial, self.string[ oppos, 20 ].chomp, self.pointer ]
884
+ end
885
+
886
+ modpos = self.pointer
887
+ rdpos = modpos - 1
888
+
889
+ # If we're matching a regex, look for any trailing modifiers
890
+ if initial == '/'
891
+ pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
892
+ self.scan( pattern )
893
+ end
894
+
895
+ rval = {
896
+ :prefix => self.string[ startPos, (oppos-startPos) ],
897
+ :match => self.string[ oppos .. (self.pointer - 1) ],
898
+ :leftDelim => self.string[ oppos, 1 ],
899
+ :delimText => self.string[ (oppos+1) .. (rdpos-1) ],
900
+ :rightDelim => self.string[ rdpos, 1 ],
901
+ :modifiers => self.string[ modpos, (self.pointer-modpos) ],
902
+ :suffix => self.string[ self.pointer.. -1 ],
903
+ }
904
+
905
+ # If it's one of the fancy quotelike operators, our job is somewhat
906
+ # complicated (though nothing like Perl's, thank the Goddess)
907
+ elsif self.scan( %r:%[rwqQx]?(?=\S): )
908
+ op = self.matched
909
+ debugMsg 2, "Matching a real quotelike ('%s') at offset %d" %
910
+ [ op, self.pointer ]
911
+ modifiers = nil
912
+
913
+ ldpos = self.pointer
914
+ strpos = ldpos + 1
915
+
916
+ # Peek ahead to see what the delimiter is
917
+ ldel = self.check( /\S/ )
918
+
919
+ # If it's a bracketing character, just use matchBracketed
920
+ if ldel =~ /[\[(<{]/
921
+ rdel = ldel.tr( '[({<', '])}>' )
922
+ debugMsg 4, "Left delim is a bracket: %s; looking for compliment: %s" %
923
+ [ ldel, rdel ]
924
+ self.matchBracketed( '', Regexp::quote(ldel), nil, nil, Regexp::quote(rdel) )
925
+ else
926
+ debugMsg 4, "Left delim isn't a bracket: '#{ldel}'; looking for closing instance"
927
+ self.scan( /#{ldel}[^\\#{ldel}]*(\\.[^\\#{ldel}]*)*#{ldel}/ ) or
928
+ raise MatchFailure,
929
+ "Can't find a closing delimiter '%s' at '%s...' (offset %d)" %
930
+ [ ldel, self.rest[0,20].chomp, self.pointer ]
931
+ end
932
+ rdelpos = self.pointer - 1
933
+
934
+ # Match modifiers for Regexp quote
935
+ if op == '%r'
936
+ pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
937
+ modifiers = self.scan( pattern ) || ''
938
+ end
939
+
940
+ rval = {
941
+ :prefix => self.string[ startPos, (oppos-startPos) ],
942
+ :match => self.string[ oppos .. (self.pointer - 1) ],
943
+ :quoteOp => op,
944
+ :leftDelim => self.string[ ldpos, 1 ],
945
+ :delimText => self.string[ strpos, (rdelpos-strpos) ],
946
+ :rightDelim => self.string[ rdelpos, 1 ],
947
+ :modifiers => modifiers,
948
+ :suffix => self.string[ self.pointer.. -1 ],
949
+ }
950
+
951
+ # If it's a here-doc, things get even hairier.
952
+ elsif self.scan( %r:<<(-)?: )
953
+ debugMsg 2, "Matching a here-document at offset %d" % self.pointer
954
+ op = self.matched
955
+
956
+ # If there was a dash, start with optional whitespace
957
+ indent = self[1] ? '\s*' : ''
958
+ ldpos = self.pointer
959
+ label = ''
960
+
961
+ # Plain identifier
962
+ if self.scan( /[A-Za-z_]\w*/ )
963
+ label = self.matched
964
+ debugMsg 3, "Setting heredoc terminator to bare identifier '%s'" % label
965
+
966
+ # Quoted string
967
+ elsif self.scan( / ' ([^'\\]* (?:\\.[^'\\]*)*) ' /sx ) ||
968
+ self.scan( / " ([^"\\]* (?:\\.[^"\\]*)*) " /sx ) ||
969
+ self.scan( / ` ([^`\\]* (?:\\.[^`\\]*)*) ` /sx )
970
+ label = self[1]
971
+ debugMsg 3, "Setting heredoc terminator to quoted identifier '%s'" % label
972
+
973
+ # Ruby, unlike Perl, requires a terminal, even if it's only an empty
974
+ # string
975
+ else
976
+ raise MatchFailure,
977
+ "Missing heredoc terminator before end of line at "\
978
+ "'%s...' (offset %d)" %
979
+ [ self.rest[0,20].chomp, self.pointer ]
980
+ end
981
+ extrapos = self.pointer
982
+
983
+ # Advance to the beginning of the string
984
+ self.skip( /.*\n/ )
985
+ strpos = self.pointer
986
+ debugMsg 3, "Scanning until /\\n#{indent}#{label}\\n/m"
987
+
988
+ # Match to the label
989
+ unless self.scan_until( /\n#{indent}#{label}\n/m )
990
+ raise MatchFailure,
991
+ "Couldn't find heredoc terminator '%s' after '%s...' (offset %d)" %
992
+ [ label, self.rest[0,20].chomp, self.pointer ]
993
+ end
994
+
995
+ rdpos = self.pointer - self.matched_size
996
+
997
+ rval = {
998
+ :prefix => self.string[ startPos, (oppos-startPos) ],
999
+ :match => self.string[ oppos .. (self.pointer - 1) ],
1000
+ :quoteOp => op,
1001
+ :leftDelim => self.string[ ldpos, (extrapos-ldpos) ],
1002
+ :delimText => self.string[ strpos, (rdpos-strpos) ],
1003
+ :rightDelim => self.string[ rdpos, (self.pointer-rdpos) ],
1004
+ :suffix => self.string[ self.pointer.. -1 ],
1005
+ }
1006
+
1007
+ else
1008
+ raise MatchFailure,
1009
+ "No quotelike operator found after prefix at '%s...'" %
1010
+ self.rest[0,20].chomp
1011
+ end
1012
+
1013
+
1014
+ debugMsg 1, "matchQuotelike succeeded: %s" % rval.inspect
1015
+ return rval
1016
+ end
1017
+
1018
+
1019
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
1020
+ ### try to match text that is a valid Ruby variable or identifier, ...?
1021
+ def matchVariable( prefix )
1022
+ startPos = self.pointer
1023
+ debugMsg 2, "matchVariable starting at pos = %d: prefix = %s",
1024
+ startPos, prefix.inspect
1025
+
1026
+ # Look for the prefix
1027
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
1028
+ self.skip( prefix )
1029
+
1030
+ varPos = self.pointer
1031
+
1032
+ # If the variable matched is a predefined global, no need to look for an
1033
+ # identifier
1034
+ unless self.scan( %r~\$(?:[!@/\\,;.<>$?:_\~&`'+]|-\w|\d+)~ )
1035
+
1036
+ debugMsg 2, "Not a predefined global at '%s...' (offset %d)" %
1037
+ [ self.rest[0,20].chomp, self.pointer ]
1038
+
1039
+ # Look for a valid identifier
1040
+ unless self.scan( /\*?(?:[$@]|::)?(?:[a-z_]\w*(?:::\s*))*[_a-z]\w*/is )
1041
+ raise MatchFailure, "No variable found: Bad identifier (offset %d)" % self.pointer
1042
+ end
1043
+ end
1044
+
1045
+ debugMsg 2, "Matched '%s' at offset %d" % [ self.matched, self.pointer ]
1046
+
1047
+ # Match methodchain with trailing codeblock
1048
+ while self.rest?
1049
+ # Match a regular chained method
1050
+ next if scanCodeblock( {"("=>")", "do"=>"end", "begin"=>"end", "{"=>"}"},
1051
+ /\s*(?:\.|::)\s*[a-zA-Z_]\w+\s*/ )
1052
+
1053
+ # Match a trailing block or an element ref
1054
+ next if scanCodeblock( nil, /\s*/, {'{' => '}', '[' => ']'} )
1055
+
1056
+ # This matched a dereferencer in Perl, which doesn't have any
1057
+ # equivalent in Ruby.
1058
+ #next if scanVariable( '\s*(\.|::)\s*' )
1059
+
1060
+ # Match a method call without parens (?)
1061
+ next if self.scan( '\s*(\.|::)\s*\w+(?![{(\[])' )
1062
+
1063
+ break
1064
+ end
1065
+
1066
+ rval = {
1067
+ :match => self.string[ varPos .. (self.pointer - 1) ],
1068
+ :prefix => self.string[ startPos, (varPos-startPos) ],
1069
+ :suffix => self.string[ self.pointer..-1 ],
1070
+ }
1071
+ debugMsg 1, "matchVariable succeeded: %s" % rval.inspect
1072
+ return rval
1073
+ end
1074
+
1075
+
1076
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
1077
+ ### try to match text inside a Ruby code block construct which must be
1078
+ ### delimited by the specified <tt>outerDelimPairs</tt>. It may optionally
1079
+ ### contain sub-blocks delimited with the given <tt>innerDelimPairs</tt>.
1080
+ def matchCodeblock( prefix, innerDelimPairs, outerDelimPairs )
1081
+ startPos = self.pointer
1082
+ debugMsg 2, "Starting matchCodeblock at offset %d (%s)", startPos, self.rest.inspect
1083
+
1084
+ # Look for the prefix
1085
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
1086
+ self.skip( prefix )
1087
+ codePos = self.pointer
1088
+ debugMsg 3, "Skipped prefix '%s' to offset %d" %
1089
+ [ self.matched, codePos ]
1090
+
1091
+ # Build a regexp for the outer delimiters
1092
+ ldelimOuter = "(" + outerDelimPairs.keys .uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
1093
+ rdelimOuter = "(" + outerDelimPairs.values.uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
1094
+ debugMsg 4, "Using /%s/ as the outer delim regex" % ldelimOuter
1095
+
1096
+ unless self.scan( ldelimOuter )
1097
+ raise MatchFailure, %q:Did not find opening bracket at "%s..." offset %d: %
1098
+ [ self.rest[0,20].chomp, codePos ]
1099
+ end
1100
+
1101
+ # Look up the corresponding outer delimiter
1102
+ closingDelim = outerDelimPairs[self.matched] or
1103
+ raise DelimiterError, "Could not find closing delimiter for '%s'" %
1104
+ self.matched
1105
+
1106
+ debugMsg 3, "Scanning for closing delim '#{closingDelim}'"
1107
+ matched = ''
1108
+ patvalid = true
1109
+
1110
+ # Scan until the end of the text or until an explicit break
1111
+ while self.rest?
1112
+ debugMsg 5, "Scanning from offset %d (%s)", self.pointer, self.rest.inspect
1113
+ matched = ''
1114
+
1115
+ # Skip comments
1116
+ debugMsg 5, "Trying to match a comment"
1117
+ if self.scan( /\s*#.*/ )
1118
+ debugMsg 4, "Skipping comment '%s' to offset %d" %
1119
+ [ self.matched, self.pointer ]
1120
+ next
1121
+ end
1122
+
1123
+ # Look for (any) closing delimiter
1124
+ debugMsg 5, "Trying to match a closing outer delimiter with /\s*(#{rdelimOuter})/"
1125
+ if self.scan( /\s*(#{rdelimOuter})/ )
1126
+ debugMsg 4, "Found a right delimiter '#{self.matched}'"
1127
+
1128
+ # If it's the delimiter we're looking for, stop the scan
1129
+ if self.matched.strip == closingDelim
1130
+ matched = self.matched
1131
+ debugMsg 3, "Found the closing delimiter we've been looking for (#{matched.inspect})."
1132
+ break
1133
+
1134
+ # Otherwise, it's an error, as we've apparently seen a closing
1135
+ # delimiter without a corresponding opening one.
1136
+ else
1137
+ raise MatchFailure,
1138
+ %q:Mismatched closing bracket at "%s..." (offset %s). Expected '%s': %
1139
+ [ self.rest[0,20], self.pointer, closingDelim ]
1140
+ end
1141
+ end
1142
+
1143
+ # Try to match a variable or a quoted phrase
1144
+ debugMsg 5, "Trying to match either a variable or quotelike"
1145
+ if self.scanVariable( '\s*' ) || self.scanQuotelike( '\s*', patvalid )
1146
+ debugMsg 3, "Matched either a variable or quotelike. Offset now %d" % self.pointer
1147
+ patvalid = false
1148
+ next
1149
+ end
1150
+
1151
+ # Match some operators
1152
+ # :TODO: This hasn't really been ruby-ified
1153
+ debugMsg 5, "Trying to match an operator"
1154
+ if self.scan( %r:\s*([-+*x/%^&|.]=?
1155
+ | [!=]~
1156
+ | =(?!>)
1157
+ | (\*\*|&&|\|\||<<|>>)=?
1158
+ | split|grep|map|return
1159
+ ):x )
1160
+ debugMsg 3, "Skipped miscellaneous operator '%s' to offset %d." %
1161
+ [ self.matched, self.pointer ]
1162
+ patvalid = true
1163
+ next
1164
+ end
1165
+
1166
+ # Try to match an embedded codeblock
1167
+ debugMsg 5, "Trying to match an embedded codeblock with delim pairs: %s",
1168
+ innerDelimPairs.inspect
1169
+ if self.scanCodeblock( innerDelimPairs )
1170
+ debugMsg 3, "Skipped inner codeblock to offset %d." % self.pointer
1171
+ patvalid = true
1172
+ next
1173
+ end
1174
+
1175
+ # Try to match a stray outer-left delimiter
1176
+ debugMsg 5, "Trying to match a stray outer-left delimiter (#{ldelimOuter})"
1177
+ if self.match?( ldelimOuter )
1178
+ raise MatchFailure, "Improperly nested codeblock at offset %d: %s... " %
1179
+ [ self.pointer, self.rest[0,20] ]
1180
+ end
1181
+
1182
+ patvalid = false
1183
+ self.scan( /\s*(\w+|[-=>]>|.|\Z)/m )
1184
+ debugMsg 3, "Skipped '%s' to offset %d" %
1185
+ [ self.matched, self.pointer ]
1186
+ end
1187
+
1188
+
1189
+ unless matched
1190
+ raise MatchFailure, "No match found for opening bracket"
1191
+ end
1192
+
1193
+ rval = {
1194
+ :match => self.string[codePos .. (self.pointer - 1)],
1195
+ :prefix => self.string[startPos, (codePos-startPos)],
1196
+ :suffix => self.string[ self.pointer..-1 ],
1197
+ }
1198
+ debugMsg 1, "matchCodeblock succeeded: %s" % rval.inspect
1199
+ return rval
1200
+ end
1201
+
1202
+
1203
+ ### Attempt to derive and return the number of scan methods traversed up to
1204
+ ### this point by examining the call stack.
1205
+ def scanDepth
1206
+ return caller(2).find_all {|frame|
1207
+ frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
1208
+ }.length
1209
+ end
1210
+
1211
+
1212
+ #######
1213
+ private
1214
+ #######
1215
+
1216
+ ### Print the specified <tt>message</tt> to STDERR if the scanner's
1217
+ ### debugging level is greater than or equal to <tt>level</tt>.
1218
+ def debugMsg( level, msgFormat, *args )
1219
+ return unless level.nonzero? && self.debugLevel >= level
1220
+ msg = if args.empty? then msgFormat else format(msgFormat, *args) end
1221
+ $stderr.puts( (" " * (level-1) * 2) + msg )
1222
+ end
1223
+
1224
+
1225
+ ### Given a series of one or more bracket characters (eg., '<', '[', '{',
1226
+ ### etc.), return the brackets reversed in order and direction.
1227
+ def revbracket( bracket )
1228
+ return bracket.to_s.reverse.tr( '<[{(', '>]})' )
1229
+ end
1230
+
1231
+
1232
+ ### Given an opening <tt>tag</tt> of the sort matched by #scanTagged,
1233
+ ### construct and return a closing tag.
1234
+ def makeClosingTag( tag )
1235
+ debugMsg 3, "Making a closing tag for '%s'" % tag
1236
+
1237
+ closingTag = tag.gsub( /^([[(<{]+)(#{XmlName}).*/ ) {
1238
+ Regexp.quote( "#{$1}/#{$2}" + revbracket($1) )
1239
+ }
1240
+
1241
+ raise MatchFailure, "Unable to construct closing tag to match: #{tag}" unless closingTag
1242
+ return closingTag
1243
+ end
1244
+
1245
+
1246
+ ### Make and return a new Regexp which matches substrings bounded by the
1247
+ ### specified +delimiters+, not counting those which have been escaped with
1248
+ ### the escape characters in +escapes+.
1249
+ def makeDelimPattern( delimiters, escapes='\\', prefix='\\s*' )
1250
+ delimiters = delimiters.to_s
1251
+ escapes = escapes.to_s
1252
+
1253
+ raise DelimiterError, "Illegal delimiter '#{delimiter}'" unless delimiters =~ /\S/
1254
+
1255
+ # Pad the escapes string to the same length as the delimiters
1256
+ escapes.concat( escapes[-1,1] * (delimiters.length - escapes.length) )
1257
+ patParts = []
1258
+
1259
+ # Escape each delimiter and a corresponding escape character, and then
1260
+ # build a pattern part from them
1261
+ delimiters.length.times do |i|
1262
+ del = Regexp.escape( delimiters[i, 1] )
1263
+ esc = Regexp.escape( escapes[i, 1] )
1264
+
1265
+ if del == esc then
1266
+ patParts.push "#{del}(?:[^#{del}]*(?:(?:#{del}#{del})[^#{del}]*)*)#{del}"
1267
+ else
1268
+ patParts.push "#{del}(?:[^#{esc}#{del}]*(?:#{esc}.[^#{esc}#{del}]*)*)#{del}";
1269
+ end
1270
+ end
1271
+
1272
+ # Join all the parts together and return one big pattern
1273
+ return Regexp::new( "#{prefix}(?:#{patParts.join("|")})" )
1274
+ end
1275
+
1276
+ end # class StringExtractor
1277
+
1278
+