timetrap 1.8.5 → 1.8.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1278 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # A derivative of StringScanner that can scan for delimited constructs in
4
+ # addition to regular expressions. It is a loose port of the Text::Balanced
5
+ # module for Perl by Damian Conway <damian@cs.monash.edu.au>.
6
+ #
7
+ # == Synopsis
8
+ #
9
+ # se = DelimScanner::new( myString )
10
+ #
11
+ # == Authors
12
+ #
13
+ # * Michael Granger <ged@FaerieMUD.org>
14
+ # * Gonzalo Garramuno <GGarramuno@aol.com>
15
+ #
16
+ # Copyright (c) 2002, 2003 The FaerieMUD Consortium. Most rights reserved.
17
+ #
18
+ # This work is licensed under the Creative Commons Attribution License. To view
19
+ # a copy of this license, visit http://creativecommons.org/licenses/by/1.0 or
20
+ # send a letter to Creative Commons, 559 Nathan Abbott Way, Stanford, California
21
+ # 94305, USA.
22
+ #
23
+ # == Version
24
+ #
25
+ # $Id: DelimScanner.rb,v 1.2 2003/01/12 20:56:51 deveiant Exp $
26
+ #
27
+ # == History
28
+ #
29
+ # - Added :suffix hash key for returning rest (right) of matches, like Perl's
30
+ # Text::Balanced, on several methods.
31
+ # - Added one or two \ for backquoting brackets, as new ruby1.8 complains
32
+ #
33
+
34
+ require 'strscan'
35
+ require 'forwardable'
36
+
37
+ ### Add some stuff to the String class to allow easy transformation to Regexp
38
+ ### and in-place interpolation.
39
+ class String
40
+ def to_re( casefold=false, extended=false )
41
+ return Regexp::new( self.dup )
42
+ end
43
+
44
+ ### Ideas for String-interpolation stuff courtesy of Hal E. Fulton
45
+ ### <hal9000@hypermetrics.com> via ruby-talk
46
+
47
+ def interpolate( scope )
48
+ unless scope.is_a?( Binding )
49
+ raise TypeError, "Argument to interpolate must be a Binding, not "\
50
+ "a #{scope.class.name}"
51
+ end
52
+
53
+ # $stderr.puts ">>> Interpolating '#{self}'..."
54
+
55
+ copy = self.gsub( /"/, %q:\": )
56
+ eval( '"' + copy + '"', scope )
57
+ end
58
+
59
+ end
60
+
61
+
62
+ ### A derivative of StringScanner that can scan for delimited constructs in
63
+ ### addition to regular expressions.
64
+ class DelimScanner
65
+
66
+ ### Scanner exception classes
67
+ class MatchFailure < RuntimeError ; end
68
+ class DelimiterError < RuntimeError ; end
69
+
70
+
71
+ extend Forwardable
72
+ StringScanner.must_C_version
73
+
74
+
75
+ ### Class constants
76
+ Version = /([\d\.]+)/.match( %q{$Revision: 1.2 $} )[1]
77
+ Rcsid = %q$Id: DelimScanner.rb,v 1.2 2003/01/12 20:56:51 deveiant Exp $
78
+
79
+ # Pattern to match a valid XML name
80
+ XmlName = '[a-zA-Z_:][a-zA-Z0-9:.-]*'
81
+
82
+
83
+ ### Namespace module for DelimString constants
84
+ module Default
85
+
86
+ # The list of default opening => closing codeblock delimiters to use for
87
+ # scanCodeblock.
88
+ CodeblockDelimiters = {
89
+ '{' => '}',
90
+ 'begin' => 'end',
91
+ 'do' => 'end',
92
+ }
93
+
94
+ # Default scanMultiple operations and their arguments
95
+ MultipleFunctions = [
96
+ :scanVariable => [],
97
+ :scanQuotelike => [],
98
+ :scanCodeblock => [],
99
+ ]
100
+
101
+ end
102
+ include Default
103
+
104
+
105
+ ### Define delegating methods that cast their argument to a Regexp from a
106
+ ### String. This allows the scanner's scanning methods to be called with
107
+ ### Strings in addition to Regexps. This was mostly stolen from
108
+ ### forwardable.rb.
109
+ def self.def_casting_delegators( *methods )
110
+ methods.each {|methodName|
111
+ class_eval( <<-EOF, "(--def_casting_delegators--)", 1 )
112
+ def #{methodName}( pattern )
113
+ pattern = pattern.to_s.to_re unless pattern.is_a?( Regexp )
114
+ @scanner.#{methodName}( pattern )
115
+ end
116
+ EOF
117
+ }
118
+ end
119
+
120
+
121
+ ### Create a new DelimScanner object for the specified <tt>string</tt>. If
122
+ ### <tt>dup</tt> is <tt>true</tt>, a duplicate of the target string will be
123
+ ### used instead of the one given. The target string will be frozen after
124
+ ### the scanner is created.
125
+ def initialize( string, dup=true )
126
+ @scanner = StringScanner::new( string, dup )
127
+ @matchError = nil
128
+ @debugLevel = 0
129
+ end
130
+
131
+
132
+
133
+ ######
134
+ public
135
+ ######
136
+
137
+ # Here, some delegation trickery is done to make a DelimScanner behave like
138
+ # a StringScanner. Some methods are directly delegated, while some are
139
+ # delegated via a method which casts its argument to a Regexp first so some
140
+ # scanner methods can be called with Strings as well as Regexps.
141
+
142
+ # A list of delegated methods that need casting.
143
+ NeedCastingDelegators = :scan, :skip, :match?, :check,
144
+ :scan_until, :skip_until, :exist?, :check_until
145
+
146
+ # Delegate all StringScanner instance methods to the associated scanner
147
+ # object, except those that need a casting delegator, which uses an indirect
148
+ # delegation method.
149
+ def_delegators :@scanner,
150
+ *( StringScanner.instance_methods(false) -
151
+ NeedCastingDelegators.collect {|sym| sym.id2name} )
152
+
153
+ def_casting_delegators( *NeedCastingDelegators )
154
+
155
+
156
+
157
+ # The last match error encountered by the scanner
158
+ attr_accessor :matchError
159
+ protected :matchError= ; # ; is to work around a ruby-mode indent bug
160
+
161
+ # Debugging level
162
+ attr_accessor :debugLevel
163
+
164
+
165
+
166
+ ### Returns <tt>true</tt> if the scanner has encountered a match error.
167
+ def matchError?
168
+ return ! @matchError.nil?
169
+ end
170
+
171
+
172
+ ### Starting at the scan pointer, try to match a substring delimited by the
173
+ ### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
174
+ ### and any character escaped by the specified <tt>escape</tt>
175
+ ### character/s. If matched, advances the scan pointer and returns a Hash
176
+ ### with the following key/value pairs on success:
177
+ ###
178
+ ### [<tt>:match</tt>]
179
+ ### The text of the match, including delimiters.
180
+ ### [<tt>:prefix</tt>]
181
+ ### The matched prefix, if any.
182
+ ###
183
+ ### If the match fails, returns nil.
184
+ def scanDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
185
+ delimiters ||= "'\"`"
186
+ prefix ||= '\\s*'
187
+ escape ||= '\\'
188
+
189
+ debugMsg( 1, "Scanning for delimited text: delim = (%s), prefix=(%s), escape=(%s)",
190
+ delimiters, prefix, escape )
191
+ self.matchError = nil
192
+
193
+ # Try to match the prefix first to get the length
194
+ unless (( prefixLength = self.match?(prefix.to_re) ))
195
+ self.matchError = "Failed to match prefix '%s' at offset %d" %
196
+ [ prefix, self.pointer ]
197
+ return nil
198
+ end
199
+
200
+ # Now build a delimited pattern with the specified parameters.
201
+ delimPattern = makeDelimPattern( delimiters, escape, prefix )
202
+ debugMsg( 2, "Delimiter pattern is %s" % delimPattern.inspect )
203
+
204
+ # Fail if no match
205
+ unless (( matchedString = self.scan(delimPattern) ))
206
+ self.matchError = "No delimited string found."
207
+ return nil
208
+ end
209
+
210
+ return {
211
+ :match => matchedString[prefixLength .. -1],
212
+ :prefix => matchedString[0..prefixLength-1],
213
+ }
214
+ end
215
+
216
+
217
+ ### Match using the #scanDelimited method, but only return the match or nil.
218
+ def extractDelimited( *args )
219
+ rval = scanDelimited( *args ) or return nil
220
+ return rval[:match]
221
+ end
222
+
223
+
224
+ ### Starting at the scan pointer, try to match a substring delimited by the
225
+ ### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt>
226
+ ### and any character escaped by the specified <tt>escape</tt>
227
+ ### character/s. If matched, advances the scan pointer and returns the
228
+ ### length of the matched string; if it fails the match, returns nil.
229
+ def skipDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' )
230
+ delimiters ||= "'\"`"
231
+ prefix ||= '\\s*'
232
+ escape ||= '\\'
233
+
234
+ self.matchError = nil
235
+ return self.skip( makeDelimPattern(delimiters, escape, prefix) )
236
+ end
237
+
238
+
239
+ ### Starting at the scan pointer, try to match a substring delimited by
240
+ ### balanced <tt>delimiters</tt> of the type specified, after skipping the
241
+ ### specified <tt>prefix</tt>. On a successful match, this method advances
242
+ ### the scan pointer and returns a Hash with the following key/value pairs:
243
+ ###
244
+ ### [<tt>:match</tt>]
245
+ ### The text of the match, including the delimiting brackets.
246
+ ### [<tt>:prefix</tt>]
247
+ ### The matched prefix, if any.
248
+ ###
249
+ ### On failure, returns nil.
250
+ def scanBracketed( delimiters="{([<", prefix='\s*' )
251
+ delimiters ||= "{([<"
252
+ prefix ||= '\s*'
253
+
254
+ prefix = prefix.to_re unless prefix.kind_of?( Regexp )
255
+
256
+ debugMsg( 1, "Scanning for bracketed text: delimiters = (%s), prefix = (%s)",
257
+ delimiters, prefix )
258
+
259
+ self.matchError = nil
260
+
261
+ # Split the left-delimiters (brackets) from the quote delimiters.
262
+ ldel = delimiters.dup
263
+ qdel = ldel.squeeze.split(//).find_all {|char| char =~ /["'`]/ }.join('|')
264
+ qdel = nil if qdel.empty?
265
+ quotelike = true if ldel =~ /q/
266
+
267
+ # Change all instances of delimiters to the left-hand versions, and
268
+ # strip away anything but bracketing delimiters
269
+ ldel = ldel.tr( '[](){}<>', '[[(({{<<' ).gsub(/[^#{Regexp.quote('[\\](){}<>')}]+/, '').squeeze
270
+
271
+ ### Now build the right-delim equivalent of the left delim string
272
+ rdel = ldel.dup
273
+ unless rdel.tr!( '[({<', '])}>' )
274
+ raise DelimiterError, "Did not find a suitable bracket in delimiter: '#{delimiters}'"
275
+ end
276
+
277
+ # Build regexps from both bracketing delimiter strings
278
+ ldel = ldel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
279
+ rdel = rdel.split(//).collect {|ch| Regexp.quote(ch)}.join('|')
280
+
281
+ depth = self.scanDepth
282
+ result = nil
283
+ startPos = self.pointer
284
+
285
+ begin
286
+ result = matchBracketed( prefix, ldel, qdel, quotelike, rdel )
287
+ rescue MatchFailure => e
288
+ debugMsg( depth + 1, "Match error: %s" % e.message )
289
+ self.matchError = e.message
290
+ self.pointer = startPos
291
+ result = nil
292
+ rescue => e
293
+ self.pointer = startPos
294
+ Kernel::raise
295
+ end
296
+
297
+ return result
298
+ end
299
+
300
+
301
+ ### Match using the #scanBracketed method, but only return the match or nil.
302
+ def extractBracketed( *args )
303
+ rval = scanBracketed( *args ) or return nil
304
+ return rval[:match]
305
+ end
306
+
307
+
308
+ ### Starting at the scan pointer, try to match a substring with
309
+ ### #scanBracketed. On a successful match, this method advances the scan
310
+ ### pointer and returns the length of the match, including the delimiters
311
+ ### and any prefix that was skipped. On failure, returns nil.
312
+ def skipBracketed( *args )
313
+ startPos = self.pointer
314
+
315
+ match = scanBracketed( *args )
316
+
317
+ return nil unless match
318
+ return match.length + prefix.length
319
+ ensure
320
+ debugMsg( 2, "Resetting scan pointer." )
321
+ self.pointer = startPos
322
+ end
323
+
324
+
325
+ ### Extracts and segments text from the scan pointer forward that occurs
326
+ ### between (balanced) specified tags, after skipping the specified
327
+ ### <tt>prefix</tt>. If the opentag argument is <tt>nil</tt>, a pattern which
328
+ ### will match any standard HTML/XML tag will be used. If the
329
+ ### <tt>closetag</tt> argument is <tt>nil</tt>, a pattern is created which
330
+ ### prepends a <tt>/</tt> character to the matched opening tag, after any
331
+ ### bracketing characters. The <tt>options</tt> argument is a Hash of one or
332
+ ### more options which govern the matching operation. They are described in
333
+ ### more detail in the Description section of 'lib/DelimScanner.rb'. On a
334
+ ### successful match, this method advances the scan pointer and returns an
335
+ ###
336
+ ### [<tt>:match</tt>]
337
+ ### The text of the match, including the delimiting tags.
338
+ ### [<tt>:prefix</tt>]
339
+ ### The matched prefix, if any.
340
+ ###
341
+ ### On failure, returns nil.
342
+ def scanTagged( opentag=nil, closetag=nil, prefix='\s*', options={} )
343
+ prefix ||= '\s*'
344
+
345
+ ldel = opentag || %Q,<\\w+(?:#{ makeDelimPattern(%q:'":) }|[^>])*>,
346
+ rdel = closetag
347
+ raise ArgumentError, "Options argument must be a hash" unless options.kind_of?( Hash )
348
+
349
+ failmode = options[:fail]
350
+ bad = if options[:reject].is_a?( Array ) then
351
+ options[:reject].join("|")
352
+ else
353
+ (options[:reject] || '')
354
+ end
355
+ ignore = if options[:ignore].is_a?( Array ) then
356
+ options[:ignore].join("|")
357
+ else
358
+ (options[:ignore] || '')
359
+ end
360
+
361
+ self.matchError = nil
362
+ result = nil
363
+ startPos = self.pointer
364
+
365
+ depth = self.scanDepth
366
+
367
+ begin
368
+ result = matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
369
+ rescue MatchFailure => e
370
+ debugMsg( depth + 1, "Match error: %s" % e.message )
371
+ self.matchError = e.message
372
+ self.pointer = startPos
373
+ result = nil
374
+ rescue => e
375
+ self.pointer = startPos
376
+ Kernel::raise
377
+ end
378
+
379
+ return result
380
+ end
381
+
382
+
383
+ ### Match using the #scanTagged method, but only return the match or nil.
384
+ def extractTagged( *args )
385
+ rval = scanTagged( *args ) or return nil
386
+ return rval[:match]
387
+ end
388
+
389
+
390
+ ### Starting at the scan pointer, try to match a substring with
391
+ ### #scanTagged. On a successful match, this method advances the scan
392
+ ### pointer and returns the length of the match, including any delimiters
393
+ ### and any prefix that was skipped. On failure, returns nil.
394
+ def skipTagged( *args )
395
+ startPos = self.pointer
396
+
397
+ match = scanTagged( *args )
398
+
399
+ return nil unless match
400
+ return match.length + prefix.length
401
+ ensure
402
+ debugMsg( 2, "Resetting scan pointer." )
403
+ self.pointer = startPos
404
+ end
405
+
406
+
407
+ # :NOTE:
408
+ # Since the extract_quotelike function isn't documented at all in
409
+ # Text::Balanced, I'm only guessing this is correct...
410
+
411
+ ### Starting from the scan pointer, try to match any one of the various Ruby
412
+ ### quotes and quotelike operators after skipping the specified
413
+ ### <tt>prefix</tt>. Nested backslashed delimiters, embedded balanced
414
+ ### bracket delimiters (for the quotelike operators), and trailing modifiers
415
+ ### are all caught. If <tt>matchRawRegex</tt> is <tt>true</tt>, inline
416
+ ### regexen (eg., <tt>/pattern/</tt>) are matched as well. Advances the scan
417
+ ### pointer and returns a Hash with the following key/value pairs on
418
+ ### success:
419
+ ###
420
+ ### [<tt>:match</tt>]
421
+ ### The entire text of the match.
422
+ ### [<tt>:prefix</tt>]
423
+ ### The matched prefix, if any.
424
+ ### [<tt>:quoteOp</tt>]
425
+ ### The name of the quotelike operator (if any) (eg., '%Q', '%r', etc).
426
+ ### [<tt>:leftDelim</tt>]
427
+ ### The left delimiter of the first block of the operation.
428
+ ### [<tt>:delimText</tt>]
429
+ ### The text of the first block of the operation.
430
+ ### [<tt>:rightDelim</tt>]
431
+ ### The right delimiter of the first block of the operation.
432
+ ### [<tt>:modifiers</tt>]
433
+ ### The trailing modifiers on the operation (if any).
434
+ ###
435
+ ### On failure, returns nil.
436
+ def scanQuotelike( prefix='\s*', matchRawRegex=true )
437
+
438
+ self.matchError = nil
439
+ result = nil
440
+ startPos = self.pointer
441
+
442
+ depth = self.scanDepth
443
+
444
+ begin
445
+ result = matchQuotelike( prefix, matchRawRegex )
446
+ rescue MatchFailure => e
447
+ debugMsg( depth + 1, "Match error: %s" % e.message )
448
+ self.matchError = e.message
449
+ self.pointer = startPos
450
+ result = nil
451
+ rescue => e
452
+ self.pointer = startPos
453
+ Kernel::raise
454
+ end
455
+
456
+ return result
457
+ end
458
+
459
+
460
+ ### Match using the #scanQuotelike method, but only return the match or nil.
461
+ def extractQuotelike( *args )
462
+ rval = scanQuotelike( *args ) or return nil
463
+ return rval[:match]
464
+ end
465
+
466
+
467
+ ### Starting at the scan pointer, try to match a substring with
468
+ ### #scanQuotelike. On a successful match, this method advances the scan
469
+ ### pointer and returns the length of the match, including any delimiters
470
+ ### and any prefix that was skipped. On failure, returns nil.
471
+ def skipQuotelike( *args )
472
+ startPos = self.pointer
473
+
474
+ match = scanQuotelike( *args )
475
+
476
+ return nil unless match
477
+ return match.length + prefix.length
478
+ ensure
479
+ debugMsg( 2, "Resetting scan pointer." )
480
+ self.pointer = startPos
481
+ end
482
+
483
+
484
+ ### Starting from the scan pointer, try to match a Ruby variable after
485
+ ### skipping the specified prefix.
486
+ def scanVariable( prefix='\s*' )
487
+ self.matchError = nil
488
+ result = nil
489
+ startPos = self.pointer
490
+
491
+ depth = self.scanDepth
492
+
493
+ begin
494
+ result = matchVariable( prefix )
495
+ rescue MatchFailure => e
496
+ debugMsg( depth + 1, "Match error: %s" % e.message )
497
+ self.matchError = e.message
498
+ self.pointer = startPos
499
+ result = nil
500
+ rescue => e
501
+ self.pointer = startPos
502
+ Kernel::raise
503
+ end
504
+
505
+ return result
506
+ end
507
+
508
+
509
+ ### Match using the #scanVariable method, but only return the match or nil.
510
+ def extractVariable( *args )
511
+ rval = scanVariable( *args ) or return nil
512
+ return rval[:match]
513
+ end
514
+
515
+
516
+ ### Starting at the scan pointer, try to match a substring with
517
+ ### #scanVariable. On a successful match, this method advances the scan
518
+ ### pointer and returns the length of the match, including any delimiters
519
+ ### and any prefix that was skipped. On failure, returns nil.
520
+ def skipVariable( *args )
521
+ startPos = self.pointer
522
+
523
+ match = scanVariable( *args )
524
+
525
+ return nil unless match
526
+ return match.length + prefix.length
527
+ ensure
528
+ debugMsg( 2, "Resetting scan pointer." )
529
+ self.pointer = startPos
530
+ end
531
+
532
+
533
+ ### Starting from the scan pointer, and skipping the specified
534
+ ### <tt>prefix</tt>, try to to recognize and match a balanced bracket-,
535
+ ### do/end-, or begin/end-delimited substring that may contain unbalanced
536
+ ### delimiters inside quotes or quotelike operations.
537
+ def scanCodeblock( innerDelim=CodeblockDelimiters, prefix='\s*', outerDelim=innerDelim )
538
+ self.matchError = nil
539
+ result = nil
540
+ startPos = self.pointer
541
+
542
+ prefix ||= '\s*'
543
+ innerDelim ||= CodeblockDelimiters
544
+ outerDelim ||= innerDelim
545
+
546
+ depth = caller(1).find_all {|frame|
547
+ frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
548
+ }.length
549
+
550
+ begin
551
+ debugMsg 3, "------------------------------------"
552
+ debugMsg 3, "Calling matchCodeBlock( %s, %s, %s )",
553
+ prefix.inspect, innerDelim.inspect, outerDelim.inspect
554
+ debugMsg 3, "------------------------------------"
555
+ result = matchCodeblock( prefix, innerDelim, outerDelim )
556
+ rescue MatchFailure => e
557
+ debugMsg( depth + 1, "Match error: %s" % e.message )
558
+ self.matchError = e.message
559
+ self.pointer = startPos
560
+ result = nil
561
+ rescue => e
562
+ self.pointer = startPos
563
+ Kernel::raise
564
+ end
565
+
566
+ return result
567
+ end
568
+
569
+
570
+ ### Match using the #scanCodeblock method, but only return the match or nil.
571
+ def extractCodeblock( *args )
572
+ rval = scanCodeblock( *args ) or return nil
573
+ return rval[:match]
574
+ end
575
+
576
+
577
+ ### Starting at the scan pointer, try to match a substring with
578
+ ### #scanCodeblock. On a successful match, this method advances the scan
579
+ ### pointer and returns the length of the match, including any delimiters
580
+ ### and any prefix that was skipped. On failure, returns nil.
581
+ def skipCodeblock( *args )
582
+ startPos = self.pointer
583
+
584
+ match = scanCodeblock( *args )
585
+
586
+ return nil unless match
587
+ return match.length + prefix.length
588
+ ensure
589
+ debugMsg( 2, "Resetting scan pointer." )
590
+ self.pointer = startPos
591
+ end
592
+
593
+
594
+
595
+
596
+ #########
597
+ protected
598
+ #########
599
+
600
+ ### Scan the string from the scan pointer forward, skipping the specified
601
+ ### <tt>prefix</tt> and trying to match a string delimited by bracketing
602
+ ### delimiters <tt>ldel</tt> and <tt>rdel</tt> (Regexp objects), and quoting
603
+ ### delimiters <tt>qdel</tt> (Regexp). If <tt>quotelike</tt> is
604
+ ### <tt>true</tt>, Ruby quotelike constructs will also be honored.
605
+ def matchBracketed( prefix, ldel, qdel, quotelike, rdel )
606
+ startPos = self.pointer
607
+ debugMsg( 2, "matchBracketed starting at pos = %d: prefix = %s, "\
608
+ "ldel = %s, qdel = %s, quotelike = %s, rdel = %s",
609
+ startPos, prefix.inspect, ldel.inspect, qdel.inspect, quotelike.inspect,
610
+ rdel.inspect )
611
+
612
+ # Test for the prefix, failing if not found
613
+ raise MatchFailure, "Did not find prefix: #{prefix.inspect}" unless
614
+ self.skip( prefix )
615
+
616
+ # Mark this position as the left-delimiter pointer
617
+ ldelpos = self.pointer
618
+ debugMsg( 3, "Found prefix. Left delim pointer at %d", ldelpos )
619
+
620
+ # Match opening delimiter or fail
621
+ unless (( delim = self.scan(ldel) ))
622
+ raise MatchFailure, "Did not find opening bracket after prefix: '%s' (%d)" %
623
+ [ self.string[startPos..ldelpos].chomp, ldelpos ]
624
+ end
625
+
626
+ # A stack to keep track of nested delimiters
627
+ nesting = [ delim ]
628
+ debugMsg( 3, "Found opening bracket. Nesting = %s", nesting.inspect )
629
+
630
+ while self.rest?
631
+
632
+ debugMsg( 5, "Starting scan loop. Nesting = %s", nesting.inspect )
633
+
634
+ # Skip anything that's backslashed
635
+ if self.skip( /\\./ )
636
+ debugMsg( 4, "Skipping backslashed literal at offset %d: '%s'",
637
+ self.pointer - 2, self.string[ self.pointer - 2, 2 ].chomp )
638
+ next
639
+ end
640
+
641
+ # Opening bracket (left delimiter)
642
+ if self.scan(ldel)
643
+ delim = self.matched
644
+ debugMsg( 4, "Found opening delim %s at offset %d",
645
+ delim.inspect, self.pointer - 1 )
646
+ nesting.push delim
647
+
648
+ # Closing bracket (right delimiter)
649
+ elsif self.scan(rdel)
650
+ delim = self.matched
651
+
652
+ debugMsg( 4, "Found closing delim %s at offset %d",
653
+ delim.inspect, self.pointer - 1 )
654
+
655
+ # :TODO: When is this code reached?
656
+ if nesting.empty?
657
+ raise MatchFailure, "Unmatched closing bracket '%s' at offset %d" %
658
+ [ delim, self.pointer - 1 ]
659
+ end
660
+
661
+ # Figure out what the compliment of the bracket next off the
662
+ # stack should be.
663
+ expected = nesting.pop.tr( '({[<', ')}]>' )
664
+ debugMsg( 4, "Got a '%s' bracket off nesting stack", expected )
665
+
666
+ # Check for mismatched brackets
667
+ if expected != delim
668
+ raise MatchFailure, "Mismatched closing bracket at offset %d: "\
669
+ "Expected '%s', but found '%s' instead." %
670
+ [ self.pointer - 1, expected, delim ]
671
+ end
672
+
673
+ # If we've found the closing delimiter, stop scanning
674
+ if nesting.empty?
675
+ debugMsg( 4, "Finished with scan: nesting stack empty." )
676
+ break
677
+ end
678
+
679
+ # Quoted chunk (quoted delimiter)
680
+ elsif qdel && self.scan(qdel)
681
+ match = self.matched
682
+
683
+ if self. scan( /[^\\#{match}]*(?:\\.[^\\#{match}]*)*(#{Regexp::quote(match)})/ )
684
+ debugMsg( 4, "Skipping quoted chunk. Scan pointer now at offset %d", self.pointer )
685
+ next
686
+ end
687
+
688
+ raise MatchFailure, "Unmatched embedded quote (%s) at offset %d" %
689
+ [ match, self.pointer - 1 ]
690
+
691
+ # Embedded quotelike
692
+ elsif quotelike && self.scanQuotelike
693
+ debugMsg( 4, "Matched a quotelike. Scan pointer now at offset %d", self.pointer )
694
+ next
695
+
696
+ # Skip word characters, or a single non-word character
697
+ else
698
+ self.skip( /(?:[a-zA-Z0-9]+|.)/m )
699
+ debugMsg 5, "Skipping '%s' at offset %d." %
700
+ [ self.matched, self.pointer ]
701
+ end
702
+
703
+ end
704
+
705
+ # If there's one or more brackets left on the delimiter stack, we're
706
+ # missing a closing delim.
707
+ unless nesting.empty?
708
+ raise MatchFailure, "Unmatched opening bracket(s): %s.. at offset %d" %
709
+ [ nesting.join('..'), self.pointer ]
710
+ end
711
+
712
+ rval = {
713
+ :match => self.string[ ldelpos .. (self.pointer - 1) ],
714
+ :prefix => self.string[ startPos, (ldelpos-startPos) ],
715
+ :suffix => self.string[ self.pointer..-1 ],
716
+ }
717
+ debugMsg 1, "matchBracketed succeeded: %s" % rval.inspect
718
+ return rval
719
+ end
720
+
721
+
722
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
723
+ ### try to match text bracketed by the given left and right tag-delimiters
724
+ ### (<tt>ldel</tt> and <tt>rdel</tt>).
725
+ def matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
726
+ failmode = failmode.to_s.intern if failmode
727
+ startPos = self.pointer
728
+ debugMsg 2, "matchTagged starting at pos = %d: prefix = %s, "\
729
+ "ldel = %s, rdel = %s, failmode = %s, bad = %s, ignore = %s",
730
+ startPos, prefix.inspect, ldel.inspect, rdel.inspect,
731
+ failmode.inspect, bad.inspect, ignore.inspect
732
+
733
+ rdelspec = ''
734
+ openTagPos, textPos, paraPos, closeTagPos, endPos = ([nil] * 5)
735
+ match = nil
736
+
737
+ # Look for the prefix
738
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
739
+ self.skip( prefix )
740
+
741
+ openTagPos = self.pointer
742
+ debugMsg 3, "Found prefix. Pointer now at offset %d" % self.pointer
743
+
744
+ # Look for the opening delimiter
745
+ unless (( match = self.scan(ldel) ))
746
+ raise MatchFailure, "Did not find opening tag %s at offset %d" %
747
+ [ ldel.inspect, self.pointer ]
748
+ end
749
+
750
+ textPos = self.pointer
751
+ debugMsg 3, "Found left delimiter '%s': offset now %d" % [ match, textPos ]
752
+
753
+ # Make a right delim out of the tag we found if none was specified
754
+ if rdel.nil?
755
+ rdelspec = makeClosingTag( match )
756
+ debugMsg 3, "Generated right-delimiting tag: %s" % rdelspec.inspect
757
+ else
758
+ # Make the regexp-related globals from the match
759
+ rdelspec = rdel.gsub( /(\A|[^\\])\$([1-9])/, '\1self[\2]' ).interpolate( binding )
760
+ debugMsg 3, "Right delimiter (after interpolation) is: %s" % rdelspec.inspect
761
+ end
762
+
763
+ # Process until we reach the end of the string or find a closing tag
764
+ while self.rest? && closeTagPos.nil?
765
+
766
+ # Skip backslashed characters
767
+ if (( self.skip( /^\\./ ) ))
768
+ debugMsg 4, "Skipping backslashed literal at offset %d" % self.pointer
769
+ next
770
+
771
+ # Match paragraphs-break for fail == :para
772
+ elsif (( matchlength = self.skip( /^(\n[ \t]*\n)/ ) ))
773
+ paraPos ||= self.pointer - matchlength
774
+ debugMsg 4, "Found paragraph position at offset %d" % paraPos
775
+
776
+ # Match closing tag
777
+ elsif (( matchlength = self.skip( rdelspec ) ))
778
+ closeTagPos = self.pointer - matchlength
779
+ debugMsg 3, "Found closing tag at offset %d" % closeTagPos
780
+
781
+ # If we're ignoring anything, try to match and move beyond it
782
+ elsif ignore && !ignore.empty? && self.skip(ignore)
783
+ debugMsg 3, "Skipping ignored text '%s' at offset %d" %
784
+ [ self.matched, self.pointer - self.matched_size ]
785
+ next
786
+
787
+ # If there's a "bad" pattern, try to match it, shorting the
788
+ # outer loop if it matches in para or max mode, or failing with
789
+ # a match error if not.
790
+ elsif bad && !bad.empty? && self.match?( bad )
791
+ if failmode == :para || failmode == :max
792
+ break
793
+ else
794
+ raise MatchFailure, "Found invalid nested tag '%s' at offset %d" %
795
+ [ match, self.pointer ]
796
+ end
797
+
798
+ # If there's another opening tag, make a recursive call to
799
+ # ourselves to move the cursor beyond it
800
+ elsif (( match = self.scan( ldel ) ))
801
+ tag = match
802
+ self.unscan
803
+
804
+ unless self.matchTagged( prefix, ldel, rdel, failmode, bad, ignore )
805
+ break if failmode == :para || failmode == :max
806
+
807
+ raise MatchFailure, "Found unbalanced nested tag '%s' at offset %d" %
808
+ [ tag, self.pointer ]
809
+ end
810
+
811
+ else
812
+ self.pointer += 1
813
+ debugMsg 5, "Advanced scan pointer to offset %d" % self.pointer
814
+ end
815
+ end
816
+
817
+ # If the closing hasn't been found, then it's a "short" match, which is
818
+ # okay if the failmode indicates we don't care. Otherwise, it's an error.
819
+ unless closeTagPos
820
+ debugMsg 3, "No close tag position found. "
821
+
822
+ if failmode == :max || failmode == :para
823
+ closeTagPos = self.pointer - 1
824
+ debugMsg 4, "Failmode %s tolerates no closing tag. Close tag position set to %d" %
825
+ [ failmode.inspect, closeTagPos ]
826
+
827
+ # Sync the scan pointer and the paragraph marker if it's set.
828
+ if failmode == :para && paraPos
829
+ self.pointer = paraPos + 1
830
+ end
831
+ else
832
+ raise MatchFailure, "No closing tag found."
833
+ end
834
+ end
835
+
836
+ rval = {
837
+ :match => self.string[ openTagPos .. (self.pointer - 1) ],
838
+ :prefix => self.string[ startPos, (openTagPos-startPos) ],
839
+ :suffix => self.string[ self.pointer..-1 ],
840
+ }
841
+ debugMsg 1, "matchTagged succeeded: %s" % rval.inspect
842
+ return rval
843
+ end
844
+
845
+
846
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
847
+ ### try to match text inside a Ruby quotelike construct. If
848
+ ### <tt>matchRawRegex</tt> is <tt>true</tt>, the regex construct
849
+ ### <tt>/pattern/</tt> is also matched.
850
+ def matchQuotelike( prefix, matchRawRegex )
851
+ startPos = self.pointer
852
+ debugMsg 2, "matchQuotelike starting at pos = %d: prefix = %s, "\
853
+ "matchRawRegex = %s",
854
+ startPos, prefix.inspect, matchRawRegex.inspect
855
+
856
+ # Init position markers
857
+ rval = oppos = preldpos = ldpos = strpos = rdpos = modpos = nil
858
+
859
+ # Look for the prefix
860
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
861
+ self.skip( prefix )
862
+ oppos = self.pointer
863
+
864
+
865
+ # Peek at the next character
866
+ # If the initial quote is a simple quote, our job is easy
867
+ if self.check(/^["`']/) || ( matchRawRegex && self.check(%r:/:) )
868
+
869
+ initial = self.matched
870
+
871
+ # Build the pattern for matching the simple string
872
+ pattern = "%s [^\\%s]* (\\.[^\\%s]*)* %s" %
873
+ [ Regexp.quote(initial),
874
+ initial, initial,
875
+ Regexp.quote(initial) ]
876
+ debugMsg 2, "Matching simple quote at offset %d with /%s/" %
877
+ [ self.pointer, pattern ]
878
+
879
+ # Search for it, raising an exception if it's not found
880
+ unless self.scan( /#{pattern}/xism )
881
+ raise MatchFailure,
882
+ "Did not find closing delimiter to match '%s' at '%s...' (offset %d)" %
883
+ [ initial, self.string[ oppos, 20 ].chomp, self.pointer ]
884
+ end
885
+
886
+ modpos = self.pointer
887
+ rdpos = modpos - 1
888
+
889
+ # If we're matching a regex, look for any trailing modifiers
890
+ if initial == '/'
891
+ pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
892
+ self.scan( pattern )
893
+ end
894
+
895
+ rval = {
896
+ :prefix => self.string[ startPos, (oppos-startPos) ],
897
+ :match => self.string[ oppos .. (self.pointer - 1) ],
898
+ :leftDelim => self.string[ oppos, 1 ],
899
+ :delimText => self.string[ (oppos+1) .. (rdpos-1) ],
900
+ :rightDelim => self.string[ rdpos, 1 ],
901
+ :modifiers => self.string[ modpos, (self.pointer-modpos) ],
902
+ :suffix => self.string[ self.pointer.. -1 ],
903
+ }
904
+
905
+ # If it's one of the fancy quotelike operators, our job is somewhat
906
+ # complicated (though nothing like Perl's, thank the Goddess)
907
+ elsif self.scan( %r:%[rwqQx]?(?=\S): )
908
+ op = self.matched
909
+ debugMsg 2, "Matching a real quotelike ('%s') at offset %d" %
910
+ [ op, self.pointer ]
911
+ modifiers = nil
912
+
913
+ ldpos = self.pointer
914
+ strpos = ldpos + 1
915
+
916
+ # Peek ahead to see what the delimiter is
917
+ ldel = self.check( /\S/ )
918
+
919
+ # If it's a bracketing character, just use matchBracketed
920
+ if ldel =~ /[\[(<{]/
921
+ rdel = ldel.tr( '[({<', '])}>' )
922
+ debugMsg 4, "Left delim is a bracket: %s; looking for compliment: %s" %
923
+ [ ldel, rdel ]
924
+ self.matchBracketed( '', Regexp::quote(ldel), nil, nil, Regexp::quote(rdel) )
925
+ else
926
+ debugMsg 4, "Left delim isn't a bracket: '#{ldel}'; looking for closing instance"
927
+ self.scan( /#{ldel}[^\\#{ldel}]*(\\.[^\\#{ldel}]*)*#{ldel}/ ) or
928
+ raise MatchFailure,
929
+ "Can't find a closing delimiter '%s' at '%s...' (offset %d)" %
930
+ [ ldel, self.rest[0,20].chomp, self.pointer ]
931
+ end
932
+ rdelpos = self.pointer - 1
933
+
934
+ # Match modifiers for Regexp quote
935
+ if op == '%r'
936
+ pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end
937
+ modifiers = self.scan( pattern ) || ''
938
+ end
939
+
940
+ rval = {
941
+ :prefix => self.string[ startPos, (oppos-startPos) ],
942
+ :match => self.string[ oppos .. (self.pointer - 1) ],
943
+ :quoteOp => op,
944
+ :leftDelim => self.string[ ldpos, 1 ],
945
+ :delimText => self.string[ strpos, (rdelpos-strpos) ],
946
+ :rightDelim => self.string[ rdelpos, 1 ],
947
+ :modifiers => modifiers,
948
+ :suffix => self.string[ self.pointer.. -1 ],
949
+ }
950
+
951
+ # If it's a here-doc, things get even hairier.
952
+ elsif self.scan( %r:<<(-)?: )
953
+ debugMsg 2, "Matching a here-document at offset %d" % self.pointer
954
+ op = self.matched
955
+
956
+ # If there was a dash, start with optional whitespace
957
+ indent = self[1] ? '\s*' : ''
958
+ ldpos = self.pointer
959
+ label = ''
960
+
961
+ # Plain identifier
962
+ if self.scan( /[A-Za-z_]\w*/ )
963
+ label = self.matched
964
+ debugMsg 3, "Setting heredoc terminator to bare identifier '%s'" % label
965
+
966
+ # Quoted string
967
+ elsif self.scan( / ' ([^'\\]* (?:\\.[^'\\]*)*) ' /sx ) ||
968
+ self.scan( / " ([^"\\]* (?:\\.[^"\\]*)*) " /sx ) ||
969
+ self.scan( / ` ([^`\\]* (?:\\.[^`\\]*)*) ` /sx )
970
+ label = self[1]
971
+ debugMsg 3, "Setting heredoc terminator to quoted identifier '%s'" % label
972
+
973
+ # Ruby, unlike Perl, requires a terminal, even if it's only an empty
974
+ # string
975
+ else
976
+ raise MatchFailure,
977
+ "Missing heredoc terminator before end of line at "\
978
+ "'%s...' (offset %d)" %
979
+ [ self.rest[0,20].chomp, self.pointer ]
980
+ end
981
+ extrapos = self.pointer
982
+
983
+ # Advance to the beginning of the string
984
+ self.skip( /.*\n/ )
985
+ strpos = self.pointer
986
+ debugMsg 3, "Scanning until /\\n#{indent}#{label}\\n/m"
987
+
988
+ # Match to the label
989
+ unless self.scan_until( /\n#{indent}#{label}\n/m )
990
+ raise MatchFailure,
991
+ "Couldn't find heredoc terminator '%s' after '%s...' (offset %d)" %
992
+ [ label, self.rest[0,20].chomp, self.pointer ]
993
+ end
994
+
995
+ rdpos = self.pointer - self.matched_size
996
+
997
+ rval = {
998
+ :prefix => self.string[ startPos, (oppos-startPos) ],
999
+ :match => self.string[ oppos .. (self.pointer - 1) ],
1000
+ :quoteOp => op,
1001
+ :leftDelim => self.string[ ldpos, (extrapos-ldpos) ],
1002
+ :delimText => self.string[ strpos, (rdpos-strpos) ],
1003
+ :rightDelim => self.string[ rdpos, (self.pointer-rdpos) ],
1004
+ :suffix => self.string[ self.pointer.. -1 ],
1005
+ }
1006
+
1007
+ else
1008
+ raise MatchFailure,
1009
+ "No quotelike operator found after prefix at '%s...'" %
1010
+ self.rest[0,20].chomp
1011
+ end
1012
+
1013
+
1014
+ debugMsg 1, "matchQuotelike succeeded: %s" % rval.inspect
1015
+ return rval
1016
+ end
1017
+
1018
+
1019
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
1020
+ ### try to match text that is a valid Ruby variable or identifier, ...?
1021
+ def matchVariable( prefix )
1022
+ startPos = self.pointer
1023
+ debugMsg 2, "matchVariable starting at pos = %d: prefix = %s",
1024
+ startPos, prefix.inspect
1025
+
1026
+ # Look for the prefix
1027
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
1028
+ self.skip( prefix )
1029
+
1030
+ varPos = self.pointer
1031
+
1032
+ # If the variable matched is a predefined global, no need to look for an
1033
+ # identifier
1034
+ unless self.scan( %r~\$(?:[!@/\\,;.<>$?:_\~&`'+]|-\w|\d+)~ )
1035
+
1036
+ debugMsg 2, "Not a predefined global at '%s...' (offset %d)" %
1037
+ [ self.rest[0,20].chomp, self.pointer ]
1038
+
1039
+ # Look for a valid identifier
1040
+ unless self.scan( /\*?(?:[$@]|::)?(?:[a-z_]\w*(?:::\s*))*[_a-z]\w*/is )
1041
+ raise MatchFailure, "No variable found: Bad identifier (offset %d)" % self.pointer
1042
+ end
1043
+ end
1044
+
1045
+ debugMsg 2, "Matched '%s' at offset %d" % [ self.matched, self.pointer ]
1046
+
1047
+ # Match methodchain with trailing codeblock
1048
+ while self.rest?
1049
+ # Match a regular chained method
1050
+ next if scanCodeblock( {"("=>")", "do"=>"end", "begin"=>"end", "{"=>"}"},
1051
+ /\s*(?:\.|::)\s*[a-zA-Z_]\w+\s*/ )
1052
+
1053
+ # Match a trailing block or an element ref
1054
+ next if scanCodeblock( nil, /\s*/, {'{' => '}', '[' => ']'} )
1055
+
1056
+ # This matched a dereferencer in Perl, which doesn't have any
1057
+ # equivalent in Ruby.
1058
+ #next if scanVariable( '\s*(\.|::)\s*' )
1059
+
1060
+ # Match a method call without parens (?)
1061
+ next if self.scan( '\s*(\.|::)\s*\w+(?![{(\[])' )
1062
+
1063
+ break
1064
+ end
1065
+
1066
+ rval = {
1067
+ :match => self.string[ varPos .. (self.pointer - 1) ],
1068
+ :prefix => self.string[ startPos, (varPos-startPos) ],
1069
+ :suffix => self.string[ self.pointer..-1 ],
1070
+ }
1071
+ debugMsg 1, "matchVariable succeeded: %s" % rval.inspect
1072
+ return rval
1073
+ end
1074
+
1075
+
1076
+ ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and
1077
+ ### try to match text inside a Ruby code block construct which must be
1078
+ ### delimited by the specified <tt>outerDelimPairs</tt>. It may optionally
1079
+ ### contain sub-blocks delimited with the given <tt>innerDelimPairs</tt>.
1080
+ def matchCodeblock( prefix, innerDelimPairs, outerDelimPairs )
1081
+ startPos = self.pointer
1082
+ debugMsg 2, "Starting matchCodeblock at offset %d (%s)", startPos, self.rest.inspect
1083
+
1084
+ # Look for the prefix
1085
+ raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless
1086
+ self.skip( prefix )
1087
+ codePos = self.pointer
1088
+ debugMsg 3, "Skipped prefix '%s' to offset %d" %
1089
+ [ self.matched, codePos ]
1090
+
1091
+ # Build a regexp for the outer delimiters
1092
+ ldelimOuter = "(" + outerDelimPairs.keys .uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
1093
+ rdelimOuter = "(" + outerDelimPairs.values.uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")"
1094
+ debugMsg 4, "Using /%s/ as the outer delim regex" % ldelimOuter
1095
+
1096
+ unless self.scan( ldelimOuter )
1097
+ raise MatchFailure, %q:Did not find opening bracket at "%s..." offset %d: %
1098
+ [ self.rest[0,20].chomp, codePos ]
1099
+ end
1100
+
1101
+ # Look up the corresponding outer delimiter
1102
+ closingDelim = outerDelimPairs[self.matched] or
1103
+ raise DelimiterError, "Could not find closing delimiter for '%s'" %
1104
+ self.matched
1105
+
1106
+ debugMsg 3, "Scanning for closing delim '#{closingDelim}'"
1107
+ matched = ''
1108
+ patvalid = true
1109
+
1110
+ # Scan until the end of the text or until an explicit break
1111
+ while self.rest?
1112
+ debugMsg 5, "Scanning from offset %d (%s)", self.pointer, self.rest.inspect
1113
+ matched = ''
1114
+
1115
+ # Skip comments
1116
+ debugMsg 5, "Trying to match a comment"
1117
+ if self.scan( /\s*#.*/ )
1118
+ debugMsg 4, "Skipping comment '%s' to offset %d" %
1119
+ [ self.matched, self.pointer ]
1120
+ next
1121
+ end
1122
+
1123
+ # Look for (any) closing delimiter
1124
+ debugMsg 5, "Trying to match a closing outer delimiter with /\s*(#{rdelimOuter})/"
1125
+ if self.scan( /\s*(#{rdelimOuter})/ )
1126
+ debugMsg 4, "Found a right delimiter '#{self.matched}'"
1127
+
1128
+ # If it's the delimiter we're looking for, stop the scan
1129
+ if self.matched.strip == closingDelim
1130
+ matched = self.matched
1131
+ debugMsg 3, "Found the closing delimiter we've been looking for (#{matched.inspect})."
1132
+ break
1133
+
1134
+ # Otherwise, it's an error, as we've apparently seen a closing
1135
+ # delimiter without a corresponding opening one.
1136
+ else
1137
+ raise MatchFailure,
1138
+ %q:Mismatched closing bracket at "%s..." (offset %s). Expected '%s': %
1139
+ [ self.rest[0,20], self.pointer, closingDelim ]
1140
+ end
1141
+ end
1142
+
1143
+ # Try to match a variable or a quoted phrase
1144
+ debugMsg 5, "Trying to match either a variable or quotelike"
1145
+ if self.scanVariable( '\s*' ) || self.scanQuotelike( '\s*', patvalid )
1146
+ debugMsg 3, "Matched either a variable or quotelike. Offset now %d" % self.pointer
1147
+ patvalid = false
1148
+ next
1149
+ end
1150
+
1151
+ # Match some operators
1152
+ # :TODO: This hasn't really been ruby-ified
1153
+ debugMsg 5, "Trying to match an operator"
1154
+ if self.scan( %r:\s*([-+*x/%^&|.]=?
1155
+ | [!=]~
1156
+ | =(?!>)
1157
+ | (\*\*|&&|\|\||<<|>>)=?
1158
+ | split|grep|map|return
1159
+ ):x )
1160
+ debugMsg 3, "Skipped miscellaneous operator '%s' to offset %d." %
1161
+ [ self.matched, self.pointer ]
1162
+ patvalid = true
1163
+ next
1164
+ end
1165
+
1166
+ # Try to match an embedded codeblock
1167
+ debugMsg 5, "Trying to match an embedded codeblock with delim pairs: %s",
1168
+ innerDelimPairs.inspect
1169
+ if self.scanCodeblock( innerDelimPairs )
1170
+ debugMsg 3, "Skipped inner codeblock to offset %d." % self.pointer
1171
+ patvalid = true
1172
+ next
1173
+ end
1174
+
1175
+ # Try to match a stray outer-left delimiter
1176
+ debugMsg 5, "Trying to match a stray outer-left delimiter (#{ldelimOuter})"
1177
+ if self.match?( ldelimOuter )
1178
+ raise MatchFailure, "Improperly nested codeblock at offset %d: %s... " %
1179
+ [ self.pointer, self.rest[0,20] ]
1180
+ end
1181
+
1182
+ patvalid = false
1183
+ self.scan( /\s*(\w+|[-=>]>|.|\Z)/m )
1184
+ debugMsg 3, "Skipped '%s' to offset %d" %
1185
+ [ self.matched, self.pointer ]
1186
+ end
1187
+
1188
+
1189
+ unless matched
1190
+ raise MatchFailure, "No match found for opening bracket"
1191
+ end
1192
+
1193
+ rval = {
1194
+ :match => self.string[codePos .. (self.pointer - 1)],
1195
+ :prefix => self.string[startPos, (codePos-startPos)],
1196
+ :suffix => self.string[ self.pointer..-1 ],
1197
+ }
1198
+ debugMsg 1, "matchCodeblock succeeded: %s" % rval.inspect
1199
+ return rval
1200
+ end
1201
+
1202
+
1203
+ ### Attempt to derive and return the number of scan methods traversed up to
1204
+ ### this point by examining the call stack.
1205
+ def scanDepth
1206
+ return caller(2).find_all {|frame|
1207
+ frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/
1208
+ }.length
1209
+ end
1210
+
1211
+
1212
+ #######
1213
+ private
1214
+ #######
1215
+
1216
+ ### Print the specified <tt>message</tt> to STDERR if the scanner's
1217
+ ### debugging level is greater than or equal to <tt>level</tt>.
1218
+ def debugMsg( level, msgFormat, *args )
1219
+ return unless level.nonzero? && self.debugLevel >= level
1220
+ msg = if args.empty? then msgFormat else format(msgFormat, *args) end
1221
+ $stderr.puts( (" " * (level-1) * 2) + msg )
1222
+ end
1223
+
1224
+
1225
+ ### Given a series of one or more bracket characters (eg., '<', '[', '{',
1226
+ ### etc.), return the brackets reversed in order and direction.
1227
+ def revbracket( bracket )
1228
+ return bracket.to_s.reverse.tr( '<[{(', '>]})' )
1229
+ end
1230
+
1231
+
1232
+ ### Given an opening <tt>tag</tt> of the sort matched by #scanTagged,
1233
+ ### construct and return a closing tag.
1234
+ def makeClosingTag( tag )
1235
+ debugMsg 3, "Making a closing tag for '%s'" % tag
1236
+
1237
+ closingTag = tag.gsub( /^([[(<{]+)(#{XmlName}).*/ ) {
1238
+ Regexp.quote( "#{$1}/#{$2}" + revbracket($1) )
1239
+ }
1240
+
1241
+ raise MatchFailure, "Unable to construct closing tag to match: #{tag}" unless closingTag
1242
+ return closingTag
1243
+ end
1244
+
1245
+
1246
+ ### Make and return a new Regexp which matches substrings bounded by the
1247
+ ### specified +delimiters+, not counting those which have been escaped with
1248
+ ### the escape characters in +escapes+.
1249
+ def makeDelimPattern( delimiters, escapes='\\', prefix='\\s*' )
1250
+ delimiters = delimiters.to_s
1251
+ escapes = escapes.to_s
1252
+
1253
+ raise DelimiterError, "Illegal delimiter '#{delimiter}'" unless delimiters =~ /\S/
1254
+
1255
+ # Pad the escapes string to the same length as the delimiters
1256
+ escapes.concat( escapes[-1,1] * (delimiters.length - escapes.length) )
1257
+ patParts = []
1258
+
1259
+ # Escape each delimiter and a corresponding escape character, and then
1260
+ # build a pattern part from them
1261
+ delimiters.length.times do |i|
1262
+ del = Regexp.escape( delimiters[i, 1] )
1263
+ esc = Regexp.escape( escapes[i, 1] )
1264
+
1265
+ if del == esc then
1266
+ patParts.push "#{del}(?:[^#{del}]*(?:(?:#{del}#{del})[^#{del}]*)*)#{del}"
1267
+ else
1268
+ patParts.push "#{del}(?:[^#{esc}#{del}]*(?:#{esc}.[^#{esc}#{del}]*)*)#{del}";
1269
+ end
1270
+ end
1271
+
1272
+ # Join all the parts together and return one big pattern
1273
+ return Regexp::new( "#{prefix}(?:#{patParts.join("|")})" )
1274
+ end
1275
+
1276
+ end # class StringExtractor
1277
+
1278
+