reg 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+ module Reg
20
+ class Position
21
+ require 'reg'
22
+
23
+ class<<self
24
+ alias new__no_negatives
25
+ def new(nums)
26
+ nums.respond_to? :to_i and 0>nums and
27
+ return PositionFromEnd.new(-nums)
28
+ new__no_negatives nums
29
+ end
30
+ alias [] new
31
+ end
32
+
33
+ def initialize(nums)
34
+ Float===nums and nums=nums.to_i #enables Position[-0.1]
35
+ @positions=nums
36
+ end
37
+
38
+ def mmatch(pr)
39
+ pos=@positions
40
+ pos=adjust_position(pr,pos)
41
+
42
+ to_res pos===pr.cursor.pos
43
+
44
+ end
45
+
46
+ def itemrange
47
+ 0..0
48
+ end
49
+
50
+ private
51
+ def adjust_position(pr,pos)
52
+ pos
53
+ end
54
+
55
+ def to_res bool
56
+ bool ? 0 : nil
57
+ end
58
+ end
59
+
60
+
61
+ class PositionFromEnd < Position
62
+ private
63
+ def adjust_position(pr,pos)
64
+ pr.cursor.size+pos
65
+ end
66
+ end
67
+
68
+ end
@@ -0,0 +1,1067 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+ begin
20
+ require 'rubygems'
21
+ rescue #do nothing
22
+ end
23
+
24
+ require 'cursor'
25
+ require 'cursor/indexed'
26
+
27
+
28
+ =begin the internal api
29
+ originally:
30
+ ResAryFrag=Array #it would be nice to get a more precise definition....
31
+ ResAry=+[ResAryFrag,-[MatchSet,Integer,ResAryFrag].*]
32
+
33
+ Reg%:mmatch[Array,Integer,
34
+ Returns MatchSet|ResAryFrag|nil
35
+ ]
36
+ Backtrace%:bt_match[Array,Integer,Integer,Integer,ResAry,Integer.-,
37
+ Returns ResAry|nil,Integer,Integer
38
+ ]
39
+ Matchset%:next_match[Array,Integer,
40
+ Returns ResAryFrag|nil,Integer
41
+ ]
42
+
43
+ currently:
44
+ Reg%:mmatch[Array,Integer, #has to change to take progress soon
45
+ Returns MatchSet|ResAryFrag|nil
46
+ ] #except subseq and repeat currently want progress
47
+ Progress%:bt_match[Integer.-, #affects progress, i'm pretty sure
48
+ Returns ResAry|nil,Integer,Integer #1st result used only as bool
49
+ ]
50
+ MatchSet%:next_match[Array,Integer, #affects progress?
51
+ Returns ResAryFrag|nil,Integer
52
+ ]
53
+ MatchSet%:initialize[Progress,OBS,Returns MatchSet] #for repeat and subseq only
54
+
55
+
56
+
57
+ ultimately:
58
+ Reg%:mmatch[Progress, Returns MatchSet|Integer|nil] #affects progress on success (when integer returned)
59
+ Progress%:bt_match[Integer.-, Returns Bool] #affects progress on success
60
+ MatchSet%:next_match[Returns Integer|nil] #affects progress on success
61
+ (modified progress is the same one as was given to the mmatch that created the matchset)
62
+ MatchSet%:initialize[Progress,OBS,Returns MatchSet] #for every ms class
63
+
64
+
65
+
66
+ =end
67
+
68
+
69
+ #---------------------------------------------
70
+ module Reg
71
+
72
+ #---------------------------------------------
73
+ class MatchFailRec
74
+ attr_accessor(:undos_inc,:matchsucceed_inc,:position_inc)
75
+ def initialize
76
+ @undos_inc=@matchsucceed_inc=@position_inc=0
77
+ end
78
+
79
+ #position_inc is the number of positions to pop off position stack
80
+ #to get back to the point before the match of the most recent matchset.
81
+ #it is also the count by which to adjust @regsidx to get back to the
82
+ #corresponding reg which generated the matchset.
83
+ alias regs_adjust position_inc
84
+ end
85
+
86
+ #---------------------------------------------
87
+ class Progress
88
+ attr_reader :matcher, :cursor, :regsidx, :position_stack, :matchset_stack, :matchfail_todo, :variables
89
+
90
+ #for internal use only...
91
+ attr_writer :undos_stack, :matchfail_todo, :matchsucceed_stack #, :regsidx
92
+
93
+
94
+ #@matchset_stack and @matchfail_todo are (nearly) parallel arrays; @matchfail_todo has
95
+ #one more item in it (at the bottom). each matchfailrec represents the things to undo
96
+ #on failure to get back to the corresponding matchset's starting position.
97
+
98
+ #@matchfail_todo is more or less a 2-dimensional array of integers. very many of
99
+ #those integers in the undos_inc and matchsucceed_inc columns will be zero. it
100
+ #would be nice to use a sparse vector or matrix instead.
101
+
102
+
103
+ #---------------------------------------------
104
+ def initialize(matcher,cursor)
105
+ @matcher=matcher
106
+ @cursor=cursor
107
+ @regsidx=0
108
+ @variables={}
109
+ @undos_stack=[] #recursive list of undo procs and vars defined in this entire match
110
+ @matchset_stack=[]
111
+ @position_stack=[@cursor.pos]
112
+ @matchfail_todo=[MatchFailRec.new] #list of things to do when match fails.... undo(&adjust variables), matchsucceed, position, (matchset)
113
+ @matchsucceed_stack=[] #r-list of things to do when entire match succeeds... subst and deferreds
114
+
115
+ assert check_result
116
+ end
117
+
118
+ #---------------------------------------------
119
+ def push_matchset(ms)
120
+ assert MatchSet===ms
121
+ @matchset_stack.push ms
122
+ @matchfail_todo.push MatchFailRec.new
123
+ #push_match len #disable... caller wants to do it...
124
+ end
125
+
126
+ #---------------------------------------------
127
+ def push_match(inc=0)
128
+ #matchset_stack should be 1 smaller than matchfail_todo
129
+ assert @matchfail_todo.size-1==@matchset_stack.size
130
+
131
+ @cursor.skip inc #do nothing if no param given
132
+ @position_stack.push @cursor.pos #push the start position of the next match
133
+ @matchfail_todo.last.position_inc+=1
134
+ @regsidx+=1
135
+ end
136
+
137
+ #---------------------------------------------
138
+ #this method is dangerous! it leaves the object in an inconsistant state.
139
+ #caller must fixup state by either popping @matchset_stack or pushing a matchfail_todo
140
+ def backup_stacks
141
+ assert(@matchfail_todo.size >= 1)
142
+ discarding=@matchfail_todo.pop
143
+ #discarding should not ever be a list!
144
+ @regsidx-=discarding.position_inc #adjust position in matcher
145
+ assert(@position_stack.size >= discarding.position_inc) #what if position stack is empty here?
146
+
147
+ discarding.position_inc.nonzero? and
148
+ @position_stack=@position_stack.slice(0...-discarding.position_inc)
149
+
150
+ #backup undo stack and execute undos
151
+ process_undos @undos_stack.slice!(-discarding.undos_inc..-1)
152
+
153
+ #backup matchsucceed stack
154
+ discarding.matchsucceed_inc.nonzero? and
155
+ @matchsucceed_stack=@matchsucceed_stack.slice(0...-discarding.matchsucceed_inc)
156
+
157
+ return @matchset_stack.last
158
+ end
159
+
160
+ #---------------------------------------------
161
+ def backtrack
162
+ assert @regsidx != INFINITY
163
+ assert check_result
164
+ mat=nil
165
+ loop do
166
+ ms=backup_stacks or return
167
+
168
+ mat=ms.next_match(@cursor.data?, @position_stack.last) and break
169
+ @matchset_stack.pop or huh
170
+ end
171
+ assert( (1..@matcher.max_matches)===@regsidx+1)
172
+ assert ::Array===mat
173
+ assert ::Array===mat.first
174
+
175
+
176
+ #back up cursor position
177
+ @cursor.pos=@position_stack.last
178
+
179
+
180
+
181
+ @matchfail_todo.push MatchFailRec.new
182
+ push_match mat.last
183
+
184
+ assert @regsidx
185
+ return @regsidx, @cursor.pos-origpos
186
+ end
187
+
188
+ #---------------------------------------------
189
+ #lookup something that was already matched, either by
190
+ #name or index(es).
191
+ #probably need to take a full path for parameters
192
+ def backref; huh end
193
+
194
+ #---------------------------------------------
195
+ def subprogress(cursor=nil,matcher=nil)
196
+ result=dup
197
+ result.matchfail_todo=[MatchFailRec.new]
198
+ result.cursor=
199
+ if cursor
200
+ unless Cursor===cursor
201
+ huh #convert other data to a cursor...
202
+ end
203
+ cursor
204
+ else
205
+ result.cursor.position # make a sub-cursor
206
+ end
207
+
208
+ result.position_stack=[result.cursor.pos]
209
+
210
+ @undo_stack.push result.undo_stack=[]
211
+ @matchsucceed_stack.push result.matchsucceed_stack=[]
212
+ @matchfail_todo.last.matchsucceed_inc+=1
213
+ @matchfail_todo.last.undo_inc+=1
214
+
215
+ if matcher
216
+ @matcher=matcher#@matcher might be set to soemthing different
217
+ @regsidx=0
218
+ end
219
+
220
+ assert result.check_result
221
+
222
+ result
223
+ end
224
+
225
+ #---------------------------------------------
226
+ def make_hash
227
+ huh
228
+
229
+ end
230
+
231
+ #---------------------------------------------
232
+ def last_match_range
233
+ @position_stack[-2]...@position_stack[-1]
234
+ end
235
+
236
+ #---------------------------------------------
237
+ def top_matchset
238
+ @matchset_stack.last
239
+ end
240
+
241
+ #---------------------------------------------
242
+ def register_variable(name)
243
+ @variables[name] and return warn( "variable #{name} is already defined")
244
+ @variables[name]=last_match_range
245
+ @undos_stack<<name
246
+ @matchfail_todo.last.undos_inc+=1
247
+ end
248
+
249
+ #---------------------------------------------
250
+ class Later #inside Progress, so it doesn't conflict with Reg::Later from regreplace.rb
251
+ def initialize(block,args)
252
+ @block,@args=block,args
253
+ end
254
+ class<<self;
255
+ alias [] new;
256
+ end
257
+
258
+ def call
259
+ @block.call( *@args)
260
+ end
261
+ end
262
+
263
+ #---------------------------------------------
264
+ def register_undo *args, &block
265
+ @undos_stack<<Later[block,*args]
266
+ @matchfail_todo.last.undos_inc+=1
267
+ end
268
+
269
+ #wannabe in class Array
270
+ def recursive_each arr,&block
271
+ arr.each {|item|
272
+ if item.respond_to? :to_a
273
+ recursive_each item.to_a, &block
274
+ else
275
+ block[item]
276
+ end
277
+ }
278
+ end
279
+
280
+ def recursive_reverse_each arr,&block
281
+ arr.reverse_each {|item|
282
+ if item.respond_to? :to_a
283
+ recursive_reverse_each item.to_a, &block
284
+ else
285
+ block[item]
286
+ end
287
+ }
288
+ end
289
+
290
+ #---------------------------------------------
291
+ def process_undos(undos)
292
+ recursive_reverse_each undos do|undo|
293
+ Symbol===undo ? @variables.delete(undo) : undo.call
294
+ end
295
+ end
296
+
297
+
298
+ #---------------------------------------------
299
+ def register_replace(subst,*args)
300
+ @matchsucceed_stack.push Later[subst,args]
301
+ @matchfail_todo.last.matchsucceed_inc+=1
302
+ end
303
+
304
+ #---------------------------------------------
305
+ def later(*args,&block)
306
+ @matchsucceed_stack.push Later[block,args]
307
+ @matchfail_todo.last.matchsucceed_inc+=1
308
+ end
309
+
310
+ #---------------------------------------------
311
+ def process_laters
312
+ recursive_reverse_each(@matchsucceed_stack) {|later| later.call }
313
+ end
314
+
315
+ #---------------------------------------------
316
+ def origpos
317
+ @position_stack.first
318
+ end
319
+
320
+ #---------------------------------------------
321
+ def posinc
322
+ @cursor.pos-origpos
323
+ end
324
+
325
+
326
+ #--------------------------
327
+ $RegTraceEnable=$RegTraceDisable=nil
328
+ def trace_enabled?
329
+ @trace||=nil
330
+ $RegTraceEnable or (!$RegTraceDisable && @trace)
331
+ end
332
+
333
+ #--------------------------
334
+ #bt, in this case, stands for 'backtracking'.
335
+ #but the cognoscenti refer to this method as 'bitch-match'.
336
+ def bt_match(match_steps=@matcher.max_matches)
337
+ assert @cursor.pos <= @cursor.size
338
+ assert origpos >= 0
339
+ assert posinc >= 0
340
+ assert( (0..match_steps)===@regsidx)
341
+ assert Integer===@position_stack.first
342
+ assert check_result
343
+ loop do #loop over regs to match
344
+ assert @cursor.pos <= @cursor.size
345
+ assert posinc >= 0
346
+ assert( (0..match_steps)===@regsidx)
347
+
348
+ if trace_enabled?
349
+ puts [@matcher, clean_result].map{|i| i.inspect }.join(' ')
350
+ #pp self
351
+ end
352
+
353
+ assert check_result
354
+
355
+ #try a new match of current reg
356
+ r=@matcher.regs(@regsidx)
357
+ if r.respond_to? :mmatch #should test for being a Reg::Reg instead
358
+ # 'mmatch could return 2 items here'
359
+ m=r.mmatch(@cursor.data?, @cursor.pos)
360
+ #is a single match or a match set?
361
+ if m.respond_to? :next_match
362
+ #it's a set -- start new inner result array
363
+ #with initial match as first elem
364
+ push_matchset m
365
+ mat,matchlen=m.next_match(@cursor.data?, @cursor.pos)
366
+
367
+ assert mat
368
+ assert m
369
+ else
370
+ mat,matchlen=*m #single match or nil
371
+ m=nil
372
+ end
373
+ else
374
+ if r===(item=@cursor.read1after)
375
+ mat=RR[item]
376
+ matchlen=1
377
+ end
378
+ end
379
+
380
+
381
+ assert check_result
382
+
383
+ unless mat #match fail?
384
+ assert check_result
385
+ return to_result,posinc,@regsidx if @matcher.enough_matches? @regsidx
386
+
387
+ #doesn't match, try backtracking
388
+ assert @regsidx
389
+ backtrack or return nil #bt failed? we fail
390
+ assert(@cursor.pos<=@cursor.size)
391
+ assert check_result
392
+ else #match succeeded
393
+ if !m and @matcher.respond_to? :want_gratuitous_btstop? and
394
+ @matcher.want_gratuitous_btstop?(@regsidx)
395
+ push_matchset SingleMatch_MatchSet.new(matchlen)
396
+ end
397
+
398
+ #advance to next reg
399
+ assert check_result
400
+ push_match @matcher.update_di(0,matchlen)
401
+ assert(@cursor.pos<=@cursor.size)
402
+ end
403
+
404
+ assert(@cursor.pos<=@cursor.size)
405
+
406
+ assert check_result
407
+ return to_result,posinc,@regsidx if @regsidx>=match_steps
408
+ assert( (0..match_steps)===@regsidx)
409
+
410
+ end #loop
411
+
412
+ end
413
+
414
+ #---------------------------------------------
415
+ #maybe this isn't necessary?
416
+ #because backtrack is called after it,
417
+ #and it's doing the same things.... more or less
418
+
419
+ def last_next_match; #(?ary=>@cursor,?start,?resfrag=>...?)
420
+ #huh #adapt logic from Backtrace
421
+ assert check_result
422
+ assert( (0..@matcher.max_matches)===@regsidx)
423
+ r=backup_stacks
424
+
425
+ di=@cursor.pos=@position_stack.last
426
+ assert( (0..@matcher.max_matches)===@regsidx)
427
+ unless r
428
+ @matchfail_todo.push MatchFailRec.new
429
+ assert check_result
430
+ return nil,nil,@regsidx
431
+ end
432
+ #something wrong here....
433
+
434
+ #dunno how to do this simply...
435
+ #assert full_up? if SubseqMatchSet===self
436
+
437
+ #@matchset_stack.pop is called in backtrack but not here, why?
438
+
439
+
440
+ r,diinc=r.next_match(@cursor.data?,@cursor.pos)
441
+ unless r
442
+ #might need to return non-nil here, if resfrag isn't exhausted yet
443
+ assert( (0..@matcher.max_matches)===@regsidx)
444
+ @matchset_stack.pop
445
+ assert check_result
446
+ return nil,nil,@regsidx unless @matcher.enough_matches? @regsidx
447
+ return to_result, @cursor.pos-@position_stack.first, @regsidx
448
+ end
449
+
450
+ assert diinc
451
+ assert @cursor.pos+diinc <= @cursor.size
452
+ @cursor.skip diinc
453
+ #@regsidx-=matchfail_todo.position_inc #should be done in push_match...
454
+ @matchfail_todo.push MatchFailRec.new
455
+ push_match
456
+
457
+ if @regsidx<@matcher.max_matches #if there are more subregs of this reg to be matched
458
+ #re-match tail regs
459
+ assert @cursor.pos <= @cursor.size
460
+ #di is sometimes bad here, it seems....(fixed now?)
461
+ assert check_result
462
+ assert( (0..@matcher.max_matches)===@regsidx)
463
+ result=bt_match
464
+ assert check_result
465
+ return result
466
+ end
467
+
468
+
469
+
470
+ assert( (0..@matcher.max_matches)===@regsidx)
471
+ assert check_result
472
+
473
+ return to_result,@cursor.pos-@position_stack.first,@regsidx
474
+ end
475
+
476
+ #---------------------------------------------
477
+ def check_result;
478
+
479
+ #since this should be true, a separate @regsidx is unnecessary
480
+ assert @regsidx==@position_stack.size-1
481
+
482
+ #matchset_stack should be 1 smaller than matchfail_todo
483
+ assert @matchfail_todo.size-1==@matchset_stack.size
484
+
485
+
486
+ #verify correct types in @-variables
487
+ assert Cursor===@cursor
488
+ assert @matcher.respond_to?( :update_di)
489
+ assert @regsidx >=0
490
+ @matchset_stack.each{|ms| assert MatchSet===ms }
491
+ prev_pos=0
492
+ @position_stack.each{|pos| assert prev_pos<=pos; pos=prev_pos }
493
+ assert prev_pos<=@cursor.size
494
+
495
+ vars_copy=@variables.dup
496
+ @undos_stack.each {|i|
497
+ case i
498
+ #every element of @variables should also be a sym in @undos_stack
499
+ when Symbol:
500
+ vars_copy.delete(i) or assert(false)
501
+
502
+ when Later:
503
+ else assert(false)
504
+ end
505
+ }
506
+ assert vars_copy.empty? #every var should be accounted for
507
+
508
+ #sum of :undos_inc,:matchsucceed_inc,:position_inc in @matchfail_todo
509
+ #should be the same as the size of the corresponding stack.
510
+ uns=mats=poss=0
511
+ @matchfail_todo.each{|mfr|
512
+ uns+=mfr.undos_inc
513
+ mats+=mfr.matchsucceed_inc
514
+ poss+=mfr.position_inc
515
+ }
516
+ assert uns==@undos_stack.size
517
+ assert mats==@matchsucceed_stack.size
518
+ assert poss+1==@position_stack.size
519
+
520
+ assert succ_stack_ok
521
+
522
+ return true
523
+ end
524
+
525
+ #---------------------------------------------
526
+ def succ_stack_ok(stk=@matchsucceed_stack)
527
+ stk.each{|elem|
528
+ case elem
529
+ when Array: succ_stack_ok(elem)
530
+ when Later: true
531
+ else
532
+ end or return
533
+ }
534
+ return true
535
+ end
536
+ private :succ_stack_ok
537
+
538
+ #---------------------------------------------
539
+ def clean_result
540
+ result=[]
541
+ ms_pos_idx=@position_stack.size - @matchfail_todo.last.position_inc
542
+ ms_pos_idx=-1
543
+ result=(0...@position_stack.size-1).map{|i|
544
+ if i==ms_pos_idx
545
+ ms_pos_idx-=1
546
+ #what if ms_idx too big?
547
+ ms_pos_idx-=@matchfail_todo[ms_idx].position_inc
548
+ ms.clean_result
549
+
550
+ else
551
+ @cursor.position{ #retain current pos while Cursor#[]
552
+ @cursor[@position_stack[i], @position_stack[i+1]-@position_stack[i]]
553
+ }
554
+ end
555
+ }
556
+
557
+ return result
558
+ end
559
+
560
+ #---------------------------------------------
561
+ def to_result;
562
+ true#ok, i'm cheating
563
+ end
564
+
565
+ end #class Progress
566
+
567
+ class SingleMatch_MatchSet < MatchSet
568
+ #this is somewhat of a hack, and shouldn't be necessary....
569
+ #it exists because every backtracking stop has to have a
570
+ #matchset in it, even the ones that only match one way.
571
+ #this class encapsulates matchsets that match only one way.
572
+
573
+
574
+ def initialize(len)
575
+ #@len=len
576
+ end
577
+
578
+ def next_match(arr,start)
579
+ #if @len
580
+ # result=[arr[start,@len], @len]
581
+ # @len=nil
582
+ # result
583
+ #end
584
+ end
585
+
586
+
587
+ end
588
+
589
+
590
+ if defined? $MMATCH_PROGRESS #ultimately, mmatch will take a progress, but until then, disable this
591
+ #---------------------------------------------
592
+ class Subseq
593
+
594
+ def mmatch(pr)
595
+ #in this version, all @regs are not multiple regs
596
+ pr=pr.subprogress(nil,self)
597
+ cu=pr.cursor
598
+ start=cu.pos
599
+ assert cu.pos<=cu.size
600
+ cu.pos+@regs.size<=cu.size or return nil
601
+ @regs.each do |reg|
602
+ assert cu.pos<cu.size
603
+ reg===cu.read1 or return nil
604
+ end
605
+ return [ [cu.read( start-cu.pos,true)], @regs.size ]
606
+ end
607
+
608
+ private
609
+ def mmatch_multiple(pr)
610
+ #in this version, at least one of @regs is a multiple reg
611
+ #start==arr.size and huh
612
+ pr=pr.subprogress(nil,self)
613
+ cu=pr.cursor
614
+ start=cu.pos
615
+ start+itemrange.begin<=cu.size or return nil
616
+ assert( (0..cu.size).include?( start))
617
+ result,di,bogus=pr.bt_match
618
+ result and SubseqMatchSet.new(pr,di)
619
+ end
620
+ end
621
+
622
+ #---------------------------------------------
623
+ class Repeat
624
+ def mmatch(pr)
625
+ pr=pr.subprogress(nil,self)
626
+ cu=pr.cursor
627
+ start=cu.pos
628
+ start+@times.begin <= cu.size or return nil #enough room left in input?
629
+ i=-1
630
+ (0...@times.end).each do |i|
631
+ start+i<cu.size or break(i-=1)
632
+ @reg===cu.read1 or break(i-=1)
633
+ end
634
+ i+=1
635
+ assert( (0..@times.end)===i)
636
+ if i==@times.begin
637
+ return [[cu.read(start-cu.pos,true)], i]
638
+ end
639
+ i>@times.begin or return nil
640
+ return SingleRepeatMatchSet.new(i,-1,@times.begin)
641
+ end
642
+
643
+ private
644
+ def mmatch_multiple(pr)
645
+ pr=pr.subprogress(nil,self)
646
+ start=cu.pos
647
+ assert start <= cu.size
648
+ start+itemrange.begin <= cu.size or return nil #enough room left in input?
649
+ r=[[]]
650
+
651
+ #first match the minimum number
652
+ if @times.begin==0 #if we can match nothing
653
+ cu.eof? and return [r,0] #at end of input? return empty set
654
+ ri=di=0
655
+ else
656
+ cu.eof? and return nil
657
+ assert @times.begin<INFINITY
658
+ r,di,ri=pr.bt_match(@times.begin) #matches @reg @times.begin times
659
+ r.nil? and return nil
660
+ end
661
+ assert ri==@times.begin
662
+
663
+ assert !@times.exclude_end?
664
+ left=@times.end-@times.begin
665
+
666
+ #note: left and top could be infinite here...
667
+
668
+ #do the optional match iterations
669
+ #only greedy matching implemented for now
670
+ #there must be a more efficient algorithm...
671
+ if left >= 1
672
+ assert pr.check_result
673
+ #get remaining matches up to @times.end times
674
+ rr=pr.make_hash
675
+ res,di,ri=pr.bt_match #bt stop at each iteration, this time
676
+ assert pr.check_result( res)
677
+ assert @times===pr.regsidx
678
+
679
+ res and return RepeatMatchSet.new(pr,(huh di))
680
+ assert rr==pr.make_hash
681
+ end
682
+
683
+ #if matchset has no backtracking stops, and
684
+ #hence cannot contain more than one actual match,
685
+ #then just return that match.
686
+ pr.matchset_stack.empty? ? RepeatMatchSet.new(pr,(huh di)) :
687
+ [pr.clean_result,(huh di)]
688
+ end
689
+ end
690
+
691
+ #---------------------------------------------
692
+ class AndMatchSet
693
+ #the total number of possible different ways to match an AndMatchSet
694
+ #where several of the branches are actually ambiguous
695
+ #grows exponentially.
696
+ #rather than hit every possible match, we'll try to hit
697
+ #every legal match length at least once.
698
+
699
+ #on next_match,
700
+ #figure out the alternative(s) that are returning the longest
701
+ #matchset currently. those alternatives are returned in
702
+ #the first match, but at the 2nd and subsequent calls
703
+ #to next_match, that set of longest alternatives are all
704
+ #next_matched (rolled back) until they match something shorter.
705
+ #(or maybe just a different length? Reg::Or isn't greedy, so its
706
+ #longest match isn't necessarily returned first.)
707
+
708
+ #if any next_match call returns nil (or false), the whole match set
709
+ #is finished. return nil from next_match now and forever more.
710
+
711
+
712
+
713
+ def initialize
714
+ huh
715
+ end
716
+ end
717
+
718
+ #---------------------------------------------
719
+ class And
720
+
721
+ private
722
+ #can't use this until mmatch interface is changed to take a single progress param
723
+ def mmatch_multiple(progress)
724
+ #in this version, at least one of @regs is a multiple reg
725
+ assert( (0..progress.cursor.size).include?( progress.cursor.pos))
726
+ result,*bogus=progress.bt_match
727
+ result and AndMatchSet.new(self,result)
728
+ #need new definition of AndMatchSet...
729
+
730
+ #need to keep track of which alternative(s) was longest, so as to advance
731
+ #the cursor by that amount. and know which ones to start backtracking in.
732
+
733
+ #cursor needs to be advanced here somewhere, i think.
734
+ end
735
+
736
+ end
737
+
738
+
739
+ else #... not $MMATCH_PROGRESS
740
+ class Subseq
741
+
742
+ def mmatch(arr,start)
743
+ #in this version, each of @regs is not a multiple reg
744
+ assert start<=arr.size
745
+ start+@regs.size<=arr.size or return nil
746
+ idx=0
747
+ @regs.each do |reg|
748
+ assert(start+idx<arr.size)
749
+ reg===arr[start+idx] or return nil
750
+ idx+=1
751
+ end
752
+ return [RR[arr[start,@regs.size]], @regs.size]
753
+ end
754
+
755
+ def mmatch_multiple(arr,start)
756
+ #in this version, at least one of @regs is a multiple reg
757
+ assert( (0..arr.size).include?( start))
758
+ cu=Cursor::Indexed.new(arr); cu.pos=start
759
+ pr=Progress.new(self,cu)
760
+ result,di,bogus=pr.bt_match
761
+ result and SubseqMatchSet.new(pr,di)
762
+ end
763
+ end
764
+
765
+ class ::Cursor
766
+ class Indexed
767
+ def data?; @data end
768
+ end
769
+ end
770
+
771
+ class Repeat
772
+ def mmatch(arr,start)
773
+ i=-1
774
+ (0...@times.end).each do |i|
775
+ start+i<arr.size or break(i-=1)
776
+ @reg===arr[start+i] or break(i-=1)
777
+ end
778
+ i+=1
779
+ assert( (0..@times.end)===i)
780
+ if i==@times.begin
781
+ return [RR[arr[start,i]], i]
782
+ end
783
+ i>@times.begin or return nil
784
+ return SingleRepeatMatchSet.new(i,-1,@times.begin)
785
+ end
786
+
787
+ def mmatch_multiple(arr,start)
788
+ assert start <= arr.size
789
+ r=[RR[]]
790
+
791
+ cu=Cursor::Indexed.new(arr); cu.pos=start
792
+ pr=Progress.new(self,cu)
793
+
794
+ #first match the minimum number
795
+ if @times.begin==0 #if we can match nothing
796
+ arr.size==start and return [r,0] #at end of input? return empty set
797
+ ri=di=0
798
+ else
799
+ arr.size==start and return nil
800
+ assert @times.begin<INFINITY
801
+ r,di,ri=pr.bt_match(@times.begin) #matches @reg @times.begin times
802
+ r.nil? and return nil
803
+ end
804
+ assert ri==@times.begin
805
+
806
+ assert !@times.exclude_end?
807
+ left=@times.end-@times.begin
808
+
809
+ #note: left and top could be infinite here...
810
+
811
+ #do the optional match iterations
812
+ #only greedy matching implemented for now
813
+ #there must be a more efficient algorithm...
814
+ if left >= 1
815
+ assert pr.check_result
816
+ #get remaining matches up to @times.end times
817
+ #because bt_match could change the rr argument, and
818
+ #we might need to return the original in r below
819
+ res,di,ri=pr.bt_match
820
+ # assert Backtrace.check_result res #this is correct, for now (i think)
821
+ #don't update to progress version
822
+ assert @times===ri
823
+
824
+ res and return RepeatMatchSet.new(pr,di)
825
+ end
826
+
827
+ #if matchset has no backtracking stops, and
828
+ #hence cannot contain more than one actual match,
829
+ #then just return that match.
830
+ pr.matchset_stack.empty? ?
831
+ [pr.clean_result,di] : RepeatMatchSet.new(pr,di)
832
+ end
833
+
834
+ end
835
+
836
+ end # $MMATCH_PROGRESS
837
+
838
+
839
+
840
+ #---------------------------------------------
841
+ class MatchSet
842
+ #delegate to embedded progress, if any
843
+ def clean_result
844
+ huh
845
+ @progress.clean_result
846
+ end
847
+ end
848
+
849
+ #--------------------------
850
+ class RepeatMatchSet < MatchSet
851
+
852
+ attr :progress
853
+ def initialize(progress,consumed)
854
+ @progress=progress
855
+ #@cnt=@startcnt-stepper
856
+ #@ary.push 1
857
+ @consumed=consumed
858
+ @firstmatch=[progress.clean_result,@consumed]
859
+ assert( progress.matcher.times===progress.regsidx)
860
+ assert progress.regsidx
861
+ #assert(@ri==@firstmatch.first.size)
862
+ end
863
+
864
+ def match_iterations;
865
+ #assert(@ri==Backtrace.clean_result(@ary).size)
866
+ progress.regsidx
867
+ end
868
+
869
+ #very nearly identical to SubseqMatchSet#next_match
870
+ def next_match(arr,idx)
871
+ #fewer assertions in twin
872
+ if @firstmatch
873
+ result,@firstmatch=@firstmatch,nil
874
+ assert result.first.empty? || ::Array===result.first.first
875
+ #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
876
+ assert idx+result.last<=arr.size
877
+ assert(progress.regsidx==result.first.size)
878
+ return result
879
+ end
880
+
881
+ @progress or return #not in twin ... ignore it
882
+
883
+ assert progress.check_result
884
+
885
+ i=progress.matchfail_todo.last.position_inc
886
+ =begin extents not used
887
+ extents= if i==0
888
+ []
889
+ else
890
+ progress.position_stack[-i..-1]
891
+ end
892
+ =end
893
+ #this part's not in twin
894
+ #'need to check for fewer matches here before rematching last matchset'
895
+
896
+ #what if the match that gets discarded was returned by a matchset
897
+ #that has more matches in it?
898
+ #in that case, i is 1 and the body of this if should not be executed...
899
+ if progress.regsidx>progress.matcher.times.begin && i>1
900
+ huh progress.backup_stacks
901
+ huh #need to change progress.cursor.pos here too
902
+ huh #need to save result of backup_stacks
903
+
904
+ #but now i need to undo all other progress state too, if
905
+ #the state was created with the match result just popped.
906
+ #in general, it's not possible to distinguish state with the
907
+ #last match from state with the matches that might have preceeded it...
908
+ #unless I create a backtracking point for each optional iteration
909
+ #of the repeat matcher.
910
+ #currently, making a backtracking point implies making a matchset
911
+ #as well. I'll need a matchset the contains only 1 match.
912
+ #ok, i think this is working now. no extra code needed here.
913
+
914
+ @consumed-=pos-progress.position_stack.last
915
+ #assert(@ri==Backtrace.clean_result(@ary).size)
916
+ assert idx+@consumed<=arr.size
917
+ assert progress.check_result
918
+ result= [progress.clean_result, @consumed]
919
+ assert progress.check_result
920
+ return result
921
+ end
922
+
923
+
924
+ assert progress.check_result
925
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
926
+ result,di,ri=progress.last_next_match
927
+ if result and @progress.matcher.enough_matches? ri #condition slightly different in twin
928
+ result=[progress.clean_result,di]
929
+ @consumed=di #not in twin...why?
930
+ #@progress.regsidx-=1
931
+ assert ::Array===result.first.first
932
+ assert idx+result.last<=arr.size
933
+ assert progress.check_result
934
+ #assert(@ri==result.first.size)
935
+ return result
936
+ end
937
+
938
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
939
+ #assert(@ri==Backtrace.clean_result(@ary).size)
940
+ assert(progress.check_result)
941
+
942
+
943
+
944
+ @progress.matchset_stack.empty? and return @progress=nil #also checking @ary in twin... ignore it
945
+ assert @progress.regsidx>0
946
+
947
+ @progress.backtrack or return @progress=nil #@progress never set to nil like this in twin... ignore it
948
+
949
+ #this is where the divergence widens. ri is a local in twin
950
+
951
+ #assert(@ri==Backtrace.clean_result(@ary).size)
952
+ assert(progress.check_result)
953
+ mat,di,@ri=@progress.bt_match #mat is @ary in twin
954
+ mat.nil? and return @progress=nil
955
+
956
+ #assert(@ri==Backtrace.clean_result(mat).size)
957
+ assert @progress.regsidx
958
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
959
+
960
+ result=[@progress.clean_result,di]
961
+ @consumed=di #no @consumed in twin
962
+ assert ::Array===result.first.first
963
+ assert idx+result.last<=arr.size
964
+ assert progress.check_result
965
+ #assert(@ri==result.last.size)
966
+ return result
967
+ end
968
+
969
+ end
970
+
971
+ #---------------------------------------------
972
+ class SubseqMatchSet < MatchSet
973
+
974
+ def initialize progress,di;
975
+ @reg,@progress= progress.matcher,progress
976
+
977
+ @orig_pos=progress.cursor.pos-di
978
+ @firstresult= [progress.clean_result,di]
979
+ end
980
+
981
+ #(@reg=>progress.matcher,@matchary=>progress.huh,di=>progress.cursor.pos-@orig_pos)
982
+
983
+ def next_match(ary,start)
984
+ if @firstresult
985
+ @firstresult,result=nil,@firstresult
986
+ assert ::Array===result.first.first
987
+ return result
988
+ end
989
+
990
+
991
+ result,di,ri=@progress.last_next_match
992
+ result or return @progress=nil
993
+ if result and ri==@reg.max_matches
994
+ result=[@progress.clean_result,di]
995
+ assert ::Array===result.first.first
996
+ return result
997
+ end
998
+
999
+
1000
+ (@progress and !@progress.matchset_stack.empty?) or return @progress=nil
1001
+ assert @progress.regsidx
1002
+ @progress.backtrack or return @progress=nil
1003
+
1004
+ #need to adjust ri?
1005
+
1006
+ #is this right... dunno...
1007
+ result,di,bogus=@progress.bt_match
1008
+
1009
+
1010
+ if result
1011
+ result=[@progress.clean_result,di]
1012
+ assert ::Array===result.first.first
1013
+ return result
1014
+ end
1015
+ end
1016
+
1017
+ def match_iterations
1018
+ progress.matcher.max_matches
1019
+ end
1020
+
1021
+ end
1022
+ #--------------------------
1023
+ class AndMatchSet < SubseqMatchSet
1024
+ #this isn't really right...
1025
+ #on next_match, we need to backtrack the longest alternative(s)
1026
+ #if they're then shorter than the next longest alternative,
1027
+ #then that (formerly next longest) alternative becomes
1028
+ #the dominating alternative, and determines how much is consumed
1029
+
1030
+ end
1031
+ #might need Reg::Or tooo....
1032
+
1033
+ class Repeat
1034
+ #--------------------------------------------------------
1035
+ # "enable backtracking stops at each optional iteration"
1036
+ def want_gratuitous_btstop?(steps)
1037
+ @times===steps
1038
+ end
1039
+
1040
+ end
1041
+
1042
+
1043
+ #---------------------------------------------
1044
+ class Array
1045
+ def ===(other)
1046
+ ::Array===other or return false #need to be more generous eventually
1047
+ progress=Progress.new(self,(Cursor::Indexed.new other))
1048
+ result,di,bogus=progress.bt_match
1049
+ assert di.nil? || di <= other.size
1050
+ return(di==other.size && result)
1051
+ end
1052
+ end
1053
+
1054
+ end
1055
+
1056
+ #if false #work-around warnings in cursor
1057
+ propNiller=proc do
1058
+ old_init=instance_method:initialize
1059
+
1060
+ define_method:initialize do|*args|
1061
+ @positions||=@prop||=nil
1062
+ old_init.bind(self)[*args]
1063
+ end
1064
+ end
1065
+ Cursor::Indexed.instance_eval( &propNiller)
1066
+ Cursor::Position.instance_eval( &propNiller)
1067
+ #end