reg 0.4.8 → 0.5.0a0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING +0 -0
  3. data/History.txt +14 -0
  4. data/Makefile +59 -0
  5. data/README +87 -40
  6. data/article.txt +838 -0
  7. data/{assert.rb → lib/assert.rb} +3 -3
  8. data/{reg.rb → lib/reg.rb} +11 -4
  9. data/lib/reg/version.rb +21 -0
  10. data/lib/regarray.rb +455 -0
  11. data/{regarrayold.rb → lib/regarrayold.rb} +33 -7
  12. data/lib/regbackref.rb +73 -0
  13. data/lib/regbind.rb +230 -0
  14. data/{regcase.rb → lib/regcase.rb} +15 -5
  15. data/lib/regcompiler.rb +2341 -0
  16. data/{regcore.rb → lib/regcore.rb} +196 -85
  17. data/{regdeferred.rb → lib/regdeferred.rb} +35 -4
  18. data/{regposition.rb → lib/regevent.rb} +36 -38
  19. data/lib/reggraphpoint.rb +28 -0
  20. data/lib/reghash.rb +631 -0
  21. data/lib/reginstrumentation.rb +36 -0
  22. data/{regitem_that.rb → lib/regitem_that.rb} +32 -11
  23. data/{regknows.rb → lib/regknows.rb} +4 -2
  24. data/{reglogic.rb → lib/reglogic.rb} +76 -59
  25. data/{reglookab.rb → lib/reglookab.rb} +31 -21
  26. data/lib/regmatchset.rb +323 -0
  27. data/{regold.rb → lib/regold.rb} +27 -27
  28. data/{regpath.rb → lib/regpath.rb} +91 -1
  29. data/lib/regposition.rb +79 -0
  30. data/lib/regprogress.rb +1522 -0
  31. data/lib/regrepeat.rb +307 -0
  32. data/lib/regreplace.rb +254 -0
  33. data/lib/regslicing.rb +581 -0
  34. data/lib/regsubseq.rb +72 -0
  35. data/lib/regsugar.rb +361 -0
  36. data/lib/regvar.rb +180 -0
  37. data/lib/regxform.rb +212 -0
  38. data/{trace.rb → lib/trace_during.rb} +6 -4
  39. data/lib/warning.rb +37 -0
  40. data/parser.txt +26 -8
  41. data/philosophy.txt +18 -0
  42. data/reg.gemspec +58 -25
  43. data/regguide.txt +18 -0
  44. data/test/andtest.rb +46 -0
  45. data/test/regcompiler_test.rb +346 -0
  46. data/test/regdemo.rb +20 -0
  47. data/{item_thattest.rb → test/regitem_thattest.rb} +2 -2
  48. data/test/regtest.rb +2125 -0
  49. data/test/test_all.rb +32 -0
  50. data/test/test_reg.rb +19 -0
  51. metadata +108 -73
  52. data/calc.reg +0 -73
  53. data/forward_to.rb +0 -49
  54. data/numberset.rb +0 -200
  55. data/regarray.rb +0 -675
  56. data/regbackref.rb +0 -126
  57. data/regbind.rb +0 -74
  58. data/reggrid.csv +1 -2
  59. data/reghash.rb +0 -318
  60. data/regprogress.rb +0 -1054
  61. data/regreplace.rb +0 -114
  62. data/regsugar.rb +0 -230
  63. data/regtest.rb +0 -1078
  64. data/regvar.rb +0 -76
@@ -1,1054 +0,0 @@
1
- =begin copyright
2
- reg - the ruby extended grammar
3
- Copyright (C) 2005,2009 Caleb Clausen
4
-
5
- This library is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU Lesser General Public
7
- License as published by the Free Software Foundation; either
8
- version 2.1 of the License, or (at your option) any later version.
9
-
10
- This library is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
- Lesser General Public License for more details.
14
-
15
- You should have received a copy of the GNU Lesser General Public
16
- License along with this library; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
- =end
19
- begin
20
- require 'rubygems'
21
- rescue LoadError #do nothing
22
- end
23
-
24
- require 'sequence'
25
- require 'sequence/indexed'
26
-
27
-
28
- =begin the internal api
29
- originally:
30
- ResAryFrag=Array #it would be nice to get a more precise definition....
31
- ResAry=+[ResAryFrag,-[MatchSet,Integer,ResAryFrag].*]
32
-
33
- Reg%:mmatch[Array,Integer,
34
- Returns MatchSet|ResAryFrag|nil
35
- ]
36
- Backtrace%:bt_match[Array,Integer,Integer,Integer,ResAry,Integer.-,
37
- Returns ResAry|nil,Integer,Integer
38
- ]
39
- Matchset%:next_match[Array,Integer,
40
- Returns ResAryFrag|nil,Integer
41
- ]
42
-
43
- currently:
44
- Reg%:mmatch[Array,Integer, #has to change to take progress soon
45
- Returns MatchSet|ResAryFrag|nil
46
- ] #except subseq and repeat currently want progress
47
- Progress%:bt_match[Integer.-, #affects progress, i'm pretty sure
48
- Returns ResAry|nil,Integer,Integer #1st result used only as bool
49
- ]
50
- MatchSet%:next_match[Array,Integer, #affects progress?
51
- Returns ResAryFrag|nil,Integer
52
- ]
53
- MatchSet%:initialize[Progress,OBS,Returns MatchSet] #for repeat and subseq only
54
-
55
-
56
-
57
- ultimately:
58
- Reg%:mmatch[Progress, Returns MatchSet|Integer|nil] #affects progress on success (when integer returned)
59
- Progress%:bt_match[Integer.-, Returns Bool] #affects progress on success
60
- MatchSet%:next_match[Returns Integer|nil] #affects progress on success
61
- (modified progress is the same one as was given to the mmatch that created the matchset)
62
- MatchSet%:initialize[Progress,OBS,Returns MatchSet] #for every ms class
63
-
64
-
65
-
66
- =end
67
-
68
-
69
- #---------------------------------------------
70
- module Reg
71
-
72
- #---------------------------------------------
73
- class MatchFailRec
74
- attr_accessor(:undos_inc,:matchsucceed_inc,:position_inc)
75
- def initialize
76
- @undos_inc=@matchsucceed_inc=@position_inc=0
77
- end
78
-
79
- #position_inc is the number of positions to pop off position stack
80
- #to get back to the point before the match of the most recent matchset.
81
- #it is also the count by which to adjust @regsidx to get back to the
82
- #corresponding reg which generated the matchset.
83
- alias regs_adjust position_inc
84
- end
85
-
86
- #---------------------------------------------
87
- class Progress
88
- attr_reader :matcher, :cursor, :regsidx, :position_stack, :matchset_stack, :matchfail_todo, :variables
89
-
90
- #for internal use only...
91
- attr_writer :undos_stack, :matchfail_todo, :matchsucceed_stack #, :regsidx
92
-
93
-
94
- #@matchset_stack and @matchfail_todo are (nearly) parallel arrays; @matchfail_todo has
95
- #one more item in it (at the bottom). each matchfailrec represents the things to undo
96
- #on failure to get back to the corresponding matchset's starting position.
97
-
98
- #@matchfail_todo is more or less a 2-dimensional array of integers. very many of
99
- #those integers in the undos_inc and matchsucceed_inc columns will be zero. it
100
- #would be nice to use a sparse vector or matrix instead.
101
-
102
-
103
- #---------------------------------------------
104
- def initialize(matcher,cursor)
105
- @matcher=matcher
106
- @cursor=cursor
107
- @regsidx=0
108
- @variables={}
109
- @undos_stack=[] #recursive list of undo procs and vars defined in this entire match
110
- @matchset_stack=[]
111
- @position_stack=[@cursor.pos]
112
- @matchfail_todo=[MatchFailRec.new] #list of things to do when match fails.... undo(&adjust variables), matchsucceed, position, (matchset)
113
- @matchsucceed_stack=[] #r-list of things to do when entire match succeeds... subst and deferreds
114
-
115
- assert check_result
116
- end
117
-
118
- #---------------------------------------------
119
- def push_matchset(ms)
120
- assert MatchSet===ms
121
- @matchset_stack.push ms
122
- @matchfail_todo.push MatchFailRec.new
123
- #push_match len #disable... caller wants to do it...
124
- end
125
-
126
- #---------------------------------------------
127
- def push_match(inc=0)
128
- #matchset_stack should be 1 smaller than matchfail_todo
129
- assert @matchfail_todo.size-1==@matchset_stack.size
130
-
131
- @cursor.move inc #do nothing if no param given
132
- @position_stack.push @cursor.pos #push the start position of the next match
133
- @matchfail_todo.last.position_inc+=1
134
- @regsidx+=1
135
- end
136
-
137
- #---------------------------------------------
138
- #this method is dangerous! it leaves the object in an inconsistant state.
139
- #caller must fixup state by either popping @matchset_stack or pushing a matchfail_todo
140
- def backup_stacks
141
- assert(@matchfail_todo.size >= 1)
142
- discarding=@matchfail_todo.pop
143
- #discarding should not ever be a list!
144
- @regsidx-=discarding.position_inc #adjust position in matcher
145
- assert(@position_stack.size >= discarding.position_inc) #what if position stack is empty here?
146
-
147
- discarding.position_inc.nonzero? and
148
- @position_stack=@position_stack.slice(0...-discarding.position_inc)
149
-
150
- #backup undo stack and execute undos
151
- process_undos @undos_stack.slice!(-discarding.undos_inc..-1)
152
-
153
- #backup matchsucceed stack
154
- discarding.matchsucceed_inc.nonzero? and
155
- @matchsucceed_stack=@matchsucceed_stack.slice(0...-discarding.matchsucceed_inc)
156
-
157
- return @matchset_stack.last
158
- end
159
-
160
- #---------------------------------------------
161
- def backtrack
162
- assert @regsidx != INFINITY
163
- assert check_result
164
- mat=nil
165
- loop do
166
- ms=backup_stacks or return
167
-
168
- mat=ms.next_match(@cursor.data?, @position_stack.last) and break
169
- @matchset_stack.pop or huh
170
- end
171
- assert( (1..@matcher.max_matches)===@regsidx+1)
172
- assert ::Array===mat
173
- assert ::Array===mat.first
174
-
175
-
176
- #back up cursor position
177
- @cursor.pos=@position_stack.last
178
-
179
-
180
-
181
- @matchfail_todo.push MatchFailRec.new
182
- push_match mat.last
183
-
184
- assert @regsidx
185
- return @regsidx, @cursor.pos-origpos
186
- end
187
-
188
- #---------------------------------------------
189
- #lookup something that was already matched, either by
190
- #name or index(es).
191
- #probably need to take a full path for parameters
192
- def backref; huh end
193
-
194
- #---------------------------------------------
195
- def subprogress(cursor=nil,matcher=nil)
196
- result=dup
197
- result.matchfail_todo=[MatchFailRec.new]
198
- result.cursor=
199
- if cursor
200
- unless Sequence===cursor
201
- huh #convert other data to a cursor...
202
- end
203
- cursor
204
- else
205
- result.cursor.position # make a sub-cursor
206
- end
207
-
208
- result.position_stack=[result.cursor.pos]
209
-
210
- @undo_stack.push result.undo_stack=[]
211
- @matchsucceed_stack.push result.matchsucceed_stack=[]
212
- @matchfail_todo.last.matchsucceed_inc+=1
213
- @matchfail_todo.last.undo_inc+=1
214
-
215
- if matcher
216
- @matcher=matcher#@matcher might be set to soemthing different
217
- @regsidx=0
218
- end
219
-
220
- assert result.check_result
221
-
222
- result
223
- end
224
-
225
- #---------------------------------------------
226
- def make_hash
227
- huh
228
-
229
- end
230
-
231
- #---------------------------------------------
232
- def last_match_range
233
- @position_stack[-2]...@position_stack[-1]
234
- end
235
-
236
- #---------------------------------------------
237
- def top_matchset
238
- @matchset_stack.last
239
- end
240
-
241
- #---------------------------------------------
242
- def register_variable(name)
243
- @variables[name] and return warn( "variable #{name} is already defined")
244
- @variables[name]=last_match_range
245
- @undos_stack<<name
246
- @matchfail_todo.last.undos_inc+=1
247
- end
248
-
249
- #---------------------------------------------
250
- class Later #inside Progress, so it doesn't conflict with Reg::Later from regreplace.rb
251
- def initialize(block,args)
252
- @block,@args=block,args
253
- end
254
- class<<self;
255
- alias [] new;
256
- end
257
-
258
- def call
259
- @block.call( *@args)
260
- end
261
- end
262
-
263
- #---------------------------------------------
264
- def register_undo *args, &block
265
- @undos_stack<<Later[block,*args]
266
- @matchfail_todo.last.undos_inc+=1
267
- end
268
-
269
- #wannabe in class Array
270
- def recursive_each arr,&block
271
- arr.each {|item|
272
- if item.respond_to? :to_a
273
- recursive_each item.to_a, &block
274
- else
275
- block[item]
276
- end
277
- }
278
- end
279
-
280
- def recursive_reverse_each arr,&block
281
- arr.reverse_each {|item|
282
- if item.respond_to? :to_a
283
- recursive_reverse_each item.to_a, &block
284
- else
285
- block[item]
286
- end
287
- }
288
- end
289
-
290
- #---------------------------------------------
291
- def process_undos(undos)
292
- recursive_reverse_each undos do|undo|
293
- Symbol===undo ? @variables.delete(undo) : undo.call
294
- end
295
- end
296
-
297
-
298
- #---------------------------------------------
299
- def register_replace(subst,*args)
300
- @matchsucceed_stack.push Later[subst,args]
301
- @matchfail_todo.last.matchsucceed_inc+=1
302
- end
303
-
304
- #---------------------------------------------
305
- def later(*args,&block)
306
- @matchsucceed_stack.push Later[block,args]
307
- @matchfail_todo.last.matchsucceed_inc+=1
308
- end
309
-
310
- #---------------------------------------------
311
- def process_laters
312
- recursive_reverse_each(@matchsucceed_stack) {|later| later.call }
313
- end
314
-
315
- #---------------------------------------------
316
- def origpos
317
- @position_stack.first
318
- end
319
-
320
- #---------------------------------------------
321
- def posinc
322
- @cursor.pos-origpos
323
- end
324
-
325
-
326
- #--------------------------
327
- $RegTraceEnable=$RegTraceDisable=nil
328
- def trace_enabled?
329
- @trace||=nil
330
- $RegTraceEnable or (!$RegTraceDisable && @trace)
331
- end
332
-
333
- #--------------------------
334
- #bt, in this case, stands for 'backtracking'.
335
- #but the cognoscenti refer to this method as 'bitch-match'.
336
- def bt_match(match_steps=@matcher.max_matches)
337
- assert @cursor.pos <= @cursor.size
338
- assert origpos >= 0
339
- assert posinc >= 0
340
- assert( (0...match_steps)===@regsidx)
341
- assert Integer===@position_stack.first
342
- assert check_result
343
- loop do #loop over regs to match
344
- assert @cursor.pos <= @cursor.size
345
- assert posinc >= 0
346
- assert( (0...match_steps)===@regsidx)
347
-
348
- if trace_enabled?
349
- puts [@matcher, clean_result].map{|i| i.inspect }.join(' ')
350
- #pp self
351
- end
352
-
353
- assert check_result
354
-
355
- #try a new match of current reg
356
- r=@matcher.regs(@regsidx)
357
- if r.respond_to? :mmatch #should test for being a Reg::Reg instead
358
- # 'mmatch could return 2 items here'
359
- m=r.mmatch(@cursor.data?, @cursor.pos)
360
- #is a single match or a match set?
361
- if m.respond_to? :next_match
362
- #it's a set -- start new inner result array
363
- #with initial match as first elem
364
- push_matchset m
365
- mat,matchlen=m.next_match(@cursor.data?, @cursor.pos)
366
-
367
- assert mat
368
- assert m
369
- else
370
- mat,matchlen=*m #single match or nil
371
- m=nil
372
- end
373
- else
374
- if r===(item=@cursor.readahead1)
375
- mat=RR[item]
376
- matchlen=1
377
- end
378
- end
379
-
380
-
381
- assert check_result
382
-
383
- unless mat #match fail?
384
- assert check_result
385
- return to_result,posinc,@regsidx if @matcher.enough_matches? @regsidx
386
-
387
- #doesn't match, try backtracking
388
- assert @regsidx
389
- backtrack or return nil #bt failed? we fail
390
- assert(@cursor.pos<=@cursor.size)
391
- assert check_result
392
- else #match succeeded
393
- if !m and @matcher.respond_to? :want_gratuitous_btstop? and
394
- @matcher.want_gratuitous_btstop?(@regsidx)
395
- push_matchset SingleMatch_MatchSet.new(matchlen)
396
- end
397
-
398
- #advance to next reg
399
- assert check_result
400
- push_match @matcher.update_di(0,matchlen)
401
- assert(@cursor.pos<=@cursor.size)
402
- end
403
-
404
- assert(@cursor.pos<=@cursor.size)
405
-
406
- assert check_result
407
- return to_result,posinc,@regsidx if @regsidx>=match_steps
408
- assert( (0...match_steps)===@regsidx)
409
-
410
- end #loop
411
-
412
- end
413
-
414
- #---------------------------------------------
415
- #maybe this isn't necessary?
416
- #because backtrack is called after it,
417
- #and it's doing the same things.... more or less
418
-
419
- def last_next_match; #(?ary=>@cursor,?start,?resfrag=>...?)
420
- #huh #adapt logic from Backtrace
421
- assert check_result
422
- assert( (0..@matcher.max_matches)===@regsidx)
423
- r=backup_stacks
424
-
425
- di=@cursor.pos=@position_stack.last
426
- assert( (0..@matcher.max_matches)===@regsidx)
427
- unless r
428
- @matchfail_todo.push MatchFailRec.new
429
- assert check_result
430
- return nil,nil,@regsidx
431
- end
432
- #something wrong here....
433
-
434
- #dunno how to do this simply...
435
- #assert full_up? if SubseqMatchSet===self
436
-
437
- #@matchset_stack.pop is called in backtrack but not here, why?
438
-
439
-
440
- r,diinc=r.next_match(@cursor.data?,@cursor.pos)
441
- unless r
442
- #might need to return non-nil here, if resfrag isn't exhausted yet
443
- assert( (0..@matcher.max_matches)===@regsidx)
444
- @matchset_stack.pop
445
- assert check_result
446
- return nil,nil,@regsidx unless @matcher.enough_matches? @regsidx
447
- return to_result, @cursor.pos-@position_stack.first, @regsidx
448
- end
449
-
450
- assert diinc
451
- assert @cursor.pos+diinc <= @cursor.size
452
- @cursor.move diinc
453
- #@regsidx-=matchfail_todo.position_inc #should be done in push_match...
454
- @matchfail_todo.push MatchFailRec.new
455
- push_match
456
-
457
- if @regsidx<@matcher.max_matches #if there are more subregs of this reg to be matched
458
- #re-match tail regs
459
- assert @cursor.pos <= @cursor.size
460
- #di is sometimes bad here, it seems....(fixed now?)
461
- assert check_result
462
- assert( (0..@matcher.max_matches)===@regsidx)
463
- result=bt_match
464
- assert check_result
465
- return result
466
- end
467
-
468
-
469
-
470
- assert( (0..@matcher.max_matches)===@regsidx)
471
- assert check_result
472
-
473
- return to_result,@cursor.pos-@position_stack.first,@regsidx
474
- end
475
-
476
- #---------------------------------------------
477
- def check_result;
478
-
479
- #since this should be true, a separate @regsidx is unnecessary
480
- assert @regsidx==@position_stack.size-1
481
-
482
- #matchset_stack should be 1 smaller than matchfail_todo
483
- assert @matchfail_todo.size-1==@matchset_stack.size
484
-
485
-
486
- #verify correct types in @-variables
487
- assert Sequence===@cursor
488
- assert @matcher.respond_to?( :update_di)
489
- assert @regsidx >=0
490
- @matchset_stack.each{|ms| assert MatchSet===ms }
491
- prev_pos=0
492
- @position_stack.each{|pos| assert prev_pos<=pos; pos=prev_pos }
493
- assert prev_pos<=@cursor.size
494
-
495
- vars_copy=@variables.dup
496
- @undos_stack.each {|i|
497
- case i
498
- #every element of @variables should also be a sym in @undos_stack
499
- when Symbol
500
- vars_copy.delete(i) or assert(false)
501
-
502
- when Later
503
- else assert(false)
504
- end
505
- }
506
- assert vars_copy.empty? #every var should be accounted for
507
-
508
- #sum of :undos_inc,:matchsucceed_inc,:position_inc in @matchfail_todo
509
- #should be the same as the size of the corresponding stack.
510
- uns=mats=poss=0
511
- @matchfail_todo.each{|mfr|
512
- uns+=mfr.undos_inc
513
- mats+=mfr.matchsucceed_inc
514
- poss+=mfr.position_inc
515
- }
516
- assert uns==@undos_stack.size
517
- assert mats==@matchsucceed_stack.size
518
- assert poss+1==@position_stack.size
519
-
520
- assert succ_stack_ok
521
-
522
- return true
523
- end
524
-
525
- #---------------------------------------------
526
- def succ_stack_ok(stk=@matchsucceed_stack)
527
- stk.each{|elem|
528
- case elem
529
- when Array; succ_stack_ok(elem)
530
- when Later; true
531
- else
532
- end or return
533
- }
534
- return true
535
- end
536
- private :succ_stack_ok
537
-
538
- #---------------------------------------------
539
- def clean_result
540
- result=[]
541
- ms_pos_idx=@position_stack.size - @matchfail_todo.last.position_inc
542
- ms_pos_idx=-1
543
- result=(0...@position_stack.size-1).map{|i|
544
- if i==ms_pos_idx
545
- ms_pos_idx-=1
546
- #what if ms_idx too big?
547
- ms_pos_idx-=@matchfail_todo[ms_idx].position_inc
548
- ms.clean_result
549
-
550
- else
551
- @cursor[@position_stack[i], @position_stack[i+1]-@position_stack[i]]
552
- end
553
- }
554
-
555
- return result
556
- end
557
-
558
- #---------------------------------------------
559
- def to_result;
560
- true#ok, i'm cheating
561
- end
562
-
563
- end #class Progress
564
-
565
- class SingleMatch_MatchSet < MatchSet
566
- #this is somewhat of a hack, and shouldn't be necessary....
567
- #it exists because every backtracking stop has to have a
568
- #matchset in it, even the ones that only match one way.
569
- #this class encapsulates matchsets that match only one way.
570
-
571
-
572
- def initialize(len)
573
- #@len=len
574
- end
575
-
576
- def next_match(arr,start)
577
- #if @len
578
- # result=[arr[start,@len], @len]
579
- # @len=nil
580
- # result
581
- #end
582
- end
583
-
584
-
585
- end
586
-
587
-
588
- if defined? $MMATCH_PROGRESS #ultimately, mmatch will take a progress, but until then, disable this
589
- #---------------------------------------------
590
- class Subseq
591
-
592
- def mmatch(pr)
593
- #in this version, all @regs are not multiple regs
594
- pr=pr.subprogress(nil,self)
595
- cu=pr.cursor
596
- start=cu.pos
597
- assert cu.pos<=cu.size
598
- cu.pos+@regs.size<=cu.size or return nil
599
- @regs.each do |reg|
600
- assert cu.pos<cu.size
601
- reg===cu.read1 or return nil
602
- end
603
- return [ [cu.read( start-cu.pos,true)], @regs.size ]
604
- end
605
-
606
- private
607
- def mmatch_multiple(pr)
608
- #in this version, at least one of @regs is a multiple reg
609
- #start==arr.size and huh
610
- pr=pr.subprogress(nil,self)
611
- cu=pr.cursor
612
- start=cu.pos
613
- start+itemrange.begin<=cu.size or return nil
614
- assert( (0..cu.size).include?( start))
615
- result,di,bogus=pr.bt_match
616
- result and SubseqMatchSet.new(pr,di)
617
- end
618
- end
619
-
620
- #---------------------------------------------
621
- class Repeat
622
- def mmatch(pr)
623
- pr=pr.subprogress(nil,self)
624
- cu=pr.cursor
625
- start=cu.pos
626
- start+@times.begin <= cu.size or return nil #enough room left in input?
627
- i=-1
628
- (0...@times.end).each do |i2| i=i2
629
- start+i<cu.size or break(i-=1)
630
- @reg===cu.read1 or break(i-=1)
631
- end
632
- i+=1
633
- assert( (0..@times.end)===i)
634
- if i==@times.begin
635
- return [[cu.read(start-cu.pos,true)], i]
636
- end
637
- i>@times.begin or return nil
638
- return SingleRepeatMatchSet.new(i,-1,@times.begin)
639
- end
640
-
641
- private
642
- def mmatch_multiple(pr)
643
- pr=pr.subprogress(nil,self)
644
- start=cu.pos
645
- assert start <= cu.size
646
- start+itemrange.begin <= cu.size or return nil #enough room left in input?
647
- r=[[]]
648
-
649
- #first match the minimum number
650
- if @times.begin==0 #if we can match nothing
651
- cu.eof? and return [r,0] #at end of input? return empty set
652
- ri=di=0
653
- else
654
- cu.eof? and return nil
655
- assert @times.begin<INFINITY
656
- r,di,ri=pr.bt_match(@times.begin) #matches @reg @times.begin times
657
- r.nil? and return nil
658
- end
659
- assert ri==@times.begin
660
-
661
- assert !@times.exclude_end?
662
- left=@times.end-@times.begin
663
-
664
- #note: left and top could be infinite here...
665
-
666
- #do the optional match iterations
667
- #only greedy matching implemented for now
668
- #there must be a more efficient algorithm...
669
- if left >= 1
670
- assert pr.check_result
671
- #get remaining matches up to @times.end times
672
- rr=pr.make_hash
673
- res,di,ri=pr.bt_match #bt stop at each iteration, this time
674
- assert pr.check_result( res)
675
- assert @times===pr.regsidx
676
-
677
- res and return RepeatMatchSet.new(pr,(huh di))
678
- assert rr==pr.make_hash
679
- end
680
-
681
- #if matchset has no backtracking stops, and
682
- #hence cannot contain more than one actual match,
683
- #then just return that match.
684
- pr.matchset_stack.empty? ? RepeatMatchSet.new(pr,(huh di)) :
685
- [pr.clean_result,(huh di)]
686
- end
687
- end
688
-
689
- #---------------------------------------------
690
- class AndMatchSet
691
- #the total number of possible different ways to match an AndMatchSet
692
- #where several of the branches are actually ambiguous
693
- #grows exponentially.
694
- #rather than hit every possible match, we'll try to hit
695
- #every legal match length at least once.
696
-
697
- #on next_match,
698
- #figure out the alternative(s) that are returning the longest
699
- #matchset currently. those alternatives are returned in
700
- #the first match, but at the 2nd and subsequent calls
701
- #to next_match, that set of longest alternatives are all
702
- #next_matched (rolled back) until they match something shorter.
703
- #(or maybe just a different length? Reg::Or isn't greedy, so its
704
- #longest match isn't necessarily returned first.)
705
-
706
- #if any next_match call returns nil (or false), the whole match set
707
- #is finished. return nil from next_match now and forever more.
708
-
709
-
710
-
711
- def initialize
712
- huh
713
- end
714
- end
715
-
716
- #---------------------------------------------
717
- class And
718
-
719
- private
720
- #can't use this until mmatch interface is changed to take a single progress param
721
- def mmatch_multiple(progress)
722
- #in this version, at least one of @regs is a multiple reg
723
- assert( (0..progress.cursor.size).include?( progress.cursor.pos))
724
- result,*bogus=progress.bt_match
725
- result and AndMatchSet.new(self,result)
726
- #need new definition of AndMatchSet...
727
-
728
- #need to keep track of which alternative(s) was longest, so as to advance
729
- #the cursor by that amount. and know which ones to start backtracking in.
730
-
731
- #cursor needs to be advanced here somewhere, i think.
732
- end
733
-
734
- end
735
-
736
-
737
- else #... not $MMATCH_PROGRESS
738
- class Subseq
739
-
740
- def mmatch(arr,start)
741
- #in this version, each of @regs is not a multiple reg
742
- assert start<=arr.size
743
- start+@regs.size<=arr.size or return nil
744
- idx=0
745
- @regs.each do |reg|
746
- assert(start+idx<arr.size)
747
- reg===arr[start+idx] or return nil
748
- idx+=1
749
- end
750
- return [RR[arr[start,@regs.size]], @regs.size]
751
- end
752
-
753
- def mmatch_multiple(arr,start)
754
- #in this version, at least one of @regs is a multiple reg
755
- assert( (0..arr.size).include?( start))
756
- cu=Sequence::Indexed.new(arr); cu.pos=start
757
- pr=Progress.new(self,cu)
758
- result,di,bogus=pr.bt_match
759
- result and SubseqMatchSet.new(pr,di)
760
- end
761
- end
762
-
763
- class ::Sequence
764
- class Indexed
765
- def data?; @data end
766
- end
767
- end
768
-
769
- class Repeat
770
- def mmatch(arr,start)
771
- i=-1
772
- (0...@times.end).each do |i2| i=i2
773
- start+i<arr.size or break(i-=1)
774
- @reg===arr[start+i] or break(i-=1)
775
- end
776
- i+=1
777
- assert( (0..@times.end)===i)
778
- if i==@times.begin
779
- return [RR[arr[start,i]], i]
780
- end
781
- i>@times.begin or return nil
782
- return SingleRepeatMatchSet.new(i,-1,@times.begin)
783
- end
784
-
785
- def mmatch_multiple(arr,start)
786
- assert start <= arr.size
787
- r=[RR[]]
788
-
789
- cu=Sequence::Indexed.new(arr); cu.pos=start
790
- pr=Progress.new(self,cu)
791
-
792
- #first match the minimum number
793
- if @times.begin==0 #if we can match nothing
794
- arr.size==start and return [r,0] #at end of input? return empty set
795
- ri=di=0
796
- else
797
- arr.size==start and return nil
798
- assert @times.begin<INFINITY
799
- r,di,ri=pr.bt_match(@times.begin) #matches @reg @times.begin times
800
- r.nil? and return nil
801
- end
802
- assert ri==@times.begin
803
-
804
- assert !@times.exclude_end?
805
- left=@times.end-@times.begin
806
-
807
- #note: left and top could be infinite here...
808
-
809
- #do the optional match iterations
810
- #only greedy matching implemented for now
811
- #there must be a more efficient algorithm...
812
- if left >= 1
813
- assert pr.check_result
814
- #get remaining matches up to @times.end times
815
- #because bt_match could change the rr argument, and
816
- #we might need to return the original in r below
817
- res,di,ri=pr.bt_match
818
- # assert Backtrace.check_result res #this is correct, for now (i think)
819
- #don't update to progress version
820
- assert @times===ri
821
-
822
- res and return RepeatMatchSet.new(pr,di)
823
- end
824
-
825
- #if matchset has no backtracking stops, and
826
- #hence cannot contain more than one actual match,
827
- #then just return that match.
828
- pr.matchset_stack.empty? ?
829
- [pr.clean_result,di] : RepeatMatchSet.new(pr,di)
830
- end
831
-
832
- end
833
-
834
- end # $MMATCH_PROGRESS
835
-
836
-
837
-
838
- #---------------------------------------------
839
- class MatchSet
840
- #delegate to embedded progress, if any
841
- def clean_result
842
- huh
843
- @progress.clean_result
844
- end
845
- end
846
-
847
- #--------------------------
848
- class RepeatMatchSet < MatchSet
849
-
850
- attr :progress
851
- def initialize(progress,consumed)
852
- @progress=progress
853
- #@cnt=@startcnt-stepper
854
- #@ary.push 1
855
- @consumed=consumed
856
- @firstmatch=[progress.clean_result,@consumed]
857
- assert( progress.matcher.times===progress.regsidx)
858
- assert progress.regsidx
859
- #assert(@ri==@firstmatch.first.size)
860
- end
861
-
862
- def match_iterations;
863
- #assert(@ri==Backtrace.clean_result(@ary).size)
864
- progress.regsidx
865
- end
866
-
867
- #very nearly identical to SubseqMatchSet#next_match
868
- def next_match(arr,idx)
869
- #fewer assertions in twin
870
- if @firstmatch
871
- result,@firstmatch=@firstmatch,nil
872
- assert result.first.empty? || ::Array===result.first.first
873
- #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
874
- assert idx+result.last<=arr.size
875
- assert(progress.regsidx==result.first.size)
876
- return result
877
- end
878
-
879
- @progress or return #not in twin ... ignore it
880
-
881
- assert progress.check_result
882
-
883
- i=progress.matchfail_todo.last.position_inc
884
- =begin extents not used
885
- extents= if i==0
886
- []
887
- else
888
- progress.position_stack[-i..-1]
889
- end
890
- =end
891
- #this part's not in twin
892
- #'need to check for fewer matches here before rematching last matchset'
893
-
894
- #what if the match that gets discarded was returned by a matchset
895
- #that has more matches in it?
896
- #in that case, i is 1 and the body of this if should not be executed...
897
- if progress.regsidx>progress.matcher.times.begin && i>1
898
- huh progress.backup_stacks
899
- huh #need to change progress.cursor.pos here too
900
- huh #need to save result of backup_stacks
901
-
902
- #but now i need to undo all other progress state too, if
903
- #the state was created with the match result just popped.
904
- #in general, it's not possible to distinguish state with the
905
- #last match from state with the matches that might have preceeded it...
906
- #unless I create a backtracking point for each optional iteration
907
- #of the repeat matcher.
908
- #currently, making a backtracking point implies making a matchset
909
- #as well. I'll need a matchset the contains only 1 match.
910
- #ok, i think this is working now. no extra code needed here.
911
-
912
- @consumed-=pos-progress.position_stack.last
913
- #assert(@ri==Backtrace.clean_result(@ary).size)
914
- assert idx+@consumed<=arr.size
915
- assert progress.check_result
916
- result= [progress.clean_result, @consumed]
917
- assert progress.check_result
918
- return result
919
- end
920
-
921
-
922
- assert progress.check_result
923
- assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
924
- result,di,ri=progress.last_next_match
925
- if result and @progress.matcher.enough_matches? ri #condition slightly different in twin
926
- result=[progress.clean_result,di]
927
- @consumed=di #not in twin...why?
928
- #@progress.regsidx-=1
929
- assert ::Array===result.first.first
930
- assert idx+result.last<=arr.size
931
- assert progress.check_result
932
- #assert(@ri==result.first.size)
933
- return result
934
- end
935
-
936
- assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
937
- #assert(@ri==Backtrace.clean_result(@ary).size)
938
- assert(progress.check_result)
939
-
940
-
941
-
942
- @progress.matchset_stack.empty? and return @progress=nil #also checking @ary in twin... ignore it
943
- assert @progress.regsidx>0
944
-
945
- @progress.backtrack or return @progress=nil #@progress never set to nil like this in twin... ignore it
946
-
947
- #this is where the divergence widens. ri is a local in twin
948
-
949
- #assert(@ri==Backtrace.clean_result(@ary).size)
950
- assert(progress.check_result)
951
- mat,di,@ri=@progress.bt_match #mat is @ary in twin
952
- mat.nil? and return @progress=nil
953
-
954
- #assert(@ri==Backtrace.clean_result(mat).size)
955
- assert @progress.regsidx
956
- assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
957
-
958
- result=[@progress.clean_result,di]
959
- @consumed=di #no @consumed in twin
960
- assert ::Array===result.first.first
961
- assert idx+result.last<=arr.size
962
- assert progress.check_result
963
- #assert(@ri==result.last.size)
964
- return result
965
- end
966
-
967
- end
968
-
969
- #---------------------------------------------
970
- class SubseqMatchSet < MatchSet
971
-
972
- def initialize progress,di;
973
- @reg,@progress= progress.matcher,progress
974
-
975
- @orig_pos=progress.cursor.pos-di
976
- @firstresult= [progress.clean_result,di]
977
- end
978
-
979
- #(@reg=>progress.matcher,@matchary=>progress.huh,di=>progress.cursor.pos-@orig_pos)
980
-
981
- def next_match(ary,start)
982
- if @firstresult
983
- @firstresult,result=nil,@firstresult
984
- assert ::Array===result.first.first
985
- return result
986
- end
987
-
988
-
989
- result,di,ri=@progress.last_next_match
990
- result or return @progress=nil
991
- if result and ri==@reg.max_matches
992
- result=[@progress.clean_result,di]
993
- assert ::Array===result.first.first
994
- return result
995
- end
996
-
997
-
998
- (@progress and !@progress.matchset_stack.empty?) or return @progress=nil
999
- assert @progress.regsidx
1000
- @progress.backtrack or return @progress=nil
1001
-
1002
- #need to adjust ri?
1003
-
1004
- #is this right... dunno...
1005
- result,di,bogus=@progress.bt_match
1006
-
1007
-
1008
- if result
1009
- result=[@progress.clean_result,di]
1010
- assert ::Array===result.first.first
1011
- return result
1012
- end
1013
- end
1014
-
1015
- def match_iterations
1016
- progress.matcher.max_matches
1017
- end
1018
-
1019
- end
1020
- #--------------------------
1021
- class AndMatchSet < SubseqMatchSet
1022
- #this isn't really right...
1023
- #on next_match, we need to backtrack the longest alternative(s)
1024
- #if they're then shorter than the next longest alternative,
1025
- #then that (formerly next longest) alternative becomes
1026
- #the dominating alternative, and determines how much is consumed
1027
-
1028
- end
1029
- #might need Reg::Or tooo....
1030
-
1031
- class Repeat
1032
- #--------------------------------------------------------
1033
- # "enable backtracking stops at each optional iteration"
1034
- def want_gratuitous_btstop?(steps)
1035
- @times===steps
1036
- end
1037
-
1038
- end
1039
-
1040
-
1041
- #---------------------------------------------
1042
- class Array
1043
- def ===(other)
1044
- ::Array===other or return false #need to be more generous eventually
1045
- @regs.empty? and return other.empty?
1046
- progress=Progress.new(self,(Sequence::Indexed.new other))
1047
- result,di,bogus=progress.bt_match
1048
- assert di.nil? || di <= other.size
1049
- return(di==other.size && result)
1050
- end
1051
- end
1052
-
1053
- end
1054
-