reg 0.4.8 → 0.5.0a0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING +0 -0
  3. data/History.txt +14 -0
  4. data/Makefile +59 -0
  5. data/README +87 -40
  6. data/article.txt +838 -0
  7. data/{assert.rb → lib/assert.rb} +3 -3
  8. data/{reg.rb → lib/reg.rb} +11 -4
  9. data/lib/reg/version.rb +21 -0
  10. data/lib/regarray.rb +455 -0
  11. data/{regarrayold.rb → lib/regarrayold.rb} +33 -7
  12. data/lib/regbackref.rb +73 -0
  13. data/lib/regbind.rb +230 -0
  14. data/{regcase.rb → lib/regcase.rb} +15 -5
  15. data/lib/regcompiler.rb +2341 -0
  16. data/{regcore.rb → lib/regcore.rb} +196 -85
  17. data/{regdeferred.rb → lib/regdeferred.rb} +35 -4
  18. data/{regposition.rb → lib/regevent.rb} +36 -38
  19. data/lib/reggraphpoint.rb +28 -0
  20. data/lib/reghash.rb +631 -0
  21. data/lib/reginstrumentation.rb +36 -0
  22. data/{regitem_that.rb → lib/regitem_that.rb} +32 -11
  23. data/{regknows.rb → lib/regknows.rb} +4 -2
  24. data/{reglogic.rb → lib/reglogic.rb} +76 -59
  25. data/{reglookab.rb → lib/reglookab.rb} +31 -21
  26. data/lib/regmatchset.rb +323 -0
  27. data/{regold.rb → lib/regold.rb} +27 -27
  28. data/{regpath.rb → lib/regpath.rb} +91 -1
  29. data/lib/regposition.rb +79 -0
  30. data/lib/regprogress.rb +1522 -0
  31. data/lib/regrepeat.rb +307 -0
  32. data/lib/regreplace.rb +254 -0
  33. data/lib/regslicing.rb +581 -0
  34. data/lib/regsubseq.rb +72 -0
  35. data/lib/regsugar.rb +361 -0
  36. data/lib/regvar.rb +180 -0
  37. data/lib/regxform.rb +212 -0
  38. data/{trace.rb → lib/trace_during.rb} +6 -4
  39. data/lib/warning.rb +37 -0
  40. data/parser.txt +26 -8
  41. data/philosophy.txt +18 -0
  42. data/reg.gemspec +58 -25
  43. data/regguide.txt +18 -0
  44. data/test/andtest.rb +46 -0
  45. data/test/regcompiler_test.rb +346 -0
  46. data/test/regdemo.rb +20 -0
  47. data/{item_thattest.rb → test/regitem_thattest.rb} +2 -2
  48. data/test/regtest.rb +2125 -0
  49. data/test/test_all.rb +32 -0
  50. data/test/test_reg.rb +19 -0
  51. metadata +108 -73
  52. data/calc.reg +0 -73
  53. data/forward_to.rb +0 -49
  54. data/numberset.rb +0 -200
  55. data/regarray.rb +0 -675
  56. data/regbackref.rb +0 -126
  57. data/regbind.rb +0 -74
  58. data/reggrid.csv +1 -2
  59. data/reghash.rb +0 -318
  60. data/regprogress.rb +0 -1054
  61. data/regreplace.rb +0 -114
  62. data/regsugar.rb +0 -230
  63. data/regtest.rb +0 -1078
  64. data/regvar.rb +0 -76
@@ -1,1054 +0,0 @@
1
- =begin copyright
2
- reg - the ruby extended grammar
3
- Copyright (C) 2005,2009 Caleb Clausen
4
-
5
- This library is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU Lesser General Public
7
- License as published by the Free Software Foundation; either
8
- version 2.1 of the License, or (at your option) any later version.
9
-
10
- This library is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
- Lesser General Public License for more details.
14
-
15
- You should have received a copy of the GNU Lesser General Public
16
- License along with this library; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
- =end
19
- begin
20
- require 'rubygems'
21
- rescue LoadError #do nothing
22
- end
23
-
24
- require 'sequence'
25
- require 'sequence/indexed'
26
-
27
-
28
- =begin the internal api
29
- originally:
30
- ResAryFrag=Array #it would be nice to get a more precise definition....
31
- ResAry=+[ResAryFrag,-[MatchSet,Integer,ResAryFrag].*]
32
-
33
- Reg%:mmatch[Array,Integer,
34
- Returns MatchSet|ResAryFrag|nil
35
- ]
36
- Backtrace%:bt_match[Array,Integer,Integer,Integer,ResAry,Integer.-,
37
- Returns ResAry|nil,Integer,Integer
38
- ]
39
- Matchset%:next_match[Array,Integer,
40
- Returns ResAryFrag|nil,Integer
41
- ]
42
-
43
- currently:
44
- Reg%:mmatch[Array,Integer, #has to change to take progress soon
45
- Returns MatchSet|ResAryFrag|nil
46
- ] #except subseq and repeat currently want progress
47
- Progress%:bt_match[Integer.-, #affects progress, i'm pretty sure
48
- Returns ResAry|nil,Integer,Integer #1st result used only as bool
49
- ]
50
- MatchSet%:next_match[Array,Integer, #affects progress?
51
- Returns ResAryFrag|nil,Integer
52
- ]
53
- MatchSet%:initialize[Progress,OBS,Returns MatchSet] #for repeat and subseq only
54
-
55
-
56
-
57
- ultimately:
58
- Reg%:mmatch[Progress, Returns MatchSet|Integer|nil] #affects progress on success (when integer returned)
59
- Progress%:bt_match[Integer.-, Returns Bool] #affects progress on success
60
- MatchSet%:next_match[Returns Integer|nil] #affects progress on success
61
- (modified progress is the same one as was given to the mmatch that created the matchset)
62
- MatchSet%:initialize[Progress,OBS,Returns MatchSet] #for every ms class
63
-
64
-
65
-
66
- =end
67
-
68
-
69
- #---------------------------------------------
70
- module Reg
71
-
72
- #---------------------------------------------
73
- class MatchFailRec
74
- attr_accessor(:undos_inc,:matchsucceed_inc,:position_inc)
75
- def initialize
76
- @undos_inc=@matchsucceed_inc=@position_inc=0
77
- end
78
-
79
- #position_inc is the number of positions to pop off position stack
80
- #to get back to the point before the match of the most recent matchset.
81
- #it is also the count by which to adjust @regsidx to get back to the
82
- #corresponding reg which generated the matchset.
83
- alias regs_adjust position_inc
84
- end
85
-
86
- #---------------------------------------------
87
- class Progress
88
- attr_reader :matcher, :cursor, :regsidx, :position_stack, :matchset_stack, :matchfail_todo, :variables
89
-
90
- #for internal use only...
91
- attr_writer :undos_stack, :matchfail_todo, :matchsucceed_stack #, :regsidx
92
-
93
-
94
- #@matchset_stack and @matchfail_todo are (nearly) parallel arrays; @matchfail_todo has
95
- #one more item in it (at the bottom). each matchfailrec represents the things to undo
96
- #on failure to get back to the corresponding matchset's starting position.
97
-
98
- #@matchfail_todo is more or less a 2-dimensional array of integers. very many of
99
- #those integers in the undos_inc and matchsucceed_inc columns will be zero. it
100
- #would be nice to use a sparse vector or matrix instead.
101
-
102
-
103
- #---------------------------------------------
104
- def initialize(matcher,cursor)
105
- @matcher=matcher
106
- @cursor=cursor
107
- @regsidx=0
108
- @variables={}
109
- @undos_stack=[] #recursive list of undo procs and vars defined in this entire match
110
- @matchset_stack=[]
111
- @position_stack=[@cursor.pos]
112
- @matchfail_todo=[MatchFailRec.new] #list of things to do when match fails.... undo(&adjust variables), matchsucceed, position, (matchset)
113
- @matchsucceed_stack=[] #r-list of things to do when entire match succeeds... subst and deferreds
114
-
115
- assert check_result
116
- end
117
-
118
- #---------------------------------------------
119
- def push_matchset(ms)
120
- assert MatchSet===ms
121
- @matchset_stack.push ms
122
- @matchfail_todo.push MatchFailRec.new
123
- #push_match len #disable... caller wants to do it...
124
- end
125
-
126
- #---------------------------------------------
127
- def push_match(inc=0)
128
- #matchset_stack should be 1 smaller than matchfail_todo
129
- assert @matchfail_todo.size-1==@matchset_stack.size
130
-
131
- @cursor.move inc #do nothing if no param given
132
- @position_stack.push @cursor.pos #push the start position of the next match
133
- @matchfail_todo.last.position_inc+=1
134
- @regsidx+=1
135
- end
136
-
137
- #---------------------------------------------
138
- #this method is dangerous! it leaves the object in an inconsistant state.
139
- #caller must fixup state by either popping @matchset_stack or pushing a matchfail_todo
140
- def backup_stacks
141
- assert(@matchfail_todo.size >= 1)
142
- discarding=@matchfail_todo.pop
143
- #discarding should not ever be a list!
144
- @regsidx-=discarding.position_inc #adjust position in matcher
145
- assert(@position_stack.size >= discarding.position_inc) #what if position stack is empty here?
146
-
147
- discarding.position_inc.nonzero? and
148
- @position_stack=@position_stack.slice(0...-discarding.position_inc)
149
-
150
- #backup undo stack and execute undos
151
- process_undos @undos_stack.slice!(-discarding.undos_inc..-1)
152
-
153
- #backup matchsucceed stack
154
- discarding.matchsucceed_inc.nonzero? and
155
- @matchsucceed_stack=@matchsucceed_stack.slice(0...-discarding.matchsucceed_inc)
156
-
157
- return @matchset_stack.last
158
- end
159
-
160
- #---------------------------------------------
161
- def backtrack
162
- assert @regsidx != INFINITY
163
- assert check_result
164
- mat=nil
165
- loop do
166
- ms=backup_stacks or return
167
-
168
- mat=ms.next_match(@cursor.data?, @position_stack.last) and break
169
- @matchset_stack.pop or huh
170
- end
171
- assert( (1..@matcher.max_matches)===@regsidx+1)
172
- assert ::Array===mat
173
- assert ::Array===mat.first
174
-
175
-
176
- #back up cursor position
177
- @cursor.pos=@position_stack.last
178
-
179
-
180
-
181
- @matchfail_todo.push MatchFailRec.new
182
- push_match mat.last
183
-
184
- assert @regsidx
185
- return @regsidx, @cursor.pos-origpos
186
- end
187
-
188
- #---------------------------------------------
189
- #lookup something that was already matched, either by
190
- #name or index(es).
191
- #probably need to take a full path for parameters
192
- def backref; huh end
193
-
194
- #---------------------------------------------
195
- def subprogress(cursor=nil,matcher=nil)
196
- result=dup
197
- result.matchfail_todo=[MatchFailRec.new]
198
- result.cursor=
199
- if cursor
200
- unless Sequence===cursor
201
- huh #convert other data to a cursor...
202
- end
203
- cursor
204
- else
205
- result.cursor.position # make a sub-cursor
206
- end
207
-
208
- result.position_stack=[result.cursor.pos]
209
-
210
- @undo_stack.push result.undo_stack=[]
211
- @matchsucceed_stack.push result.matchsucceed_stack=[]
212
- @matchfail_todo.last.matchsucceed_inc+=1
213
- @matchfail_todo.last.undo_inc+=1
214
-
215
- if matcher
216
- @matcher=matcher#@matcher might be set to soemthing different
217
- @regsidx=0
218
- end
219
-
220
- assert result.check_result
221
-
222
- result
223
- end
224
-
225
- #---------------------------------------------
226
- def make_hash
227
- huh
228
-
229
- end
230
-
231
- #---------------------------------------------
232
- def last_match_range
233
- @position_stack[-2]...@position_stack[-1]
234
- end
235
-
236
- #---------------------------------------------
237
- def top_matchset
238
- @matchset_stack.last
239
- end
240
-
241
- #---------------------------------------------
242
- def register_variable(name)
243
- @variables[name] and return warn( "variable #{name} is already defined")
244
- @variables[name]=last_match_range
245
- @undos_stack<<name
246
- @matchfail_todo.last.undos_inc+=1
247
- end
248
-
249
- #---------------------------------------------
250
- class Later #inside Progress, so it doesn't conflict with Reg::Later from regreplace.rb
251
- def initialize(block,args)
252
- @block,@args=block,args
253
- end
254
- class<<self;
255
- alias [] new;
256
- end
257
-
258
- def call
259
- @block.call( *@args)
260
- end
261
- end
262
-
263
- #---------------------------------------------
264
- def register_undo *args, &block
265
- @undos_stack<<Later[block,*args]
266
- @matchfail_todo.last.undos_inc+=1
267
- end
268
-
269
- #wannabe in class Array
270
- def recursive_each arr,&block
271
- arr.each {|item|
272
- if item.respond_to? :to_a
273
- recursive_each item.to_a, &block
274
- else
275
- block[item]
276
- end
277
- }
278
- end
279
-
280
- def recursive_reverse_each arr,&block
281
- arr.reverse_each {|item|
282
- if item.respond_to? :to_a
283
- recursive_reverse_each item.to_a, &block
284
- else
285
- block[item]
286
- end
287
- }
288
- end
289
-
290
- #---------------------------------------------
291
- def process_undos(undos)
292
- recursive_reverse_each undos do|undo|
293
- Symbol===undo ? @variables.delete(undo) : undo.call
294
- end
295
- end
296
-
297
-
298
- #---------------------------------------------
299
- def register_replace(subst,*args)
300
- @matchsucceed_stack.push Later[subst,args]
301
- @matchfail_todo.last.matchsucceed_inc+=1
302
- end
303
-
304
- #---------------------------------------------
305
- def later(*args,&block)
306
- @matchsucceed_stack.push Later[block,args]
307
- @matchfail_todo.last.matchsucceed_inc+=1
308
- end
309
-
310
- #---------------------------------------------
311
- def process_laters
312
- recursive_reverse_each(@matchsucceed_stack) {|later| later.call }
313
- end
314
-
315
- #---------------------------------------------
316
- def origpos
317
- @position_stack.first
318
- end
319
-
320
- #---------------------------------------------
321
- def posinc
322
- @cursor.pos-origpos
323
- end
324
-
325
-
326
- #--------------------------
327
- $RegTraceEnable=$RegTraceDisable=nil
328
- def trace_enabled?
329
- @trace||=nil
330
- $RegTraceEnable or (!$RegTraceDisable && @trace)
331
- end
332
-
333
- #--------------------------
334
- #bt, in this case, stands for 'backtracking'.
335
- #but the cognoscenti refer to this method as 'bitch-match'.
336
- def bt_match(match_steps=@matcher.max_matches)
337
- assert @cursor.pos <= @cursor.size
338
- assert origpos >= 0
339
- assert posinc >= 0
340
- assert( (0...match_steps)===@regsidx)
341
- assert Integer===@position_stack.first
342
- assert check_result
343
- loop do #loop over regs to match
344
- assert @cursor.pos <= @cursor.size
345
- assert posinc >= 0
346
- assert( (0...match_steps)===@regsidx)
347
-
348
- if trace_enabled?
349
- puts [@matcher, clean_result].map{|i| i.inspect }.join(' ')
350
- #pp self
351
- end
352
-
353
- assert check_result
354
-
355
- #try a new match of current reg
356
- r=@matcher.regs(@regsidx)
357
- if r.respond_to? :mmatch #should test for being a Reg::Reg instead
358
- # 'mmatch could return 2 items here'
359
- m=r.mmatch(@cursor.data?, @cursor.pos)
360
- #is a single match or a match set?
361
- if m.respond_to? :next_match
362
- #it's a set -- start new inner result array
363
- #with initial match as first elem
364
- push_matchset m
365
- mat,matchlen=m.next_match(@cursor.data?, @cursor.pos)
366
-
367
- assert mat
368
- assert m
369
- else
370
- mat,matchlen=*m #single match or nil
371
- m=nil
372
- end
373
- else
374
- if r===(item=@cursor.readahead1)
375
- mat=RR[item]
376
- matchlen=1
377
- end
378
- end
379
-
380
-
381
- assert check_result
382
-
383
- unless mat #match fail?
384
- assert check_result
385
- return to_result,posinc,@regsidx if @matcher.enough_matches? @regsidx
386
-
387
- #doesn't match, try backtracking
388
- assert @regsidx
389
- backtrack or return nil #bt failed? we fail
390
- assert(@cursor.pos<=@cursor.size)
391
- assert check_result
392
- else #match succeeded
393
- if !m and @matcher.respond_to? :want_gratuitous_btstop? and
394
- @matcher.want_gratuitous_btstop?(@regsidx)
395
- push_matchset SingleMatch_MatchSet.new(matchlen)
396
- end
397
-
398
- #advance to next reg
399
- assert check_result
400
- push_match @matcher.update_di(0,matchlen)
401
- assert(@cursor.pos<=@cursor.size)
402
- end
403
-
404
- assert(@cursor.pos<=@cursor.size)
405
-
406
- assert check_result
407
- return to_result,posinc,@regsidx if @regsidx>=match_steps
408
- assert( (0...match_steps)===@regsidx)
409
-
410
- end #loop
411
-
412
- end
413
-
414
- #---------------------------------------------
415
- #maybe this isn't necessary?
416
- #because backtrack is called after it,
417
- #and it's doing the same things.... more or less
418
-
419
- def last_next_match; #(?ary=>@cursor,?start,?resfrag=>...?)
420
- #huh #adapt logic from Backtrace
421
- assert check_result
422
- assert( (0..@matcher.max_matches)===@regsidx)
423
- r=backup_stacks
424
-
425
- di=@cursor.pos=@position_stack.last
426
- assert( (0..@matcher.max_matches)===@regsidx)
427
- unless r
428
- @matchfail_todo.push MatchFailRec.new
429
- assert check_result
430
- return nil,nil,@regsidx
431
- end
432
- #something wrong here....
433
-
434
- #dunno how to do this simply...
435
- #assert full_up? if SubseqMatchSet===self
436
-
437
- #@matchset_stack.pop is called in backtrack but not here, why?
438
-
439
-
440
- r,diinc=r.next_match(@cursor.data?,@cursor.pos)
441
- unless r
442
- #might need to return non-nil here, if resfrag isn't exhausted yet
443
- assert( (0..@matcher.max_matches)===@regsidx)
444
- @matchset_stack.pop
445
- assert check_result
446
- return nil,nil,@regsidx unless @matcher.enough_matches? @regsidx
447
- return to_result, @cursor.pos-@position_stack.first, @regsidx
448
- end
449
-
450
- assert diinc
451
- assert @cursor.pos+diinc <= @cursor.size
452
- @cursor.move diinc
453
- #@regsidx-=matchfail_todo.position_inc #should be done in push_match...
454
- @matchfail_todo.push MatchFailRec.new
455
- push_match
456
-
457
- if @regsidx<@matcher.max_matches #if there are more subregs of this reg to be matched
458
- #re-match tail regs
459
- assert @cursor.pos <= @cursor.size
460
- #di is sometimes bad here, it seems....(fixed now?)
461
- assert check_result
462
- assert( (0..@matcher.max_matches)===@regsidx)
463
- result=bt_match
464
- assert check_result
465
- return result
466
- end
467
-
468
-
469
-
470
- assert( (0..@matcher.max_matches)===@regsidx)
471
- assert check_result
472
-
473
- return to_result,@cursor.pos-@position_stack.first,@regsidx
474
- end
475
-
476
- #---------------------------------------------
477
- def check_result;
478
-
479
- #since this should be true, a separate @regsidx is unnecessary
480
- assert @regsidx==@position_stack.size-1
481
-
482
- #matchset_stack should be 1 smaller than matchfail_todo
483
- assert @matchfail_todo.size-1==@matchset_stack.size
484
-
485
-
486
- #verify correct types in @-variables
487
- assert Sequence===@cursor
488
- assert @matcher.respond_to?( :update_di)
489
- assert @regsidx >=0
490
- @matchset_stack.each{|ms| assert MatchSet===ms }
491
- prev_pos=0
492
- @position_stack.each{|pos| assert prev_pos<=pos; pos=prev_pos }
493
- assert prev_pos<=@cursor.size
494
-
495
- vars_copy=@variables.dup
496
- @undos_stack.each {|i|
497
- case i
498
- #every element of @variables should also be a sym in @undos_stack
499
- when Symbol
500
- vars_copy.delete(i) or assert(false)
501
-
502
- when Later
503
- else assert(false)
504
- end
505
- }
506
- assert vars_copy.empty? #every var should be accounted for
507
-
508
- #sum of :undos_inc,:matchsucceed_inc,:position_inc in @matchfail_todo
509
- #should be the same as the size of the corresponding stack.
510
- uns=mats=poss=0
511
- @matchfail_todo.each{|mfr|
512
- uns+=mfr.undos_inc
513
- mats+=mfr.matchsucceed_inc
514
- poss+=mfr.position_inc
515
- }
516
- assert uns==@undos_stack.size
517
- assert mats==@matchsucceed_stack.size
518
- assert poss+1==@position_stack.size
519
-
520
- assert succ_stack_ok
521
-
522
- return true
523
- end
524
-
525
- #---------------------------------------------
526
- def succ_stack_ok(stk=@matchsucceed_stack)
527
- stk.each{|elem|
528
- case elem
529
- when Array; succ_stack_ok(elem)
530
- when Later; true
531
- else
532
- end or return
533
- }
534
- return true
535
- end
536
- private :succ_stack_ok
537
-
538
- #---------------------------------------------
539
- def clean_result
540
- result=[]
541
- ms_pos_idx=@position_stack.size - @matchfail_todo.last.position_inc
542
- ms_pos_idx=-1
543
- result=(0...@position_stack.size-1).map{|i|
544
- if i==ms_pos_idx
545
- ms_pos_idx-=1
546
- #what if ms_idx too big?
547
- ms_pos_idx-=@matchfail_todo[ms_idx].position_inc
548
- ms.clean_result
549
-
550
- else
551
- @cursor[@position_stack[i], @position_stack[i+1]-@position_stack[i]]
552
- end
553
- }
554
-
555
- return result
556
- end
557
-
558
- #---------------------------------------------
559
- def to_result;
560
- true#ok, i'm cheating
561
- end
562
-
563
- end #class Progress
564
-
565
- class SingleMatch_MatchSet < MatchSet
566
- #this is somewhat of a hack, and shouldn't be necessary....
567
- #it exists because every backtracking stop has to have a
568
- #matchset in it, even the ones that only match one way.
569
- #this class encapsulates matchsets that match only one way.
570
-
571
-
572
- def initialize(len)
573
- #@len=len
574
- end
575
-
576
- def next_match(arr,start)
577
- #if @len
578
- # result=[arr[start,@len], @len]
579
- # @len=nil
580
- # result
581
- #end
582
- end
583
-
584
-
585
- end
586
-
587
-
588
- if defined? $MMATCH_PROGRESS #ultimately, mmatch will take a progress, but until then, disable this
589
- #---------------------------------------------
590
- class Subseq
591
-
592
- def mmatch(pr)
593
- #in this version, all @regs are not multiple regs
594
- pr=pr.subprogress(nil,self)
595
- cu=pr.cursor
596
- start=cu.pos
597
- assert cu.pos<=cu.size
598
- cu.pos+@regs.size<=cu.size or return nil
599
- @regs.each do |reg|
600
- assert cu.pos<cu.size
601
- reg===cu.read1 or return nil
602
- end
603
- return [ [cu.read( start-cu.pos,true)], @regs.size ]
604
- end
605
-
606
- private
607
- def mmatch_multiple(pr)
608
- #in this version, at least one of @regs is a multiple reg
609
- #start==arr.size and huh
610
- pr=pr.subprogress(nil,self)
611
- cu=pr.cursor
612
- start=cu.pos
613
- start+itemrange.begin<=cu.size or return nil
614
- assert( (0..cu.size).include?( start))
615
- result,di,bogus=pr.bt_match
616
- result and SubseqMatchSet.new(pr,di)
617
- end
618
- end
619
-
620
- #---------------------------------------------
621
- class Repeat
622
- def mmatch(pr)
623
- pr=pr.subprogress(nil,self)
624
- cu=pr.cursor
625
- start=cu.pos
626
- start+@times.begin <= cu.size or return nil #enough room left in input?
627
- i=-1
628
- (0...@times.end).each do |i2| i=i2
629
- start+i<cu.size or break(i-=1)
630
- @reg===cu.read1 or break(i-=1)
631
- end
632
- i+=1
633
- assert( (0..@times.end)===i)
634
- if i==@times.begin
635
- return [[cu.read(start-cu.pos,true)], i]
636
- end
637
- i>@times.begin or return nil
638
- return SingleRepeatMatchSet.new(i,-1,@times.begin)
639
- end
640
-
641
- private
642
- def mmatch_multiple(pr)
643
- pr=pr.subprogress(nil,self)
644
- start=cu.pos
645
- assert start <= cu.size
646
- start+itemrange.begin <= cu.size or return nil #enough room left in input?
647
- r=[[]]
648
-
649
- #first match the minimum number
650
- if @times.begin==0 #if we can match nothing
651
- cu.eof? and return [r,0] #at end of input? return empty set
652
- ri=di=0
653
- else
654
- cu.eof? and return nil
655
- assert @times.begin<INFINITY
656
- r,di,ri=pr.bt_match(@times.begin) #matches @reg @times.begin times
657
- r.nil? and return nil
658
- end
659
- assert ri==@times.begin
660
-
661
- assert !@times.exclude_end?
662
- left=@times.end-@times.begin
663
-
664
- #note: left and top could be infinite here...
665
-
666
- #do the optional match iterations
667
- #only greedy matching implemented for now
668
- #there must be a more efficient algorithm...
669
- if left >= 1
670
- assert pr.check_result
671
- #get remaining matches up to @times.end times
672
- rr=pr.make_hash
673
- res,di,ri=pr.bt_match #bt stop at each iteration, this time
674
- assert pr.check_result( res)
675
- assert @times===pr.regsidx
676
-
677
- res and return RepeatMatchSet.new(pr,(huh di))
678
- assert rr==pr.make_hash
679
- end
680
-
681
- #if matchset has no backtracking stops, and
682
- #hence cannot contain more than one actual match,
683
- #then just return that match.
684
- pr.matchset_stack.empty? ? RepeatMatchSet.new(pr,(huh di)) :
685
- [pr.clean_result,(huh di)]
686
- end
687
- end
688
-
689
- #---------------------------------------------
690
- class AndMatchSet
691
- #the total number of possible different ways to match an AndMatchSet
692
- #where several of the branches are actually ambiguous
693
- #grows exponentially.
694
- #rather than hit every possible match, we'll try to hit
695
- #every legal match length at least once.
696
-
697
- #on next_match,
698
- #figure out the alternative(s) that are returning the longest
699
- #matchset currently. those alternatives are returned in
700
- #the first match, but at the 2nd and subsequent calls
701
- #to next_match, that set of longest alternatives are all
702
- #next_matched (rolled back) until they match something shorter.
703
- #(or maybe just a different length? Reg::Or isn't greedy, so its
704
- #longest match isn't necessarily returned first.)
705
-
706
- #if any next_match call returns nil (or false), the whole match set
707
- #is finished. return nil from next_match now and forever more.
708
-
709
-
710
-
711
- def initialize
712
- huh
713
- end
714
- end
715
-
716
- #---------------------------------------------
717
- class And
718
-
719
- private
720
- #can't use this until mmatch interface is changed to take a single progress param
721
- def mmatch_multiple(progress)
722
- #in this version, at least one of @regs is a multiple reg
723
- assert( (0..progress.cursor.size).include?( progress.cursor.pos))
724
- result,*bogus=progress.bt_match
725
- result and AndMatchSet.new(self,result)
726
- #need new definition of AndMatchSet...
727
-
728
- #need to keep track of which alternative(s) was longest, so as to advance
729
- #the cursor by that amount. and know which ones to start backtracking in.
730
-
731
- #cursor needs to be advanced here somewhere, i think.
732
- end
733
-
734
- end
735
-
736
-
737
- else #... not $MMATCH_PROGRESS
738
- class Subseq
739
-
740
- def mmatch(arr,start)
741
- #in this version, each of @regs is not a multiple reg
742
- assert start<=arr.size
743
- start+@regs.size<=arr.size or return nil
744
- idx=0
745
- @regs.each do |reg|
746
- assert(start+idx<arr.size)
747
- reg===arr[start+idx] or return nil
748
- idx+=1
749
- end
750
- return [RR[arr[start,@regs.size]], @regs.size]
751
- end
752
-
753
- def mmatch_multiple(arr,start)
754
- #in this version, at least one of @regs is a multiple reg
755
- assert( (0..arr.size).include?( start))
756
- cu=Sequence::Indexed.new(arr); cu.pos=start
757
- pr=Progress.new(self,cu)
758
- result,di,bogus=pr.bt_match
759
- result and SubseqMatchSet.new(pr,di)
760
- end
761
- end
762
-
763
- class ::Sequence
764
- class Indexed
765
- def data?; @data end
766
- end
767
- end
768
-
769
- class Repeat
770
- def mmatch(arr,start)
771
- i=-1
772
- (0...@times.end).each do |i2| i=i2
773
- start+i<arr.size or break(i-=1)
774
- @reg===arr[start+i] or break(i-=1)
775
- end
776
- i+=1
777
- assert( (0..@times.end)===i)
778
- if i==@times.begin
779
- return [RR[arr[start,i]], i]
780
- end
781
- i>@times.begin or return nil
782
- return SingleRepeatMatchSet.new(i,-1,@times.begin)
783
- end
784
-
785
- def mmatch_multiple(arr,start)
786
- assert start <= arr.size
787
- r=[RR[]]
788
-
789
- cu=Sequence::Indexed.new(arr); cu.pos=start
790
- pr=Progress.new(self,cu)
791
-
792
- #first match the minimum number
793
- if @times.begin==0 #if we can match nothing
794
- arr.size==start and return [r,0] #at end of input? return empty set
795
- ri=di=0
796
- else
797
- arr.size==start and return nil
798
- assert @times.begin<INFINITY
799
- r,di,ri=pr.bt_match(@times.begin) #matches @reg @times.begin times
800
- r.nil? and return nil
801
- end
802
- assert ri==@times.begin
803
-
804
- assert !@times.exclude_end?
805
- left=@times.end-@times.begin
806
-
807
- #note: left and top could be infinite here...
808
-
809
- #do the optional match iterations
810
- #only greedy matching implemented for now
811
- #there must be a more efficient algorithm...
812
- if left >= 1
813
- assert pr.check_result
814
- #get remaining matches up to @times.end times
815
- #because bt_match could change the rr argument, and
816
- #we might need to return the original in r below
817
- res,di,ri=pr.bt_match
818
- # assert Backtrace.check_result res #this is correct, for now (i think)
819
- #don't update to progress version
820
- assert @times===ri
821
-
822
- res and return RepeatMatchSet.new(pr,di)
823
- end
824
-
825
- #if matchset has no backtracking stops, and
826
- #hence cannot contain more than one actual match,
827
- #then just return that match.
828
- pr.matchset_stack.empty? ?
829
- [pr.clean_result,di] : RepeatMatchSet.new(pr,di)
830
- end
831
-
832
- end
833
-
834
- end # $MMATCH_PROGRESS
835
-
836
-
837
-
838
- #---------------------------------------------
839
- class MatchSet
840
- #delegate to embedded progress, if any
841
- def clean_result
842
- huh
843
- @progress.clean_result
844
- end
845
- end
846
-
847
- #--------------------------
848
- class RepeatMatchSet < MatchSet
849
-
850
- attr :progress
851
- def initialize(progress,consumed)
852
- @progress=progress
853
- #@cnt=@startcnt-stepper
854
- #@ary.push 1
855
- @consumed=consumed
856
- @firstmatch=[progress.clean_result,@consumed]
857
- assert( progress.matcher.times===progress.regsidx)
858
- assert progress.regsidx
859
- #assert(@ri==@firstmatch.first.size)
860
- end
861
-
862
- def match_iterations;
863
- #assert(@ri==Backtrace.clean_result(@ary).size)
864
- progress.regsidx
865
- end
866
-
867
- #very nearly identical to SubseqMatchSet#next_match
868
- def next_match(arr,idx)
869
- #fewer assertions in twin
870
- if @firstmatch
871
- result,@firstmatch=@firstmatch,nil
872
- assert result.first.empty? || ::Array===result.first.first
873
- #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
874
- assert idx+result.last<=arr.size
875
- assert(progress.regsidx==result.first.size)
876
- return result
877
- end
878
-
879
- @progress or return #not in twin ... ignore it
880
-
881
- assert progress.check_result
882
-
883
- i=progress.matchfail_todo.last.position_inc
884
- =begin extents not used
885
- extents= if i==0
886
- []
887
- else
888
- progress.position_stack[-i..-1]
889
- end
890
- =end
891
- #this part's not in twin
892
- #'need to check for fewer matches here before rematching last matchset'
893
-
894
- #what if the match that gets discarded was returned by a matchset
895
- #that has more matches in it?
896
- #in that case, i is 1 and the body of this if should not be executed...
897
- if progress.regsidx>progress.matcher.times.begin && i>1
898
- huh progress.backup_stacks
899
- huh #need to change progress.cursor.pos here too
900
- huh #need to save result of backup_stacks
901
-
902
- #but now i need to undo all other progress state too, if
903
- #the state was created with the match result just popped.
904
- #in general, it's not possible to distinguish state with the
905
- #last match from state with the matches that might have preceeded it...
906
- #unless I create a backtracking point for each optional iteration
907
- #of the repeat matcher.
908
- #currently, making a backtracking point implies making a matchset
909
- #as well. I'll need a matchset the contains only 1 match.
910
- #ok, i think this is working now. no extra code needed here.
911
-
912
- @consumed-=pos-progress.position_stack.last
913
- #assert(@ri==Backtrace.clean_result(@ary).size)
914
- assert idx+@consumed<=arr.size
915
- assert progress.check_result
916
- result= [progress.clean_result, @consumed]
917
- assert progress.check_result
918
- return result
919
- end
920
-
921
-
922
- assert progress.check_result
923
- assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
924
- result,di,ri=progress.last_next_match
925
- if result and @progress.matcher.enough_matches? ri #condition slightly different in twin
926
- result=[progress.clean_result,di]
927
- @consumed=di #not in twin...why?
928
- #@progress.regsidx-=1
929
- assert ::Array===result.first.first
930
- assert idx+result.last<=arr.size
931
- assert progress.check_result
932
- #assert(@ri==result.first.size)
933
- return result
934
- end
935
-
936
- assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
937
- #assert(@ri==Backtrace.clean_result(@ary).size)
938
- assert(progress.check_result)
939
-
940
-
941
-
942
- @progress.matchset_stack.empty? and return @progress=nil #also checking @ary in twin... ignore it
943
- assert @progress.regsidx>0
944
-
945
- @progress.backtrack or return @progress=nil #@progress never set to nil like this in twin... ignore it
946
-
947
- #this is where the divergence widens. ri is a local in twin
948
-
949
- #assert(@ri==Backtrace.clean_result(@ary).size)
950
- assert(progress.check_result)
951
- mat,di,@ri=@progress.bt_match #mat is @ary in twin
952
- mat.nil? and return @progress=nil
953
-
954
- #assert(@ri==Backtrace.clean_result(mat).size)
955
- assert @progress.regsidx
956
- assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
957
-
958
- result=[@progress.clean_result,di]
959
- @consumed=di #no @consumed in twin
960
- assert ::Array===result.first.first
961
- assert idx+result.last<=arr.size
962
- assert progress.check_result
963
- #assert(@ri==result.last.size)
964
- return result
965
- end
966
-
967
- end
968
-
969
- #---------------------------------------------
970
- class SubseqMatchSet < MatchSet
971
-
972
- def initialize progress,di;
973
- @reg,@progress= progress.matcher,progress
974
-
975
- @orig_pos=progress.cursor.pos-di
976
- @firstresult= [progress.clean_result,di]
977
- end
978
-
979
- #(@reg=>progress.matcher,@matchary=>progress.huh,di=>progress.cursor.pos-@orig_pos)
980
-
981
- def next_match(ary,start)
982
- if @firstresult
983
- @firstresult,result=nil,@firstresult
984
- assert ::Array===result.first.first
985
- return result
986
- end
987
-
988
-
989
- result,di,ri=@progress.last_next_match
990
- result or return @progress=nil
991
- if result and ri==@reg.max_matches
992
- result=[@progress.clean_result,di]
993
- assert ::Array===result.first.first
994
- return result
995
- end
996
-
997
-
998
- (@progress and !@progress.matchset_stack.empty?) or return @progress=nil
999
- assert @progress.regsidx
1000
- @progress.backtrack or return @progress=nil
1001
-
1002
- #need to adjust ri?
1003
-
1004
- #is this right... dunno...
1005
- result,di,bogus=@progress.bt_match
1006
-
1007
-
1008
- if result
1009
- result=[@progress.clean_result,di]
1010
- assert ::Array===result.first.first
1011
- return result
1012
- end
1013
- end
1014
-
1015
- def match_iterations
1016
- progress.matcher.max_matches
1017
- end
1018
-
1019
- end
1020
- #--------------------------
1021
- class AndMatchSet < SubseqMatchSet
1022
- #this isn't really right...
1023
- #on next_match, we need to backtrack the longest alternative(s)
1024
- #if they're then shorter than the next longest alternative,
1025
- #then that (formerly next longest) alternative becomes
1026
- #the dominating alternative, and determines how much is consumed
1027
-
1028
- end
1029
- #might need Reg::Or tooo....
1030
-
1031
- class Repeat
1032
- #--------------------------------------------------------
1033
- # "enable backtracking stops at each optional iteration"
1034
- def want_gratuitous_btstop?(steps)
1035
- @times===steps
1036
- end
1037
-
1038
- end
1039
-
1040
-
1041
- #---------------------------------------------
1042
- class Array
1043
- def ===(other)
1044
- ::Array===other or return false #need to be more generous eventually
1045
- @regs.empty? and return other.empty?
1046
- progress=Progress.new(self,(Sequence::Indexed.new other))
1047
- result,di,bogus=progress.bt_match
1048
- assert di.nil? || di <= other.size
1049
- return(di==other.size && result)
1050
- end
1051
- end
1052
-
1053
- end
1054
-