reg 0.4.8 → 0.5.0a0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING +0 -0
  3. data/History.txt +14 -0
  4. data/Makefile +59 -0
  5. data/README +87 -40
  6. data/article.txt +838 -0
  7. data/{assert.rb → lib/assert.rb} +3 -3
  8. data/{reg.rb → lib/reg.rb} +11 -4
  9. data/lib/reg/version.rb +21 -0
  10. data/lib/regarray.rb +455 -0
  11. data/{regarrayold.rb → lib/regarrayold.rb} +33 -7
  12. data/lib/regbackref.rb +73 -0
  13. data/lib/regbind.rb +230 -0
  14. data/{regcase.rb → lib/regcase.rb} +15 -5
  15. data/lib/regcompiler.rb +2341 -0
  16. data/{regcore.rb → lib/regcore.rb} +196 -85
  17. data/{regdeferred.rb → lib/regdeferred.rb} +35 -4
  18. data/{regposition.rb → lib/regevent.rb} +36 -38
  19. data/lib/reggraphpoint.rb +28 -0
  20. data/lib/reghash.rb +631 -0
  21. data/lib/reginstrumentation.rb +36 -0
  22. data/{regitem_that.rb → lib/regitem_that.rb} +32 -11
  23. data/{regknows.rb → lib/regknows.rb} +4 -2
  24. data/{reglogic.rb → lib/reglogic.rb} +76 -59
  25. data/{reglookab.rb → lib/reglookab.rb} +31 -21
  26. data/lib/regmatchset.rb +323 -0
  27. data/{regold.rb → lib/regold.rb} +27 -27
  28. data/{regpath.rb → lib/regpath.rb} +91 -1
  29. data/lib/regposition.rb +79 -0
  30. data/lib/regprogress.rb +1522 -0
  31. data/lib/regrepeat.rb +307 -0
  32. data/lib/regreplace.rb +254 -0
  33. data/lib/regslicing.rb +581 -0
  34. data/lib/regsubseq.rb +72 -0
  35. data/lib/regsugar.rb +361 -0
  36. data/lib/regvar.rb +180 -0
  37. data/lib/regxform.rb +212 -0
  38. data/{trace.rb → lib/trace_during.rb} +6 -4
  39. data/lib/warning.rb +37 -0
  40. data/parser.txt +26 -8
  41. data/philosophy.txt +18 -0
  42. data/reg.gemspec +58 -25
  43. data/regguide.txt +18 -0
  44. data/test/andtest.rb +46 -0
  45. data/test/regcompiler_test.rb +346 -0
  46. data/test/regdemo.rb +20 -0
  47. data/{item_thattest.rb → test/regitem_thattest.rb} +2 -2
  48. data/test/regtest.rb +2125 -0
  49. data/test/test_all.rb +32 -0
  50. data/test/test_reg.rb +19 -0
  51. metadata +108 -73
  52. data/calc.reg +0 -73
  53. data/forward_to.rb +0 -49
  54. data/numberset.rb +0 -200
  55. data/regarray.rb +0 -675
  56. data/regbackref.rb +0 -126
  57. data/regbind.rb +0 -74
  58. data/reggrid.csv +1 -2
  59. data/reghash.rb +0 -318
  60. data/regprogress.rb +0 -1054
  61. data/regreplace.rb +0 -114
  62. data/regsugar.rb +0 -230
  63. data/regtest.rb +0 -1078
  64. data/regvar.rb +0 -76
@@ -0,0 +1,79 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005, 2016 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+ require 'set'
20
+ #require 'reg'
21
+ module Reg
22
+ class Position
23
+ include ::Reg::Reg
24
+
25
+ class<<self
26
+ alias new__no_negatives new
27
+ def new(*nums)
28
+ # Enumerable===nums or nums=[nums]
29
+ #all nums should have the same sign, so
30
+ #1st num determines if all nums are 'from end'
31
+ return FromEnd.new(*nums) if negative?(nums.first)
32
+ new__no_negatives nums
33
+ end
34
+ alias [] new
35
+
36
+ def negative? x
37
+ 1.0/x < 0 rescue return
38
+ end
39
+ end
40
+
41
+ def initialize(*nums)
42
+ @positions=Set[*nums]
43
+ end
44
+
45
+ def mmatch(pr)
46
+ [true,0] if @positions===adjust_position(pr.cursor,pr.cursor.pos)
47
+ end
48
+
49
+ def itemrange
50
+ 0..0
51
+ end
52
+
53
+ def inspect
54
+ "Reg::Position[#{@positions.inspect[8..-3]}]"
55
+ end
56
+
57
+ private
58
+ def adjust_position(cu,pos)
59
+ pos
60
+ end
61
+
62
+
63
+
64
+ class FromEnd < Position
65
+ class<<self
66
+ alias new new__no_negatives
67
+ alias [] new
68
+ end
69
+
70
+ def inspect
71
+ super.sub("ion","ion::FromEnd")
72
+ end
73
+ private
74
+ def adjust_position(cu,pos)
75
+ pos-cu.size
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,1522 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005, 2016 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+ begin require 'rubygems'; rescue Exception; end
20
+
21
+ #$:<<"../sequence/lib" #temp hack
22
+ #require 'warning'
23
+ #warning "sequence found via temporary hack"
24
+ #$MMATCH_PROGRESS=1
25
+
26
+ require 'forwardable'
27
+
28
+ require 'sequence'
29
+ require 'sequence/indexed'
30
+
31
+
32
+
33
+ =begin the internal api
34
+ originally:
35
+ ResAryFrag=Array #it would be nice to get a more precise definition....
36
+ ResAry=+[ResAryFrag,-[MatchSet,Integer,ResAryFrag].*]
37
+
38
+ Reg%:mmatch[Array,Integer,
39
+ Returns( MatchSet|ResAryFrag|nil)
40
+ ]
41
+ Backtrace%:bt_match[Array,Integer,Integer,Integer,ResAry,Integer.-,
42
+ Returns( ResAry|nil,Integer,Integer)
43
+ ]
44
+ MatchSet%:next_match[Array,Integer,
45
+ Returns( ResAryFrag|nil,Integer)
46
+ ]
47
+
48
+ currently:
49
+ Reg%:mmatch[Progress, #has to change to take progress soon
50
+ Returns( MatchSet|ResAryFrag|nil)
51
+ ] #except subseq and repeat currently want progress
52
+ Progress%:bt_match[Integer.-, #affects progress, i'm pretty sure
53
+ Returns( ResAry|nil,Integer,Integer) #1st result used only as bool
54
+ ]
55
+ MatchSet%:next_match[Array,Integer, #affects progress? #needs to change too
56
+ Returns( ResAryFrag|nil,Integer)
57
+ ]
58
+ MatchSet%:initialize[Progress,OBS,Returns( MatchSet)] #for every ms class
59
+
60
+
61
+
62
+ former ultimate goal:
63
+ Reg%:mmatch[Progress, Returns( MatchSet|Integer|nil)] #affects progress on success (when integer returned)
64
+ Progress%:bt_match[Integer.-, Returns( Bool)] #affects progress on success
65
+ MatchSet%:next_match[Returns( Integer|nil)] #affects progress on success
66
+ #(modified progress is the same one as was given to the mmatch that created the matchset)
67
+ MatchSet%:initialize[Progress,OBS,Returns( MatchSet)] #for every ms class
68
+
69
+
70
+
71
+ now:
72
+ Reg%:cmatch[Progress, Yields[NeverReturns], NeverReturns] #throws :RegMatchFail on match failure, yields on success.
73
+ Reg%:bmatch[Progress, Returns(Object)] #returns a true value on success, nil or false on match failure
74
+
75
+
76
+ =end
77
+
78
+
79
+ #---------------------------------------------
80
+ module Reg
81
+
82
+ #---------------------------------------------
83
+ class MatchFailRec
84
+ attr_accessor :undos_inc,:matchsucceed_inc#,:position_inc
85
+ def initialize
86
+ @undos_inc=@matchsucceed_inc=0;#@position_inc=0
87
+ end
88
+
89
+ #position_inc is the number of positions to pop off position stack
90
+ #to get back to the point before the match of the most recent matchset.
91
+ #it is also the count by which to adjust regsidx to get back to the
92
+ #corresponding reg which generated the matchset.
93
+ # alias regs_adjust position_inc
94
+ end
95
+
96
+ #---------------------------------------------
97
+ class Progress
98
+ # attr_reader :matcher, :cursor, :regsidx
99
+ attr_reader :variables
100
+
101
+ #for internal use only...
102
+ # attr_writer :undos_stack, :matchfail_todo, :matchsucceed_stack #, :regsidx
103
+
104
+
105
+ #matchset_stack and matchfail_todo are (nearly) parallel arrays; matchfail_todo has
106
+ #one more item in it (at the bottom). each matchfailrec represents the things to undo
107
+ #on failure to get back to the corresponding matchset's starting position.
108
+
109
+ #matchfail_todo is more or less a 2-dimensional array of integers. very many of
110
+ #those integers in the undos_inc and matchsucceed_inc columns will be zero. it
111
+ #would be nice to use a sparse vector or matrix instead.
112
+
113
+ #a progress has a stack of contexts
114
+ #a context has a (possibly empty) stack of matchsets
115
+ #a matchset has a context
116
+
117
+
118
+
119
+ #---------------------------------------------
120
+ class Context
121
+ def initialize matcher,data
122
+ @matcher=matcher
123
+ @data=data
124
+ @regsidx=0
125
+ @position_stack=[data.pos]
126
+ @position_inc_stack=[0]
127
+ # @matchfail_todo=[MatchFailRec.new]
128
+ # @matchset_stack=[]
129
+ end
130
+ attr_reader :matcher,:data,:regsidx,:position_stack#,:matchfail_todo,:matchset_stack
131
+ attr_reader :context_type
132
+
133
+ #position_inc_stack.last is the number of patterns that have successfully matched
134
+ #since the last matchset was pushed onto matchset_stack. The pattern that created
135
+ #the last matchset is included in this count, hence position_inc_stack.last must
136
+ #always be 1 or greater, unless position_inc_stack contains one element.
137
+ attr_reader :position_inc_stack
138
+
139
+ attr_writer :regsidx,:data
140
+ alias cursor data
141
+
142
+ attr_accessor :context_index
143
+
144
+ #---------------------------------------------
145
+ def with_context(type,data)
146
+ @context_type=type
147
+ @data=::Sequence::SingleItem.new data
148
+ end
149
+
150
+ #---------------------------------------------
151
+ def get_index
152
+ context_index || data.pos
153
+ end
154
+ #---------------------------------------------
155
+ def position_inc; position_inc_stack.last end
156
+
157
+ #---------------------------------------------
158
+ def push_match(inc=0)
159
+ #matchset_stack should be 1 smaller than matchfail_todo
160
+ #assert matchfail_todo.size-1==matchset_stack.size
161
+
162
+ cursor.move inc #do nothing if no param given
163
+ assert cursor.pos>= position_stack.last
164
+ position_stack.push cursor.pos #push the start position of the next match
165
+ position_inc_stack[-1]+=1
166
+ self.regsidx+=1
167
+ end
168
+
169
+ #---------------------------------------------
170
+ def origpos
171
+ position_stack.first
172
+ end
173
+
174
+ #---------------------------------------------
175
+ def posinc
176
+ cursor.pos-origpos
177
+ end
178
+ end
179
+
180
+ attr_reader :matchfail_todo,:matchset_stack
181
+ extend Forwardable
182
+ def_delegators "@context_stack.last", :matcher,:regsidx,:regsidx=, :with_context,
183
+ :data,:get_index,:position_stack,:push_match,#:matchfail_todo,:matchset_stack,
184
+ :context_type, :context_index, :context_index=, :position_inc_stack, :position_inc,
185
+ :origpos, :posinc
186
+ alias cursor data
187
+ alias regs_adjust position_inc
188
+ def_delegators :cursor, :move,
189
+ :scan, :skip, :check, :match?,
190
+ :scan_until, :skip_until, :check_until, :exist?,
191
+ :scanback, :skipback, :checkback, :matchback?,
192
+ :scanback_until, :skipback_until, :checkback_until, :existback?
193
+
194
+
195
+ def context; @context_stack.last end
196
+
197
+ def sequence; cursor; end
198
+
199
+ #---------------------------------------------
200
+ def initialize(matcher,cursor)
201
+ # @parent=nil #eliminate
202
+ # @matcher=matcher #move into Context
203
+ # @regsidx=0 #move into Context
204
+ # @cursor=cursor #move into Context
205
+ @context_stack=[]
206
+ newcontext matcher, cursor
207
+ @matchset_stack=[]
208
+ @matchfail_todo=[MatchFailRec.new] #list of things to do when match fails....
209
+ #undo(&adjust variables), matchsucceed, position, (matchset)
210
+ # @position_stack=[@cursor.pos] #r-list? of positions
211
+ @variables={}
212
+ @undos_stack=[] #recursive list of undo procs and vars defined in this entire match
213
+ @matchsucceed_stack=[] #r-list of things to do when entire match succeeds... subst and deferreds
214
+
215
+
216
+ # assert check_result
217
+ end
218
+
219
+ #---------------------------------------------
220
+ def newcontext(matcher,data=cursor)
221
+ @context_stack.push Context.new(matcher,data)
222
+ return nil
223
+ end
224
+ #a new context is created (newcontext is called) whenever entering
225
+ #a Subseq, Repeat, vector logical, and sometimes composite scalar
226
+ #classes such as Reg::Object, Reg::Array, Reg::Hash, Reg::Restrict,
227
+ #(or even a scalar logical)
228
+ #_if_ they contain an undo, variable binding, later or replacement
229
+ #(Reg::Transform, Reg::Undo, Reg::Later, or Reg::Bound)
230
+ #expression somewhere within them.
231
+ #once the expression that created the context is finished matching, it is popped
232
+ #from the context stack. however, a reference to it may remain from
233
+ #a MatchSet on the matchset_stack. (if there was a backtracking stop
234
+ #found during the (sub)match, there will be such a reference.)
235
+
236
+ #why should vector logicals create a new context?? now i think that was a mistake....
237
+
238
+ #---------------------------------------------
239
+ def endcontext; @context_stack.pop end
240
+
241
+ #---------------------------------------------
242
+ def push_matchset(ms=nil)
243
+ # assert check_result
244
+ assert MatchSet===ms if defined? MatchSet
245
+ matchset_stack.push ms
246
+ matchfail_todo.push MatchFailRec.new
247
+ position_inc_stack.push 0
248
+ # assert check_result
249
+ #push_match len #disable... caller wants to do it...
250
+ end
251
+
252
+ =begin
253
+ #---------------------------------------------
254
+ #dunno if i really want this
255
+ def skip(mtr)
256
+ len=(cursor.skip mtr) || return
257
+ push_match len
258
+ return len
259
+ end
260
+ =end
261
+
262
+
263
+ #---------------------------------------------
264
+ #this method is dangerous! it leaves the Progress in an inconsistant state.
265
+ #caller must fixup state by either popping matchset_stack or pushing a matchfail_todo.
266
+ #called by last_next_match, backtrack, and next_match of RepeatMatchSet and SubseqMatchSet
267
+ def backup_stacks(ctx=context)
268
+ (ctx.position_inc_stack.size > 1) or return
269
+ assert(ctx.position_inc_stack.size > 1)
270
+ discarding_pos=ctx.position_inc_stack.pop
271
+ assert(ctx.position_inc_stack.size > 0)
272
+ ctx.regsidx-=discarding_pos #adjust position in matcher
273
+ assert(ctx.position_stack.size >= discarding_pos) #what if position stack is empty here?
274
+
275
+ ctx.position_stack.slice!(-discarding_pos..-1) if discarding_pos>0
276
+ # @position_stack=@position_stack.slice(0...-discarding.position_inc)
277
+
278
+ assert(matchfail_todo.size >= 1)
279
+ discarding=matchfail_todo.pop
280
+
281
+ #backup undo stack and execute undos
282
+ discarding_undos=discarding.undos_inc
283
+ process_undos @undos_stack.slice!(-discarding_undos..-1) if discarding_undos>0
284
+
285
+ #backup matchsucceed stack
286
+ discarding_succ=discarding.matchsucceed_inc
287
+ @matchsucceed_stack.slice!(-discarding_succ..-1) if discarding_succ>0
288
+
289
+ return matchset_stack.pop
290
+ end
291
+
292
+
293
+ #---------------------------------------------
294
+ def backtrack(ctx=context)
295
+ assert regsidx != Infinity
296
+ assert check_result
297
+ mat=nil
298
+ loop do
299
+ #warn "warning: ctx.position_stack not being updated in backup_stacks?"
300
+ ms=backup_stacks(ctx) or return
301
+
302
+ if mat=ms.next_match(cursor.data, position_stack.last)
303
+ matchset_stack.push ms
304
+ #position_inc_stack.push 0 #i'm really unsure about this line
305
+ #warn "warning: ctx.position_stack not being updated??"
306
+ break
307
+ end
308
+ end
309
+ assert( (1..matcher.max_matches)===regsidx+1)
310
+ assert ::Array===mat
311
+ #assert ::Array===mat.first
312
+
313
+
314
+ #back up cursor position
315
+ ctx.cursor.pos=ctx.position_stack.last
316
+
317
+
318
+
319
+ matchfail_todo.push MatchFailRec.new
320
+ ctx.position_inc_stack.push 0 #i'm really unsure about this line
321
+ ctx.push_match mat.last
322
+
323
+ assert regsidx
324
+ assert check_result
325
+ return ctx.regsidx, ctx.cursor.pos-ctx.origpos
326
+ end
327
+
328
+ #---------------------------------------------
329
+ #lookup something that was already matched, either by
330
+ #name or index(es).
331
+ #probably need to take a full path for parameters
332
+ def backref; huh end
333
+
334
+ def set_state!(cu,ps,mtr,parent) #internal use only
335
+ @parent=parent
336
+ # @matchfail_todo=[MatchFailRec.new]
337
+ # @matchset_stack=[]#@matchset_stack.dup
338
+ # @cursor=cu
339
+ # @position_stack=ps
340
+ # @undos_stack=[]
341
+ # @matchsucceed_stack=[]
342
+ # @variables=@variables.dup
343
+
344
+ # @matchfail_todo.last.position_inc+=1
345
+ # @matchfail_todo.last.undos_inc+=1
346
+
347
+ if mtr
348
+ @matcher=mtr#@matcher might be set to soemthing different
349
+ @regsidx=0
350
+ end
351
+ end
352
+
353
+ #---------------------------------------------
354
+ def subprogress(cu=nil,mtr=nil)
355
+ # warn 'subprogress not quite thought out...'
356
+ huh "replace this method with newcontext/endcontext"
357
+ result=dup
358
+ result.set_state!( if cu
359
+ unless ::Sequence===cu
360
+ ::Sequence.from(cu) #convert other data to a cursor...
361
+ else
362
+ cu
363
+ end
364
+ else
365
+ result.cursor.position # make a sub-cursor
366
+ #make real SubCursor here?
367
+ end, [result.cursor.pos], mtr,self )
368
+
369
+ #should this be in self, or result?
370
+
371
+ assert result.check_result
372
+
373
+ result
374
+ end
375
+
376
+ #---------------------------------------------
377
+ def make_hash
378
+ warn "warning: i want more here..."
379
+ hash
380
+ end
381
+
382
+ #---------------------------------------------
383
+ def last_match_range
384
+ position_stack[-2]...position_stack[-1]
385
+ end
386
+
387
+ #---------------------------------------------
388
+ def top_matchset
389
+ matchset_stack.last
390
+ end
391
+
392
+ #---------------------------------------------
393
+ def variable_names
394
+ @variables.keys
395
+ end
396
+
397
+ #---------------------------------------------
398
+ def raw_variable(name)
399
+ assert ::Symbol.reg|::String===name
400
+ var=@variables[name] and var.last
401
+ end
402
+
403
+ #---------------------------------------------
404
+ #always returns array or string, not single item
405
+ def lookup_var(name)
406
+ assert ::Symbol.reg|::String===name
407
+ var=@variables[name] and (cu,idx=*var.last) and cu and cu[idx]
408
+ end
409
+ alias [] lookup_var
410
+
411
+ #---------------------------------------------
412
+ def unregister_var(name)
413
+ assert ::Symbol.reg|::String===name
414
+ @variables[name].pop
415
+ assert @undos_stack.last.equal?( name ) #maybe this isn't true????....
416
+ @undos_stack.pop
417
+ matchfail_todo.last.undos_inc-=1
418
+ assert matchfail_todo.last.undos_inc>=0
419
+ nil
420
+ end
421
+ #---------------------------------------------
422
+ def raw_register_var(name,bound_to)
423
+ assert ::Symbol.reg|::String===name
424
+ @variables[name]||=[]
425
+ #@variables[name] and warn( "variable #{name} is already defined")
426
+ @variables[name].push bound_to
427
+ @undos_stack<<name
428
+ matchfail_todo.last.undos_inc+=1
429
+ end
430
+
431
+ #---------------------------------------------
432
+ def register_var(name,bound_to)
433
+ assert ::Symbol.reg|::String===name
434
+ @variables[name]||=[]
435
+ #@variables[name] and warn( "variable #{name} is already defined")
436
+ @variables[name].push [@cursor,bound_to]
437
+ @undos_stack<<name
438
+ matchfail_todo.last.undos_inc+=1
439
+ end
440
+
441
+ #---------------------------------------------
442
+ def bindhistory(sym)
443
+ @variables[sym].map{|(cu,idx)| cu[idx]}
444
+ end
445
+
446
+ #---------------------------------------------
447
+ def register_undo *args, &block
448
+ @undos_stack<<proc{block[*args]}
449
+ matchfail_todo.last.undos_inc+=1
450
+ end
451
+
452
+ #---------------------------------------------
453
+ def process_undos(undos=@undos_stack)
454
+ #i think regular reverse_each will work as well...
455
+ Ron::GraphWalk.recursive_reverse_each undos do|undo|
456
+ ::Symbol.reg|::String===undo ? @variables[undo].pop : undo.call
457
+ end
458
+ end
459
+
460
+
461
+ #---------------------------------------------
462
+ def register_replace(index,len,rep_exp)
463
+ huh #hmmm.... may need some work. what is context_type defined as?
464
+ @matchsucceed_stack.push context_type.new(context.data,index,len) {|gp|
465
+ Replace.evaluate(rep_exp,self,gp)
466
+ }
467
+ matchfail_todo.last.matchsucceed_inc+=1
468
+ end
469
+
470
+ #---------------------------------------------
471
+ def register_later(*args,&block)
472
+ @matchsucceed_stack.push proc{block[*args]}
473
+ matchfail_todo.last.matchsucceed_inc+=1
474
+ end
475
+
476
+ #---------------------------------------------
477
+ def process_laters
478
+ #i think regular reverse_each will work as well...
479
+ Ron::GraphWalk.recursive_reverse_each(@matchsucceed_stack) {|later| later.call }
480
+ end
481
+
482
+ =begin
483
+ #---------------------------------------------
484
+ class Later #inside Progress, so it doesn't conflict with Reg::Later from regreplace.rb
485
+ def initialize(block,args)
486
+ @block,@args=block,args
487
+ end
488
+ class<<self;
489
+ alias [] new;
490
+ end
491
+
492
+ def call
493
+ @block.call( *@args)
494
+ end
495
+ end
496
+ =end
497
+
498
+
499
+ #--------------------------
500
+ $RegTraceEnable=$RegTraceDisable=nil
501
+ def trace_enabled?
502
+ @trace||=nil
503
+ $RegTraceEnable or (!$RegTraceDisable && @trace)
504
+ end
505
+
506
+ #--------------------------
507
+ #bt, in this case, stands for 'backtracking'.
508
+ #but the cognoscenti refer to this method as 'bitch-match'.
509
+ #match the multiple matcher mtr against the input data in current #cursor
510
+ #but backtracking all along if any submatches fail
511
+ #remember, a multiple matcher has many sub-reg expressions
512
+ #(or in the case of Reg::Repeat, one expression used multiple times)
513
+ #that each have to match the input at some point. (sequentially one after
514
+ #another in the case of Repeat and Subseq, all at the same point in input
515
+ #in the case of Reg::And.)
516
+
517
+ #returns nil if no match, or if a match is found, returns
518
+ #[true, # of data items consumed, number of matchers used ( - 1?)]
519
+
520
+ #used in #mmatch_full of Reg::Array, Reg::Subseq, Reg::Repeat, Reg::And
521
+ #and in the corresponding MatchSets
522
+ #also in #last_next_match
523
+
524
+ #The Reg::And version employs a trick (defining #update_di to leave di unchanged)
525
+ #that will ensure each sub-reg starts at the same place in #cursor as the first one.
526
+
527
+ #Reg::Or and Reg::Xor start each sub-reg at the same place as well, but effectively
528
+ #only one sub-reg of Reg::Or or Reg::Xor ever matches input overall. With Xor, it must
529
+ #be guaranteed that only one alternative can match at all at the current position in
530
+ #input. With Or, #mmatch kicks out early once the first successful match is found.
531
+ #subsequent matches in the overall expression might fail, causing the Or to be backtracked
532
+ #into and a different alternative to be considered, but in that case, the first alternative
533
+ #is considered to have failed overall, and any side effects in it are undone.
534
+
535
+ #why is this important? Reg::And must call bt_match, because a Variable
536
+ #binding in one branch might be used in a subsequent branch of the overall expression.
537
+ #with Reg::Or and Xor, that cannot be the case, and hence they need not call bt_match
538
+
539
+ #backtracking stops
540
+ #a subexpression that might match multiple things in the current input creates a
541
+ #backtracking stop within the current Progress (self). creating a new backtracking
542
+ #stop means by an entry on both @matchset_stack and @matchfail_todo and #position_inc_stack.
543
+
544
+ #bt_match returns 3 things if an initial match could be found:
545
+ #true,
546
+ #the number of data items in cursor to be consumed in the initial match, and
547
+ #the number of sub-regs that were used. the 3rd is only really maybe needed if
548
+ #mtr is a Repeat.
549
+ #bt_match returns nil if no initial match could be found.
550
+
551
+ #if the initial match is unsatisfactory, you should call #backtrack to get another
552
+ #potential match
553
+
554
+ def bt_match(mtr=matcher,match_steps=mtr.max_matches)
555
+ mtr ||=matcher
556
+ assert cursor.pos <= cursor.size
557
+ assert origpos >= 0
558
+ assert posinc >= 0
559
+ assert( (0..match_steps)===regsidx)
560
+ assert Integer===position_stack.first
561
+ assert check_result
562
+ loop do #loop over regs to match
563
+ assert cursor.pos <= cursor.size
564
+ assert posinc >= 0
565
+ assert( (0..match_steps)===regsidx || !(mtr.enough_matches? regsidx,cursor.eof?))
566
+
567
+ if trace_enabled?
568
+ puts [cursor.pos, regsidx, mtr, clean_result].map{|i| i.inspect }.join(' ')
569
+ #pp self
570
+ end
571
+
572
+ assert check_result
573
+
574
+ #try a new match of current reg
575
+ r=mtr.regs(regsidx)
576
+ if r.respond_to? :mmatch and not Formula===r
577
+ #but what about RegThat? should test for being a Reg::Reg instead
578
+ if defined? $MMATCH_PROGRESS
579
+ m=r.mmatch(self)
580
+ # p r.class
581
+ # p r.__id__
582
+ else
583
+ # 'mmatch could return 2 items here'
584
+ m=r.mmatch(cursor.data, cursor.pos)
585
+ end
586
+
587
+ assert check_result
588
+
589
+ assert ::Array===m || MatchSet===m || !m
590
+
591
+ #is a single match or a match set?
592
+ if m.respond_to? :next_match
593
+ #it's a set -- start new inner result array
594
+ #with initial match as first elem
595
+ push_matchset m
596
+ mat,matchlen=m.next_match(cursor.data, cursor.pos)
597
+
598
+ assert mat
599
+ assert m
600
+ else
601
+ #if defined? $MMATCH_PROGRESS
602
+ # matchlen=m
603
+ #else
604
+ mat,matchlen=*m #single match or nil
605
+ #end
606
+ m=nil
607
+ end
608
+ else
609
+ if !cursor.eof? and r===(item=cursor.readahead1)
610
+ mat=RR[item]
611
+ matchlen=1
612
+ end
613
+ end
614
+
615
+
616
+ assert check_result
617
+
618
+ if matchlen #match succeeded
619
+ if !m and mtr.respond_to? :want_gratuitous_btstop? and \
620
+ mtr.want_gratuitous_btstop?(regsidx)
621
+ push_matchset SingleMatch_MatchSet.new
622
+ end
623
+
624
+ #advance to next reg
625
+ assert check_result
626
+ push_match mtr.update_di(0,matchlen)
627
+ assert(cursor.pos<=cursor.size)
628
+ else #match fail?
629
+ assert check_result
630
+ return to_result,posinc,regsidx if mtr.enough_matches? regsidx,cursor.eof?
631
+
632
+ #doesn't match, try backtracking
633
+ assert regsidx
634
+ backtrack or return nil #bt failed? we fail
635
+ assert(cursor.pos<=cursor.size)
636
+ assert check_result
637
+ assert(!(mtr.enough_matches? regsidx,cursor.eof?))
638
+ end
639
+
640
+ assert(cursor.pos<=cursor.size)
641
+
642
+ assert check_result
643
+ assert matchlen || !(mtr.enough_matches? regsidx,cursor.eof?)
644
+ return to_result,posinc,regsidx if regsidx>=match_steps and mtr.enough_matches? regsidx,cursor.eof?
645
+ assert( (0..match_steps)===regsidx || !(mtr.enough_matches? regsidx,cursor.eof?))
646
+
647
+ end #loop
648
+
649
+ end
650
+
651
+ #---------------------------------------------
652
+ #maybe this isn't necessary?
653
+ #because backtrack is called after it,
654
+ #and it's doing the same things.... more or less
655
+ #used in RepeatMatchSet#next_match and SubseqMatchSet#next_match
656
+ #this method appears to be changing things that it shouldn't?!
657
+ def last_next_match(ctx=context)
658
+ assert check_result
659
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
660
+ assert(ctx.position_inc_stack.size >= 1)
661
+ r=backup_stacks(ctx) #need to back up the context, not progress (at least sometimes)
662
+
663
+ di=cursor.pos=ctx.position_stack.last
664
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
665
+ unless r
666
+ matchfail_todo.push MatchFailRec.new
667
+ assert check_result
668
+ return nil,nil,regsidx
669
+ end
670
+ ctx.position_inc_stack.push 0 #i'm really unsure about this line
671
+
672
+ #matchset_stack.pop is called in backtrack but not here, why?
673
+
674
+
675
+ r2,diinc=r.next_match(ctx.cursor.data,ctx.cursor.pos)
676
+ matchset_stack.push r
677
+ r=r2
678
+ unless r
679
+ #might need to return non-nil here, if resfrag isn't exhausted yet
680
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
681
+ matchset_stack.pop
682
+ assert check_result
683
+ #huh #oops, should I really be using ctx here?
684
+ return nil,nil,ctx.regsidx unless ctx.matcher.enough_matches? ctx.regsidx,ctx.cursor.eof?
685
+ return to_result, ctx.cursor.pos-ctx.position_stack.first, ctx.regsidx
686
+ end
687
+
688
+ assert diinc
689
+ assert ctx.cursor.pos+diinc <= ctx.cursor.size
690
+ ctx.cursor.move diinc
691
+ #regsidx-=matchfail_todo.position_inc #should be done in push_match...
692
+ matchfail_todo.push MatchFailRec.new
693
+ ctx.position_inc_stack.push 0 #i'm really unsure about this line
694
+ ctx.push_match #need to affect ctx instead of self?
695
+
696
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
697
+ if ctx.regsidx<ctx.matcher.max_matches #if there are more subregs of this reg to be matched
698
+ #re-match tail regs
699
+ assert ctx.cursor.pos <= ctx.cursor.size
700
+ #di is sometimes bad here, it seems....(fixed now?)
701
+ assert check_result
702
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
703
+ huh #need to re-start matching where previous bt_match left off
704
+ huh #should bt_match below be looking at ctx instead of self?
705
+ result=bt_match
706
+ assert check_result
707
+ return result
708
+ end
709
+
710
+
711
+
712
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
713
+ assert check_result
714
+
715
+ return to_result,posinc,ctx.regsidx
716
+ end
717
+
718
+ =begin
719
+ #---------------------------------------------
720
+ def check_result;
721
+
722
+ if defined? $not_right_now #failing now, dunno why, maybe re-enable later
723
+ #since this should be true, a separate regsidx is unnecessary
724
+ ri=0
725
+ current=self
726
+ begin
727
+ ri+=current.regsidx
728
+ end while current=current.parent
729
+ assert ri==position_stack.size-1
730
+
731
+ #matchset_stack should be 1 smaller than matchfail_todo
732
+ matchsets=0
733
+ current=self
734
+ begin
735
+ matchsets+=current.matchset_stack.size
736
+ end while current=current.parent
737
+ assert matchfail_todo.size-1==matchsets
738
+ end
739
+
740
+ #verify correct types in @-variables
741
+ assert ::Sequence===cursor
742
+ assert matcher.respond_to?( :update_di)
743
+ assert regsidx >=0
744
+ matchset_stack.each{|ms| assert MatchSet===ms }
745
+ prev_pos=0
746
+ position_stack.each{|pos| assert prev_pos<=pos; pos=prev_pos }
747
+ assert prev_pos<=cursor.size
748
+
749
+ vars_copy=@variables.dup
750
+ @undos_stack.each {|i|
751
+ case i
752
+ #every element of @variables should also be a sym in @undos_stack
753
+ when Symbol,String:
754
+ vars_copy.delete(i) or assert(false)
755
+
756
+ when Later,::Proc:
757
+ else assert(false)
758
+ end
759
+ }
760
+ assert vars_copy.empty? #every var should be accounted for
761
+
762
+ #sum of :undos_inc,:matchsucceed_inc,:position_inc in matchfail_todo
763
+ #should be the same as the size of the corresponding stack.
764
+ uns=mats=poss=0
765
+ matchfail_todo.each{|mfr|
766
+ uns+=mfr.undos_inc
767
+ mats+=mfr.matchsucceed_inc
768
+ # poss+=mfr.position_inc
769
+ }
770
+ assert uns==@undos_stack.size
771
+ assert mats==@matchsucceed_stack.size
772
+ # assert poss+1==position_stack.size
773
+
774
+ assert succ_stack_ok
775
+
776
+ return true
777
+ end
778
+
779
+ #---------------------------------------------
780
+ def succ_stack_ok(stk=@matchsucceed_stack)
781
+ stk.each{|elem|
782
+ case elem
783
+ when Array: succ_stack_ok(elem)
784
+ when Later: true
785
+ else
786
+ end or return
787
+ }
788
+ return true
789
+ end
790
+ private :succ_stack_ok
791
+ =end
792
+ #---------------------------------------------
793
+ def clean_result
794
+ result=[]
795
+ # ms_pos_idx=position_stack.size - matchfail_todo.last.position_inc
796
+ ms_pos_idx=-1
797
+ result=(0...position_stack.size-1).map{|i|
798
+ # if i==ms_pos_idx
799
+ # ms_pos_idx-=1
800
+ # #what if ms_idx too big?
801
+ # ms_pos_idx-=matchfail_todo[ms_idx].position_inc
802
+ # ms.clean_result
803
+
804
+ # else
805
+ cursor[position_stack[i], position_stack[i+1]-position_stack[i]]
806
+ # end
807
+ }
808
+
809
+ return result
810
+ end
811
+
812
+ #---------------------------------------------
813
+ def to_result;
814
+ true#ok, i'm cheating
815
+ end
816
+
817
+ end #class Progress
818
+
819
+
820
+
821
+ if defined? $MMATCH_PROGRESS #ultimately, mmatch will take a progress, but until then, disable this
822
+ #---------------------------------------------
823
+ class Array
824
+ def mmatch_full(progress)
825
+ other=progress.cursor.readahead1
826
+ ::Array===other or return false #need to be more generous eventually
827
+
828
+ progress.newcontext(self, other.to_sequence)
829
+ assert progress.regsidx==0
830
+ result,di,bogus=progress.bt_match
831
+ assert di.nil? || di <= other.size
832
+ progress.endcontext
833
+ #should be returning a matchset here sometimes
834
+ return(di==other.size && result && [true,1])
835
+ end
836
+ end
837
+
838
+
839
+
840
+
841
+ #---------------------------------------------
842
+ class Subseq
843
+
844
+ def mmatch(pr)
845
+ #in this version, all @regs are not multiple regs
846
+ pr.newcontext(self)
847
+ cu=pr.cursor
848
+ start=cu.pos
849
+ assert cu.pos<=cu.size
850
+ cu.pos+@regs.size<=cu.size or return nil
851
+ buf= cu.readahead @regs.size
852
+ @regs.each_with_index do |reg,i|
853
+ assert cu.pos<cu.size
854
+ reg===buf[i] or return nil
855
+ end
856
+ return [true, @regs.size]
857
+ ensure
858
+ pr.endcontext
859
+ end
860
+
861
+ private
862
+ def mmatch_full(pr)
863
+ #in this version, at least one of @regs is a multiple reg
864
+ orig_stack_size=pr.matchset_stack.size
865
+ pr.newcontext(self)
866
+ cu=pr.cursor
867
+ start=cu.pos
868
+ start+itemrange.begin<=cu.size or return result=nil
869
+ assert( (0..cu.size).include?( start))
870
+ assert pr.regsidx==0
871
+ result,di,bogus=pr.bt_match
872
+ return (result &&= SubseqMatchSet.new(pr,di,orig_stack_size))
873
+ ensure
874
+ assert MatchSet===result || pr.matchset_stack.size==orig_stack_size
875
+ pr.cursor.pos=start
876
+ assert start==pr.cursor.pos
877
+ pr.endcontext
878
+ end
879
+ end
880
+
881
+ #---------------------------------------------
882
+ class Repeat
883
+ include CausesBacktracking
884
+ def mmatch(pr)
885
+ assert pr.check_result
886
+ pr.newcontext(self)
887
+ cu=pr.cursor
888
+ start=cu.pos
889
+ start+@times.begin <= cu.size or return nil #enough room left in input?
890
+ i=-1
891
+ (0...@times.end).each do |i2| i=i2
892
+ start+i<cu.size or break(i-=1)
893
+ @reg===cu.read1 or break(i-=1)
894
+ end
895
+ i+=1
896
+ assert( (0..@times.end)===i)
897
+ assert pr.check_result
898
+ cu.pos=start
899
+ if i==@times.begin
900
+ return [true,i]
901
+ end
902
+ i>@times.begin or return nil
903
+ return SingleRepeatMatchSet.new(pr,i,-1,@times.begin)
904
+ ensure
905
+ pr.endcontext
906
+ end
907
+
908
+ private
909
+ def mmatch_full(pr)
910
+ pr.newcontext(self)
911
+ cu=pr.cursor
912
+ orig_stack_size=pr.matchset_stack.size
913
+ start=cu.pos
914
+ assert start <= cu.size
915
+ start+itemrange.begin <= cu.size or return result=nil #enough room left in input?
916
+ r=[[]]
917
+
918
+ #first match the minimum number
919
+ if @times.begin==0 #if we can match nothing
920
+ cu.eof? and return result=[true,0] #at end of input? return empty set
921
+ ri=di=0
922
+ else
923
+ cu.eof? and return result=nil
924
+ assert @times.begin<Infinity
925
+ assert pr.regsidx==0
926
+ r,di,ri=pr.bt_match(nil,@times.begin) #matches @reg @times.begin times
927
+ r.nil? and return result=nil
928
+ end
929
+ assert ri==@times.begin
930
+
931
+ assert !@times.exclude_end?
932
+ left=@times.end-@times.begin
933
+
934
+ #note: left and top could be infinite here...
935
+
936
+ #do the optional match iterations
937
+ #only greedy matching implemented for now
938
+ #there must be a more efficient algorithm...
939
+ if left >= 1
940
+ #need to re-start matching where previous bt_match left off
941
+ assert pr.check_result
942
+ #get remaining matches up to @times.end times
943
+ assert rr=pr.make_hash
944
+ assert pr.regsidx==@times.begin
945
+ res,di,ri=pr.bt_match #bt stop at each iteration, this time
946
+ assert pr.check_result
947
+ assert @times===pr.regsidx
948
+
949
+ res and return result=RepeatMatchSet.new(pr,di, orig_stack_size)
950
+ assert rr==pr.make_hash
951
+ end
952
+
953
+ #if matchset has no backtracking stops, and
954
+ #hence cannot contain more than one actual match,
955
+ #then just return that match.
956
+ return result=if pr.matchset_stack.size==orig_stack_size then
957
+ [true,di]
958
+ else
959
+ RepeatMatchSet.new(pr,di,orig_stack_size)
960
+ end
961
+ ensure
962
+ assert MatchSet===result || pr.matchset_stack.size==orig_stack_size
963
+ pr.cursor.pos=start #is it really this simple? I'm doubtful....
964
+ assert pr.cursor.pos==start
965
+ pr.endcontext
966
+ end
967
+ end
968
+
969
+
970
+
971
+ #---------------------------------------------
972
+ class And
973
+ include CausesBacktracking
974
+ private
975
+ #can't use this until mmatch interface is changed to take a single progress param
976
+ def mmatch_full(progress)
977
+ #in this version, at least one of @regs is a multiple reg
978
+ progress.newcontext(self)
979
+ assert( (0..progress.cursor.size).include?( progress.cursor.pos))
980
+ assert progress.regsidx==0
981
+ result,di,bogus=progress.bt_match
982
+
983
+ #uh-oh, di is always 0 here, because And#update_di never does anything.
984
+ #need to come up with some other way to figure out how many items were consumed.
985
+
986
+ result and AndMatchSet.new(progress,di)
987
+ #need new definition of AndMatchSet...
988
+
989
+ #need to keep track of which alternative(s) was longest, so as to advance
990
+ #the cursor by that amount. and know which ones to start backtracking in.
991
+
992
+ #cursor needs to be advanced here somewhere, i think....no
993
+ ensure
994
+ progress.endcontext
995
+ end
996
+
997
+ end
998
+
999
+ #--------------------------
1000
+ class SingleRepeatMatchSet < MatchSet
1001
+ def initialize(progress,startcnt,stepper,endcnt)
1002
+ endcnt==startcnt and raise 'why even make it a set, then?'
1003
+ (endcnt-startcnt)*stepper>0 or raise "tried to make null match set"
1004
+ assert startcnt>=0
1005
+ assert endcnt>=0
1006
+ @progress,@matchtimes,@stepper,@endcnt=progress,startcnt,stepper,endcnt
1007
+ end
1008
+
1009
+ def next_match(arr,idx)
1010
+ assert @stepper.abs == 1
1011
+ (@endcnt-@matchtimes)*@stepper>=0 or return nil
1012
+ assert @matchtimes >=0
1013
+ result=[RR[arr[idx...idx+@matchtimes]], @matchtimes]
1014
+ assert ::Array===result.first.first
1015
+ @matchtimes+=@stepper
1016
+ return result
1017
+ end
1018
+ end
1019
+
1020
+ #--------------------------
1021
+ class OrMatchSet < MatchSet
1022
+ def initialize(progress,orreg,idx,set,firstmatchlen)
1023
+ @orreg,@idx,@set,@firstmatch,@progress=orreg,idx,set,firstmatchlen,progress
1024
+ assert ::Array===@firstmatch
1025
+ # assert @firstmatch.nil? || Integer===@firstmatch
1026
+ end
1027
+
1028
+ def next_match(ary,idx)
1029
+ if @firstmatch
1030
+ result,@firstmatch=@firstmatch,nil
1031
+ assert ::Array===result
1032
+ # assert ::Array===result.first.first
1033
+ assert 2==result.size
1034
+ assert Integer===result.last
1035
+ return result
1036
+ end
1037
+ @set and result= @set.next_match(ary,idx)
1038
+ while result.nil?
1039
+ @idx+=1
1040
+ @idx >= @orreg.regs.size and return nil
1041
+ x=@orreg.regs[@idx].mmatch(@progress) #hard spot
1042
+ result=case x
1043
+ when MatchSet; @set=x;x.next_match
1044
+ when Integer; @progress.cursor.readahead( x)
1045
+ end
1046
+ end
1047
+ a=RR[nil]*@orreg.regs.size
1048
+ a[idx]=result[0]
1049
+ result[0]=a
1050
+ assert ::Array===result.first.first
1051
+ return result
1052
+ end
1053
+ end
1054
+
1055
+ #--------------------------
1056
+ class Or
1057
+ include CausesBacktracking
1058
+ def mmatch(pr)
1059
+ # assert start <= arr.size
1060
+ cu=pr.cursor
1061
+ cu.eof? and return nil
1062
+ item=cu.readahead1
1063
+ @regs.each_with_index {|reg,i|
1064
+ reg===item and
1065
+ return OrMatchSet.new(pr,self,i,nil,1)
1066
+ }
1067
+ return nil
1068
+ end
1069
+
1070
+ private
1071
+ def mmatch_full(pr)
1072
+ pr.newcontext(self)
1073
+ mat=nil
1074
+ assert pos=pr.cursor.pos
1075
+ @regs.each_with_index{|r,i|
1076
+ if r.respond_to? :mmatch
1077
+ assert pr.cursor.pos==pos
1078
+ mat=r.mmatch(pr) or next
1079
+ if mat.respond_to? :next_match
1080
+ huh #is calling next_match bad because it advances cursor?
1081
+ len=mat.next_match(pr.cursor.all_data,pr.cursor.pos).last
1082
+ return OrMatchSet.new(pr,self,i,mat,len)
1083
+ else
1084
+ return OrMatchSet.new(pr,self,i,nil,mat)
1085
+ end
1086
+ else
1087
+ item=pr.cursor.readahead1
1088
+ r===item and
1089
+ return OrMatchSet.new(pr,self,i,nil,[true,1])
1090
+ end
1091
+ }
1092
+
1093
+ assert mat.nil?
1094
+ return nil
1095
+ ensure
1096
+ pr.endcontext
1097
+ end
1098
+ end
1099
+
1100
+ #--------------------------
1101
+ class Xor
1102
+ private
1103
+ def mmatch_full pr
1104
+ pr.newcontext self
1105
+ found=nil
1106
+ pos=pr.cursor.pos
1107
+ @regs.each{|reg|
1108
+ assert pr.cursor.pos==pos
1109
+ if m=reg.mmatch(pr)
1110
+ return if found
1111
+ found=m
1112
+ end
1113
+ }
1114
+ return found
1115
+ ensure
1116
+ pr.endcontext
1117
+ end
1118
+ end
1119
+
1120
+
1121
+ #--------------------------
1122
+ class ManyClass
1123
+ def mmatch(pr)
1124
+ left=pr.cursor.restsize
1125
+ beg=@times.begin
1126
+ if beg==left ; [true,left]
1127
+ elsif beg<left
1128
+ make_ms([left,@times.end].min,beg,pr)
1129
+ end
1130
+ end
1131
+ def make_ms(left,beg,pr)
1132
+ SingleRepeatMatchSet.new(pr,left, -1, beg)
1133
+ end
1134
+ end
1135
+
1136
+ class ManyLazyClass
1137
+ def mmatch(pr)
1138
+ left=pr.cursor.restsize
1139
+ beg=@times.begin
1140
+ if beg==left ; [true,left]
1141
+ elsif beg<left
1142
+ make_ms([left,@times.end].min,beg,pr)
1143
+ end
1144
+ end
1145
+ def make_ms(left,beg,pr)
1146
+ SingleRepeatMatchSet.new(pr,beg,1,left)
1147
+ end
1148
+ end
1149
+
1150
+ module Reg
1151
+ #mmatch implementation for all scalar expressions
1152
+ #which don't have an mmatch of their own
1153
+ def mmatch(pr)
1154
+ !pr.cursor.eof? and self===pr.cursor.readahead1 and [true,1]
1155
+ end
1156
+ end
1157
+
1158
+ #--------------------------
1159
+ class RepeatMatchSet < MatchSet
1160
+
1161
+ attr :progress
1162
+ def initialize(progress,consumed,orig_stack_size)
1163
+ @orig_stack_size=orig_stack_size
1164
+ @progress=progress
1165
+ #@cnt=@startcnt-stepper
1166
+ #@ary.push 1
1167
+ @context=@progress.context
1168
+ @consumed=consumed
1169
+ @firstmatch=[progress.clean_result,@consumed]
1170
+ assert( progress.matcher.times===progress.regsidx)
1171
+ assert progress.regsidx
1172
+ assert @consumed>=0
1173
+ #assert(@ri==@firstmatch.first.size)
1174
+ end
1175
+
1176
+ def match_iterations;
1177
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1178
+ progress.regsidx
1179
+ end
1180
+
1181
+ #very nearly identical to SubseqMatchSet#next_match
1182
+ def next_match(arr,idx)
1183
+ #fewer assertions in twin
1184
+ if @firstmatch
1185
+ result,@firstmatch=@firstmatch,nil
1186
+ # assert result.first.empty? || ::Array===result.first.first
1187
+ #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
1188
+ # assert idx+result.last<=arr.size
1189
+ # assert(progress.regsidx==result.first.size)
1190
+ return result
1191
+ end
1192
+
1193
+
1194
+ @progress or return #not in twin ... ignore it
1195
+
1196
+ assert @orig_stack_size <= @progress.matchset_stack.size
1197
+
1198
+ @orig_stack_size==@progress.matchset_stack.size and return @progress=nil
1199
+
1200
+ assert progress.check_result
1201
+
1202
+ i=@context.position_inc
1203
+ =begin extents not used
1204
+ extents= if i==0
1205
+ []
1206
+ else
1207
+ progress.position_stack[-i..-1]
1208
+ end
1209
+ =end
1210
+ #this part's not in twin
1211
+ #'need to check for fewer matches here before rematching last matchset'
1212
+
1213
+ #what if the match that gets discarded was returned by a matchset
1214
+ #that has more matches in it?
1215
+ #in that case, i is 1 and the body of this if should not be executed...
1216
+ #but why would i be 1?
1217
+ if @context.regsidx>@context.matcher.times.begin #&& i>1
1218
+ oldpos=@context.position_stack.last
1219
+ progress.backup_stacks(@context) or raise
1220
+ # huh #need to change progress.cursor.pos here too?
1221
+ #result of backup_stacks is abandoned, leaked, orphaned
1222
+ #we don't want it anymore
1223
+ #but what if it's nil?
1224
+
1225
+ #but now i need to undo all other progress state too, if
1226
+ #the state was created with the match result just popped.
1227
+ #in general, it's not possible to distinguish state with the
1228
+ #last match from state with the matches that might have preceded it...
1229
+ #unless I create a backtracking point for each optional iteration
1230
+ #of the repeat matcher.
1231
+ #currently, making a backtracking point implies making a matchset
1232
+ #as well. I'll need a matchset that contains only 1 match.
1233
+ #ok, i think this is working now. no extra code needed here.
1234
+
1235
+ #recompute # of items @consumed
1236
+ @consumed-=oldpos-@context.position_stack.last
1237
+ assert @consumed>=0
1238
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1239
+ assert idx+@consumed<=arr.size
1240
+ assert progress.check_result
1241
+ result= [progress.clean_result, @consumed]
1242
+ assert progress.check_result
1243
+ return result
1244
+ end
1245
+
1246
+
1247
+ assert progress.check_result
1248
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
1249
+ assert(@context.position_inc_stack.size >= 1)
1250
+ result,di,ri=progress.last_next_match(@context)
1251
+ if result and @progress.matcher.enough_matches? ri,@progress.cursor.eof?
1252
+ result=[progress.clean_result,di]
1253
+ @consumed=di #not in twin...why?
1254
+ assert @consumed>=0
1255
+ #@progress.regsidx-=1
1256
+ # assert ::Array===result.first.first
1257
+ assert idx+result.last<=arr.size
1258
+ assert progress.check_result
1259
+ #assert(@ri==result.first.size)
1260
+ return result
1261
+ end
1262
+
1263
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
1264
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1265
+ assert(progress.check_result)
1266
+
1267
+
1268
+
1269
+ @progress.matchset_stack.size==@orig_stack_size and return @progress=nil #also checking @ary in twin... ignore it
1270
+ # assert @progress.regsidx>0
1271
+
1272
+ @progress.backtrack(@context) or return @progress=nil #@progress never set to nil like this in twin... ignore it
1273
+
1274
+ #this is where the divergence widens. ri is a local in twin
1275
+
1276
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1277
+ assert(progress.check_result)
1278
+ mat,di,@ri=@progress.bt_match #mat is @ary in twin
1279
+ mat.nil? and return @progress=nil
1280
+
1281
+ #assert(@ri==Backtrace.clean_result(mat).size)
1282
+ assert @progress.regsidx
1283
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
1284
+
1285
+ result=[@progress.clean_result,di]
1286
+ @consumed=di #no @consumed in twin
1287
+ assert @consumed>=0
1288
+ assert ::Array===result.first.first
1289
+ assert idx+result.last<=arr.size
1290
+ assert progress.check_result
1291
+ #assert(@ri==result.last.size)
1292
+ return result
1293
+ end
1294
+
1295
+ end
1296
+
1297
+ #---------------------------------------------
1298
+ class SubseqMatchSet < MatchSet
1299
+
1300
+ def initialize progress,di,orig_stack_size;
1301
+ @orig_stack_size= orig_stack_size
1302
+ @progress=progress
1303
+ @context=progress.context
1304
+ @orig_pos=progress.cursor.pos-di
1305
+ @firstresult= [progress.clean_result,di]
1306
+ end
1307
+
1308
+ #(@reg=>progress.matcher,@matchary=>progress.huh,di=>progress.cursor.pos-@orig_pos)
1309
+
1310
+ def next_match(ary,start)
1311
+ if @firstresult
1312
+ @firstresult,result=nil,@firstresult
1313
+ assert ::Array===result#.first.first
1314
+ return result
1315
+ end
1316
+
1317
+ assert @orig_stack_size<=@progress.matchset_stack.size
1318
+ @orig_stack_size==@progress.matchset_stack.size and return @progress=nil
1319
+
1320
+ result,di,ri=@progress.last_next_match(@context)
1321
+ # result or return @progress=nil #should this line be here? no
1322
+ if result and @progress.matcher.enough_matches? ri,@progress.cursor.eof?
1323
+ result=[@progress.clean_result,di]
1324
+ return result
1325
+ end
1326
+
1327
+
1328
+ #twin has a more sophisticated test on matchset_stack
1329
+ (@progress and !@progress.matchset_stack.empty?) or return @progress=nil
1330
+ assert @progress.regsidx
1331
+ @progress.backtrack(@context) or return @progress=nil
1332
+
1333
+ #need to adjust ri?
1334
+
1335
+ #is this right... dunno...
1336
+ # #need to restart where last backtrack left regsidx
1337
+ result,di,bogus=@progress.bt_match
1338
+
1339
+
1340
+ if result
1341
+ result=[@progress.clean_result,di]
1342
+ assert ::Array===result.first.first
1343
+ return result
1344
+ end
1345
+ end
1346
+
1347
+ def match_iterations
1348
+ progress.matcher.max_matches
1349
+ end
1350
+
1351
+ end
1352
+ #--------------------------
1353
+ class AndMatchSet < SubseqMatchSet
1354
+ #the total number of possible different ways to match an AndMatchSet
1355
+ #where several of the branches are actually ambiguous
1356
+ #grows exponentially.
1357
+ #rather than hit every possible match, we'll try to hit
1358
+ #every legal match length at least once.
1359
+
1360
+ #on next_match,
1361
+ #figure out the alternative(s) that are returning the longest
1362
+ #matchset currently. those alternatives are returned in
1363
+ #the first match, but at the 2nd and subsequent calls
1364
+ #to next_match, that set of longest alternatives are all
1365
+ #next_matched (rolled back) until they match something shorter.
1366
+ #(or maybe just a different length? Reg::Or isn't greedy, so its
1367
+ #longest match isn't necessarily returned first.)
1368
+
1369
+ #if any next_match call returns nil (or false), the whole match set
1370
+ #is finished. return nil from next_match now and forever more.
1371
+
1372
+
1373
+
1374
+ #def initialize(progress,firstmatchlen)
1375
+ # @progress=progress
1376
+ # @firstmatch=[true,firstmatchlen]
1377
+ # huh
1378
+ #end
1379
+
1380
+ #this isn't really right...
1381
+ #on next_match, we need to backtrack the longest alternative(s)
1382
+ #if they're then shorter than the next longest alternative,
1383
+ #then that (formerly next longest) alternative becomes
1384
+ #the dominating alternative, and determines how much is consumed
1385
+
1386
+ end
1387
+ #might need Reg::Or tooo....
1388
+
1389
+ else #... not $MMATCH_PROGRESS
1390
+ class Subseq
1391
+
1392
+ def mmatch(arr,start)
1393
+ #in this version, each of @regs is not a multiple reg
1394
+ assert start<=arr.size
1395
+ start+@regs.size<=arr.size or return nil
1396
+ idx=0
1397
+ @regs.each { |reg|
1398
+ assert(start+idx<arr.size)
1399
+ reg===arr[start+idx] or return nil
1400
+ idx+=1
1401
+ }
1402
+ return [true, @regs.size]
1403
+ end
1404
+
1405
+ def mmatch_full(arr,start)
1406
+ #in this version, at least one of @regs is a multiple reg
1407
+ assert( (0..arr.size).include?( start))
1408
+ cu=arr.to_sequence cu.pos=start
1409
+ pr=Progress.new(self,cu)
1410
+ result,di,bogus=pr.bt_match
1411
+ result and SubseqMatchSet.new(pr,di)
1412
+ end
1413
+ end
1414
+
1415
+ class Repeat
1416
+ def mmatch(arr,start)
1417
+ i=-1
1418
+ (0...@times.end).each do |i2| i=i2
1419
+ start+i<arr.size or break(i-=1)
1420
+ @reg===arr[start+i] or break(i-=1)
1421
+ end
1422
+ i+=1
1423
+ assert( (0..@times.end)===i)
1424
+ if i==@times.begin
1425
+ return [true,i]
1426
+ end
1427
+ i>@times.begin or return nil
1428
+ return SingleRepeatMatchSet.new(i,-1,@times.begin)
1429
+ end
1430
+
1431
+ def mmatch_full(arr,start)
1432
+ assert start <= arr.size
1433
+ r=[RR[]]
1434
+
1435
+ cu=arr.to_sequence cu.pos=start
1436
+ pr=Progress.new(self,cu)
1437
+
1438
+ #first match the minimum number
1439
+ if @times.begin==0 #if we can match nothing
1440
+ arr.size==start and return [r,0] #at end of input? return empty set
1441
+ ri=di=0
1442
+ else
1443
+ arr.size==start and return nil
1444
+ assert @times.begin<Infinity
1445
+ r,di,ri=pr.bt_match(self,@times.begin) #matches @reg @times.begin times
1446
+ r.nil? and return nil
1447
+ end
1448
+ assert ri==@times.begin
1449
+
1450
+ assert !@times.exclude_end?
1451
+ left=@times.end-@times.begin
1452
+
1453
+ #note: left and top could be infinite here...
1454
+
1455
+ #do the optional match iterations
1456
+ #only greedy matching implemented for now
1457
+ #there must be a more efficient algorithm...
1458
+ if left >= 1
1459
+ assert pr.check_result
1460
+ #get remaining matches up to @times.end times
1461
+ #because bt_match could change the rr argument, and
1462
+ #we might need to return the original in r below
1463
+ res,di,ri=pr.bt_match
1464
+ # assert Backtrace.check_result res #this is correct, for now (i think)
1465
+ #don't update to progress version
1466
+ assert @times===ri
1467
+
1468
+ res and return RepeatMatchSet.new(pr,di)
1469
+ end
1470
+
1471
+ #if matchset has no backtracking stops, and
1472
+ #hence cannot contain more than one actual match,
1473
+ #then just return that match.
1474
+ huh 'this needs to change: matchset_stack is shared with whatever came before'
1475
+ pr.matchset_stack.empty? ? di : RepeatMatchSet.new(pr,di)
1476
+ end
1477
+
1478
+ end
1479
+
1480
+
1481
+ end # $MMATCH_PROGRESS
1482
+
1483
+
1484
+
1485
+
1486
+ class Repeat
1487
+ #--------------------------------------------------------
1488
+ # "enable backtracking stops at each optional iteration"
1489
+ def want_gratuitous_btstop?(steps)
1490
+ @times===steps
1491
+ end
1492
+
1493
+ end
1494
+
1495
+
1496
+ #---------------------------------------------
1497
+ class Array
1498
+ def ===(other)
1499
+ ::Array===other or return false #need to be more generous eventually
1500
+ progress=Progress.new(self,other.to_sequence)
1501
+ assert progress.regsidx==0
1502
+ result,di,bogus=progress.bt_match
1503
+ assert di.nil? || di <= other.size
1504
+ return(di==other.size && result)
1505
+ end
1506
+ end
1507
+
1508
+ end
1509
+
1510
+ if false #work-around warnings in cursor
1511
+ warn "warning: ugly workaround for chatty sequence warnings"
1512
+ propNiller=proc do
1513
+ old_init=instance_method :initialize
1514
+
1515
+ define_method :initialize do|*args|
1516
+ @positions||=nil;@prop||=nil
1517
+ old_init.bind(self)[*args]
1518
+ end
1519
+ end
1520
+ ::Sequence::Indexed.instance_eval( &propNiller)
1521
+ ::Sequence::Position.instance_eval( &propNiller)
1522
+ end