reg 0.4.8 → 0.5.0a0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING +0 -0
  3. data/History.txt +14 -0
  4. data/Makefile +59 -0
  5. data/README +87 -40
  6. data/article.txt +838 -0
  7. data/{assert.rb → lib/assert.rb} +3 -3
  8. data/{reg.rb → lib/reg.rb} +11 -4
  9. data/lib/reg/version.rb +21 -0
  10. data/lib/regarray.rb +455 -0
  11. data/{regarrayold.rb → lib/regarrayold.rb} +33 -7
  12. data/lib/regbackref.rb +73 -0
  13. data/lib/regbind.rb +230 -0
  14. data/{regcase.rb → lib/regcase.rb} +15 -5
  15. data/lib/regcompiler.rb +2341 -0
  16. data/{regcore.rb → lib/regcore.rb} +196 -85
  17. data/{regdeferred.rb → lib/regdeferred.rb} +35 -4
  18. data/{regposition.rb → lib/regevent.rb} +36 -38
  19. data/lib/reggraphpoint.rb +28 -0
  20. data/lib/reghash.rb +631 -0
  21. data/lib/reginstrumentation.rb +36 -0
  22. data/{regitem_that.rb → lib/regitem_that.rb} +32 -11
  23. data/{regknows.rb → lib/regknows.rb} +4 -2
  24. data/{reglogic.rb → lib/reglogic.rb} +76 -59
  25. data/{reglookab.rb → lib/reglookab.rb} +31 -21
  26. data/lib/regmatchset.rb +323 -0
  27. data/{regold.rb → lib/regold.rb} +27 -27
  28. data/{regpath.rb → lib/regpath.rb} +91 -1
  29. data/lib/regposition.rb +79 -0
  30. data/lib/regprogress.rb +1522 -0
  31. data/lib/regrepeat.rb +307 -0
  32. data/lib/regreplace.rb +254 -0
  33. data/lib/regslicing.rb +581 -0
  34. data/lib/regsubseq.rb +72 -0
  35. data/lib/regsugar.rb +361 -0
  36. data/lib/regvar.rb +180 -0
  37. data/lib/regxform.rb +212 -0
  38. data/{trace.rb → lib/trace_during.rb} +6 -4
  39. data/lib/warning.rb +37 -0
  40. data/parser.txt +26 -8
  41. data/philosophy.txt +18 -0
  42. data/reg.gemspec +58 -25
  43. data/regguide.txt +18 -0
  44. data/test/andtest.rb +46 -0
  45. data/test/regcompiler_test.rb +346 -0
  46. data/test/regdemo.rb +20 -0
  47. data/{item_thattest.rb → test/regitem_thattest.rb} +2 -2
  48. data/test/regtest.rb +2125 -0
  49. data/test/test_all.rb +32 -0
  50. data/test/test_reg.rb +19 -0
  51. metadata +108 -73
  52. data/calc.reg +0 -73
  53. data/forward_to.rb +0 -49
  54. data/numberset.rb +0 -200
  55. data/regarray.rb +0 -675
  56. data/regbackref.rb +0 -126
  57. data/regbind.rb +0 -74
  58. data/reggrid.csv +1 -2
  59. data/reghash.rb +0 -318
  60. data/regprogress.rb +0 -1054
  61. data/regreplace.rb +0 -114
  62. data/regsugar.rb +0 -230
  63. data/regtest.rb +0 -1078
  64. data/regvar.rb +0 -76
@@ -0,0 +1,79 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005, 2016 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+ require 'set'
20
+ #require 'reg'
21
+ module Reg
22
+ class Position
23
+ include ::Reg::Reg
24
+
25
+ class<<self
26
+ alias new__no_negatives new
27
+ def new(*nums)
28
+ # Enumerable===nums or nums=[nums]
29
+ #all nums should have the same sign, so
30
+ #1st num determines if all nums are 'from end'
31
+ return FromEnd.new(*nums) if negative?(nums.first)
32
+ new__no_negatives nums
33
+ end
34
+ alias [] new
35
+
36
+ def negative? x
37
+ 1.0/x < 0 rescue return
38
+ end
39
+ end
40
+
41
+ def initialize(*nums)
42
+ @positions=Set[*nums]
43
+ end
44
+
45
+ def mmatch(pr)
46
+ [true,0] if @positions===adjust_position(pr.cursor,pr.cursor.pos)
47
+ end
48
+
49
+ def itemrange
50
+ 0..0
51
+ end
52
+
53
+ def inspect
54
+ "Reg::Position[#{@positions.inspect[8..-3]}]"
55
+ end
56
+
57
+ private
58
+ def adjust_position(cu,pos)
59
+ pos
60
+ end
61
+
62
+
63
+
64
+ class FromEnd < Position
65
+ class<<self
66
+ alias new new__no_negatives
67
+ alias [] new
68
+ end
69
+
70
+ def inspect
71
+ super.sub("ion","ion::FromEnd")
72
+ end
73
+ private
74
+ def adjust_position(cu,pos)
75
+ pos-cu.size
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,1522 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005, 2016 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+ begin require 'rubygems'; rescue Exception; end
20
+
21
+ #$:<<"../sequence/lib" #temp hack
22
+ #require 'warning'
23
+ #warning "sequence found via temporary hack"
24
+ #$MMATCH_PROGRESS=1
25
+
26
+ require 'forwardable'
27
+
28
+ require 'sequence'
29
+ require 'sequence/indexed'
30
+
31
+
32
+
33
+ =begin the internal api
34
+ originally:
35
+ ResAryFrag=Array #it would be nice to get a more precise definition....
36
+ ResAry=+[ResAryFrag,-[MatchSet,Integer,ResAryFrag].*]
37
+
38
+ Reg%:mmatch[Array,Integer,
39
+ Returns( MatchSet|ResAryFrag|nil)
40
+ ]
41
+ Backtrace%:bt_match[Array,Integer,Integer,Integer,ResAry,Integer.-,
42
+ Returns( ResAry|nil,Integer,Integer)
43
+ ]
44
+ MatchSet%:next_match[Array,Integer,
45
+ Returns( ResAryFrag|nil,Integer)
46
+ ]
47
+
48
+ currently:
49
+ Reg%:mmatch[Progress, #has to change to take progress soon
50
+ Returns( MatchSet|ResAryFrag|nil)
51
+ ] #except subseq and repeat currently want progress
52
+ Progress%:bt_match[Integer.-, #affects progress, i'm pretty sure
53
+ Returns( ResAry|nil,Integer,Integer) #1st result used only as bool
54
+ ]
55
+ MatchSet%:next_match[Array,Integer, #affects progress? #needs to change too
56
+ Returns( ResAryFrag|nil,Integer)
57
+ ]
58
+ MatchSet%:initialize[Progress,OBS,Returns( MatchSet)] #for every ms class
59
+
60
+
61
+
62
+ former ultimate goal:
63
+ Reg%:mmatch[Progress, Returns( MatchSet|Integer|nil)] #affects progress on success (when integer returned)
64
+ Progress%:bt_match[Integer.-, Returns( Bool)] #affects progress on success
65
+ MatchSet%:next_match[Returns( Integer|nil)] #affects progress on success
66
+ #(modified progress is the same one as was given to the mmatch that created the matchset)
67
+ MatchSet%:initialize[Progress,OBS,Returns( MatchSet)] #for every ms class
68
+
69
+
70
+
71
+ now:
72
+ Reg%:cmatch[Progress, Yields[NeverReturns], NeverReturns] #throws :RegMatchFail on match failure, yields on success.
73
+ Reg%:bmatch[Progress, Returns(Object)] #returns a true value on success, nil or false on match failure
74
+
75
+
76
+ =end
77
+
78
+
79
+ #---------------------------------------------
80
+ module Reg
81
+
82
+ #---------------------------------------------
83
+ class MatchFailRec
84
+ attr_accessor :undos_inc,:matchsucceed_inc#,:position_inc
85
+ def initialize
86
+ @undos_inc=@matchsucceed_inc=0;#@position_inc=0
87
+ end
88
+
89
+ #position_inc is the number of positions to pop off position stack
90
+ #to get back to the point before the match of the most recent matchset.
91
+ #it is also the count by which to adjust regsidx to get back to the
92
+ #corresponding reg which generated the matchset.
93
+ # alias regs_adjust position_inc
94
+ end
95
+
96
+ #---------------------------------------------
97
+ class Progress
98
+ # attr_reader :matcher, :cursor, :regsidx
99
+ attr_reader :variables
100
+
101
+ #for internal use only...
102
+ # attr_writer :undos_stack, :matchfail_todo, :matchsucceed_stack #, :regsidx
103
+
104
+
105
+ #matchset_stack and matchfail_todo are (nearly) parallel arrays; matchfail_todo has
106
+ #one more item in it (at the bottom). each matchfailrec represents the things to undo
107
+ #on failure to get back to the corresponding matchset's starting position.
108
+
109
+ #matchfail_todo is more or less a 2-dimensional array of integers. very many of
110
+ #those integers in the undos_inc and matchsucceed_inc columns will be zero. it
111
+ #would be nice to use a sparse vector or matrix instead.
112
+
113
+ #a progress has a stack of contexts
114
+ #a context has a (possibly empty) stack of matchsets
115
+ #a matchset has a context
116
+
117
+
118
+
119
+ #---------------------------------------------
120
+ class Context
121
+ def initialize matcher,data
122
+ @matcher=matcher
123
+ @data=data
124
+ @regsidx=0
125
+ @position_stack=[data.pos]
126
+ @position_inc_stack=[0]
127
+ # @matchfail_todo=[MatchFailRec.new]
128
+ # @matchset_stack=[]
129
+ end
130
+ attr_reader :matcher,:data,:regsidx,:position_stack#,:matchfail_todo,:matchset_stack
131
+ attr_reader :context_type
132
+
133
+ #position_inc_stack.last is the number of patterns that have successfully matched
134
+ #since the last matchset was pushed onto matchset_stack. The pattern that created
135
+ #the last matchset is included in this count, hence position_inc_stack.last must
136
+ #always be 1 or greater, unless position_inc_stack contains one element.
137
+ attr_reader :position_inc_stack
138
+
139
+ attr_writer :regsidx,:data
140
+ alias cursor data
141
+
142
+ attr_accessor :context_index
143
+
144
+ #---------------------------------------------
145
+ def with_context(type,data)
146
+ @context_type=type
147
+ @data=::Sequence::SingleItem.new data
148
+ end
149
+
150
+ #---------------------------------------------
151
+ def get_index
152
+ context_index || data.pos
153
+ end
154
+ #---------------------------------------------
155
+ def position_inc; position_inc_stack.last end
156
+
157
+ #---------------------------------------------
158
+ def push_match(inc=0)
159
+ #matchset_stack should be 1 smaller than matchfail_todo
160
+ #assert matchfail_todo.size-1==matchset_stack.size
161
+
162
+ cursor.move inc #do nothing if no param given
163
+ assert cursor.pos>= position_stack.last
164
+ position_stack.push cursor.pos #push the start position of the next match
165
+ position_inc_stack[-1]+=1
166
+ self.regsidx+=1
167
+ end
168
+
169
+ #---------------------------------------------
170
+ def origpos
171
+ position_stack.first
172
+ end
173
+
174
+ #---------------------------------------------
175
+ def posinc
176
+ cursor.pos-origpos
177
+ end
178
+ end
179
+
180
+ attr_reader :matchfail_todo,:matchset_stack
181
+ extend Forwardable
182
+ def_delegators "@context_stack.last", :matcher,:regsidx,:regsidx=, :with_context,
183
+ :data,:get_index,:position_stack,:push_match,#:matchfail_todo,:matchset_stack,
184
+ :context_type, :context_index, :context_index=, :position_inc_stack, :position_inc,
185
+ :origpos, :posinc
186
+ alias cursor data
187
+ alias regs_adjust position_inc
188
+ def_delegators :cursor, :move,
189
+ :scan, :skip, :check, :match?,
190
+ :scan_until, :skip_until, :check_until, :exist?,
191
+ :scanback, :skipback, :checkback, :matchback?,
192
+ :scanback_until, :skipback_until, :checkback_until, :existback?
193
+
194
+
195
+ def context; @context_stack.last end
196
+
197
+ def sequence; cursor; end
198
+
199
+ #---------------------------------------------
200
+ def initialize(matcher,cursor)
201
+ # @parent=nil #eliminate
202
+ # @matcher=matcher #move into Context
203
+ # @regsidx=0 #move into Context
204
+ # @cursor=cursor #move into Context
205
+ @context_stack=[]
206
+ newcontext matcher, cursor
207
+ @matchset_stack=[]
208
+ @matchfail_todo=[MatchFailRec.new] #list of things to do when match fails....
209
+ #undo(&adjust variables), matchsucceed, position, (matchset)
210
+ # @position_stack=[@cursor.pos] #r-list? of positions
211
+ @variables={}
212
+ @undos_stack=[] #recursive list of undo procs and vars defined in this entire match
213
+ @matchsucceed_stack=[] #r-list of things to do when entire match succeeds... subst and deferreds
214
+
215
+
216
+ # assert check_result
217
+ end
218
+
219
+ #---------------------------------------------
220
+ def newcontext(matcher,data=cursor)
221
+ @context_stack.push Context.new(matcher,data)
222
+ return nil
223
+ end
224
+ #a new context is created (newcontext is called) whenever entering
225
+ #a Subseq, Repeat, vector logical, and sometimes composite scalar
226
+ #classes such as Reg::Object, Reg::Array, Reg::Hash, Reg::Restrict,
227
+ #(or even a scalar logical)
228
+ #_if_ they contain an undo, variable binding, later or replacement
229
+ #(Reg::Transform, Reg::Undo, Reg::Later, or Reg::Bound)
230
+ #expression somewhere within them.
231
+ #once the expression that created the context is finished matching, it is popped
232
+ #from the context stack. however, a reference to it may remain from
233
+ #a MatchSet on the matchset_stack. (if there was a backtracking stop
234
+ #found during the (sub)match, there will be such a reference.)
235
+
236
+ #why should vector logicals create a new context?? now i think that was a mistake....
237
+
238
+ #---------------------------------------------
239
+ def endcontext; @context_stack.pop end
240
+
241
+ #---------------------------------------------
242
+ def push_matchset(ms=nil)
243
+ # assert check_result
244
+ assert MatchSet===ms if defined? MatchSet
245
+ matchset_stack.push ms
246
+ matchfail_todo.push MatchFailRec.new
247
+ position_inc_stack.push 0
248
+ # assert check_result
249
+ #push_match len #disable... caller wants to do it...
250
+ end
251
+
252
+ =begin
253
+ #---------------------------------------------
254
+ #dunno if i really want this
255
+ def skip(mtr)
256
+ len=(cursor.skip mtr) || return
257
+ push_match len
258
+ return len
259
+ end
260
+ =end
261
+
262
+
263
+ #---------------------------------------------
264
+ #this method is dangerous! it leaves the Progress in an inconsistant state.
265
+ #caller must fixup state by either popping matchset_stack or pushing a matchfail_todo.
266
+ #called by last_next_match, backtrack, and next_match of RepeatMatchSet and SubseqMatchSet
267
+ def backup_stacks(ctx=context)
268
+ (ctx.position_inc_stack.size > 1) or return
269
+ assert(ctx.position_inc_stack.size > 1)
270
+ discarding_pos=ctx.position_inc_stack.pop
271
+ assert(ctx.position_inc_stack.size > 0)
272
+ ctx.regsidx-=discarding_pos #adjust position in matcher
273
+ assert(ctx.position_stack.size >= discarding_pos) #what if position stack is empty here?
274
+
275
+ ctx.position_stack.slice!(-discarding_pos..-1) if discarding_pos>0
276
+ # @position_stack=@position_stack.slice(0...-discarding.position_inc)
277
+
278
+ assert(matchfail_todo.size >= 1)
279
+ discarding=matchfail_todo.pop
280
+
281
+ #backup undo stack and execute undos
282
+ discarding_undos=discarding.undos_inc
283
+ process_undos @undos_stack.slice!(-discarding_undos..-1) if discarding_undos>0
284
+
285
+ #backup matchsucceed stack
286
+ discarding_succ=discarding.matchsucceed_inc
287
+ @matchsucceed_stack.slice!(-discarding_succ..-1) if discarding_succ>0
288
+
289
+ return matchset_stack.pop
290
+ end
291
+
292
+
293
+ #---------------------------------------------
294
+ def backtrack(ctx=context)
295
+ assert regsidx != Infinity
296
+ assert check_result
297
+ mat=nil
298
+ loop do
299
+ #warn "warning: ctx.position_stack not being updated in backup_stacks?"
300
+ ms=backup_stacks(ctx) or return
301
+
302
+ if mat=ms.next_match(cursor.data, position_stack.last)
303
+ matchset_stack.push ms
304
+ #position_inc_stack.push 0 #i'm really unsure about this line
305
+ #warn "warning: ctx.position_stack not being updated??"
306
+ break
307
+ end
308
+ end
309
+ assert( (1..matcher.max_matches)===regsidx+1)
310
+ assert ::Array===mat
311
+ #assert ::Array===mat.first
312
+
313
+
314
+ #back up cursor position
315
+ ctx.cursor.pos=ctx.position_stack.last
316
+
317
+
318
+
319
+ matchfail_todo.push MatchFailRec.new
320
+ ctx.position_inc_stack.push 0 #i'm really unsure about this line
321
+ ctx.push_match mat.last
322
+
323
+ assert regsidx
324
+ assert check_result
325
+ return ctx.regsidx, ctx.cursor.pos-ctx.origpos
326
+ end
327
+
328
+ #---------------------------------------------
329
+ #lookup something that was already matched, either by
330
+ #name or index(es).
331
+ #probably need to take a full path for parameters
332
+ def backref; huh end
333
+
334
+ def set_state!(cu,ps,mtr,parent) #internal use only
335
+ @parent=parent
336
+ # @matchfail_todo=[MatchFailRec.new]
337
+ # @matchset_stack=[]#@matchset_stack.dup
338
+ # @cursor=cu
339
+ # @position_stack=ps
340
+ # @undos_stack=[]
341
+ # @matchsucceed_stack=[]
342
+ # @variables=@variables.dup
343
+
344
+ # @matchfail_todo.last.position_inc+=1
345
+ # @matchfail_todo.last.undos_inc+=1
346
+
347
+ if mtr
348
+ @matcher=mtr#@matcher might be set to soemthing different
349
+ @regsidx=0
350
+ end
351
+ end
352
+
353
+ #---------------------------------------------
354
+ def subprogress(cu=nil,mtr=nil)
355
+ # warn 'subprogress not quite thought out...'
356
+ huh "replace this method with newcontext/endcontext"
357
+ result=dup
358
+ result.set_state!( if cu
359
+ unless ::Sequence===cu
360
+ ::Sequence.from(cu) #convert other data to a cursor...
361
+ else
362
+ cu
363
+ end
364
+ else
365
+ result.cursor.position # make a sub-cursor
366
+ #make real SubCursor here?
367
+ end, [result.cursor.pos], mtr,self )
368
+
369
+ #should this be in self, or result?
370
+
371
+ assert result.check_result
372
+
373
+ result
374
+ end
375
+
376
+ #---------------------------------------------
377
+ def make_hash
378
+ warn "warning: i want more here..."
379
+ hash
380
+ end
381
+
382
+ #---------------------------------------------
383
+ def last_match_range
384
+ position_stack[-2]...position_stack[-1]
385
+ end
386
+
387
+ #---------------------------------------------
388
+ def top_matchset
389
+ matchset_stack.last
390
+ end
391
+
392
+ #---------------------------------------------
393
+ def variable_names
394
+ @variables.keys
395
+ end
396
+
397
+ #---------------------------------------------
398
+ def raw_variable(name)
399
+ assert ::Symbol.reg|::String===name
400
+ var=@variables[name] and var.last
401
+ end
402
+
403
+ #---------------------------------------------
404
+ #always returns array or string, not single item
405
+ def lookup_var(name)
406
+ assert ::Symbol.reg|::String===name
407
+ var=@variables[name] and (cu,idx=*var.last) and cu and cu[idx]
408
+ end
409
+ alias [] lookup_var
410
+
411
+ #---------------------------------------------
412
+ def unregister_var(name)
413
+ assert ::Symbol.reg|::String===name
414
+ @variables[name].pop
415
+ assert @undos_stack.last.equal?( name ) #maybe this isn't true????....
416
+ @undos_stack.pop
417
+ matchfail_todo.last.undos_inc-=1
418
+ assert matchfail_todo.last.undos_inc>=0
419
+ nil
420
+ end
421
+ #---------------------------------------------
422
+ def raw_register_var(name,bound_to)
423
+ assert ::Symbol.reg|::String===name
424
+ @variables[name]||=[]
425
+ #@variables[name] and warn( "variable #{name} is already defined")
426
+ @variables[name].push bound_to
427
+ @undos_stack<<name
428
+ matchfail_todo.last.undos_inc+=1
429
+ end
430
+
431
+ #---------------------------------------------
432
+ def register_var(name,bound_to)
433
+ assert ::Symbol.reg|::String===name
434
+ @variables[name]||=[]
435
+ #@variables[name] and warn( "variable #{name} is already defined")
436
+ @variables[name].push [@cursor,bound_to]
437
+ @undos_stack<<name
438
+ matchfail_todo.last.undos_inc+=1
439
+ end
440
+
441
+ #---------------------------------------------
442
+ def bindhistory(sym)
443
+ @variables[sym].map{|(cu,idx)| cu[idx]}
444
+ end
445
+
446
+ #---------------------------------------------
447
+ def register_undo *args, &block
448
+ @undos_stack<<proc{block[*args]}
449
+ matchfail_todo.last.undos_inc+=1
450
+ end
451
+
452
+ #---------------------------------------------
453
+ def process_undos(undos=@undos_stack)
454
+ #i think regular reverse_each will work as well...
455
+ Ron::GraphWalk.recursive_reverse_each undos do|undo|
456
+ ::Symbol.reg|::String===undo ? @variables[undo].pop : undo.call
457
+ end
458
+ end
459
+
460
+
461
+ #---------------------------------------------
462
+ def register_replace(index,len,rep_exp)
463
+ huh #hmmm.... may need some work. what is context_type defined as?
464
+ @matchsucceed_stack.push context_type.new(context.data,index,len) {|gp|
465
+ Replace.evaluate(rep_exp,self,gp)
466
+ }
467
+ matchfail_todo.last.matchsucceed_inc+=1
468
+ end
469
+
470
+ #---------------------------------------------
471
+ def register_later(*args,&block)
472
+ @matchsucceed_stack.push proc{block[*args]}
473
+ matchfail_todo.last.matchsucceed_inc+=1
474
+ end
475
+
476
+ #---------------------------------------------
477
+ def process_laters
478
+ #i think regular reverse_each will work as well...
479
+ Ron::GraphWalk.recursive_reverse_each(@matchsucceed_stack) {|later| later.call }
480
+ end
481
+
482
+ =begin
483
+ #---------------------------------------------
484
+ class Later #inside Progress, so it doesn't conflict with Reg::Later from regreplace.rb
485
+ def initialize(block,args)
486
+ @block,@args=block,args
487
+ end
488
+ class<<self;
489
+ alias [] new;
490
+ end
491
+
492
+ def call
493
+ @block.call( *@args)
494
+ end
495
+ end
496
+ =end
497
+
498
+
499
+ #--------------------------
500
+ $RegTraceEnable=$RegTraceDisable=nil
501
+ def trace_enabled?
502
+ @trace||=nil
503
+ $RegTraceEnable or (!$RegTraceDisable && @trace)
504
+ end
505
+
506
+ #--------------------------
507
+ #bt, in this case, stands for 'backtracking'.
508
+ #but the cognoscenti refer to this method as 'bitch-match'.
509
+ #match the multiple matcher mtr against the input data in current #cursor
510
+ #but backtracking all along if any submatches fail
511
+ #remember, a multiple matcher has many sub-reg expressions
512
+ #(or in the case of Reg::Repeat, one expression used multiple times)
513
+ #that each have to match the input at some point. (sequentially one after
514
+ #another in the case of Repeat and Subseq, all at the same point in input
515
+ #in the case of Reg::And.)
516
+
517
+ #returns nil if no match, or if a match is found, returns
518
+ #[true, # of data items consumed, number of matchers used ( - 1?)]
519
+
520
+ #used in #mmatch_full of Reg::Array, Reg::Subseq, Reg::Repeat, Reg::And
521
+ #and in the corresponding MatchSets
522
+ #also in #last_next_match
523
+
524
+ #The Reg::And version employs a trick (defining #update_di to leave di unchanged)
525
+ #that will ensure each sub-reg starts at the same place in #cursor as the first one.
526
+
527
+ #Reg::Or and Reg::Xor start each sub-reg at the same place as well, but effectively
528
+ #only one sub-reg of Reg::Or or Reg::Xor ever matches input overall. With Xor, it must
529
+ #be guaranteed that only one alternative can match at all at the current position in
530
+ #input. With Or, #mmatch kicks out early once the first successful match is found.
531
+ #subsequent matches in the overall expression might fail, causing the Or to be backtracked
532
+ #into and a different alternative to be considered, but in that case, the first alternative
533
+ #is considered to have failed overall, and any side effects in it are undone.
534
+
535
+ #why is this important? Reg::And must call bt_match, because a Variable
536
+ #binding in one branch might be used in a subsequent branch of the overall expression.
537
+ #with Reg::Or and Xor, that cannot be the case, and hence they need not call bt_match
538
+
539
+ #backtracking stops
540
+ #a subexpression that might match multiple things in the current input creates a
541
+ #backtracking stop within the current Progress (self). creating a new backtracking
542
+ #stop means by an entry on both @matchset_stack and @matchfail_todo and #position_inc_stack.
543
+
544
+ #bt_match returns 3 things if an initial match could be found:
545
+ #true,
546
+ #the number of data items in cursor to be consumed in the initial match, and
547
+ #the number of sub-regs that were used. the 3rd is only really maybe needed if
548
+ #mtr is a Repeat.
549
+ #bt_match returns nil if no initial match could be found.
550
+
551
+ #if the initial match is unsatisfactory, you should call #backtrack to get another
552
+ #potential match
553
+
554
+ def bt_match(mtr=matcher,match_steps=mtr.max_matches)
555
+ mtr ||=matcher
556
+ assert cursor.pos <= cursor.size
557
+ assert origpos >= 0
558
+ assert posinc >= 0
559
+ assert( (0..match_steps)===regsidx)
560
+ assert Integer===position_stack.first
561
+ assert check_result
562
+ loop do #loop over regs to match
563
+ assert cursor.pos <= cursor.size
564
+ assert posinc >= 0
565
+ assert( (0..match_steps)===regsidx || !(mtr.enough_matches? regsidx,cursor.eof?))
566
+
567
+ if trace_enabled?
568
+ puts [cursor.pos, regsidx, mtr, clean_result].map{|i| i.inspect }.join(' ')
569
+ #pp self
570
+ end
571
+
572
+ assert check_result
573
+
574
+ #try a new match of current reg
575
+ r=mtr.regs(regsidx)
576
+ if r.respond_to? :mmatch and not Formula===r
577
+ #but what about RegThat? should test for being a Reg::Reg instead
578
+ if defined? $MMATCH_PROGRESS
579
+ m=r.mmatch(self)
580
+ # p r.class
581
+ # p r.__id__
582
+ else
583
+ # 'mmatch could return 2 items here'
584
+ m=r.mmatch(cursor.data, cursor.pos)
585
+ end
586
+
587
+ assert check_result
588
+
589
+ assert ::Array===m || MatchSet===m || !m
590
+
591
+ #is a single match or a match set?
592
+ if m.respond_to? :next_match
593
+ #it's a set -- start new inner result array
594
+ #with initial match as first elem
595
+ push_matchset m
596
+ mat,matchlen=m.next_match(cursor.data, cursor.pos)
597
+
598
+ assert mat
599
+ assert m
600
+ else
601
+ #if defined? $MMATCH_PROGRESS
602
+ # matchlen=m
603
+ #else
604
+ mat,matchlen=*m #single match or nil
605
+ #end
606
+ m=nil
607
+ end
608
+ else
609
+ if !cursor.eof? and r===(item=cursor.readahead1)
610
+ mat=RR[item]
611
+ matchlen=1
612
+ end
613
+ end
614
+
615
+
616
+ assert check_result
617
+
618
+ if matchlen #match succeeded
619
+ if !m and mtr.respond_to? :want_gratuitous_btstop? and \
620
+ mtr.want_gratuitous_btstop?(regsidx)
621
+ push_matchset SingleMatch_MatchSet.new
622
+ end
623
+
624
+ #advance to next reg
625
+ assert check_result
626
+ push_match mtr.update_di(0,matchlen)
627
+ assert(cursor.pos<=cursor.size)
628
+ else #match fail?
629
+ assert check_result
630
+ return to_result,posinc,regsidx if mtr.enough_matches? regsidx,cursor.eof?
631
+
632
+ #doesn't match, try backtracking
633
+ assert regsidx
634
+ backtrack or return nil #bt failed? we fail
635
+ assert(cursor.pos<=cursor.size)
636
+ assert check_result
637
+ assert(!(mtr.enough_matches? regsidx,cursor.eof?))
638
+ end
639
+
640
+ assert(cursor.pos<=cursor.size)
641
+
642
+ assert check_result
643
+ assert matchlen || !(mtr.enough_matches? regsidx,cursor.eof?)
644
+ return to_result,posinc,regsidx if regsidx>=match_steps and mtr.enough_matches? regsidx,cursor.eof?
645
+ assert( (0..match_steps)===regsidx || !(mtr.enough_matches? regsidx,cursor.eof?))
646
+
647
+ end #loop
648
+
649
+ end
650
+
651
+ #---------------------------------------------
652
+ #maybe this isn't necessary?
653
+ #because backtrack is called after it,
654
+ #and it's doing the same things.... more or less
655
+ #used in RepeatMatchSet#next_match and SubseqMatchSet#next_match
656
+ #this method appears to be changing things that it shouldn't?!
657
+ def last_next_match(ctx=context)
658
+ assert check_result
659
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
660
+ assert(ctx.position_inc_stack.size >= 1)
661
+ r=backup_stacks(ctx) #need to back up the context, not progress (at least sometimes)
662
+
663
+ di=cursor.pos=ctx.position_stack.last
664
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
665
+ unless r
666
+ matchfail_todo.push MatchFailRec.new
667
+ assert check_result
668
+ return nil,nil,regsidx
669
+ end
670
+ ctx.position_inc_stack.push 0 #i'm really unsure about this line
671
+
672
+ #matchset_stack.pop is called in backtrack but not here, why?
673
+
674
+
675
+ r2,diinc=r.next_match(ctx.cursor.data,ctx.cursor.pos)
676
+ matchset_stack.push r
677
+ r=r2
678
+ unless r
679
+ #might need to return non-nil here, if resfrag isn't exhausted yet
680
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
681
+ matchset_stack.pop
682
+ assert check_result
683
+ #huh #oops, should I really be using ctx here?
684
+ return nil,nil,ctx.regsidx unless ctx.matcher.enough_matches? ctx.regsidx,ctx.cursor.eof?
685
+ return to_result, ctx.cursor.pos-ctx.position_stack.first, ctx.regsidx
686
+ end
687
+
688
+ assert diinc
689
+ assert ctx.cursor.pos+diinc <= ctx.cursor.size
690
+ ctx.cursor.move diinc
691
+ #regsidx-=matchfail_todo.position_inc #should be done in push_match...
692
+ matchfail_todo.push MatchFailRec.new
693
+ ctx.position_inc_stack.push 0 #i'm really unsure about this line
694
+ ctx.push_match #need to affect ctx instead of self?
695
+
696
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
697
+ if ctx.regsidx<ctx.matcher.max_matches #if there are more subregs of this reg to be matched
698
+ #re-match tail regs
699
+ assert ctx.cursor.pos <= ctx.cursor.size
700
+ #di is sometimes bad here, it seems....(fixed now?)
701
+ assert check_result
702
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
703
+ huh #need to re-start matching where previous bt_match left off
704
+ huh #should bt_match below be looking at ctx instead of self?
705
+ result=bt_match
706
+ assert check_result
707
+ return result
708
+ end
709
+
710
+
711
+
712
+ assert( (0..ctx.matcher.max_matches)===ctx.regsidx)
713
+ assert check_result
714
+
715
+ return to_result,posinc,ctx.regsidx
716
+ end
717
+
718
+ =begin
719
+ #---------------------------------------------
720
+ def check_result;
721
+
722
+ if defined? $not_right_now #failing now, dunno why, maybe re-enable later
723
+ #since this should be true, a separate regsidx is unnecessary
724
+ ri=0
725
+ current=self
726
+ begin
727
+ ri+=current.regsidx
728
+ end while current=current.parent
729
+ assert ri==position_stack.size-1
730
+
731
+ #matchset_stack should be 1 smaller than matchfail_todo
732
+ matchsets=0
733
+ current=self
734
+ begin
735
+ matchsets+=current.matchset_stack.size
736
+ end while current=current.parent
737
+ assert matchfail_todo.size-1==matchsets
738
+ end
739
+
740
+ #verify correct types in @-variables
741
+ assert ::Sequence===cursor
742
+ assert matcher.respond_to?( :update_di)
743
+ assert regsidx >=0
744
+ matchset_stack.each{|ms| assert MatchSet===ms }
745
+ prev_pos=0
746
+ position_stack.each{|pos| assert prev_pos<=pos; pos=prev_pos }
747
+ assert prev_pos<=cursor.size
748
+
749
+ vars_copy=@variables.dup
750
+ @undos_stack.each {|i|
751
+ case i
752
+ #every element of @variables should also be a sym in @undos_stack
753
+ when Symbol,String:
754
+ vars_copy.delete(i) or assert(false)
755
+
756
+ when Later,::Proc:
757
+ else assert(false)
758
+ end
759
+ }
760
+ assert vars_copy.empty? #every var should be accounted for
761
+
762
+ #sum of :undos_inc,:matchsucceed_inc,:position_inc in matchfail_todo
763
+ #should be the same as the size of the corresponding stack.
764
+ uns=mats=poss=0
765
+ matchfail_todo.each{|mfr|
766
+ uns+=mfr.undos_inc
767
+ mats+=mfr.matchsucceed_inc
768
+ # poss+=mfr.position_inc
769
+ }
770
+ assert uns==@undos_stack.size
771
+ assert mats==@matchsucceed_stack.size
772
+ # assert poss+1==position_stack.size
773
+
774
+ assert succ_stack_ok
775
+
776
+ return true
777
+ end
778
+
779
+ #---------------------------------------------
780
+ def succ_stack_ok(stk=@matchsucceed_stack)
781
+ stk.each{|elem|
782
+ case elem
783
+ when Array: succ_stack_ok(elem)
784
+ when Later: true
785
+ else
786
+ end or return
787
+ }
788
+ return true
789
+ end
790
+ private :succ_stack_ok
791
+ =end
792
+ #---------------------------------------------
793
+ def clean_result
794
+ result=[]
795
+ # ms_pos_idx=position_stack.size - matchfail_todo.last.position_inc
796
+ ms_pos_idx=-1
797
+ result=(0...position_stack.size-1).map{|i|
798
+ # if i==ms_pos_idx
799
+ # ms_pos_idx-=1
800
+ # #what if ms_idx too big?
801
+ # ms_pos_idx-=matchfail_todo[ms_idx].position_inc
802
+ # ms.clean_result
803
+
804
+ # else
805
+ cursor[position_stack[i], position_stack[i+1]-position_stack[i]]
806
+ # end
807
+ }
808
+
809
+ return result
810
+ end
811
+
812
+ #---------------------------------------------
813
+ def to_result;
814
+ true#ok, i'm cheating
815
+ end
816
+
817
+ end #class Progress
818
+
819
+
820
+
821
+ if defined? $MMATCH_PROGRESS #ultimately, mmatch will take a progress, but until then, disable this
822
+ #---------------------------------------------
823
+ class Array
824
+ def mmatch_full(progress)
825
+ other=progress.cursor.readahead1
826
+ ::Array===other or return false #need to be more generous eventually
827
+
828
+ progress.newcontext(self, other.to_sequence)
829
+ assert progress.regsidx==0
830
+ result,di,bogus=progress.bt_match
831
+ assert di.nil? || di <= other.size
832
+ progress.endcontext
833
+ #should be returning a matchset here sometimes
834
+ return(di==other.size && result && [true,1])
835
+ end
836
+ end
837
+
838
+
839
+
840
+
841
+ #---------------------------------------------
842
+ class Subseq
843
+
844
+ def mmatch(pr)
845
+ #in this version, all @regs are not multiple regs
846
+ pr.newcontext(self)
847
+ cu=pr.cursor
848
+ start=cu.pos
849
+ assert cu.pos<=cu.size
850
+ cu.pos+@regs.size<=cu.size or return nil
851
+ buf= cu.readahead @regs.size
852
+ @regs.each_with_index do |reg,i|
853
+ assert cu.pos<cu.size
854
+ reg===buf[i] or return nil
855
+ end
856
+ return [true, @regs.size]
857
+ ensure
858
+ pr.endcontext
859
+ end
860
+
861
+ private
862
+ def mmatch_full(pr)
863
+ #in this version, at least one of @regs is a multiple reg
864
+ orig_stack_size=pr.matchset_stack.size
865
+ pr.newcontext(self)
866
+ cu=pr.cursor
867
+ start=cu.pos
868
+ start+itemrange.begin<=cu.size or return result=nil
869
+ assert( (0..cu.size).include?( start))
870
+ assert pr.regsidx==0
871
+ result,di,bogus=pr.bt_match
872
+ return (result &&= SubseqMatchSet.new(pr,di,orig_stack_size))
873
+ ensure
874
+ assert MatchSet===result || pr.matchset_stack.size==orig_stack_size
875
+ pr.cursor.pos=start
876
+ assert start==pr.cursor.pos
877
+ pr.endcontext
878
+ end
879
+ end
880
+
881
+ #---------------------------------------------
882
+ class Repeat
883
+ include CausesBacktracking
884
+ def mmatch(pr)
885
+ assert pr.check_result
886
+ pr.newcontext(self)
887
+ cu=pr.cursor
888
+ start=cu.pos
889
+ start+@times.begin <= cu.size or return nil #enough room left in input?
890
+ i=-1
891
+ (0...@times.end).each do |i2| i=i2
892
+ start+i<cu.size or break(i-=1)
893
+ @reg===cu.read1 or break(i-=1)
894
+ end
895
+ i+=1
896
+ assert( (0..@times.end)===i)
897
+ assert pr.check_result
898
+ cu.pos=start
899
+ if i==@times.begin
900
+ return [true,i]
901
+ end
902
+ i>@times.begin or return nil
903
+ return SingleRepeatMatchSet.new(pr,i,-1,@times.begin)
904
+ ensure
905
+ pr.endcontext
906
+ end
907
+
908
+ private
909
+ def mmatch_full(pr)
910
+ pr.newcontext(self)
911
+ cu=pr.cursor
912
+ orig_stack_size=pr.matchset_stack.size
913
+ start=cu.pos
914
+ assert start <= cu.size
915
+ start+itemrange.begin <= cu.size or return result=nil #enough room left in input?
916
+ r=[[]]
917
+
918
+ #first match the minimum number
919
+ if @times.begin==0 #if we can match nothing
920
+ cu.eof? and return result=[true,0] #at end of input? return empty set
921
+ ri=di=0
922
+ else
923
+ cu.eof? and return result=nil
924
+ assert @times.begin<Infinity
925
+ assert pr.regsidx==0
926
+ r,di,ri=pr.bt_match(nil,@times.begin) #matches @reg @times.begin times
927
+ r.nil? and return result=nil
928
+ end
929
+ assert ri==@times.begin
930
+
931
+ assert !@times.exclude_end?
932
+ left=@times.end-@times.begin
933
+
934
+ #note: left and top could be infinite here...
935
+
936
+ #do the optional match iterations
937
+ #only greedy matching implemented for now
938
+ #there must be a more efficient algorithm...
939
+ if left >= 1
940
+ #need to re-start matching where previous bt_match left off
941
+ assert pr.check_result
942
+ #get remaining matches up to @times.end times
943
+ assert rr=pr.make_hash
944
+ assert pr.regsidx==@times.begin
945
+ res,di,ri=pr.bt_match #bt stop at each iteration, this time
946
+ assert pr.check_result
947
+ assert @times===pr.regsidx
948
+
949
+ res and return result=RepeatMatchSet.new(pr,di, orig_stack_size)
950
+ assert rr==pr.make_hash
951
+ end
952
+
953
+ #if matchset has no backtracking stops, and
954
+ #hence cannot contain more than one actual match,
955
+ #then just return that match.
956
+ return result=if pr.matchset_stack.size==orig_stack_size then
957
+ [true,di]
958
+ else
959
+ RepeatMatchSet.new(pr,di,orig_stack_size)
960
+ end
961
+ ensure
962
+ assert MatchSet===result || pr.matchset_stack.size==orig_stack_size
963
+ pr.cursor.pos=start #is it really this simple? I'm doubtful....
964
+ assert pr.cursor.pos==start
965
+ pr.endcontext
966
+ end
967
+ end
968
+
969
+
970
+
971
+ #---------------------------------------------
972
+ class And
973
+ include CausesBacktracking
974
+ private
975
+ #can't use this until mmatch interface is changed to take a single progress param
976
+ def mmatch_full(progress)
977
+ #in this version, at least one of @regs is a multiple reg
978
+ progress.newcontext(self)
979
+ assert( (0..progress.cursor.size).include?( progress.cursor.pos))
980
+ assert progress.regsidx==0
981
+ result,di,bogus=progress.bt_match
982
+
983
+ #uh-oh, di is always 0 here, because And#update_di never does anything.
984
+ #need to come up with some other way to figure out how many items were consumed.
985
+
986
+ result and AndMatchSet.new(progress,di)
987
+ #need new definition of AndMatchSet...
988
+
989
+ #need to keep track of which alternative(s) was longest, so as to advance
990
+ #the cursor by that amount. and know which ones to start backtracking in.
991
+
992
+ #cursor needs to be advanced here somewhere, i think....no
993
+ ensure
994
+ progress.endcontext
995
+ end
996
+
997
+ end
998
+
999
+ #--------------------------
1000
+ class SingleRepeatMatchSet < MatchSet
1001
+ def initialize(progress,startcnt,stepper,endcnt)
1002
+ endcnt==startcnt and raise 'why even make it a set, then?'
1003
+ (endcnt-startcnt)*stepper>0 or raise "tried to make null match set"
1004
+ assert startcnt>=0
1005
+ assert endcnt>=0
1006
+ @progress,@matchtimes,@stepper,@endcnt=progress,startcnt,stepper,endcnt
1007
+ end
1008
+
1009
+ def next_match(arr,idx)
1010
+ assert @stepper.abs == 1
1011
+ (@endcnt-@matchtimes)*@stepper>=0 or return nil
1012
+ assert @matchtimes >=0
1013
+ result=[RR[arr[idx...idx+@matchtimes]], @matchtimes]
1014
+ assert ::Array===result.first.first
1015
+ @matchtimes+=@stepper
1016
+ return result
1017
+ end
1018
+ end
1019
+
1020
+ #--------------------------
1021
+ class OrMatchSet < MatchSet
1022
+ def initialize(progress,orreg,idx,set,firstmatchlen)
1023
+ @orreg,@idx,@set,@firstmatch,@progress=orreg,idx,set,firstmatchlen,progress
1024
+ assert ::Array===@firstmatch
1025
+ # assert @firstmatch.nil? || Integer===@firstmatch
1026
+ end
1027
+
1028
+ def next_match(ary,idx)
1029
+ if @firstmatch
1030
+ result,@firstmatch=@firstmatch,nil
1031
+ assert ::Array===result
1032
+ # assert ::Array===result.first.first
1033
+ assert 2==result.size
1034
+ assert Integer===result.last
1035
+ return result
1036
+ end
1037
+ @set and result= @set.next_match(ary,idx)
1038
+ while result.nil?
1039
+ @idx+=1
1040
+ @idx >= @orreg.regs.size and return nil
1041
+ x=@orreg.regs[@idx].mmatch(@progress) #hard spot
1042
+ result=case x
1043
+ when MatchSet; @set=x;x.next_match
1044
+ when Integer; @progress.cursor.readahead( x)
1045
+ end
1046
+ end
1047
+ a=RR[nil]*@orreg.regs.size
1048
+ a[idx]=result[0]
1049
+ result[0]=a
1050
+ assert ::Array===result.first.first
1051
+ return result
1052
+ end
1053
+ end
1054
+
1055
+ #--------------------------
1056
+ class Or
1057
+ include CausesBacktracking
1058
+ def mmatch(pr)
1059
+ # assert start <= arr.size
1060
+ cu=pr.cursor
1061
+ cu.eof? and return nil
1062
+ item=cu.readahead1
1063
+ @regs.each_with_index {|reg,i|
1064
+ reg===item and
1065
+ return OrMatchSet.new(pr,self,i,nil,1)
1066
+ }
1067
+ return nil
1068
+ end
1069
+
1070
+ private
1071
+ def mmatch_full(pr)
1072
+ pr.newcontext(self)
1073
+ mat=nil
1074
+ assert pos=pr.cursor.pos
1075
+ @regs.each_with_index{|r,i|
1076
+ if r.respond_to? :mmatch
1077
+ assert pr.cursor.pos==pos
1078
+ mat=r.mmatch(pr) or next
1079
+ if mat.respond_to? :next_match
1080
+ huh #is calling next_match bad because it advances cursor?
1081
+ len=mat.next_match(pr.cursor.all_data,pr.cursor.pos).last
1082
+ return OrMatchSet.new(pr,self,i,mat,len)
1083
+ else
1084
+ return OrMatchSet.new(pr,self,i,nil,mat)
1085
+ end
1086
+ else
1087
+ item=pr.cursor.readahead1
1088
+ r===item and
1089
+ return OrMatchSet.new(pr,self,i,nil,[true,1])
1090
+ end
1091
+ }
1092
+
1093
+ assert mat.nil?
1094
+ return nil
1095
+ ensure
1096
+ pr.endcontext
1097
+ end
1098
+ end
1099
+
1100
+ #--------------------------
1101
+ class Xor
1102
+ private
1103
+ def mmatch_full pr
1104
+ pr.newcontext self
1105
+ found=nil
1106
+ pos=pr.cursor.pos
1107
+ @regs.each{|reg|
1108
+ assert pr.cursor.pos==pos
1109
+ if m=reg.mmatch(pr)
1110
+ return if found
1111
+ found=m
1112
+ end
1113
+ }
1114
+ return found
1115
+ ensure
1116
+ pr.endcontext
1117
+ end
1118
+ end
1119
+
1120
+
1121
+ #--------------------------
1122
+ class ManyClass
1123
+ def mmatch(pr)
1124
+ left=pr.cursor.restsize
1125
+ beg=@times.begin
1126
+ if beg==left ; [true,left]
1127
+ elsif beg<left
1128
+ make_ms([left,@times.end].min,beg,pr)
1129
+ end
1130
+ end
1131
+ def make_ms(left,beg,pr)
1132
+ SingleRepeatMatchSet.new(pr,left, -1, beg)
1133
+ end
1134
+ end
1135
+
1136
+ class ManyLazyClass
1137
+ def mmatch(pr)
1138
+ left=pr.cursor.restsize
1139
+ beg=@times.begin
1140
+ if beg==left ; [true,left]
1141
+ elsif beg<left
1142
+ make_ms([left,@times.end].min,beg,pr)
1143
+ end
1144
+ end
1145
+ def make_ms(left,beg,pr)
1146
+ SingleRepeatMatchSet.new(pr,beg,1,left)
1147
+ end
1148
+ end
1149
+
1150
+ module Reg
1151
+ #mmatch implementation for all scalar expressions
1152
+ #which don't have an mmatch of their own
1153
+ def mmatch(pr)
1154
+ !pr.cursor.eof? and self===pr.cursor.readahead1 and [true,1]
1155
+ end
1156
+ end
1157
+
1158
+ #--------------------------
1159
+ class RepeatMatchSet < MatchSet
1160
+
1161
+ attr :progress
1162
+ def initialize(progress,consumed,orig_stack_size)
1163
+ @orig_stack_size=orig_stack_size
1164
+ @progress=progress
1165
+ #@cnt=@startcnt-stepper
1166
+ #@ary.push 1
1167
+ @context=@progress.context
1168
+ @consumed=consumed
1169
+ @firstmatch=[progress.clean_result,@consumed]
1170
+ assert( progress.matcher.times===progress.regsidx)
1171
+ assert progress.regsidx
1172
+ assert @consumed>=0
1173
+ #assert(@ri==@firstmatch.first.size)
1174
+ end
1175
+
1176
+ def match_iterations;
1177
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1178
+ progress.regsidx
1179
+ end
1180
+
1181
+ #very nearly identical to SubseqMatchSet#next_match
1182
+ def next_match(arr,idx)
1183
+ #fewer assertions in twin
1184
+ if @firstmatch
1185
+ result,@firstmatch=@firstmatch,nil
1186
+ # assert result.first.empty? || ::Array===result.first.first
1187
+ #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
1188
+ # assert idx+result.last<=arr.size
1189
+ # assert(progress.regsidx==result.first.size)
1190
+ return result
1191
+ end
1192
+
1193
+
1194
+ @progress or return #not in twin ... ignore it
1195
+
1196
+ assert @orig_stack_size <= @progress.matchset_stack.size
1197
+
1198
+ @orig_stack_size==@progress.matchset_stack.size and return @progress=nil
1199
+
1200
+ assert progress.check_result
1201
+
1202
+ i=@context.position_inc
1203
+ =begin extents not used
1204
+ extents= if i==0
1205
+ []
1206
+ else
1207
+ progress.position_stack[-i..-1]
1208
+ end
1209
+ =end
1210
+ #this part's not in twin
1211
+ #'need to check for fewer matches here before rematching last matchset'
1212
+
1213
+ #what if the match that gets discarded was returned by a matchset
1214
+ #that has more matches in it?
1215
+ #in that case, i is 1 and the body of this if should not be executed...
1216
+ #but why would i be 1?
1217
+ if @context.regsidx>@context.matcher.times.begin #&& i>1
1218
+ oldpos=@context.position_stack.last
1219
+ progress.backup_stacks(@context) or raise
1220
+ # huh #need to change progress.cursor.pos here too?
1221
+ #result of backup_stacks is abandoned, leaked, orphaned
1222
+ #we don't want it anymore
1223
+ #but what if it's nil?
1224
+
1225
+ #but now i need to undo all other progress state too, if
1226
+ #the state was created with the match result just popped.
1227
+ #in general, it's not possible to distinguish state with the
1228
+ #last match from state with the matches that might have preceded it...
1229
+ #unless I create a backtracking point for each optional iteration
1230
+ #of the repeat matcher.
1231
+ #currently, making a backtracking point implies making a matchset
1232
+ #as well. I'll need a matchset that contains only 1 match.
1233
+ #ok, i think this is working now. no extra code needed here.
1234
+
1235
+ #recompute # of items @consumed
1236
+ @consumed-=oldpos-@context.position_stack.last
1237
+ assert @consumed>=0
1238
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1239
+ assert idx+@consumed<=arr.size
1240
+ assert progress.check_result
1241
+ result= [progress.clean_result, @consumed]
1242
+ assert progress.check_result
1243
+ return result
1244
+ end
1245
+
1246
+
1247
+ assert progress.check_result
1248
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
1249
+ assert(@context.position_inc_stack.size >= 1)
1250
+ result,di,ri=progress.last_next_match(@context)
1251
+ if result and @progress.matcher.enough_matches? ri,@progress.cursor.eof?
1252
+ result=[progress.clean_result,di]
1253
+ @consumed=di #not in twin...why?
1254
+ assert @consumed>=0
1255
+ #@progress.regsidx-=1
1256
+ # assert ::Array===result.first.first
1257
+ assert idx+result.last<=arr.size
1258
+ assert progress.check_result
1259
+ #assert(@ri==result.first.size)
1260
+ return result
1261
+ end
1262
+
1263
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
1264
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1265
+ assert(progress.check_result)
1266
+
1267
+
1268
+
1269
+ @progress.matchset_stack.size==@orig_stack_size and return @progress=nil #also checking @ary in twin... ignore it
1270
+ # assert @progress.regsidx>0
1271
+
1272
+ @progress.backtrack(@context) or return @progress=nil #@progress never set to nil like this in twin... ignore it
1273
+
1274
+ #this is where the divergence widens. ri is a local in twin
1275
+
1276
+ #assert(@ri==Backtrace.clean_result(@ary).size)
1277
+ assert(progress.check_result)
1278
+ mat,di,@ri=@progress.bt_match #mat is @ary in twin
1279
+ mat.nil? and return @progress=nil
1280
+
1281
+ #assert(@ri==Backtrace.clean_result(mat).size)
1282
+ assert @progress.regsidx
1283
+ assert( (0..@progress.matcher.max_matches)===@progress.regsidx)
1284
+
1285
+ result=[@progress.clean_result,di]
1286
+ @consumed=di #no @consumed in twin
1287
+ assert @consumed>=0
1288
+ assert ::Array===result.first.first
1289
+ assert idx+result.last<=arr.size
1290
+ assert progress.check_result
1291
+ #assert(@ri==result.last.size)
1292
+ return result
1293
+ end
1294
+
1295
+ end
1296
+
1297
+ #---------------------------------------------
1298
+ class SubseqMatchSet < MatchSet
1299
+
1300
+ def initialize progress,di,orig_stack_size;
1301
+ @orig_stack_size= orig_stack_size
1302
+ @progress=progress
1303
+ @context=progress.context
1304
+ @orig_pos=progress.cursor.pos-di
1305
+ @firstresult= [progress.clean_result,di]
1306
+ end
1307
+
1308
+ #(@reg=>progress.matcher,@matchary=>progress.huh,di=>progress.cursor.pos-@orig_pos)
1309
+
1310
+ def next_match(ary,start)
1311
+ if @firstresult
1312
+ @firstresult,result=nil,@firstresult
1313
+ assert ::Array===result#.first.first
1314
+ return result
1315
+ end
1316
+
1317
+ assert @orig_stack_size<=@progress.matchset_stack.size
1318
+ @orig_stack_size==@progress.matchset_stack.size and return @progress=nil
1319
+
1320
+ result,di,ri=@progress.last_next_match(@context)
1321
+ # result or return @progress=nil #should this line be here? no
1322
+ if result and @progress.matcher.enough_matches? ri,@progress.cursor.eof?
1323
+ result=[@progress.clean_result,di]
1324
+ return result
1325
+ end
1326
+
1327
+
1328
+ #twin has a more sophisticated test on matchset_stack
1329
+ (@progress and !@progress.matchset_stack.empty?) or return @progress=nil
1330
+ assert @progress.regsidx
1331
+ @progress.backtrack(@context) or return @progress=nil
1332
+
1333
+ #need to adjust ri?
1334
+
1335
+ #is this right... dunno...
1336
+ # #need to restart where last backtrack left regsidx
1337
+ result,di,bogus=@progress.bt_match
1338
+
1339
+
1340
+ if result
1341
+ result=[@progress.clean_result,di]
1342
+ assert ::Array===result.first.first
1343
+ return result
1344
+ end
1345
+ end
1346
+
1347
+ def match_iterations
1348
+ progress.matcher.max_matches
1349
+ end
1350
+
1351
+ end
1352
+ #--------------------------
1353
+ class AndMatchSet < SubseqMatchSet
1354
+ #the total number of possible different ways to match an AndMatchSet
1355
+ #where several of the branches are actually ambiguous
1356
+ #grows exponentially.
1357
+ #rather than hit every possible match, we'll try to hit
1358
+ #every legal match length at least once.
1359
+
1360
+ #on next_match,
1361
+ #figure out the alternative(s) that are returning the longest
1362
+ #matchset currently. those alternatives are returned in
1363
+ #the first match, but at the 2nd and subsequent calls
1364
+ #to next_match, that set of longest alternatives are all
1365
+ #next_matched (rolled back) until they match something shorter.
1366
+ #(or maybe just a different length? Reg::Or isn't greedy, so its
1367
+ #longest match isn't necessarily returned first.)
1368
+
1369
+ #if any next_match call returns nil (or false), the whole match set
1370
+ #is finished. return nil from next_match now and forever more.
1371
+
1372
+
1373
+
1374
+ #def initialize(progress,firstmatchlen)
1375
+ # @progress=progress
1376
+ # @firstmatch=[true,firstmatchlen]
1377
+ # huh
1378
+ #end
1379
+
1380
+ #this isn't really right...
1381
+ #on next_match, we need to backtrack the longest alternative(s)
1382
+ #if they're then shorter than the next longest alternative,
1383
+ #then that (formerly next longest) alternative becomes
1384
+ #the dominating alternative, and determines how much is consumed
1385
+
1386
+ end
1387
+ #might need Reg::Or tooo....
1388
+
1389
+ else #... not $MMATCH_PROGRESS
1390
+ class Subseq
1391
+
1392
+ def mmatch(arr,start)
1393
+ #in this version, each of @regs is not a multiple reg
1394
+ assert start<=arr.size
1395
+ start+@regs.size<=arr.size or return nil
1396
+ idx=0
1397
+ @regs.each { |reg|
1398
+ assert(start+idx<arr.size)
1399
+ reg===arr[start+idx] or return nil
1400
+ idx+=1
1401
+ }
1402
+ return [true, @regs.size]
1403
+ end
1404
+
1405
+ def mmatch_full(arr,start)
1406
+ #in this version, at least one of @regs is a multiple reg
1407
+ assert( (0..arr.size).include?( start))
1408
+ cu=arr.to_sequence cu.pos=start
1409
+ pr=Progress.new(self,cu)
1410
+ result,di,bogus=pr.bt_match
1411
+ result and SubseqMatchSet.new(pr,di)
1412
+ end
1413
+ end
1414
+
1415
+ class Repeat
1416
+ def mmatch(arr,start)
1417
+ i=-1
1418
+ (0...@times.end).each do |i2| i=i2
1419
+ start+i<arr.size or break(i-=1)
1420
+ @reg===arr[start+i] or break(i-=1)
1421
+ end
1422
+ i+=1
1423
+ assert( (0..@times.end)===i)
1424
+ if i==@times.begin
1425
+ return [true,i]
1426
+ end
1427
+ i>@times.begin or return nil
1428
+ return SingleRepeatMatchSet.new(i,-1,@times.begin)
1429
+ end
1430
+
1431
+ def mmatch_full(arr,start)
1432
+ assert start <= arr.size
1433
+ r=[RR[]]
1434
+
1435
+ cu=arr.to_sequence cu.pos=start
1436
+ pr=Progress.new(self,cu)
1437
+
1438
+ #first match the minimum number
1439
+ if @times.begin==0 #if we can match nothing
1440
+ arr.size==start and return [r,0] #at end of input? return empty set
1441
+ ri=di=0
1442
+ else
1443
+ arr.size==start and return nil
1444
+ assert @times.begin<Infinity
1445
+ r,di,ri=pr.bt_match(self,@times.begin) #matches @reg @times.begin times
1446
+ r.nil? and return nil
1447
+ end
1448
+ assert ri==@times.begin
1449
+
1450
+ assert !@times.exclude_end?
1451
+ left=@times.end-@times.begin
1452
+
1453
+ #note: left and top could be infinite here...
1454
+
1455
+ #do the optional match iterations
1456
+ #only greedy matching implemented for now
1457
+ #there must be a more efficient algorithm...
1458
+ if left >= 1
1459
+ assert pr.check_result
1460
+ #get remaining matches up to @times.end times
1461
+ #because bt_match could change the rr argument, and
1462
+ #we might need to return the original in r below
1463
+ res,di,ri=pr.bt_match
1464
+ # assert Backtrace.check_result res #this is correct, for now (i think)
1465
+ #don't update to progress version
1466
+ assert @times===ri
1467
+
1468
+ res and return RepeatMatchSet.new(pr,di)
1469
+ end
1470
+
1471
+ #if matchset has no backtracking stops, and
1472
+ #hence cannot contain more than one actual match,
1473
+ #then just return that match.
1474
+ huh 'this needs to change: matchset_stack is shared with whatever came before'
1475
+ pr.matchset_stack.empty? ? di : RepeatMatchSet.new(pr,di)
1476
+ end
1477
+
1478
+ end
1479
+
1480
+
1481
+ end # $MMATCH_PROGRESS
1482
+
1483
+
1484
+
1485
+
1486
+ class Repeat
1487
+ #--------------------------------------------------------
1488
+ # "enable backtracking stops at each optional iteration"
1489
+ def want_gratuitous_btstop?(steps)
1490
+ @times===steps
1491
+ end
1492
+
1493
+ end
1494
+
1495
+
1496
+ #---------------------------------------------
1497
+ class Array
1498
+ def ===(other)
1499
+ ::Array===other or return false #need to be more generous eventually
1500
+ progress=Progress.new(self,other.to_sequence)
1501
+ assert progress.regsidx==0
1502
+ result,di,bogus=progress.bt_match
1503
+ assert di.nil? || di <= other.size
1504
+ return(di==other.size && result)
1505
+ end
1506
+ end
1507
+
1508
+ end
1509
+
1510
+ if false #work-around warnings in cursor
1511
+ warn "warning: ugly workaround for chatty sequence warnings"
1512
+ propNiller=proc do
1513
+ old_init=instance_method :initialize
1514
+
1515
+ define_method :initialize do|*args|
1516
+ @positions||=nil;@prop||=nil
1517
+ old_init.bind(self)[*args]
1518
+ end
1519
+ end
1520
+ ::Sequence::Indexed.instance_eval( &propNiller)
1521
+ ::Sequence::Position.instance_eval( &propNiller)
1522
+ end