reg 0.4.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,477 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ module Reg
21
+ module Backtrace
22
+ #--------------------------
23
+ def Backtrace.clean_result(result,restype=RR)
24
+ assert result.size%3==1
25
+ a=[]
26
+ 0.step(result.size-1,3) {|i|
27
+ assert RR===result[i]
28
+ assert result[i].empty? || ::Array===result[i].first
29
+ a+= result[i]
30
+ assert a.empty? || a.first.empty? || ::Array===a.first
31
+ }
32
+ assert a.empty? || a.first.empty? || ::Array===a.first
33
+ return restype[*a]
34
+ end
35
+
36
+ #--------------------------
37
+ def Backtrace.check_result(result)
38
+ assert result.size%3==1
39
+ last_idx=0
40
+ 0.step(result.size-1,3) {|i|
41
+ assert RR===result[i]
42
+ assert result[i].empty? || ::Array===result[i].first
43
+ next if i==0
44
+ assert MatchSet===result[i-2]
45
+ assert Integer===result[i-1]
46
+ assert result[i-1]>=last_idx
47
+ }
48
+ true
49
+ end
50
+
51
+
52
+ #--------------------------
53
+ def Backtrace.deep_copy(res)
54
+ #arr, matchset, num, arr
55
+
56
+ assert res.size%3==1
57
+ assert ::Array===res.first
58
+ result=[res.first.dup]
59
+ (1...res.size).step(3) do |n|
60
+ ms,num,arr=res[n,3]
61
+ assert ms
62
+ result+=[ms.deep_copy,num,arr.dup]
63
+ result[-3]==ms or (pp :ms_o, ms.ob_state, :r_3_o, result[-3].ob_state, :ms, ms, :r_3, result[-3])
64
+ assert(result[-3]==ms)
65
+ end
66
+ assert result==res
67
+ assert Backtrace.check_result( result)
68
+ return result
69
+ end
70
+
71
+
72
+ #--------------------------
73
+ #bt, in this case, stands for 'backtracking'.
74
+ #but the cognosceni refer to this function as 'bitch-match'.
75
+ def bt_match(arr,start,ri,di,result,regs_size=max_matches)
76
+ assert start+di <= arr.size
77
+ assert start >= 0
78
+ assert di >= 0
79
+ assert( (0..regs_size)===ri)
80
+ assert ::Array===result.first
81
+ assert Backtrace.check_result( result)
82
+ loop do #loop over regs to match
83
+ assert start+di <= arr.size
84
+ assert di >= 0
85
+ assert( (0..regs_size)===ri)
86
+
87
+ trace_enabled? and $stderr.print start, " ", self.inspect, ": ", Backtrace.clean_result(result).inspect, "\n"
88
+ assert Backtrace.check_result result
89
+
90
+ #try a new match of current reg
91
+ r=regs(ri)
92
+ if r.respond_to? :mmatch
93
+ # 'mmatch could return 2 items here'
94
+ m=r.mmatch(arr,start+di)
95
+ #is a single match or a match set?
96
+ unless m.respond_to? :next_match
97
+ mat,matchlen=*m #single match or nil
98
+ else
99
+ #it's a set -- start new inner result array
100
+ #with initial match as first elem
101
+ result += [m,di,[]]
102
+ mat,matchlen=m.next_match(arr,start+di)
103
+ assert mat
104
+ end
105
+ else
106
+ if start+di<arr.size && r===arr[start+di]
107
+ mat=RR[arr[start+di]]
108
+ end
109
+ end
110
+
111
+
112
+ assert Backtrace.check_result result
113
+
114
+ unless mat #match fail?
115
+ assert Backtrace.check_result result
116
+ return result,di,ri if enough_matches? ri
117
+
118
+ #doesn't match, try backtracing
119
+ ri,di=backtrace(arr,start,result,ri)
120
+ ri or return nil #bt failed? we fail
121
+ assert(start+di<=arr.size)
122
+ assert Backtrace.check_result result
123
+ else #match succeeded
124
+ #advance to next reg
125
+ ri+=1
126
+ result.last<<mat
127
+ assert ::Array===result.first
128
+ matchlen ||= mat.length
129
+ di=update_di(di,matchlen)
130
+ assert(start+di<=arr.size)
131
+ end
132
+
133
+ assert( (0..regs_size)===ri)
134
+ assert(start+di<=arr.size)
135
+
136
+ assert Backtrace.check_result result
137
+ return result,di,ri if ri==regs_size
138
+
139
+ end #loop
140
+
141
+ end
142
+
143
+ #--------------------------
144
+ def backtrace(arr,start,result,ri)
145
+ assert ri != INFINITY
146
+ assert(Backtrace.check_result result)
147
+ mat,matlen,di=nil
148
+ loop do #might have to bt multiple times if prev prelim set also fails
149
+ #get result set and
150
+ #reset data idx to start of last prelim set
151
+ ms,di=result[-3..-2]
152
+
153
+ unless ms #if result underflowing we fail
154
+ assert(result.size==1)
155
+ #we must have b'trace'd thru the last prelim result set
156
+ #no more alternatives; finally fail
157
+ return nil
158
+ end
159
+
160
+ ri-=result.last.size #reset result idx
161
+
162
+ assert(ri>=0)
163
+
164
+ assert(result.size%3==1)
165
+ assert(result.size>=3)
166
+ assert start+di <= arr.size
167
+ mat,matlen=ms.next_match(arr,start+di)
168
+ # pp ms
169
+ mat and break(assert( (0..max_matches)===ri+1))
170
+ result.slice!(-3..-1).size==3 or raise 'partial result underflow'
171
+ end
172
+
173
+ assert ::Array===mat
174
+ assert ::Array===mat.first
175
+ assert start+update_di(di,matlen) <= arr.size
176
+
177
+ #adjust ri,di,and result to include mat
178
+ ri+=1
179
+ result[-1]=[mat]
180
+ di= update_di(di,matlen)
181
+
182
+ assert start+di <= arr.size
183
+ #assert(Backtrace.check_result mat)
184
+ return ri,di
185
+ end
186
+ end
187
+
188
+ class MatchSet
189
+ def last_next_match(ary,start,resfrag)
190
+ r,di=resfrag[-3..-2]
191
+ r or return nil,nil,match_iterations
192
+
193
+ #dunno how to do this simply...
194
+ #assert full_up? if SubseqMatchSet===self
195
+
196
+ r,diinc=r.next_match(ary,start+di)
197
+ unless r
198
+ discarding=resfrag.last
199
+ resfrag.slice!(-3..-1).size==3 or raise :impossible
200
+
201
+ #might need to return non-nil here, if resfrag isn't exhausted yet
202
+ ri=match_iterations-discarding.size
203
+ return nil,nil,ri unless @reg.enough_matches? ri
204
+ return resfrag, di, ri
205
+ end
206
+
207
+ assert di+diinc <= ary.size
208
+ di+=diinc
209
+ ri=match_iterations-resfrag[-1].size+1 #+1 for r, which must match here if set
210
+ resfrag[-1]=[r]
211
+ if ri<@reg.max_matches #if there are more subregs of this reg to be matched
212
+ #re-match tail regs
213
+ assert di <= ary.size
214
+ #di is sometimes bad here, it seems....(fixed now?)
215
+ resfrag,di,ri=@reg.bt_match(ary,start,ri,di,resfrag)
216
+ end
217
+
218
+ return resfrag,di,ri
219
+ end
220
+ end
221
+ #--------------------------
222
+ class RepeatMatchSet < MatchSet
223
+ def initialize(regrepeat,ary,ri,diinc) #maybe rename diinc=>di
224
+ @reg,@ary,@ri,@diinc=regrepeat,ary,ri,diinc
225
+ #@cnt=@startcnt-stepper
226
+ #@ary.push 1
227
+ @firstmatch=[Backtrace.clean_result(ary),@diinc]
228
+ assert( @reg.times===@ri)
229
+ assert @ri
230
+ #assert(@ri==@firstmatch.first.size)
231
+ end
232
+
233
+ def match_iterations;
234
+ #assert(@ri==Backtrace.clean_result(@ary).size)
235
+ @ri
236
+ end
237
+
238
+ #very nearly identical to SubseqMatchSet#next_match
239
+ def next_match(arr,idx)
240
+ #fewer assertions in twin
241
+ if @firstmatch
242
+ result,@firstmatch=@firstmatch,nil
243
+ assert result.first.empty? || ::Array===result.first.first
244
+ #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
245
+ assert idx+result.last<=arr.size
246
+ assert(@ri==result.first.size)
247
+ return result
248
+ end
249
+
250
+ @ary or return nil #not in twin ... ignore it
251
+
252
+ #this part's not in twin
253
+ #'need to check for fewer matches here before rematching last matchset'
254
+
255
+ #uwhat if the match that gets dicarded was returned by a matchset
256
+ #that has more matches in it? in that case nothing should be done...
257
+ #in that case, @ary.last.size is 1 and the body is not executed...
258
+ if @ri>@reg.times.begin && @ary.last.size>1
259
+ @ri-=1
260
+ discarding=@ary.last.pop
261
+ @diinc-=discarding.last.size
262
+ #assert(@ri==Backtrace.clean_result(@ary).size)
263
+ assert idx+@ri<=arr.size
264
+ return [Backtrace.clean_result(@ary), @diinc]
265
+ end
266
+
267
+
268
+ result,di,@ri=last_next_match(arr,idx,@ary)
269
+ if result and @reg.times===@ri #condition slightly different in twin
270
+ result=[Backtrace.clean_result(@ary=result),di]
271
+ @diinc=di #not in twin...why?
272
+ assert @ri
273
+ assert ::Array===result.first.first
274
+ assert idx+result.last<=arr.size
275
+ #assert(@ri==result.first.size)
276
+ return result
277
+ end
278
+
279
+ assert( (0..@reg.max_matches)===@ri)
280
+ #assert(@ri==Backtrace.clean_result(@ary).size)
281
+ assert(Backtrace.check_result @ary)
282
+
283
+
284
+
285
+ @ary[-2] or return @ary=nil #also checking @ary in twin... ignore it
286
+ assert @ri>0
287
+
288
+ @ri,di=@reg.backtrace(arr,idx,@ary, @ri) #last param is @reg.max_matches in twin
289
+ #this is where the divergence widens. @ri is a local in twin
290
+ @ri or return @ary=nil #@ary never set to nil like this in twin... ignore it
291
+
292
+ #huh 'need to adjust @ri?' #why?
293
+
294
+ #assert(@ri==Backtrace.clean_result(@ary).size)
295
+ assert(Backtrace.check_result @ary)
296
+ mat,di,@ri=@reg.bt_match(arr,idx,@ri,di,@ary) #mat is @ary in twin
297
+ mat.nil? and return @ary=nil
298
+
299
+ #huh#is @ri right here? how do i know?
300
+
301
+ #assert(@ri==Backtrace.clean_result(mat).size)
302
+ assert @ri
303
+ assert( (0..@reg.max_matches)===@ri)
304
+ #assert(mat.equal? @ary) #wronggo
305
+ @ary=mat
306
+
307
+ result=[Backtrace.clean_result(mat),di]
308
+ @diinc=di #no @diinc in twin
309
+ assert ::Array===result.first.first
310
+ assert idx+result.last<=arr.size
311
+ #assert(@ri==result.last.size)
312
+ return result
313
+ end
314
+
315
+ def deep_copy
316
+ #assert(@ri==Backtrace.clean_result(@ary).size)
317
+ assert( (0..@reg.max_matches)===@ri)
318
+ res=RepeatMatchSet.new @reg,Backtrace.deep_copy(@ary),@ri,@diinc
319
+ fm =@firstmatch && @firstmatch.dup
320
+ res.instance_eval { @firstmatch=fm }
321
+ return res
322
+ end
323
+ end
324
+
325
+ class Repeat
326
+ def mmatch(arr,start)
327
+ i=-1
328
+ (0...@times.end).each do |i|
329
+ start+i<arr.size or break(i-=1)
330
+ @reg===arr[start+i] or break(i-=1)
331
+ end
332
+ i+=1
333
+ assert( (0..@times.end)===i)
334
+ if i==@times.begin
335
+ return [RR[arr[start,i]], i]
336
+ end
337
+ i>@times.begin or return nil
338
+ return SingleRepeatMatchSet.new(i,-1,@times.begin)
339
+ end
340
+
341
+ def mmatch_multiple(arr,start)
342
+ assert start <= arr.size
343
+ r=[RR[]]
344
+
345
+ #first match the minimum number
346
+ if @times.begin==0 #if we can match nothing
347
+ arr.size==start and return [r,0] #at end of input? return empty set
348
+ ri=di=0
349
+ else
350
+ arr.size==start and return nil
351
+ assert @times.begin<INFINITY
352
+ r,di,ri=bt_match(arr,start,0,0,r,@times.begin) #matches @reg @times.begin times
353
+ r.nil? and return nil
354
+ end
355
+ assert ri==@times.begin
356
+
357
+ assert !@times.exclude_end?
358
+ left=@times.end-@times.begin
359
+
360
+ #note: left and top could be infinite here...
361
+
362
+ #do the optional match iterations
363
+ #only greedy matching implemented for now
364
+ #there must be a more efficient algorithm...
365
+ if left >= 1
366
+ assert Backtrace.check_result r
367
+ #get remaining matches up to @times.end times
368
+ #why the deep_copy here?
369
+ #because bt_match could change the rr argument, and
370
+ #we might need to return the original in r below
371
+ res,di,ri=bt_match(arr,start,ri,di,rr=Backtrace.deep_copy(r))
372
+ assert Backtrace.check_result res
373
+ assert @times===ri
374
+
375
+ #res is not right type! --yes it is
376
+ res and return RepeatMatchSet.new(self,res,ri,di)
377
+ end
378
+
379
+ #if matchset has no backtracking stops, and
380
+ #hence cannot contain more than one actual match,
381
+ #then just return that match.
382
+ r.size>1 ? RepeatMatchSet.new(self,r,ri,di) :
383
+ [Backtrace.clean_result(r),di]
384
+ end
385
+ end
386
+
387
+ class Subseq
388
+ def mmatch(arr,start)
389
+ #in this version, each of @regs is not a multiple reg
390
+ assert start<=arr.size
391
+ start+@regs.size<=arr.size or return nil
392
+ idx=0
393
+ @regs.each do |reg|
394
+ assert(start+idx<arr.size)
395
+ reg===arr[start+idx] or return nil
396
+ idx+=1
397
+ end
398
+ return [RR[arr[start,@regs.size]], @regs.size]
399
+ end
400
+
401
+ def mmatch_multiple(arr,start)
402
+ #in this version, at least one of @regs is a multiple reg
403
+ #start==arr.size and huh
404
+ assert( (0..arr.size).include?( start))
405
+ result,di,bogus=bt_match(arr,start,0,0,[RR[]])
406
+ result and SubseqMatchSet.new(self,result,di)
407
+ end
408
+ end
409
+ #--------------------------
410
+ class SubseqMatchSet < MatchSet
411
+ def initialize(subseqreg,matchary,di)
412
+ @reg,@matchary=subseqreg,matchary
413
+ @firstresult= [Backtrace.clean_result(@matchary),di]
414
+ end
415
+
416
+ def match_iterations; @reg.max_matches end
417
+
418
+ def next_match(ary,start)
419
+ if @firstresult
420
+ @firstresult,result=nil,@firstresult
421
+ assert ::Array===result.first.first
422
+ return result
423
+ end
424
+ result,di,ri=last_next_match(ary,start,@matchary)
425
+ if result and ri==@reg.max_matches
426
+ result=[Backtrace.clean_result(@matchary=result),di]
427
+ assert ::Array===result.first.first
428
+ return result
429
+ end
430
+
431
+ (@matchary and @matchary[-2]) or return nil
432
+ ri,di=@reg.backtrace(ary,start,@matchary, @reg.max_matches)
433
+ ri or return nil
434
+
435
+ #need to adjust ri?
436
+
437
+ #is this right... dunno...
438
+ @matchary,di,bogus=@reg.bt_match(ary,start,ri,di,@matchary)
439
+
440
+
441
+ if @matchary
442
+ result=[Backtrace.clean_result(@matchary),di]
443
+ assert ::Array===result.first.first
444
+ return result
445
+ end
446
+ end
447
+
448
+ def deep_copy
449
+ resfrag=Backtrace.deep_copy(@matchary)
450
+ result=dup
451
+ result.instance_eval{@matchary=resfrag}
452
+ return result
453
+ end
454
+
455
+ def subregs; @regs end
456
+ end
457
+
458
+ #--------------------------
459
+ class AndMatchSet < SubseqMatchSet
460
+ #this isn't really right...
461
+ #on next_match, we need to backtrack the longest alternative(s)
462
+ #if they're then shorter than the next longest alternative,
463
+ #then that (formerly next longest) alternative becomes
464
+ #the dominating alternative, and determines how much is consumed
465
+
466
+ end
467
+
468
+ class Array
469
+ def ===(other)
470
+ ::Array===other or return false
471
+ result,di,bogus=bt_match(other,0,0,0,[RR[]])
472
+ assert di.nil? || di <= other.size
473
+ return(di==other.size && Backtrace.clean_result(result,::Array))
474
+ end
475
+ end
476
+
477
+ end