reg 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,477 @@
1
+ =begin copyright
2
+ reg - the ruby extended grammar
3
+ Copyright (C) 2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ module Reg
21
+ module Backtrace
22
+ #--------------------------
23
+ def Backtrace.clean_result(result,restype=RR)
24
+ assert result.size%3==1
25
+ a=[]
26
+ 0.step(result.size-1,3) {|i|
27
+ assert RR===result[i]
28
+ assert result[i].empty? || ::Array===result[i].first
29
+ a+= result[i]
30
+ assert a.empty? || a.first.empty? || ::Array===a.first
31
+ }
32
+ assert a.empty? || a.first.empty? || ::Array===a.first
33
+ return restype[*a]
34
+ end
35
+
36
+ #--------------------------
37
+ def Backtrace.check_result(result)
38
+ assert result.size%3==1
39
+ last_idx=0
40
+ 0.step(result.size-1,3) {|i|
41
+ assert RR===result[i]
42
+ assert result[i].empty? || ::Array===result[i].first
43
+ next if i==0
44
+ assert MatchSet===result[i-2]
45
+ assert Integer===result[i-1]
46
+ assert result[i-1]>=last_idx
47
+ }
48
+ true
49
+ end
50
+
51
+
52
+ #--------------------------
53
+ def Backtrace.deep_copy(res)
54
+ #arr, matchset, num, arr
55
+
56
+ assert res.size%3==1
57
+ assert ::Array===res.first
58
+ result=[res.first.dup]
59
+ (1...res.size).step(3) do |n|
60
+ ms,num,arr=res[n,3]
61
+ assert ms
62
+ result+=[ms.deep_copy,num,arr.dup]
63
+ result[-3]==ms or (pp :ms_o, ms.ob_state, :r_3_o, result[-3].ob_state, :ms, ms, :r_3, result[-3])
64
+ assert(result[-3]==ms)
65
+ end
66
+ assert result==res
67
+ assert Backtrace.check_result( result)
68
+ return result
69
+ end
70
+
71
+
72
+ #--------------------------
73
+ #bt, in this case, stands for 'backtracking'.
74
+ #but the cognosceni refer to this function as 'bitch-match'.
75
+ def bt_match(arr,start,ri,di,result,regs_size=max_matches)
76
+ assert start+di <= arr.size
77
+ assert start >= 0
78
+ assert di >= 0
79
+ assert( (0..regs_size)===ri)
80
+ assert ::Array===result.first
81
+ assert Backtrace.check_result( result)
82
+ loop do #loop over regs to match
83
+ assert start+di <= arr.size
84
+ assert di >= 0
85
+ assert( (0..regs_size)===ri)
86
+
87
+ trace_enabled? and $stderr.print start, " ", self.inspect, ": ", Backtrace.clean_result(result).inspect, "\n"
88
+ assert Backtrace.check_result result
89
+
90
+ #try a new match of current reg
91
+ r=regs(ri)
92
+ if r.respond_to? :mmatch
93
+ # 'mmatch could return 2 items here'
94
+ m=r.mmatch(arr,start+di)
95
+ #is a single match or a match set?
96
+ unless m.respond_to? :next_match
97
+ mat,matchlen=*m #single match or nil
98
+ else
99
+ #it's a set -- start new inner result array
100
+ #with initial match as first elem
101
+ result += [m,di,[]]
102
+ mat,matchlen=m.next_match(arr,start+di)
103
+ assert mat
104
+ end
105
+ else
106
+ if start+di<arr.size && r===arr[start+di]
107
+ mat=RR[arr[start+di]]
108
+ end
109
+ end
110
+
111
+
112
+ assert Backtrace.check_result result
113
+
114
+ unless mat #match fail?
115
+ assert Backtrace.check_result result
116
+ return result,di,ri if enough_matches? ri
117
+
118
+ #doesn't match, try backtracing
119
+ ri,di=backtrace(arr,start,result,ri)
120
+ ri or return nil #bt failed? we fail
121
+ assert(start+di<=arr.size)
122
+ assert Backtrace.check_result result
123
+ else #match succeeded
124
+ #advance to next reg
125
+ ri+=1
126
+ result.last<<mat
127
+ assert ::Array===result.first
128
+ matchlen ||= mat.length
129
+ di=update_di(di,matchlen)
130
+ assert(start+di<=arr.size)
131
+ end
132
+
133
+ assert( (0..regs_size)===ri)
134
+ assert(start+di<=arr.size)
135
+
136
+ assert Backtrace.check_result result
137
+ return result,di,ri if ri==regs_size
138
+
139
+ end #loop
140
+
141
+ end
142
+
143
+ #--------------------------
144
+ def backtrace(arr,start,result,ri)
145
+ assert ri != INFINITY
146
+ assert(Backtrace.check_result result)
147
+ mat,matlen,di=nil
148
+ loop do #might have to bt multiple times if prev prelim set also fails
149
+ #get result set and
150
+ #reset data idx to start of last prelim set
151
+ ms,di=result[-3..-2]
152
+
153
+ unless ms #if result underflowing we fail
154
+ assert(result.size==1)
155
+ #we must have b'trace'd thru the last prelim result set
156
+ #no more alternatives; finally fail
157
+ return nil
158
+ end
159
+
160
+ ri-=result.last.size #reset result idx
161
+
162
+ assert(ri>=0)
163
+
164
+ assert(result.size%3==1)
165
+ assert(result.size>=3)
166
+ assert start+di <= arr.size
167
+ mat,matlen=ms.next_match(arr,start+di)
168
+ # pp ms
169
+ mat and break(assert( (0..max_matches)===ri+1))
170
+ result.slice!(-3..-1).size==3 or raise 'partial result underflow'
171
+ end
172
+
173
+ assert ::Array===mat
174
+ assert ::Array===mat.first
175
+ assert start+update_di(di,matlen) <= arr.size
176
+
177
+ #adjust ri,di,and result to include mat
178
+ ri+=1
179
+ result[-1]=[mat]
180
+ di= update_di(di,matlen)
181
+
182
+ assert start+di <= arr.size
183
+ #assert(Backtrace.check_result mat)
184
+ return ri,di
185
+ end
186
+ end
187
+
188
+ class MatchSet
189
+ def last_next_match(ary,start,resfrag)
190
+ r,di=resfrag[-3..-2]
191
+ r or return nil,nil,match_iterations
192
+
193
+ #dunno how to do this simply...
194
+ #assert full_up? if SubseqMatchSet===self
195
+
196
+ r,diinc=r.next_match(ary,start+di)
197
+ unless r
198
+ discarding=resfrag.last
199
+ resfrag.slice!(-3..-1).size==3 or raise :impossible
200
+
201
+ #might need to return non-nil here, if resfrag isn't exhausted yet
202
+ ri=match_iterations-discarding.size
203
+ return nil,nil,ri unless @reg.enough_matches? ri
204
+ return resfrag, di, ri
205
+ end
206
+
207
+ assert di+diinc <= ary.size
208
+ di+=diinc
209
+ ri=match_iterations-resfrag[-1].size+1 #+1 for r, which must match here if set
210
+ resfrag[-1]=[r]
211
+ if ri<@reg.max_matches #if there are more subregs of this reg to be matched
212
+ #re-match tail regs
213
+ assert di <= ary.size
214
+ #di is sometimes bad here, it seems....(fixed now?)
215
+ resfrag,di,ri=@reg.bt_match(ary,start,ri,di,resfrag)
216
+ end
217
+
218
+ return resfrag,di,ri
219
+ end
220
+ end
221
+ #--------------------------
222
+ class RepeatMatchSet < MatchSet
223
+ def initialize(regrepeat,ary,ri,diinc) #maybe rename diinc=>di
224
+ @reg,@ary,@ri,@diinc=regrepeat,ary,ri,diinc
225
+ #@cnt=@startcnt-stepper
226
+ #@ary.push 1
227
+ @firstmatch=[Backtrace.clean_result(ary),@diinc]
228
+ assert( @reg.times===@ri)
229
+ assert @ri
230
+ #assert(@ri==@firstmatch.first.size)
231
+ end
232
+
233
+ def match_iterations;
234
+ #assert(@ri==Backtrace.clean_result(@ary).size)
235
+ @ri
236
+ end
237
+
238
+ #very nearly identical to SubseqMatchSet#next_match
239
+ def next_match(arr,idx)
240
+ #fewer assertions in twin
241
+ if @firstmatch
242
+ result,@firstmatch=@firstmatch,nil
243
+ assert result.first.empty? || ::Array===result.first.first
244
+ #print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
245
+ assert idx+result.last<=arr.size
246
+ assert(@ri==result.first.size)
247
+ return result
248
+ end
249
+
250
+ @ary or return nil #not in twin ... ignore it
251
+
252
+ #this part's not in twin
253
+ #'need to check for fewer matches here before rematching last matchset'
254
+
255
+ #uwhat if the match that gets dicarded was returned by a matchset
256
+ #that has more matches in it? in that case nothing should be done...
257
+ #in that case, @ary.last.size is 1 and the body is not executed...
258
+ if @ri>@reg.times.begin && @ary.last.size>1
259
+ @ri-=1
260
+ discarding=@ary.last.pop
261
+ @diinc-=discarding.last.size
262
+ #assert(@ri==Backtrace.clean_result(@ary).size)
263
+ assert idx+@ri<=arr.size
264
+ return [Backtrace.clean_result(@ary), @diinc]
265
+ end
266
+
267
+
268
+ result,di,@ri=last_next_match(arr,idx,@ary)
269
+ if result and @reg.times===@ri #condition slightly different in twin
270
+ result=[Backtrace.clean_result(@ary=result),di]
271
+ @diinc=di #not in twin...why?
272
+ assert @ri
273
+ assert ::Array===result.first.first
274
+ assert idx+result.last<=arr.size
275
+ #assert(@ri==result.first.size)
276
+ return result
277
+ end
278
+
279
+ assert( (0..@reg.max_matches)===@ri)
280
+ #assert(@ri==Backtrace.clean_result(@ary).size)
281
+ assert(Backtrace.check_result @ary)
282
+
283
+
284
+
285
+ @ary[-2] or return @ary=nil #also checking @ary in twin... ignore it
286
+ assert @ri>0
287
+
288
+ @ri,di=@reg.backtrace(arr,idx,@ary, @ri) #last param is @reg.max_matches in twin
289
+ #this is where the divergence widens. @ri is a local in twin
290
+ @ri or return @ary=nil #@ary never set to nil like this in twin... ignore it
291
+
292
+ #huh 'need to adjust @ri?' #why?
293
+
294
+ #assert(@ri==Backtrace.clean_result(@ary).size)
295
+ assert(Backtrace.check_result @ary)
296
+ mat,di,@ri=@reg.bt_match(arr,idx,@ri,di,@ary) #mat is @ary in twin
297
+ mat.nil? and return @ary=nil
298
+
299
+ #huh#is @ri right here? how do i know?
300
+
301
+ #assert(@ri==Backtrace.clean_result(mat).size)
302
+ assert @ri
303
+ assert( (0..@reg.max_matches)===@ri)
304
+ #assert(mat.equal? @ary) #wronggo
305
+ @ary=mat
306
+
307
+ result=[Backtrace.clean_result(mat),di]
308
+ @diinc=di #no @diinc in twin
309
+ assert ::Array===result.first.first
310
+ assert idx+result.last<=arr.size
311
+ #assert(@ri==result.last.size)
312
+ return result
313
+ end
314
+
315
+ def deep_copy
316
+ #assert(@ri==Backtrace.clean_result(@ary).size)
317
+ assert( (0..@reg.max_matches)===@ri)
318
+ res=RepeatMatchSet.new @reg,Backtrace.deep_copy(@ary),@ri,@diinc
319
+ fm =@firstmatch && @firstmatch.dup
320
+ res.instance_eval { @firstmatch=fm }
321
+ return res
322
+ end
323
+ end
324
+
325
+ class Repeat
326
+ def mmatch(arr,start)
327
+ i=-1
328
+ (0...@times.end).each do |i|
329
+ start+i<arr.size or break(i-=1)
330
+ @reg===arr[start+i] or break(i-=1)
331
+ end
332
+ i+=1
333
+ assert( (0..@times.end)===i)
334
+ if i==@times.begin
335
+ return [RR[arr[start,i]], i]
336
+ end
337
+ i>@times.begin or return nil
338
+ return SingleRepeatMatchSet.new(i,-1,@times.begin)
339
+ end
340
+
341
+ def mmatch_multiple(arr,start)
342
+ assert start <= arr.size
343
+ r=[RR[]]
344
+
345
+ #first match the minimum number
346
+ if @times.begin==0 #if we can match nothing
347
+ arr.size==start and return [r,0] #at end of input? return empty set
348
+ ri=di=0
349
+ else
350
+ arr.size==start and return nil
351
+ assert @times.begin<INFINITY
352
+ r,di,ri=bt_match(arr,start,0,0,r,@times.begin) #matches @reg @times.begin times
353
+ r.nil? and return nil
354
+ end
355
+ assert ri==@times.begin
356
+
357
+ assert !@times.exclude_end?
358
+ left=@times.end-@times.begin
359
+
360
+ #note: left and top could be infinite here...
361
+
362
+ #do the optional match iterations
363
+ #only greedy matching implemented for now
364
+ #there must be a more efficient algorithm...
365
+ if left >= 1
366
+ assert Backtrace.check_result r
367
+ #get remaining matches up to @times.end times
368
+ #why the deep_copy here?
369
+ #because bt_match could change the rr argument, and
370
+ #we might need to return the original in r below
371
+ res,di,ri=bt_match(arr,start,ri,di,rr=Backtrace.deep_copy(r))
372
+ assert Backtrace.check_result res
373
+ assert @times===ri
374
+
375
+ #res is not right type! --yes it is
376
+ res and return RepeatMatchSet.new(self,res,ri,di)
377
+ end
378
+
379
+ #if matchset has no backtracking stops, and
380
+ #hence cannot contain more than one actual match,
381
+ #then just return that match.
382
+ r.size>1 ? RepeatMatchSet.new(self,r,ri,di) :
383
+ [Backtrace.clean_result(r),di]
384
+ end
385
+ end
386
+
387
+ class Subseq
388
+ def mmatch(arr,start)
389
+ #in this version, each of @regs is not a multiple reg
390
+ assert start<=arr.size
391
+ start+@regs.size<=arr.size or return nil
392
+ idx=0
393
+ @regs.each do |reg|
394
+ assert(start+idx<arr.size)
395
+ reg===arr[start+idx] or return nil
396
+ idx+=1
397
+ end
398
+ return [RR[arr[start,@regs.size]], @regs.size]
399
+ end
400
+
401
+ def mmatch_multiple(arr,start)
402
+ #in this version, at least one of @regs is a multiple reg
403
+ #start==arr.size and huh
404
+ assert( (0..arr.size).include?( start))
405
+ result,di,bogus=bt_match(arr,start,0,0,[RR[]])
406
+ result and SubseqMatchSet.new(self,result,di)
407
+ end
408
+ end
409
+ #--------------------------
410
+ class SubseqMatchSet < MatchSet
411
+ def initialize(subseqreg,matchary,di)
412
+ @reg,@matchary=subseqreg,matchary
413
+ @firstresult= [Backtrace.clean_result(@matchary),di]
414
+ end
415
+
416
+ def match_iterations; @reg.max_matches end
417
+
418
+ def next_match(ary,start)
419
+ if @firstresult
420
+ @firstresult,result=nil,@firstresult
421
+ assert ::Array===result.first.first
422
+ return result
423
+ end
424
+ result,di,ri=last_next_match(ary,start,@matchary)
425
+ if result and ri==@reg.max_matches
426
+ result=[Backtrace.clean_result(@matchary=result),di]
427
+ assert ::Array===result.first.first
428
+ return result
429
+ end
430
+
431
+ (@matchary and @matchary[-2]) or return nil
432
+ ri,di=@reg.backtrace(ary,start,@matchary, @reg.max_matches)
433
+ ri or return nil
434
+
435
+ #need to adjust ri?
436
+
437
+ #is this right... dunno...
438
+ @matchary,di,bogus=@reg.bt_match(ary,start,ri,di,@matchary)
439
+
440
+
441
+ if @matchary
442
+ result=[Backtrace.clean_result(@matchary),di]
443
+ assert ::Array===result.first.first
444
+ return result
445
+ end
446
+ end
447
+
448
+ def deep_copy
449
+ resfrag=Backtrace.deep_copy(@matchary)
450
+ result=dup
451
+ result.instance_eval{@matchary=resfrag}
452
+ return result
453
+ end
454
+
455
+ def subregs; @regs end
456
+ end
457
+
458
+ #--------------------------
459
+ class AndMatchSet < SubseqMatchSet
460
+ #this isn't really right...
461
+ #on next_match, we need to backtrack the longest alternative(s)
462
+ #if they're then shorter than the next longest alternative,
463
+ #then that (formerly next longest) alternative becomes
464
+ #the dominating alternative, and determines how much is consumed
465
+
466
+ end
467
+
468
+ class Array
469
+ def ===(other)
470
+ ::Array===other or return false
471
+ result,di,bogus=bt_match(other,0,0,0,[RR[]])
472
+ assert di.nil? || di <= other.size
473
+ return(di==other.size && Backtrace.clean_result(result,::Array))
474
+ end
475
+ end
476
+
477
+ end