reg 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +510 -0
- data/README +404 -0
- data/assert.rb +31 -0
- data/calc.reg +73 -0
- data/forward_to.rb +49 -0
- data/item_thattest.rb +47 -0
- data/numberset.rb +200 -0
- data/parser.txt +188 -0
- data/philosophy.txt +72 -0
- data/reg.gemspec +27 -0
- data/reg.rb +33 -0
- data/regarray.rb +675 -0
- data/regarrayold.rb +477 -0
- data/regbackref.rb +126 -0
- data/regbind.rb +74 -0
- data/regcase.rb +78 -0
- data/regcore.rb +379 -0
- data/regdeferred.rb +134 -0
- data/reggrid.csv +2 -1
- data/regguide.txt +416 -0
- data/reghash.rb +318 -0
- data/regitem_that.rb +146 -0
- data/regknows.rb +63 -0
- data/reglogic.rb +195 -0
- data/reglookab.rb +94 -0
- data/regold.rb +75 -0
- data/regpath.rb +74 -0
- data/regposition.rb +68 -0
- data/regprogress.rb +1067 -0
- data/regreplace.rb +114 -0
- data/regsugar.rb +230 -0
- data/regtest.rb +1075 -0
- data/regvar.rb +76 -0
- data/trace.rb +45 -0
- metadata +83 -0
data/regarrayold.rb
ADDED
@@ -0,0 +1,477 @@
|
|
1
|
+
=begin copyright
|
2
|
+
reg - the ruby extended grammar
|
3
|
+
Copyright (C) 2005 Caleb Clausen
|
4
|
+
|
5
|
+
This library is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU Lesser General Public
|
7
|
+
License as published by the Free Software Foundation; either
|
8
|
+
version 2.1 of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This library is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public
|
16
|
+
License along with this library; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
module Reg
|
21
|
+
module Backtrace
|
22
|
+
#--------------------------
|
23
|
+
def Backtrace.clean_result(result,restype=RR)
|
24
|
+
assert result.size%3==1
|
25
|
+
a=[]
|
26
|
+
0.step(result.size-1,3) {|i|
|
27
|
+
assert RR===result[i]
|
28
|
+
assert result[i].empty? || ::Array===result[i].first
|
29
|
+
a+= result[i]
|
30
|
+
assert a.empty? || a.first.empty? || ::Array===a.first
|
31
|
+
}
|
32
|
+
assert a.empty? || a.first.empty? || ::Array===a.first
|
33
|
+
return restype[*a]
|
34
|
+
end
|
35
|
+
|
36
|
+
#--------------------------
|
37
|
+
def Backtrace.check_result(result)
|
38
|
+
assert result.size%3==1
|
39
|
+
last_idx=0
|
40
|
+
0.step(result.size-1,3) {|i|
|
41
|
+
assert RR===result[i]
|
42
|
+
assert result[i].empty? || ::Array===result[i].first
|
43
|
+
next if i==0
|
44
|
+
assert MatchSet===result[i-2]
|
45
|
+
assert Integer===result[i-1]
|
46
|
+
assert result[i-1]>=last_idx
|
47
|
+
}
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
#--------------------------
|
53
|
+
def Backtrace.deep_copy(res)
|
54
|
+
#arr, matchset, num, arr
|
55
|
+
|
56
|
+
assert res.size%3==1
|
57
|
+
assert ::Array===res.first
|
58
|
+
result=[res.first.dup]
|
59
|
+
(1...res.size).step(3) do |n|
|
60
|
+
ms,num,arr=res[n,3]
|
61
|
+
assert ms
|
62
|
+
result+=[ms.deep_copy,num,arr.dup]
|
63
|
+
result[-3]==ms or (pp :ms_o, ms.ob_state, :r_3_o, result[-3].ob_state, :ms, ms, :r_3, result[-3])
|
64
|
+
assert(result[-3]==ms)
|
65
|
+
end
|
66
|
+
assert result==res
|
67
|
+
assert Backtrace.check_result( result)
|
68
|
+
return result
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
#--------------------------
|
73
|
+
#bt, in this case, stands for 'backtracking'.
|
74
|
+
#but the cognosceni refer to this function as 'bitch-match'.
|
75
|
+
def bt_match(arr,start,ri,di,result,regs_size=max_matches)
|
76
|
+
assert start+di <= arr.size
|
77
|
+
assert start >= 0
|
78
|
+
assert di >= 0
|
79
|
+
assert( (0..regs_size)===ri)
|
80
|
+
assert ::Array===result.first
|
81
|
+
assert Backtrace.check_result( result)
|
82
|
+
loop do #loop over regs to match
|
83
|
+
assert start+di <= arr.size
|
84
|
+
assert di >= 0
|
85
|
+
assert( (0..regs_size)===ri)
|
86
|
+
|
87
|
+
trace_enabled? and $stderr.print start, " ", self.inspect, ": ", Backtrace.clean_result(result).inspect, "\n"
|
88
|
+
assert Backtrace.check_result result
|
89
|
+
|
90
|
+
#try a new match of current reg
|
91
|
+
r=regs(ri)
|
92
|
+
if r.respond_to? :mmatch
|
93
|
+
# 'mmatch could return 2 items here'
|
94
|
+
m=r.mmatch(arr,start+di)
|
95
|
+
#is a single match or a match set?
|
96
|
+
unless m.respond_to? :next_match
|
97
|
+
mat,matchlen=*m #single match or nil
|
98
|
+
else
|
99
|
+
#it's a set -- start new inner result array
|
100
|
+
#with initial match as first elem
|
101
|
+
result += [m,di,[]]
|
102
|
+
mat,matchlen=m.next_match(arr,start+di)
|
103
|
+
assert mat
|
104
|
+
end
|
105
|
+
else
|
106
|
+
if start+di<arr.size && r===arr[start+di]
|
107
|
+
mat=RR[arr[start+di]]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
assert Backtrace.check_result result
|
113
|
+
|
114
|
+
unless mat #match fail?
|
115
|
+
assert Backtrace.check_result result
|
116
|
+
return result,di,ri if enough_matches? ri
|
117
|
+
|
118
|
+
#doesn't match, try backtracing
|
119
|
+
ri,di=backtrace(arr,start,result,ri)
|
120
|
+
ri or return nil #bt failed? we fail
|
121
|
+
assert(start+di<=arr.size)
|
122
|
+
assert Backtrace.check_result result
|
123
|
+
else #match succeeded
|
124
|
+
#advance to next reg
|
125
|
+
ri+=1
|
126
|
+
result.last<<mat
|
127
|
+
assert ::Array===result.first
|
128
|
+
matchlen ||= mat.length
|
129
|
+
di=update_di(di,matchlen)
|
130
|
+
assert(start+di<=arr.size)
|
131
|
+
end
|
132
|
+
|
133
|
+
assert( (0..regs_size)===ri)
|
134
|
+
assert(start+di<=arr.size)
|
135
|
+
|
136
|
+
assert Backtrace.check_result result
|
137
|
+
return result,di,ri if ri==regs_size
|
138
|
+
|
139
|
+
end #loop
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
#--------------------------
|
144
|
+
def backtrace(arr,start,result,ri)
|
145
|
+
assert ri != INFINITY
|
146
|
+
assert(Backtrace.check_result result)
|
147
|
+
mat,matlen,di=nil
|
148
|
+
loop do #might have to bt multiple times if prev prelim set also fails
|
149
|
+
#get result set and
|
150
|
+
#reset data idx to start of last prelim set
|
151
|
+
ms,di=result[-3..-2]
|
152
|
+
|
153
|
+
unless ms #if result underflowing we fail
|
154
|
+
assert(result.size==1)
|
155
|
+
#we must have b'trace'd thru the last prelim result set
|
156
|
+
#no more alternatives; finally fail
|
157
|
+
return nil
|
158
|
+
end
|
159
|
+
|
160
|
+
ri-=result.last.size #reset result idx
|
161
|
+
|
162
|
+
assert(ri>=0)
|
163
|
+
|
164
|
+
assert(result.size%3==1)
|
165
|
+
assert(result.size>=3)
|
166
|
+
assert start+di <= arr.size
|
167
|
+
mat,matlen=ms.next_match(arr,start+di)
|
168
|
+
# pp ms
|
169
|
+
mat and break(assert( (0..max_matches)===ri+1))
|
170
|
+
result.slice!(-3..-1).size==3 or raise 'partial result underflow'
|
171
|
+
end
|
172
|
+
|
173
|
+
assert ::Array===mat
|
174
|
+
assert ::Array===mat.first
|
175
|
+
assert start+update_di(di,matlen) <= arr.size
|
176
|
+
|
177
|
+
#adjust ri,di,and result to include mat
|
178
|
+
ri+=1
|
179
|
+
result[-1]=[mat]
|
180
|
+
di= update_di(di,matlen)
|
181
|
+
|
182
|
+
assert start+di <= arr.size
|
183
|
+
#assert(Backtrace.check_result mat)
|
184
|
+
return ri,di
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
class MatchSet
|
189
|
+
def last_next_match(ary,start,resfrag)
|
190
|
+
r,di=resfrag[-3..-2]
|
191
|
+
r or return nil,nil,match_iterations
|
192
|
+
|
193
|
+
#dunno how to do this simply...
|
194
|
+
#assert full_up? if SubseqMatchSet===self
|
195
|
+
|
196
|
+
r,diinc=r.next_match(ary,start+di)
|
197
|
+
unless r
|
198
|
+
discarding=resfrag.last
|
199
|
+
resfrag.slice!(-3..-1).size==3 or raise :impossible
|
200
|
+
|
201
|
+
#might need to return non-nil here, if resfrag isn't exhausted yet
|
202
|
+
ri=match_iterations-discarding.size
|
203
|
+
return nil,nil,ri unless @reg.enough_matches? ri
|
204
|
+
return resfrag, di, ri
|
205
|
+
end
|
206
|
+
|
207
|
+
assert di+diinc <= ary.size
|
208
|
+
di+=diinc
|
209
|
+
ri=match_iterations-resfrag[-1].size+1 #+1 for r, which must match here if set
|
210
|
+
resfrag[-1]=[r]
|
211
|
+
if ri<@reg.max_matches #if there are more subregs of this reg to be matched
|
212
|
+
#re-match tail regs
|
213
|
+
assert di <= ary.size
|
214
|
+
#di is sometimes bad here, it seems....(fixed now?)
|
215
|
+
resfrag,di,ri=@reg.bt_match(ary,start,ri,di,resfrag)
|
216
|
+
end
|
217
|
+
|
218
|
+
return resfrag,di,ri
|
219
|
+
end
|
220
|
+
end
|
221
|
+
#--------------------------
|
222
|
+
class RepeatMatchSet < MatchSet
|
223
|
+
def initialize(regrepeat,ary,ri,diinc) #maybe rename diinc=>di
|
224
|
+
@reg,@ary,@ri,@diinc=regrepeat,ary,ri,diinc
|
225
|
+
#@cnt=@startcnt-stepper
|
226
|
+
#@ary.push 1
|
227
|
+
@firstmatch=[Backtrace.clean_result(ary),@diinc]
|
228
|
+
assert( @reg.times===@ri)
|
229
|
+
assert @ri
|
230
|
+
#assert(@ri==@firstmatch.first.size)
|
231
|
+
end
|
232
|
+
|
233
|
+
def match_iterations;
|
234
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
235
|
+
@ri
|
236
|
+
end
|
237
|
+
|
238
|
+
#very nearly identical to SubseqMatchSet#next_match
|
239
|
+
def next_match(arr,idx)
|
240
|
+
#fewer assertions in twin
|
241
|
+
if @firstmatch
|
242
|
+
result,@firstmatch=@firstmatch,nil
|
243
|
+
assert result.first.empty? || ::Array===result.first.first
|
244
|
+
#print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
|
245
|
+
assert idx+result.last<=arr.size
|
246
|
+
assert(@ri==result.first.size)
|
247
|
+
return result
|
248
|
+
end
|
249
|
+
|
250
|
+
@ary or return nil #not in twin ... ignore it
|
251
|
+
|
252
|
+
#this part's not in twin
|
253
|
+
#'need to check for fewer matches here before rematching last matchset'
|
254
|
+
|
255
|
+
#uwhat if the match that gets dicarded was returned by a matchset
|
256
|
+
#that has more matches in it? in that case nothing should be done...
|
257
|
+
#in that case, @ary.last.size is 1 and the body is not executed...
|
258
|
+
if @ri>@reg.times.begin && @ary.last.size>1
|
259
|
+
@ri-=1
|
260
|
+
discarding=@ary.last.pop
|
261
|
+
@diinc-=discarding.last.size
|
262
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
263
|
+
assert idx+@ri<=arr.size
|
264
|
+
return [Backtrace.clean_result(@ary), @diinc]
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
result,di,@ri=last_next_match(arr,idx,@ary)
|
269
|
+
if result and @reg.times===@ri #condition slightly different in twin
|
270
|
+
result=[Backtrace.clean_result(@ary=result),di]
|
271
|
+
@diinc=di #not in twin...why?
|
272
|
+
assert @ri
|
273
|
+
assert ::Array===result.first.first
|
274
|
+
assert idx+result.last<=arr.size
|
275
|
+
#assert(@ri==result.first.size)
|
276
|
+
return result
|
277
|
+
end
|
278
|
+
|
279
|
+
assert( (0..@reg.max_matches)===@ri)
|
280
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
281
|
+
assert(Backtrace.check_result @ary)
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
@ary[-2] or return @ary=nil #also checking @ary in twin... ignore it
|
286
|
+
assert @ri>0
|
287
|
+
|
288
|
+
@ri,di=@reg.backtrace(arr,idx,@ary, @ri) #last param is @reg.max_matches in twin
|
289
|
+
#this is where the divergence widens. @ri is a local in twin
|
290
|
+
@ri or return @ary=nil #@ary never set to nil like this in twin... ignore it
|
291
|
+
|
292
|
+
#huh 'need to adjust @ri?' #why?
|
293
|
+
|
294
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
295
|
+
assert(Backtrace.check_result @ary)
|
296
|
+
mat,di,@ri=@reg.bt_match(arr,idx,@ri,di,@ary) #mat is @ary in twin
|
297
|
+
mat.nil? and return @ary=nil
|
298
|
+
|
299
|
+
#huh#is @ri right here? how do i know?
|
300
|
+
|
301
|
+
#assert(@ri==Backtrace.clean_result(mat).size)
|
302
|
+
assert @ri
|
303
|
+
assert( (0..@reg.max_matches)===@ri)
|
304
|
+
#assert(mat.equal? @ary) #wronggo
|
305
|
+
@ary=mat
|
306
|
+
|
307
|
+
result=[Backtrace.clean_result(mat),di]
|
308
|
+
@diinc=di #no @diinc in twin
|
309
|
+
assert ::Array===result.first.first
|
310
|
+
assert idx+result.last<=arr.size
|
311
|
+
#assert(@ri==result.last.size)
|
312
|
+
return result
|
313
|
+
end
|
314
|
+
|
315
|
+
def deep_copy
|
316
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
317
|
+
assert( (0..@reg.max_matches)===@ri)
|
318
|
+
res=RepeatMatchSet.new @reg,Backtrace.deep_copy(@ary),@ri,@diinc
|
319
|
+
fm =@firstmatch && @firstmatch.dup
|
320
|
+
res.instance_eval { @firstmatch=fm }
|
321
|
+
return res
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
class Repeat
|
326
|
+
def mmatch(arr,start)
|
327
|
+
i=-1
|
328
|
+
(0...@times.end).each do |i|
|
329
|
+
start+i<arr.size or break(i-=1)
|
330
|
+
@reg===arr[start+i] or break(i-=1)
|
331
|
+
end
|
332
|
+
i+=1
|
333
|
+
assert( (0..@times.end)===i)
|
334
|
+
if i==@times.begin
|
335
|
+
return [RR[arr[start,i]], i]
|
336
|
+
end
|
337
|
+
i>@times.begin or return nil
|
338
|
+
return SingleRepeatMatchSet.new(i,-1,@times.begin)
|
339
|
+
end
|
340
|
+
|
341
|
+
def mmatch_multiple(arr,start)
|
342
|
+
assert start <= arr.size
|
343
|
+
r=[RR[]]
|
344
|
+
|
345
|
+
#first match the minimum number
|
346
|
+
if @times.begin==0 #if we can match nothing
|
347
|
+
arr.size==start and return [r,0] #at end of input? return empty set
|
348
|
+
ri=di=0
|
349
|
+
else
|
350
|
+
arr.size==start and return nil
|
351
|
+
assert @times.begin<INFINITY
|
352
|
+
r,di,ri=bt_match(arr,start,0,0,r,@times.begin) #matches @reg @times.begin times
|
353
|
+
r.nil? and return nil
|
354
|
+
end
|
355
|
+
assert ri==@times.begin
|
356
|
+
|
357
|
+
assert !@times.exclude_end?
|
358
|
+
left=@times.end-@times.begin
|
359
|
+
|
360
|
+
#note: left and top could be infinite here...
|
361
|
+
|
362
|
+
#do the optional match iterations
|
363
|
+
#only greedy matching implemented for now
|
364
|
+
#there must be a more efficient algorithm...
|
365
|
+
if left >= 1
|
366
|
+
assert Backtrace.check_result r
|
367
|
+
#get remaining matches up to @times.end times
|
368
|
+
#why the deep_copy here?
|
369
|
+
#because bt_match could change the rr argument, and
|
370
|
+
#we might need to return the original in r below
|
371
|
+
res,di,ri=bt_match(arr,start,ri,di,rr=Backtrace.deep_copy(r))
|
372
|
+
assert Backtrace.check_result res
|
373
|
+
assert @times===ri
|
374
|
+
|
375
|
+
#res is not right type! --yes it is
|
376
|
+
res and return RepeatMatchSet.new(self,res,ri,di)
|
377
|
+
end
|
378
|
+
|
379
|
+
#if matchset has no backtracking stops, and
|
380
|
+
#hence cannot contain more than one actual match,
|
381
|
+
#then just return that match.
|
382
|
+
r.size>1 ? RepeatMatchSet.new(self,r,ri,di) :
|
383
|
+
[Backtrace.clean_result(r),di]
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
class Subseq
|
388
|
+
def mmatch(arr,start)
|
389
|
+
#in this version, each of @regs is not a multiple reg
|
390
|
+
assert start<=arr.size
|
391
|
+
start+@regs.size<=arr.size or return nil
|
392
|
+
idx=0
|
393
|
+
@regs.each do |reg|
|
394
|
+
assert(start+idx<arr.size)
|
395
|
+
reg===arr[start+idx] or return nil
|
396
|
+
idx+=1
|
397
|
+
end
|
398
|
+
return [RR[arr[start,@regs.size]], @regs.size]
|
399
|
+
end
|
400
|
+
|
401
|
+
def mmatch_multiple(arr,start)
|
402
|
+
#in this version, at least one of @regs is a multiple reg
|
403
|
+
#start==arr.size and huh
|
404
|
+
assert( (0..arr.size).include?( start))
|
405
|
+
result,di,bogus=bt_match(arr,start,0,0,[RR[]])
|
406
|
+
result and SubseqMatchSet.new(self,result,di)
|
407
|
+
end
|
408
|
+
end
|
409
|
+
#--------------------------
|
410
|
+
class SubseqMatchSet < MatchSet
|
411
|
+
def initialize(subseqreg,matchary,di)
|
412
|
+
@reg,@matchary=subseqreg,matchary
|
413
|
+
@firstresult= [Backtrace.clean_result(@matchary),di]
|
414
|
+
end
|
415
|
+
|
416
|
+
def match_iterations; @reg.max_matches end
|
417
|
+
|
418
|
+
def next_match(ary,start)
|
419
|
+
if @firstresult
|
420
|
+
@firstresult,result=nil,@firstresult
|
421
|
+
assert ::Array===result.first.first
|
422
|
+
return result
|
423
|
+
end
|
424
|
+
result,di,ri=last_next_match(ary,start,@matchary)
|
425
|
+
if result and ri==@reg.max_matches
|
426
|
+
result=[Backtrace.clean_result(@matchary=result),di]
|
427
|
+
assert ::Array===result.first.first
|
428
|
+
return result
|
429
|
+
end
|
430
|
+
|
431
|
+
(@matchary and @matchary[-2]) or return nil
|
432
|
+
ri,di=@reg.backtrace(ary,start,@matchary, @reg.max_matches)
|
433
|
+
ri or return nil
|
434
|
+
|
435
|
+
#need to adjust ri?
|
436
|
+
|
437
|
+
#is this right... dunno...
|
438
|
+
@matchary,di,bogus=@reg.bt_match(ary,start,ri,di,@matchary)
|
439
|
+
|
440
|
+
|
441
|
+
if @matchary
|
442
|
+
result=[Backtrace.clean_result(@matchary),di]
|
443
|
+
assert ::Array===result.first.first
|
444
|
+
return result
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
def deep_copy
|
449
|
+
resfrag=Backtrace.deep_copy(@matchary)
|
450
|
+
result=dup
|
451
|
+
result.instance_eval{@matchary=resfrag}
|
452
|
+
return result
|
453
|
+
end
|
454
|
+
|
455
|
+
def subregs; @regs end
|
456
|
+
end
|
457
|
+
|
458
|
+
#--------------------------
|
459
|
+
class AndMatchSet < SubseqMatchSet
|
460
|
+
#this isn't really right...
|
461
|
+
#on next_match, we need to backtrack the longest alternative(s)
|
462
|
+
#if they're then shorter than the next longest alternative,
|
463
|
+
#then that (formerly next longest) alternative becomes
|
464
|
+
#the dominating alternative, and determines how much is consumed
|
465
|
+
|
466
|
+
end
|
467
|
+
|
468
|
+
class Array
|
469
|
+
def ===(other)
|
470
|
+
::Array===other or return false
|
471
|
+
result,di,bogus=bt_match(other,0,0,0,[RR[]])
|
472
|
+
assert di.nil? || di <= other.size
|
473
|
+
return(di==other.size && Backtrace.clean_result(result,::Array))
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
end
|