reg 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +510 -0
- data/README +404 -0
- data/assert.rb +31 -0
- data/calc.reg +73 -0
- data/forward_to.rb +49 -0
- data/item_thattest.rb +47 -0
- data/numberset.rb +200 -0
- data/parser.txt +188 -0
- data/philosophy.txt +72 -0
- data/reg.gemspec +27 -0
- data/reg.rb +33 -0
- data/regarray.rb +675 -0
- data/regarrayold.rb +477 -0
- data/regbackref.rb +126 -0
- data/regbind.rb +74 -0
- data/regcase.rb +78 -0
- data/regcore.rb +379 -0
- data/regdeferred.rb +134 -0
- data/reggrid.csv +2 -1
- data/regguide.txt +416 -0
- data/reghash.rb +318 -0
- data/regitem_that.rb +146 -0
- data/regknows.rb +63 -0
- data/reglogic.rb +195 -0
- data/reglookab.rb +94 -0
- data/regold.rb +75 -0
- data/regpath.rb +74 -0
- data/regposition.rb +68 -0
- data/regprogress.rb +1067 -0
- data/regreplace.rb +114 -0
- data/regsugar.rb +230 -0
- data/regtest.rb +1075 -0
- data/regvar.rb +76 -0
- data/trace.rb +45 -0
- metadata +83 -0
data/regarrayold.rb
ADDED
@@ -0,0 +1,477 @@
|
|
1
|
+
=begin copyright
|
2
|
+
reg - the ruby extended grammar
|
3
|
+
Copyright (C) 2005 Caleb Clausen
|
4
|
+
|
5
|
+
This library is free software; you can redistribute it and/or
|
6
|
+
modify it under the terms of the GNU Lesser General Public
|
7
|
+
License as published by the Free Software Foundation; either
|
8
|
+
version 2.1 of the License, or (at your option) any later version.
|
9
|
+
|
10
|
+
This library is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
Lesser General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU Lesser General Public
|
16
|
+
License along with this library; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
module Reg
|
21
|
+
module Backtrace
|
22
|
+
#--------------------------
|
23
|
+
def Backtrace.clean_result(result,restype=RR)
|
24
|
+
assert result.size%3==1
|
25
|
+
a=[]
|
26
|
+
0.step(result.size-1,3) {|i|
|
27
|
+
assert RR===result[i]
|
28
|
+
assert result[i].empty? || ::Array===result[i].first
|
29
|
+
a+= result[i]
|
30
|
+
assert a.empty? || a.first.empty? || ::Array===a.first
|
31
|
+
}
|
32
|
+
assert a.empty? || a.first.empty? || ::Array===a.first
|
33
|
+
return restype[*a]
|
34
|
+
end
|
35
|
+
|
36
|
+
#--------------------------
|
37
|
+
def Backtrace.check_result(result)
|
38
|
+
assert result.size%3==1
|
39
|
+
last_idx=0
|
40
|
+
0.step(result.size-1,3) {|i|
|
41
|
+
assert RR===result[i]
|
42
|
+
assert result[i].empty? || ::Array===result[i].first
|
43
|
+
next if i==0
|
44
|
+
assert MatchSet===result[i-2]
|
45
|
+
assert Integer===result[i-1]
|
46
|
+
assert result[i-1]>=last_idx
|
47
|
+
}
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
#--------------------------
|
53
|
+
def Backtrace.deep_copy(res)
|
54
|
+
#arr, matchset, num, arr
|
55
|
+
|
56
|
+
assert res.size%3==1
|
57
|
+
assert ::Array===res.first
|
58
|
+
result=[res.first.dup]
|
59
|
+
(1...res.size).step(3) do |n|
|
60
|
+
ms,num,arr=res[n,3]
|
61
|
+
assert ms
|
62
|
+
result+=[ms.deep_copy,num,arr.dup]
|
63
|
+
result[-3]==ms or (pp :ms_o, ms.ob_state, :r_3_o, result[-3].ob_state, :ms, ms, :r_3, result[-3])
|
64
|
+
assert(result[-3]==ms)
|
65
|
+
end
|
66
|
+
assert result==res
|
67
|
+
assert Backtrace.check_result( result)
|
68
|
+
return result
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
#--------------------------
|
73
|
+
#bt, in this case, stands for 'backtracking'.
|
74
|
+
#but the cognosceni refer to this function as 'bitch-match'.
|
75
|
+
def bt_match(arr,start,ri,di,result,regs_size=max_matches)
|
76
|
+
assert start+di <= arr.size
|
77
|
+
assert start >= 0
|
78
|
+
assert di >= 0
|
79
|
+
assert( (0..regs_size)===ri)
|
80
|
+
assert ::Array===result.first
|
81
|
+
assert Backtrace.check_result( result)
|
82
|
+
loop do #loop over regs to match
|
83
|
+
assert start+di <= arr.size
|
84
|
+
assert di >= 0
|
85
|
+
assert( (0..regs_size)===ri)
|
86
|
+
|
87
|
+
trace_enabled? and $stderr.print start, " ", self.inspect, ": ", Backtrace.clean_result(result).inspect, "\n"
|
88
|
+
assert Backtrace.check_result result
|
89
|
+
|
90
|
+
#try a new match of current reg
|
91
|
+
r=regs(ri)
|
92
|
+
if r.respond_to? :mmatch
|
93
|
+
# 'mmatch could return 2 items here'
|
94
|
+
m=r.mmatch(arr,start+di)
|
95
|
+
#is a single match or a match set?
|
96
|
+
unless m.respond_to? :next_match
|
97
|
+
mat,matchlen=*m #single match or nil
|
98
|
+
else
|
99
|
+
#it's a set -- start new inner result array
|
100
|
+
#with initial match as first elem
|
101
|
+
result += [m,di,[]]
|
102
|
+
mat,matchlen=m.next_match(arr,start+di)
|
103
|
+
assert mat
|
104
|
+
end
|
105
|
+
else
|
106
|
+
if start+di<arr.size && r===arr[start+di]
|
107
|
+
mat=RR[arr[start+di]]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
assert Backtrace.check_result result
|
113
|
+
|
114
|
+
unless mat #match fail?
|
115
|
+
assert Backtrace.check_result result
|
116
|
+
return result,di,ri if enough_matches? ri
|
117
|
+
|
118
|
+
#doesn't match, try backtracing
|
119
|
+
ri,di=backtrace(arr,start,result,ri)
|
120
|
+
ri or return nil #bt failed? we fail
|
121
|
+
assert(start+di<=arr.size)
|
122
|
+
assert Backtrace.check_result result
|
123
|
+
else #match succeeded
|
124
|
+
#advance to next reg
|
125
|
+
ri+=1
|
126
|
+
result.last<<mat
|
127
|
+
assert ::Array===result.first
|
128
|
+
matchlen ||= mat.length
|
129
|
+
di=update_di(di,matchlen)
|
130
|
+
assert(start+di<=arr.size)
|
131
|
+
end
|
132
|
+
|
133
|
+
assert( (0..regs_size)===ri)
|
134
|
+
assert(start+di<=arr.size)
|
135
|
+
|
136
|
+
assert Backtrace.check_result result
|
137
|
+
return result,di,ri if ri==regs_size
|
138
|
+
|
139
|
+
end #loop
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
#--------------------------
|
144
|
+
def backtrace(arr,start,result,ri)
|
145
|
+
assert ri != INFINITY
|
146
|
+
assert(Backtrace.check_result result)
|
147
|
+
mat,matlen,di=nil
|
148
|
+
loop do #might have to bt multiple times if prev prelim set also fails
|
149
|
+
#get result set and
|
150
|
+
#reset data idx to start of last prelim set
|
151
|
+
ms,di=result[-3..-2]
|
152
|
+
|
153
|
+
unless ms #if result underflowing we fail
|
154
|
+
assert(result.size==1)
|
155
|
+
#we must have b'trace'd thru the last prelim result set
|
156
|
+
#no more alternatives; finally fail
|
157
|
+
return nil
|
158
|
+
end
|
159
|
+
|
160
|
+
ri-=result.last.size #reset result idx
|
161
|
+
|
162
|
+
assert(ri>=0)
|
163
|
+
|
164
|
+
assert(result.size%3==1)
|
165
|
+
assert(result.size>=3)
|
166
|
+
assert start+di <= arr.size
|
167
|
+
mat,matlen=ms.next_match(arr,start+di)
|
168
|
+
# pp ms
|
169
|
+
mat and break(assert( (0..max_matches)===ri+1))
|
170
|
+
result.slice!(-3..-1).size==3 or raise 'partial result underflow'
|
171
|
+
end
|
172
|
+
|
173
|
+
assert ::Array===mat
|
174
|
+
assert ::Array===mat.first
|
175
|
+
assert start+update_di(di,matlen) <= arr.size
|
176
|
+
|
177
|
+
#adjust ri,di,and result to include mat
|
178
|
+
ri+=1
|
179
|
+
result[-1]=[mat]
|
180
|
+
di= update_di(di,matlen)
|
181
|
+
|
182
|
+
assert start+di <= arr.size
|
183
|
+
#assert(Backtrace.check_result mat)
|
184
|
+
return ri,di
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
class MatchSet
|
189
|
+
def last_next_match(ary,start,resfrag)
|
190
|
+
r,di=resfrag[-3..-2]
|
191
|
+
r or return nil,nil,match_iterations
|
192
|
+
|
193
|
+
#dunno how to do this simply...
|
194
|
+
#assert full_up? if SubseqMatchSet===self
|
195
|
+
|
196
|
+
r,diinc=r.next_match(ary,start+di)
|
197
|
+
unless r
|
198
|
+
discarding=resfrag.last
|
199
|
+
resfrag.slice!(-3..-1).size==3 or raise :impossible
|
200
|
+
|
201
|
+
#might need to return non-nil here, if resfrag isn't exhausted yet
|
202
|
+
ri=match_iterations-discarding.size
|
203
|
+
return nil,nil,ri unless @reg.enough_matches? ri
|
204
|
+
return resfrag, di, ri
|
205
|
+
end
|
206
|
+
|
207
|
+
assert di+diinc <= ary.size
|
208
|
+
di+=diinc
|
209
|
+
ri=match_iterations-resfrag[-1].size+1 #+1 for r, which must match here if set
|
210
|
+
resfrag[-1]=[r]
|
211
|
+
if ri<@reg.max_matches #if there are more subregs of this reg to be matched
|
212
|
+
#re-match tail regs
|
213
|
+
assert di <= ary.size
|
214
|
+
#di is sometimes bad here, it seems....(fixed now?)
|
215
|
+
resfrag,di,ri=@reg.bt_match(ary,start,ri,di,resfrag)
|
216
|
+
end
|
217
|
+
|
218
|
+
return resfrag,di,ri
|
219
|
+
end
|
220
|
+
end
|
221
|
+
#--------------------------
|
222
|
+
class RepeatMatchSet < MatchSet
|
223
|
+
def initialize(regrepeat,ary,ri,diinc) #maybe rename diinc=>di
|
224
|
+
@reg,@ary,@ri,@diinc=regrepeat,ary,ri,diinc
|
225
|
+
#@cnt=@startcnt-stepper
|
226
|
+
#@ary.push 1
|
227
|
+
@firstmatch=[Backtrace.clean_result(ary),@diinc]
|
228
|
+
assert( @reg.times===@ri)
|
229
|
+
assert @ri
|
230
|
+
#assert(@ri==@firstmatch.first.size)
|
231
|
+
end
|
232
|
+
|
233
|
+
def match_iterations;
|
234
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
235
|
+
@ri
|
236
|
+
end
|
237
|
+
|
238
|
+
#very nearly identical to SubseqMatchSet#next_match
|
239
|
+
def next_match(arr,idx)
|
240
|
+
#fewer assertions in twin
|
241
|
+
if @firstmatch
|
242
|
+
result,@firstmatch=@firstmatch,nil
|
243
|
+
assert result.first.empty? || ::Array===result.first.first
|
244
|
+
#print "idx=#{idx}, inc=#{result.last}, arr.size=#{arr.size}\n"
|
245
|
+
assert idx+result.last<=arr.size
|
246
|
+
assert(@ri==result.first.size)
|
247
|
+
return result
|
248
|
+
end
|
249
|
+
|
250
|
+
@ary or return nil #not in twin ... ignore it
|
251
|
+
|
252
|
+
#this part's not in twin
|
253
|
+
#'need to check for fewer matches here before rematching last matchset'
|
254
|
+
|
255
|
+
#uwhat if the match that gets dicarded was returned by a matchset
|
256
|
+
#that has more matches in it? in that case nothing should be done...
|
257
|
+
#in that case, @ary.last.size is 1 and the body is not executed...
|
258
|
+
if @ri>@reg.times.begin && @ary.last.size>1
|
259
|
+
@ri-=1
|
260
|
+
discarding=@ary.last.pop
|
261
|
+
@diinc-=discarding.last.size
|
262
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
263
|
+
assert idx+@ri<=arr.size
|
264
|
+
return [Backtrace.clean_result(@ary), @diinc]
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
result,di,@ri=last_next_match(arr,idx,@ary)
|
269
|
+
if result and @reg.times===@ri #condition slightly different in twin
|
270
|
+
result=[Backtrace.clean_result(@ary=result),di]
|
271
|
+
@diinc=di #not in twin...why?
|
272
|
+
assert @ri
|
273
|
+
assert ::Array===result.first.first
|
274
|
+
assert idx+result.last<=arr.size
|
275
|
+
#assert(@ri==result.first.size)
|
276
|
+
return result
|
277
|
+
end
|
278
|
+
|
279
|
+
assert( (0..@reg.max_matches)===@ri)
|
280
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
281
|
+
assert(Backtrace.check_result @ary)
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
@ary[-2] or return @ary=nil #also checking @ary in twin... ignore it
|
286
|
+
assert @ri>0
|
287
|
+
|
288
|
+
@ri,di=@reg.backtrace(arr,idx,@ary, @ri) #last param is @reg.max_matches in twin
|
289
|
+
#this is where the divergence widens. @ri is a local in twin
|
290
|
+
@ri or return @ary=nil #@ary never set to nil like this in twin... ignore it
|
291
|
+
|
292
|
+
#huh 'need to adjust @ri?' #why?
|
293
|
+
|
294
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
295
|
+
assert(Backtrace.check_result @ary)
|
296
|
+
mat,di,@ri=@reg.bt_match(arr,idx,@ri,di,@ary) #mat is @ary in twin
|
297
|
+
mat.nil? and return @ary=nil
|
298
|
+
|
299
|
+
#huh#is @ri right here? how do i know?
|
300
|
+
|
301
|
+
#assert(@ri==Backtrace.clean_result(mat).size)
|
302
|
+
assert @ri
|
303
|
+
assert( (0..@reg.max_matches)===@ri)
|
304
|
+
#assert(mat.equal? @ary) #wronggo
|
305
|
+
@ary=mat
|
306
|
+
|
307
|
+
result=[Backtrace.clean_result(mat),di]
|
308
|
+
@diinc=di #no @diinc in twin
|
309
|
+
assert ::Array===result.first.first
|
310
|
+
assert idx+result.last<=arr.size
|
311
|
+
#assert(@ri==result.last.size)
|
312
|
+
return result
|
313
|
+
end
|
314
|
+
|
315
|
+
def deep_copy
|
316
|
+
#assert(@ri==Backtrace.clean_result(@ary).size)
|
317
|
+
assert( (0..@reg.max_matches)===@ri)
|
318
|
+
res=RepeatMatchSet.new @reg,Backtrace.deep_copy(@ary),@ri,@diinc
|
319
|
+
fm =@firstmatch && @firstmatch.dup
|
320
|
+
res.instance_eval { @firstmatch=fm }
|
321
|
+
return res
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
class Repeat
|
326
|
+
def mmatch(arr,start)
|
327
|
+
i=-1
|
328
|
+
(0...@times.end).each do |i|
|
329
|
+
start+i<arr.size or break(i-=1)
|
330
|
+
@reg===arr[start+i] or break(i-=1)
|
331
|
+
end
|
332
|
+
i+=1
|
333
|
+
assert( (0..@times.end)===i)
|
334
|
+
if i==@times.begin
|
335
|
+
return [RR[arr[start,i]], i]
|
336
|
+
end
|
337
|
+
i>@times.begin or return nil
|
338
|
+
return SingleRepeatMatchSet.new(i,-1,@times.begin)
|
339
|
+
end
|
340
|
+
|
341
|
+
def mmatch_multiple(arr,start)
|
342
|
+
assert start <= arr.size
|
343
|
+
r=[RR[]]
|
344
|
+
|
345
|
+
#first match the minimum number
|
346
|
+
if @times.begin==0 #if we can match nothing
|
347
|
+
arr.size==start and return [r,0] #at end of input? return empty set
|
348
|
+
ri=di=0
|
349
|
+
else
|
350
|
+
arr.size==start and return nil
|
351
|
+
assert @times.begin<INFINITY
|
352
|
+
r,di,ri=bt_match(arr,start,0,0,r,@times.begin) #matches @reg @times.begin times
|
353
|
+
r.nil? and return nil
|
354
|
+
end
|
355
|
+
assert ri==@times.begin
|
356
|
+
|
357
|
+
assert !@times.exclude_end?
|
358
|
+
left=@times.end-@times.begin
|
359
|
+
|
360
|
+
#note: left and top could be infinite here...
|
361
|
+
|
362
|
+
#do the optional match iterations
|
363
|
+
#only greedy matching implemented for now
|
364
|
+
#there must be a more efficient algorithm...
|
365
|
+
if left >= 1
|
366
|
+
assert Backtrace.check_result r
|
367
|
+
#get remaining matches up to @times.end times
|
368
|
+
#why the deep_copy here?
|
369
|
+
#because bt_match could change the rr argument, and
|
370
|
+
#we might need to return the original in r below
|
371
|
+
res,di,ri=bt_match(arr,start,ri,di,rr=Backtrace.deep_copy(r))
|
372
|
+
assert Backtrace.check_result res
|
373
|
+
assert @times===ri
|
374
|
+
|
375
|
+
#res is not right type! --yes it is
|
376
|
+
res and return RepeatMatchSet.new(self,res,ri,di)
|
377
|
+
end
|
378
|
+
|
379
|
+
#if matchset has no backtracking stops, and
|
380
|
+
#hence cannot contain more than one actual match,
|
381
|
+
#then just return that match.
|
382
|
+
r.size>1 ? RepeatMatchSet.new(self,r,ri,di) :
|
383
|
+
[Backtrace.clean_result(r),di]
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
class Subseq
|
388
|
+
def mmatch(arr,start)
|
389
|
+
#in this version, each of @regs is not a multiple reg
|
390
|
+
assert start<=arr.size
|
391
|
+
start+@regs.size<=arr.size or return nil
|
392
|
+
idx=0
|
393
|
+
@regs.each do |reg|
|
394
|
+
assert(start+idx<arr.size)
|
395
|
+
reg===arr[start+idx] or return nil
|
396
|
+
idx+=1
|
397
|
+
end
|
398
|
+
return [RR[arr[start,@regs.size]], @regs.size]
|
399
|
+
end
|
400
|
+
|
401
|
+
def mmatch_multiple(arr,start)
|
402
|
+
#in this version, at least one of @regs is a multiple reg
|
403
|
+
#start==arr.size and huh
|
404
|
+
assert( (0..arr.size).include?( start))
|
405
|
+
result,di,bogus=bt_match(arr,start,0,0,[RR[]])
|
406
|
+
result and SubseqMatchSet.new(self,result,di)
|
407
|
+
end
|
408
|
+
end
|
409
|
+
#--------------------------
|
410
|
+
class SubseqMatchSet < MatchSet
|
411
|
+
def initialize(subseqreg,matchary,di)
|
412
|
+
@reg,@matchary=subseqreg,matchary
|
413
|
+
@firstresult= [Backtrace.clean_result(@matchary),di]
|
414
|
+
end
|
415
|
+
|
416
|
+
def match_iterations; @reg.max_matches end
|
417
|
+
|
418
|
+
def next_match(ary,start)
|
419
|
+
if @firstresult
|
420
|
+
@firstresult,result=nil,@firstresult
|
421
|
+
assert ::Array===result.first.first
|
422
|
+
return result
|
423
|
+
end
|
424
|
+
result,di,ri=last_next_match(ary,start,@matchary)
|
425
|
+
if result and ri==@reg.max_matches
|
426
|
+
result=[Backtrace.clean_result(@matchary=result),di]
|
427
|
+
assert ::Array===result.first.first
|
428
|
+
return result
|
429
|
+
end
|
430
|
+
|
431
|
+
(@matchary and @matchary[-2]) or return nil
|
432
|
+
ri,di=@reg.backtrace(ary,start,@matchary, @reg.max_matches)
|
433
|
+
ri or return nil
|
434
|
+
|
435
|
+
#need to adjust ri?
|
436
|
+
|
437
|
+
#is this right... dunno...
|
438
|
+
@matchary,di,bogus=@reg.bt_match(ary,start,ri,di,@matchary)
|
439
|
+
|
440
|
+
|
441
|
+
if @matchary
|
442
|
+
result=[Backtrace.clean_result(@matchary),di]
|
443
|
+
assert ::Array===result.first.first
|
444
|
+
return result
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
def deep_copy
|
449
|
+
resfrag=Backtrace.deep_copy(@matchary)
|
450
|
+
result=dup
|
451
|
+
result.instance_eval{@matchary=resfrag}
|
452
|
+
return result
|
453
|
+
end
|
454
|
+
|
455
|
+
def subregs; @regs end
|
456
|
+
end
|
457
|
+
|
458
|
+
#--------------------------
|
459
|
+
class AndMatchSet < SubseqMatchSet
|
460
|
+
#this isn't really right...
|
461
|
+
#on next_match, we need to backtrack the longest alternative(s)
|
462
|
+
#if they're then shorter than the next longest alternative,
|
463
|
+
#then that (formerly next longest) alternative becomes
|
464
|
+
#the dominating alternative, and determines how much is consumed
|
465
|
+
|
466
|
+
end
|
467
|
+
|
468
|
+
class Array
|
469
|
+
def ===(other)
|
470
|
+
::Array===other or return false
|
471
|
+
result,di,bogus=bt_match(other,0,0,0,[RR[]])
|
472
|
+
assert di.nil? || di <= other.size
|
473
|
+
return(di==other.size && Backtrace.clean_result(result,::Array))
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
end
|