sequence 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,482 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'sequence/subseq'
4
+
5
+ class Sequence
6
+ module StringLike
7
+ def data_class; String end
8
+
9
+ def like; StringLike end
10
+
11
+ #-------------------------------------
12
+ FFS_4BITTABLE=[nil,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0]
13
+ def ffs
14
+ holding{
15
+ begin!
16
+ zeros=read_til_charset(/[^\0]/)
17
+ byte=read1
18
+ lo=byte&0xF
19
+ rem=FFS_4BITTABLE[lo]||FFS_4BITTABLE[byte>>4]+4
20
+ return zeros.size<<3+rem
21
+ }
22
+ end
23
+
24
+ #-------------------------------------
25
+ def fns(bitnum)
26
+ holding{
27
+ goto bitnum>>3
28
+ bitnum&=0x7
29
+ byte=read1
30
+ byte&=~((1<<(bitnum+1))-1)
31
+ if byte.nonzero?
32
+ zeros_size=0
33
+ else
34
+ zeros_size=read_til_charset(/[^\0]/).size
35
+ byte=read1
36
+ end
37
+ lo=byte&0xF
38
+ rem=FFS_4BITTABLE[lo]||FFS_4BITTABLE[byte>>4]+4
39
+ return zeros_size<<3+rem
40
+ }
41
+ end
42
+
43
+ #-------------------------------------
44
+ #read until a character in a user-supplied set is found.
45
+ #charrex must be a regexp that contains _only_ a single character class
46
+ def read_til_charset(charrex,blocksize=16)
47
+ blocks=[]
48
+ m=nil
49
+ until eof?
50
+ block=read blocksize
51
+ #if near eof, less than a full block may have been read
52
+
53
+ if m=charrex .match(block)
54
+ self.pos-=m.post_match.length+1
55
+ #'self.' shouldn't be needed... but is
56
+
57
+ blocks.push m.pre_match if m.pre_match.length>0
58
+ break
59
+ end
60
+ blocks<<block
61
+ end
62
+ return blocks.to_s
63
+ end
64
+
65
+
66
+ #-------------------------------------
67
+ #this version is fast and simple, but anchors do not work right,
68
+ #matches are NOT implicitly anchored to the current position, and
69
+ #the file position is not advanced. post_match (or pre_match if
70
+ #going backwards) is always nil.
71
+ def match_fast(rex,backwards=false,len=maxmatchlen(backwards))
72
+ str=send backwards ? :readbehind : :readahead, len
73
+ if result=rex.match(str)
74
+ if backwards
75
+ def result.pre_match; end
76
+ else
77
+ def result.post_match ; end
78
+ end
79
+ end
80
+ return result
81
+ end
82
+
83
+
84
+
85
+ #-------------------------------------
86
+ #like match, but goes backwards
87
+ def matchback(rex,anchored=true, len=maxmatchlen(true))
88
+ nearbegin=nearbegin(len)
89
+ newrex,addedgroups=
90
+ if nearbegin && !anchored
91
+ [rex,[]]
92
+ else group_anchors(rex,:back,anchored)
93
+ end
94
+ #do the match against what input we have
95
+
96
+ matchdata=match_fast(newrex,true,len)
97
+ #fail if any ^ or \A matched at begin of buffer,
98
+ #but buffer isn't begin of file
99
+ return if !matchdata or #not actually a match
100
+ addedgroups.find{|i| matchdata.end(i)==0 } && !nearbegin
101
+
102
+ matchpos=pos-len
103
+ matchpos>=0 or matchpos=0
104
+ assert(matchpos>=0)
105
+ match1st=position matchpos+matchdata.begin(0)
106
+ result=fixup_match_result(matchdata,addedgroups,matchpos,:pre) do
107
+ result=SubSeq.new(self,0,match1st.pos)
108
+ result.pos=match1st.pos
109
+ result
110
+ end
111
+ #note: pre_match is a subseq.
112
+
113
+ #rex.last_match=
114
+ self.last_match=Thread.current[:last_match]=result
115
+ end
116
+
117
+ #-------------------------------------
118
+ #like match_fast, but anchors work correctly and post_match is
119
+ #set to something, if not exactly what you expected. (an Sequence, not String.)
120
+ #2nd parameter determines if match is anchored on the left side to the
121
+ #current position or not.
122
+ def match(rex,anchored=true, len=maxmatchlen(false))
123
+
124
+ newrex=nearend(len)? rex : group_anchors(rex,false,false).first
125
+
126
+ #do the match against what input we have
127
+ matchdata=match_fast(newrex,false,len) or return
128
+
129
+ anchored and matchdata.begin(0).nonzero? and return
130
+ posi=position;posi.move matchdata.end(0)
131
+ result=fixup_match_result(matchdata,[],pos,:post) { posi.subseq(posi.pos..-1) }
132
+ #note: post_match is a SubSeq
133
+
134
+ #rex.last_match=
135
+ self.last_match=Thread.current[:last_match]=result
136
+ end
137
+
138
+
139
+ #-------------------------------------
140
+ #if not backwards:
141
+ #replace \Z with (?!)
142
+ #replace $ with (?=\n)
143
+ #if backwards:
144
+ #replace \A with (?!)
145
+ #replace ^ with (^) (and adjust addedgroups)
146
+ #there's no lookback in ruby regexp (yet)
147
+ #so, ^ in reverse regexp will perhaps lead to unexpected
148
+ #results. some matches with ^ in them will fail, when they
149
+ #should have succeeded even if the ^ couldn't match.
150
+ #you should be pretty much ok if you
151
+ #don't use ^ within alternation (|) in backwards match.
152
+ #if anchored, an implicit anchor is added at the end (begin if backwards)
153
+ #there's also a nice cache,so that the cost of regexp rebuilding is reduced
154
+ #returns: the modified regex and addedgroups
155
+ def group_anchors(rex,backwards,anchored=false)
156
+ @@fs_cache||={}
157
+ result=@@fs_cache[[rex,backwards,anchored]] and return result
158
+ if backwards
159
+ caret,dollar,buffanchor='^',nil,'A'
160
+ else
161
+ caret,dollar,buffanchor=nil,'$','Z'
162
+ end
163
+ newrex=(anchored ? _anchor(rex,backwards,false) : rex.to_s)
164
+
165
+ rewritten=incclass=false
166
+ groupnum=0
167
+ addedgroups=[]
168
+ result=''
169
+ (frags=newrex.split( /((?:[^\\(\[\]$^]+|\\(?:[CM]-)*[^CMZA])*)/ )).each_index{|i|
170
+ frag=frags[i]
171
+ case frag
172
+ when "\\":
173
+ if !incclass and frags[i+1][0,1]==buffanchor
174
+ frags[i+1].slice! 0
175
+ frag='(?!)'
176
+ rewritten=true
177
+ end
178
+ when caret
179
+ unless incclass
180
+ addedgroups<<(groupnum+=1)
181
+ frag="(^)"
182
+ rewritten=true
183
+ end
184
+ when dollar
185
+ unless incclass
186
+ frag="(?=\n)"
187
+ rewritten=true
188
+ end
189
+ when "(": incclass or frags[i+1][0]==?? or groupnum+=1
190
+ when "[": incclass=true #ignore stuff til ]
191
+ when "]": incclass=false #stop ignoring stuff
192
+ end
193
+ result<<frag
194
+ }
195
+
196
+ newrex=rewritten ? Regexp.new(result) : rex
197
+
198
+ @@fs_cache[[rex,backwards,anchored]]=[newrex,addedgroups]
199
+ end
200
+
201
+
202
+ #-------------------------------------
203
+ @@anchor_cache={}
204
+ #add an anchor to a Regexp-string. normally,
205
+ def _anchor(str,backwards=false,cache=true)
206
+ cache and result=@@anchor_cache[[str,backwards]] and return result
207
+ result=backwards ? "(?:#{str})\\Z" : "\\A(?:#{str})"
208
+ cache and return @@anchor_cache[[str,backwards]]||=Regexp.new( result )
209
+ return result
210
+ end
211
+
212
+ #-------------------------------------
213
+ def fixup_match_result(matchdata,addedgroups,pos_adjust,namelet,&body)
214
+
215
+ #remove extra capture results from () we inserted from MatchData
216
+ #..first extract groups, begin and end idxs from old
217
+ groups=matchdata.to_a
218
+ begins=[]
219
+ ends=[]
220
+ (0...matchdata.length).each{|i|
221
+ begins<<matchdata.begin(i)+pos_adjust
222
+ ends<<matchdata.end(i)+pos_adjust
223
+ }
224
+
225
+ #..remove data at group indexes we added above
226
+ addedgroups.reverse_each{|groupidx|
227
+ [groups,begins,ends].each{|arr| arr.delete_at groupidx }
228
+ }
229
+
230
+ #..now change matchdata to use fixed-up arrays
231
+ result=CorrectedMatchData.new
232
+ result.begins=begins
233
+ result.ends=ends
234
+ result.groups=groups
235
+ if namelet==:pre
236
+ result.set_pre_match_body( &body)
237
+ result.set_post_match_body {matchdata.post_match}
238
+ else
239
+ result.set_pre_match_body {matchdata.pre_match}
240
+ result.set_post_match_body( &body)
241
+ end
242
+ result.pos=pos_adjust
243
+
244
+ result
245
+ end
246
+
247
+
248
+
249
+ #-------------------------------------
250
+ class CorrectedMatchData < MatchData
251
+ class<<self
252
+ alias new allocate
253
+ end
254
+
255
+ def initialize; end
256
+
257
+ attr_reader :pos
258
+ attr_writer :begins,:ends,:groups,:pos
259
+
260
+ def set_pre_match_body &body
261
+ @pre_match_body=body
262
+ end
263
+
264
+ def set_post_match_body &body
265
+ @post_match_body=body
266
+ end
267
+
268
+ def pre_match
269
+ @pre_match_body[]
270
+ end
271
+
272
+ def post_match
273
+ @post_match_body[]
274
+ end
275
+
276
+ def [](*args); @groups[*args] end
277
+
278
+ def begin n; @begins[n] end
279
+ def end n; @ends[n] end
280
+ def offset n; [@begins[n],@ends[n]] if n<size end
281
+
282
+ def to_a; @groups end
283
+ def to_s; @groups.first end
284
+ def size; @groups.size end
285
+ alias length size
286
+
287
+
288
+
289
+ end
290
+
291
+
292
+
293
+ def scan(pat)
294
+ holding? {case pat
295
+ when Integer:
296
+ pat==read1 and pat.chr
297
+ #when SetOfChar: ...
298
+ when String:
299
+ pat==read(pat.size) and pat
300
+ when Regexp:
301
+ if m=match(pat,true)
302
+ goto m.end(0)
303
+ m.to_s
304
+ end
305
+ else raise ArgumentError.new("bad scan pattern for Sequence::StringLike")
306
+ end}
307
+ end
308
+
309
+ def scanback(pat)
310
+ holding? {case pat
311
+ when Integer:
312
+ pat==readback1 and pat.chr
313
+ #when SetOfChar: ...
314
+ when String:
315
+ pat==readback(pat.size) and pat
316
+ when Regexp:
317
+ if m=matchback(pat,true)
318
+ goto m.begin(0)
319
+ m.to_s
320
+ end
321
+ else raise ArgumentError.new("bad scan pattern for Sequence::StringLike")
322
+ end}
323
+ end
324
+
325
+ def scan_until(pat)
326
+ at=index( pat,pos) or return
327
+ newpos=case pat
328
+ when Regexp:
329
+ m=last_match
330
+ s=slice(pos...m.begin(0))
331
+ m.set_pre_match_body{s}
332
+ m.end(0)
333
+ when String: at+pat.size
334
+ when Integer: at+1
335
+ #when SetOfChar: huh
336
+ else raise ArgumentError
337
+ end
338
+ return( read newpos-pos)
339
+
340
+ =begin
341
+ holding? {
342
+ if Regexp===pat
343
+ until_buffer_len=4*maxmatchlen(false)
344
+ until_step_len=3*maxmatchlen(false)
345
+ holding_position{|posi|
346
+ until posi.eof?
347
+ if m=posi.match(pat,false,until_buffer_len)
348
+ pre=read(posi.pos-pos)+m.pre_match
349
+ m.set_prematch_body {pre} #readjust matchdata to include data between my own pos and posi
350
+ goto m.end(0) #advance my own position to end of match
351
+ return m.pre_match+m.to_s #return match and what preceded it
352
+ end
353
+ posi.move until_step_len
354
+ end
355
+ nil
356
+ }
357
+ #elsif SetOfChar===pat: ...
358
+ else #string or integer
359
+ i=index(pat,pos)
360
+ result=read(i-pos)<<pat
361
+ move(pat.is_a?( Integer ) ? 1 : pat.size)
362
+ result
363
+ end
364
+ }
365
+ =end
366
+ end
367
+
368
+ def scanback_until(pat)
369
+ at=rindex( pat,pos) or return
370
+ newpos=
371
+ if Regexp===pat
372
+ m=last_match
373
+ s=slice(m.end(0)+1..pos)
374
+ m.set_post_match_body{s}
375
+ m.begin(0)
376
+ else at
377
+ end
378
+ assert(newpos<=pos)
379
+ return( readback pos-newpos)
380
+
381
+ =begin
382
+ holding? {
383
+ if Regexp===pat
384
+ huh #need to scan til eof, like #scan_until does
385
+ m=matchback(pat,false) or break
386
+ goto= m.begin(0)
387
+ m.to_s+m.post_match
388
+ #elsif SetOfChar===pat: ...
389
+ else #string or integer
390
+ i=rindex(pat,pos)
391
+ result=readback(pos-i-pat.size)<<pat
392
+ move( -(pat.is_a? Integer ? 1 : pat.size))
393
+ result
394
+ end
395
+ }
396
+ =end
397
+ end
398
+
399
+ def push(str)
400
+ Integer===str and str=str.chr
401
+ insert size, str
402
+ end
403
+
404
+ def unshift(str)
405
+ Integer===str and str=str.chr
406
+ insert 0, str
407
+ end
408
+
409
+ def index pat,pos=0
410
+ posi= self.begin()
411
+ until_buffer_len=4*maxmatchlen(false)
412
+ if Regexp===pat
413
+ until_step_len=3*maxmatchlen(false)
414
+ until posi.eof?
415
+ if m=posi.match(pat,false,until_buffer_len)
416
+ range=0...m.begin(0)
417
+ pre=subseq(range)
418
+ m.set_pre_match_body { pre }
419
+ self.last_match=m
420
+ return m.begin(0) #return match and what preceded it
421
+ end
422
+ posi.move until_step_len
423
+ end
424
+ #elsif SetOfChar===pat; ...
425
+ else
426
+ until_step_len=until_buffer_len
427
+ String===pat and until_step_len-=pat.size-1
428
+ until posi.eof?
429
+ buf=posi.readahead(until_buffer_len)
430
+ if i=buf.index( pat)
431
+ result=posi.pos+i
432
+ return result
433
+ end
434
+ posi.move until_step_len
435
+ end
436
+ end
437
+ return nil
438
+ ensure
439
+ posi.close
440
+ end
441
+
442
+ def rindex pat,pos=size-1
443
+ posi= self.end()
444
+ until_buffer_len=4*maxmatchlen(false)
445
+ if Regexp===pat
446
+ until_step_len=3*maxmatchlen(false)
447
+ until posi.pos.zero?
448
+ if m=posi.matchback(pat,false,until_buffer_len)
449
+ range=m.end(0)+1..-1
450
+ post=subseq(range)
451
+ m.set_post_match_body { post }
452
+ self.last_match=m
453
+ posi.close
454
+ return m.begin(0) #return match and what preceded it
455
+ end
456
+ posi.move( -until_step_len )
457
+ end
458
+ #elsif SetOfChar===pat; ...
459
+ else
460
+ until_step_len=until_buffer_len
461
+ String===pat and until_step_len-=pat.size-1
462
+ until posi.pos.zero?
463
+ buf=posi.readbehind(until_buffer_len)
464
+ if i=buf.rindex( pat)
465
+ result=posi.pos-until_buffer_len+i
466
+ posi.close
467
+ return result
468
+ end
469
+ posi.move( -until_step_len )
470
+ end
471
+ end
472
+ return nil
473
+ ensure
474
+ posi.close
475
+ end
476
+
477
+
478
+
479
+
480
+ #be nice to have #pack and #unpack too
481
+ end
482
+ end
@@ -0,0 +1,90 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'sequence'
4
+ require 'sequence/usedata'
5
+
6
+ class Sequence
7
+ class SubSeq < Sequence
8
+ def initialize(seq, first,len)
9
+ first+len-1>=seq.size and len=seq.size-first
10
+ @data=seq
11
+ @pos=0
12
+ @first,@size=first,len
13
+ extend seq.like
14
+
15
+ #ask for notifications on the parent seq...
16
+ @data.on_change_notify self
17
+ end
18
+
19
+
20
+ def change_notification data,first,oldsize,newsize
21
+ assert @data==data
22
+ old_first=@first
23
+ old_size=@size
24
+ @pos=(_adjust_pos_on_change @first+@pos,first,oldsize,newsize)-@first
25
+ @size=(_adjust_pos_on_change @first+@size,first,oldsize,newsize)-@first
26
+ @first=_adjust_pos_on_change @first,first,oldsize,newsize
27
+
28
+ notify_change(self, first-@first, oldsize, newsize)
29
+ end
30
+
31
+ def offset; @first end
32
+
33
+ def readahead(len)
34
+ eof? and return new_data
35
+ len>rest=rest_size and len=rest
36
+ @data[@pos+offset,len]
37
+ end
38
+
39
+ def readbehind(len)
40
+ @pos.zero? and return new_data
41
+ @pos>=len or len=@pos
42
+ @data[@pos+offset-len,len]
43
+ end
44
+
45
+ def read(len)
46
+ result=readahead(len)
47
+ move result.size
48
+ result
49
+ end
50
+
51
+ def readback(len)
52
+ result=readbehind(len)
53
+ move( -result.size)
54
+ result
55
+ end
56
+
57
+ def eof?
58
+ @pos>=@size
59
+ end
60
+
61
+ attr_reader :size,:pos
62
+
63
+ def _pos=newp
64
+ @pos=newp
65
+ end
66
+
67
+ def_delegators :@data, :data_class, :new_data
68
+
69
+ attr :data
70
+
71
+ def subseq *args
72
+ first,len,only1=_parse_slice_args( *args)
73
+ SubSeq.new(@data,@first+first,len)
74
+ end
75
+
76
+ def modify(*args)
77
+ data=args.pop
78
+ first,len,only1=_parse_slice_args( *args)
79
+ first+=@first
80
+ only1 ? @data.modify(first,data) : @data.modify(first,len,data)
81
+ end
82
+
83
+
84
+ def closed?
85
+ super or @data.closed?
86
+ end
87
+
88
+ end
89
+ SubSequence=SubSeq
90
+ end
@@ -0,0 +1,35 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'sequence'
4
+ class Sequence
5
+ # define #read in terms of #data and @pos.
6
+ # #data must support #[]
7
+ class UseData < Sequence
8
+
9
+ def read(len)
10
+ result=readahead(len)
11
+ @pos+=result.size
12
+ result
13
+ end
14
+
15
+ def readback(len)
16
+ result=readbehind(len)
17
+ @pos-=result.size
18
+ result
19
+ end
20
+
21
+ def readahead(len)
22
+ @data[@pos,len]
23
+ end
24
+
25
+ def readbehind(len)
26
+ len>@pos and len=@pos
27
+ @data[@pos-len,len]
28
+ end
29
+
30
+
31
+ def size; data.size end
32
+ def_delegators :@data, :<<
33
+
34
+ end
35
+ end
@@ -0,0 +1,5 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ class Sequence
4
+ VERSION='0.1.0'
5
+ end