sequence 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,482 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'sequence/subseq'
4
+
5
+ class Sequence
6
+ module StringLike
7
+ def data_class; String end
8
+
9
+ def like; StringLike end
10
+
11
+ #-------------------------------------
12
+ FFS_4BITTABLE=[nil,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0]
13
+ def ffs
14
+ holding{
15
+ begin!
16
+ zeros=read_til_charset(/[^\0]/)
17
+ byte=read1
18
+ lo=byte&0xF
19
+ rem=FFS_4BITTABLE[lo]||FFS_4BITTABLE[byte>>4]+4
20
+ return zeros.size<<3+rem
21
+ }
22
+ end
23
+
24
+ #-------------------------------------
25
+ def fns(bitnum)
26
+ holding{
27
+ goto bitnum>>3
28
+ bitnum&=0x7
29
+ byte=read1
30
+ byte&=~((1<<(bitnum+1))-1)
31
+ if byte.nonzero?
32
+ zeros_size=0
33
+ else
34
+ zeros_size=read_til_charset(/[^\0]/).size
35
+ byte=read1
36
+ end
37
+ lo=byte&0xF
38
+ rem=FFS_4BITTABLE[lo]||FFS_4BITTABLE[byte>>4]+4
39
+ return zeros_size<<3+rem
40
+ }
41
+ end
42
+
43
+ #-------------------------------------
44
+ #read until a character in a user-supplied set is found.
45
+ #charrex must be a regexp that contains _only_ a single character class
46
+ def read_til_charset(charrex,blocksize=16)
47
+ blocks=[]
48
+ m=nil
49
+ until eof?
50
+ block=read blocksize
51
+ #if near eof, less than a full block may have been read
52
+
53
+ if m=charrex .match(block)
54
+ self.pos-=m.post_match.length+1
55
+ #'self.' shouldn't be needed... but is
56
+
57
+ blocks.push m.pre_match if m.pre_match.length>0
58
+ break
59
+ end
60
+ blocks<<block
61
+ end
62
+ return blocks.to_s
63
+ end
64
+
65
+
66
+ #-------------------------------------
67
+ #this version is fast and simple, but anchors do not work right,
68
+ #matches are NOT implicitly anchored to the current position, and
69
+ #the file position is not advanced. post_match (or pre_match if
70
+ #going backwards) is always nil.
71
+ def match_fast(rex,backwards=false,len=maxmatchlen(backwards))
72
+ str=send backwards ? :readbehind : :readahead, len
73
+ if result=rex.match(str)
74
+ if backwards
75
+ def result.pre_match; end
76
+ else
77
+ def result.post_match ; end
78
+ end
79
+ end
80
+ return result
81
+ end
82
+
83
+
84
+
85
+ #-------------------------------------
86
+ #like match, but goes backwards
87
+ def matchback(rex,anchored=true, len=maxmatchlen(true))
88
+ nearbegin=nearbegin(len)
89
+ newrex,addedgroups=
90
+ if nearbegin && !anchored
91
+ [rex,[]]
92
+ else group_anchors(rex,:back,anchored)
93
+ end
94
+ #do the match against what input we have
95
+
96
+ matchdata=match_fast(newrex,true,len)
97
+ #fail if any ^ or \A matched at begin of buffer,
98
+ #but buffer isn't begin of file
99
+ return if !matchdata or #not actually a match
100
+ addedgroups.find{|i| matchdata.end(i)==0 } && !nearbegin
101
+
102
+ matchpos=pos-len
103
+ matchpos>=0 or matchpos=0
104
+ assert(matchpos>=0)
105
+ match1st=position matchpos+matchdata.begin(0)
106
+ result=fixup_match_result(matchdata,addedgroups,matchpos,:pre) do
107
+ result=SubSeq.new(self,0,match1st.pos)
108
+ result.pos=match1st.pos
109
+ result
110
+ end
111
+ #note: pre_match is a subseq.
112
+
113
+ #rex.last_match=
114
+ self.last_match=Thread.current[:last_match]=result
115
+ end
116
+
117
+ #-------------------------------------
118
+ #like match_fast, but anchors work correctly and post_match is
119
+ #set to something, if not exactly what you expected. (an Sequence, not String.)
120
+ #2nd parameter determines if match is anchored on the left side to the
121
+ #current position or not.
122
+ def match(rex,anchored=true, len=maxmatchlen(false))
123
+
124
+ newrex=nearend(len)? rex : group_anchors(rex,false,false).first
125
+
126
+ #do the match against what input we have
127
+ matchdata=match_fast(newrex,false,len) or return
128
+
129
+ anchored and matchdata.begin(0).nonzero? and return
130
+ posi=position;posi.move matchdata.end(0)
131
+ result=fixup_match_result(matchdata,[],pos,:post) { posi.subseq(posi.pos..-1) }
132
+ #note: post_match is a SubSeq
133
+
134
+ #rex.last_match=
135
+ self.last_match=Thread.current[:last_match]=result
136
+ end
137
+
138
+
139
+ #-------------------------------------
140
+ #if not backwards:
141
+ #replace \Z with (?!)
142
+ #replace $ with (?=\n)
143
+ #if backwards:
144
+ #replace \A with (?!)
145
+ #replace ^ with (^) (and adjust addedgroups)
146
+ #there's no lookback in ruby regexp (yet)
147
+ #so, ^ in reverse regexp will perhaps lead to unexpected
148
+ #results. some matches with ^ in them will fail, when they
149
+ #should have succeeded even if the ^ couldn't match.
150
+ #you should be pretty much ok if you
151
+ #don't use ^ within alternation (|) in backwards match.
152
+ #if anchored, an implicit anchor is added at the end (begin if backwards)
153
+ #there's also a nice cache,so that the cost of regexp rebuilding is reduced
154
+ #returns: the modified regex and addedgroups
155
+ def group_anchors(rex,backwards,anchored=false)
156
+ @@fs_cache||={}
157
+ result=@@fs_cache[[rex,backwards,anchored]] and return result
158
+ if backwards
159
+ caret,dollar,buffanchor='^',nil,'A'
160
+ else
161
+ caret,dollar,buffanchor=nil,'$','Z'
162
+ end
163
+ newrex=(anchored ? _anchor(rex,backwards,false) : rex.to_s)
164
+
165
+ rewritten=incclass=false
166
+ groupnum=0
167
+ addedgroups=[]
168
+ result=''
169
+ (frags=newrex.split( /((?:[^\\(\[\]$^]+|\\(?:[CM]-)*[^CMZA])*)/ )).each_index{|i|
170
+ frag=frags[i]
171
+ case frag
172
+ when "\\":
173
+ if !incclass and frags[i+1][0,1]==buffanchor
174
+ frags[i+1].slice! 0
175
+ frag='(?!)'
176
+ rewritten=true
177
+ end
178
+ when caret
179
+ unless incclass
180
+ addedgroups<<(groupnum+=1)
181
+ frag="(^)"
182
+ rewritten=true
183
+ end
184
+ when dollar
185
+ unless incclass
186
+ frag="(?=\n)"
187
+ rewritten=true
188
+ end
189
+ when "(": incclass or frags[i+1][0]==?? or groupnum+=1
190
+ when "[": incclass=true #ignore stuff til ]
191
+ when "]": incclass=false #stop ignoring stuff
192
+ end
193
+ result<<frag
194
+ }
195
+
196
+ newrex=rewritten ? Regexp.new(result) : rex
197
+
198
+ @@fs_cache[[rex,backwards,anchored]]=[newrex,addedgroups]
199
+ end
200
+
201
+
202
+ #-------------------------------------
203
+ @@anchor_cache={}
204
+ #add an anchor to a Regexp-string. normally,
205
+ def _anchor(str,backwards=false,cache=true)
206
+ cache and result=@@anchor_cache[[str,backwards]] and return result
207
+ result=backwards ? "(?:#{str})\\Z" : "\\A(?:#{str})"
208
+ cache and return @@anchor_cache[[str,backwards]]||=Regexp.new( result )
209
+ return result
210
+ end
211
+
212
+ #-------------------------------------
213
+ def fixup_match_result(matchdata,addedgroups,pos_adjust,namelet,&body)
214
+
215
+ #remove extra capture results from () we inserted from MatchData
216
+ #..first extract groups, begin and end idxs from old
217
+ groups=matchdata.to_a
218
+ begins=[]
219
+ ends=[]
220
+ (0...matchdata.length).each{|i|
221
+ begins<<matchdata.begin(i)+pos_adjust
222
+ ends<<matchdata.end(i)+pos_adjust
223
+ }
224
+
225
+ #..remove data at group indexes we added above
226
+ addedgroups.reverse_each{|groupidx|
227
+ [groups,begins,ends].each{|arr| arr.delete_at groupidx }
228
+ }
229
+
230
+ #..now change matchdata to use fixed-up arrays
231
+ result=CorrectedMatchData.new
232
+ result.begins=begins
233
+ result.ends=ends
234
+ result.groups=groups
235
+ if namelet==:pre
236
+ result.set_pre_match_body( &body)
237
+ result.set_post_match_body {matchdata.post_match}
238
+ else
239
+ result.set_pre_match_body {matchdata.pre_match}
240
+ result.set_post_match_body( &body)
241
+ end
242
+ result.pos=pos_adjust
243
+
244
+ result
245
+ end
246
+
247
+
248
+
249
+ #-------------------------------------
250
+ class CorrectedMatchData < MatchData
251
+ class<<self
252
+ alias new allocate
253
+ end
254
+
255
+ def initialize; end
256
+
257
+ attr_reader :pos
258
+ attr_writer :begins,:ends,:groups,:pos
259
+
260
+ def set_pre_match_body &body
261
+ @pre_match_body=body
262
+ end
263
+
264
+ def set_post_match_body &body
265
+ @post_match_body=body
266
+ end
267
+
268
+ def pre_match
269
+ @pre_match_body[]
270
+ end
271
+
272
+ def post_match
273
+ @post_match_body[]
274
+ end
275
+
276
+ def [](*args); @groups[*args] end
277
+
278
+ def begin n; @begins[n] end
279
+ def end n; @ends[n] end
280
+ def offset n; [@begins[n],@ends[n]] if n<size end
281
+
282
+ def to_a; @groups end
283
+ def to_s; @groups.first end
284
+ def size; @groups.size end
285
+ alias length size
286
+
287
+
288
+
289
+ end
290
+
291
+
292
+
293
+ def scan(pat)
294
+ holding? {case pat
295
+ when Integer:
296
+ pat==read1 and pat.chr
297
+ #when SetOfChar: ...
298
+ when String:
299
+ pat==read(pat.size) and pat
300
+ when Regexp:
301
+ if m=match(pat,true)
302
+ goto m.end(0)
303
+ m.to_s
304
+ end
305
+ else raise ArgumentError.new("bad scan pattern for Sequence::StringLike")
306
+ end}
307
+ end
308
+
309
+ def scanback(pat)
310
+ holding? {case pat
311
+ when Integer:
312
+ pat==readback1 and pat.chr
313
+ #when SetOfChar: ...
314
+ when String:
315
+ pat==readback(pat.size) and pat
316
+ when Regexp:
317
+ if m=matchback(pat,true)
318
+ goto m.begin(0)
319
+ m.to_s
320
+ end
321
+ else raise ArgumentError.new("bad scan pattern for Sequence::StringLike")
322
+ end}
323
+ end
324
+
325
+ def scan_until(pat)
326
+ at=index( pat,pos) or return
327
+ newpos=case pat
328
+ when Regexp:
329
+ m=last_match
330
+ s=slice(pos...m.begin(0))
331
+ m.set_pre_match_body{s}
332
+ m.end(0)
333
+ when String: at+pat.size
334
+ when Integer: at+1
335
+ #when SetOfChar: huh
336
+ else raise ArgumentError
337
+ end
338
+ return( read newpos-pos)
339
+
340
+ =begin
341
+ holding? {
342
+ if Regexp===pat
343
+ until_buffer_len=4*maxmatchlen(false)
344
+ until_step_len=3*maxmatchlen(false)
345
+ holding_position{|posi|
346
+ until posi.eof?
347
+ if m=posi.match(pat,false,until_buffer_len)
348
+ pre=read(posi.pos-pos)+m.pre_match
349
+ m.set_prematch_body {pre} #readjust matchdata to include data between my own pos and posi
350
+ goto m.end(0) #advance my own position to end of match
351
+ return m.pre_match+m.to_s #return match and what preceded it
352
+ end
353
+ posi.move until_step_len
354
+ end
355
+ nil
356
+ }
357
+ #elsif SetOfChar===pat: ...
358
+ else #string or integer
359
+ i=index(pat,pos)
360
+ result=read(i-pos)<<pat
361
+ move(pat.is_a?( Integer ) ? 1 : pat.size)
362
+ result
363
+ end
364
+ }
365
+ =end
366
+ end
367
+
368
+ def scanback_until(pat)
369
+ at=rindex( pat,pos) or return
370
+ newpos=
371
+ if Regexp===pat
372
+ m=last_match
373
+ s=slice(m.end(0)+1..pos)
374
+ m.set_post_match_body{s}
375
+ m.begin(0)
376
+ else at
377
+ end
378
+ assert(newpos<=pos)
379
+ return( readback pos-newpos)
380
+
381
+ =begin
382
+ holding? {
383
+ if Regexp===pat
384
+ huh #need to scan til eof, like #scan_until does
385
+ m=matchback(pat,false) or break
386
+ goto= m.begin(0)
387
+ m.to_s+m.post_match
388
+ #elsif SetOfChar===pat: ...
389
+ else #string or integer
390
+ i=rindex(pat,pos)
391
+ result=readback(pos-i-pat.size)<<pat
392
+ move( -(pat.is_a? Integer ? 1 : pat.size))
393
+ result
394
+ end
395
+ }
396
+ =end
397
+ end
398
+
399
+ def push(str)
400
+ Integer===str and str=str.chr
401
+ insert size, str
402
+ end
403
+
404
+ def unshift(str)
405
+ Integer===str and str=str.chr
406
+ insert 0, str
407
+ end
408
+
409
+ def index pat,pos=0
410
+ posi= self.begin()
411
+ until_buffer_len=4*maxmatchlen(false)
412
+ if Regexp===pat
413
+ until_step_len=3*maxmatchlen(false)
414
+ until posi.eof?
415
+ if m=posi.match(pat,false,until_buffer_len)
416
+ range=0...m.begin(0)
417
+ pre=subseq(range)
418
+ m.set_pre_match_body { pre }
419
+ self.last_match=m
420
+ return m.begin(0) #return match and what preceded it
421
+ end
422
+ posi.move until_step_len
423
+ end
424
+ #elsif SetOfChar===pat; ...
425
+ else
426
+ until_step_len=until_buffer_len
427
+ String===pat and until_step_len-=pat.size-1
428
+ until posi.eof?
429
+ buf=posi.readahead(until_buffer_len)
430
+ if i=buf.index( pat)
431
+ result=posi.pos+i
432
+ return result
433
+ end
434
+ posi.move until_step_len
435
+ end
436
+ end
437
+ return nil
438
+ ensure
439
+ posi.close
440
+ end
441
+
442
+ def rindex pat,pos=size-1
443
+ posi= self.end()
444
+ until_buffer_len=4*maxmatchlen(false)
445
+ if Regexp===pat
446
+ until_step_len=3*maxmatchlen(false)
447
+ until posi.pos.zero?
448
+ if m=posi.matchback(pat,false,until_buffer_len)
449
+ range=m.end(0)+1..-1
450
+ post=subseq(range)
451
+ m.set_post_match_body { post }
452
+ self.last_match=m
453
+ posi.close
454
+ return m.begin(0) #return match and what preceded it
455
+ end
456
+ posi.move( -until_step_len )
457
+ end
458
+ #elsif SetOfChar===pat; ...
459
+ else
460
+ until_step_len=until_buffer_len
461
+ String===pat and until_step_len-=pat.size-1
462
+ until posi.pos.zero?
463
+ buf=posi.readbehind(until_buffer_len)
464
+ if i=buf.rindex( pat)
465
+ result=posi.pos-until_buffer_len+i
466
+ posi.close
467
+ return result
468
+ end
469
+ posi.move( -until_step_len )
470
+ end
471
+ end
472
+ return nil
473
+ ensure
474
+ posi.close
475
+ end
476
+
477
+
478
+
479
+
480
+ #be nice to have #pack and #unpack too
481
+ end
482
+ end
@@ -0,0 +1,90 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'sequence'
4
+ require 'sequence/usedata'
5
+
6
+ class Sequence
7
+ class SubSeq < Sequence
8
+ def initialize(seq, first,len)
9
+ first+len-1>=seq.size and len=seq.size-first
10
+ @data=seq
11
+ @pos=0
12
+ @first,@size=first,len
13
+ extend seq.like
14
+
15
+ #ask for notifications on the parent seq...
16
+ @data.on_change_notify self
17
+ end
18
+
19
+
20
+ def change_notification data,first,oldsize,newsize
21
+ assert @data==data
22
+ old_first=@first
23
+ old_size=@size
24
+ @pos=(_adjust_pos_on_change @first+@pos,first,oldsize,newsize)-@first
25
+ @size=(_adjust_pos_on_change @first+@size,first,oldsize,newsize)-@first
26
+ @first=_adjust_pos_on_change @first,first,oldsize,newsize
27
+
28
+ notify_change(self, first-@first, oldsize, newsize)
29
+ end
30
+
31
+ def offset; @first end
32
+
33
+ def readahead(len)
34
+ eof? and return new_data
35
+ len>rest=rest_size and len=rest
36
+ @data[@pos+offset,len]
37
+ end
38
+
39
+ def readbehind(len)
40
+ @pos.zero? and return new_data
41
+ @pos>=len or len=@pos
42
+ @data[@pos+offset-len,len]
43
+ end
44
+
45
+ def read(len)
46
+ result=readahead(len)
47
+ move result.size
48
+ result
49
+ end
50
+
51
+ def readback(len)
52
+ result=readbehind(len)
53
+ move( -result.size)
54
+ result
55
+ end
56
+
57
+ def eof?
58
+ @pos>=@size
59
+ end
60
+
61
+ attr_reader :size,:pos
62
+
63
+ def _pos=newp
64
+ @pos=newp
65
+ end
66
+
67
+ def_delegators :@data, :data_class, :new_data
68
+
69
+ attr :data
70
+
71
+ def subseq *args
72
+ first,len,only1=_parse_slice_args( *args)
73
+ SubSeq.new(@data,@first+first,len)
74
+ end
75
+
76
+ def modify(*args)
77
+ data=args.pop
78
+ first,len,only1=_parse_slice_args( *args)
79
+ first+=@first
80
+ only1 ? @data.modify(first,data) : @data.modify(first,len,data)
81
+ end
82
+
83
+
84
+ def closed?
85
+ super or @data.closed?
86
+ end
87
+
88
+ end
89
+ SubSequence=SubSeq
90
+ end
@@ -0,0 +1,35 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ require 'sequence'
4
+ class Sequence
5
+ # define #read in terms of #data and @pos.
6
+ # #data must support #[]
7
+ class UseData < Sequence
8
+
9
+ def read(len)
10
+ result=readahead(len)
11
+ @pos+=result.size
12
+ result
13
+ end
14
+
15
+ def readback(len)
16
+ result=readbehind(len)
17
+ @pos-=result.size
18
+ result
19
+ end
20
+
21
+ def readahead(len)
22
+ @data[@pos,len]
23
+ end
24
+
25
+ def readbehind(len)
26
+ len>@pos and len=@pos
27
+ @data[@pos-len,len]
28
+ end
29
+
30
+
31
+ def size; data.size end
32
+ def_delegators :@data, :<<
33
+
34
+ end
35
+ end
@@ -0,0 +1,5 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ class Sequence
4
+ VERSION='0.1.0'
5
+ end