sequence 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sequence.rb ADDED
@@ -0,0 +1,721 @@
1
+ # Copyright (C) 2006 Caleb Clausen
2
+ # Distributed under the terms of Ruby's license.
3
+ # = sequence.rb - external iterators with capabilities like a text editor cursor
4
+ # $Id$
5
+ #
6
+ # Author: Caleb Clausen (sequence-owner @at@ inforadical .dot. net)
7
+ # Original Author: Eric Mahurin
8
+ # License: Ruby license
9
+ # Home: http://rubyforge.org/projects/sequence
10
+
11
+ require "forwardable"
12
+ require 'assert'
13
+ require 'weakrefset'
14
+ require 'sequence/version'
15
+
16
+ =begin todo
17
+
18
+
19
+ (.... need to steal more features from Array, String, File, StringScanner, Enumerable?, like:)
20
+
21
+
22
+ pack/unpack
23
+
24
+
25
+
26
+ match/matchback
27
+
28
+
29
+ =end
30
+
31
+ class Sequence
32
+
33
+
34
+ include Comparable
35
+ include Enumerable
36
+ extend Forwardable
37
+
38
+ def initialize; abstract end
39
+ def to_sequence; self end
40
+
41
+ class<<self
42
+ if nil #borken
43
+ alias original__new new
44
+ undef new #bye-bye
45
+ def new(seq)
46
+ case seq
47
+ when File,IO,Array,String,Enumerable:
48
+ seq.to_sequence
49
+ else
50
+ if seq.respond_to? :to_str
51
+ seq.to_str.to_sequence
52
+ else
53
+ raise ArgumentError
54
+ end
55
+ end
56
+ end
57
+ end
58
+ def [](*x) new(*x) end
59
+ end
60
+
61
+
62
+ public
63
+ # read next element or nil if eof and advance position
64
+ def read1
65
+ (read 1)[0]
66
+ end
67
+
68
+ # read previous element or nil if start of input and move position back
69
+ def readback1
70
+ (readback 1)[0]
71
+ end
72
+
73
+ # read element after the pos or nil if eof, leaving position alone
74
+ def readahead1
75
+ slice pos
76
+ end
77
+
78
+ # read element before the pos or nil if start of input, leaving position alone
79
+ def readbehind1
80
+ slice pos-1 unless pos.zero?
81
+ end
82
+
83
+
84
+ # Return an empty object used for returning a sequence of elements.
85
+ # The only method required of this object is << (append to the sequence).
86
+ #usually [] or ""
87
+ def new_data
88
+ data_class.new
89
+ end
90
+
91
+ # attempt to read up to +len+ elements. the position is left just after the data read.
92
+ # #read may return less than the whole requested amount if less data than requested in
93
+ #+len+ is available. This can happen at end of file or if more data is simply unavailable
94
+ #currently (ie with a Sequence::IO). Don't assume that getting less than you requested
95
+ #means you're at end of file; use #eof? to test for that instead.
96
+ def read(len)
97
+ abstract
98
+ end
99
+
100
+ #like read, but position is left alone.
101
+ def readahead(len)
102
+ holding{read(len)}
103
+ end
104
+
105
+ #read data behind the current position, leaving position unchanged
106
+ def readbehind(len)
107
+ read move( -len)
108
+ end
109
+
110
+ #read data behind the current position, leaving position just before the data read.
111
+ def readback(len)
112
+ readahead move( -len )
113
+ end
114
+
115
+ # read the remaining elements.
116
+ # if reverse, read everything behind position
117
+ def read!(reverse=false)
118
+ if reverse
119
+ readback pos
120
+ else
121
+ read rest_size
122
+ end
123
+ end
124
+
125
+ def all_data
126
+ holding_position{|posi|
127
+ posi.begin!
128
+ posi.read!
129
+ }
130
+ end
131
+
132
+
133
+
134
+
135
+ #a StringScanner-like interface for pattern scanning within sequences.
136
+ #See StringScanner for /(scan|skip|check)(_until)?|match\?|exist\?/.
137
+ #Some notes on the implementation: scanning is all done at the current
138
+ #position. Unlike StringScanner, which only scans for Regexp, this
139
+ #version of #scan, etc allow more pattern types. The pattern type and how
140
+ #it is treated are determined by whether the underlying data is String-
141
+ #like (contains only characters/bytes), or Array-like (contains any
142
+ #Object).
143
+
144
+ #If String-like: scan and friends can take a Regexp, String, or
145
+ #Integer (for a single char) parameter.
146
+ #If Array-like: scan and friends can take a scalar matcher (something
147
+ #that responds to ===). Eventually, vector matchers (Reg::Multiple)
148
+ #will be supported as well here. Literal arrays, as patterns to scan for,
149
+ #are not supported: too hard, and there's the quoting problem.
150
+ #if you actually want to scan for items that are equal to a particular
151
+ #Regexp (for example), instead of items that match it, use duck.rb
152
+ #to reassign === as ==. Or, if using Reg, create a Reg::Literal by
153
+ #calling #lit on the pattern (which has the same effect).
154
+
155
+ #when scanning string-like sequences, anchors in Regexps are treated somewhat
156
+ #specially:
157
+ #when scanning forward, ^ and \A match at the current position, and
158
+ #$ and \Z match at the end of the sequence.
159
+ #when scanning backward, ^ and \A match at the beginning of sequence and $ and
160
+ #\Z match at the current position.
161
+ #^ and $ still match at beginning and end of lines, as usual.
162
+
163
+ #when scanning or matching backwards, ^ has some special problems:
164
+ # these won't work....
165
+ #an anchor that might or might not match will break the current implementation in some cases...
166
+ #regexes that should match overall even if the anchor in them doesn't...
167
+ # /(^|)/
168
+
169
+ #... in strings represents more data which is before the current position
170
+ # position in string is represented by |
171
+ # /(^foo)?bar/==="...xxxfoobar|"
172
+ # /(^[ab]+|b+)/==="...xxxaaabbbbbb|"
173
+ # /(b+|^[ab]+)/==="...xxxaaabbbbbb|"
174
+
175
+ #scan buffers and maxmatchlen (and ?maxuntillen)
176
+
177
+
178
+ #Regexps (or Reg::Multiples in Array-like sequences) are implicitly anchored
179
+ #to the current position unless one of the _until forms (or exist?) is used.
180
+
181
+
182
+
183
+ =begin can't be abstract... defined in modules
184
+ def scan(pat)
185
+ abstract
186
+ end
187
+
188
+ def scan_until(pat)
189
+ abstract
190
+ end
191
+ def scanback(pat)
192
+ abstract
193
+ end
194
+
195
+ def scanback_until(pat)
196
+ abstract
197
+ end
198
+
199
+ def match(pat)
200
+ abstract
201
+ end
202
+
203
+ def matchback(pat)
204
+ abstract
205
+ end
206
+ =end
207
+
208
+ def skip(pat) match= scan(pat) and match.length end
209
+ def check(pat) holding{scan(pat)} end
210
+ def match?(pat) holding{skip(pat)} end
211
+
212
+ def skip_until(pat) match= scan_until(pat) and match.length end
213
+ def check_until(pat) holding{scan_until(pat)} end
214
+ def exist?(pat) holding{skip_until(pat)} end
215
+
216
+ def skipback(pat) match= scanback(pat) and match.length end
217
+ def checkback(pat) holding{scanback(pat)} end
218
+ def matchback?(pat) holding{skipback(pat)} end
219
+
220
+ def skipback_until(pat) match= scanback_until(pat) and match.length end
221
+ def checkback_until(pat) holding{scanback_until(pat)} end
222
+ def existback?(pat) holding{skipback_until(pat) }end
223
+
224
+ def skip_literal(lits)
225
+ sz=lits.size
226
+ lits==readahead(sz) and move sz
227
+ end
228
+ alias skip_literals skip_literal
229
+
230
+ def skip_until_literal(lits)
231
+ sz=lits.size
232
+ first=lits[0]
233
+ holding?{
234
+ until eof?
235
+ skip_until(first)
236
+ lits==readahead(sz) and break pos
237
+ end
238
+ }
239
+ end
240
+ alias skip_until_literals skip_until_literal
241
+
242
+ attr_accessor :last_match
243
+ attr_writer :maxmatchlen
244
+
245
+ def _default_maxmatchlen; 1024 end
246
+
247
+ def maxmatchlen(backwards)
248
+ size=self.size
249
+
250
+ list=[ _default_maxmatchlen,
251
+ backwards ? pos : size-pos%size
252
+ ]
253
+ list.push @maxmatchlen if defined? @maxmatchlen
254
+ list.min
255
+ end
256
+
257
+ #hold current position while executing a block. The block is passed the current
258
+ #sequence as its parameter. you can move the position around or call methods
259
+ #like read that do it, but after the block returns, the position is reset to the
260
+ #original location. The return value is the result of the block.
261
+ def holding
262
+ oldpos=pos
263
+ begin
264
+ yield self
265
+ ensure
266
+ self.pos=oldpos
267
+ end
268
+ end
269
+
270
+ #like #holding, but position is reset only if block returns false or nil (or
271
+ #raises an exception).
272
+ def holding?
273
+ oldpos=pos
274
+ begin
275
+ result=yield self
276
+ ensure
277
+ (self.pos=oldpos) unless result
278
+ end
279
+ end
280
+
281
+ #like #holding, but block is instance_eval'd in the sequence.
282
+ def holding! &block
283
+ oldpos=pos
284
+ begin
285
+ instance_eval self, &block
286
+ ensure
287
+ self.pos=oldpos
288
+ end
289
+ end
290
+
291
+
292
+ def holding_position
293
+ pos=position
294
+ begin
295
+ result=yield self
296
+ ensure
297
+ self.position=pos
298
+ pos.close
299
+ end
300
+ end
301
+
302
+ def holding_position?
303
+ pos=position
304
+ begin
305
+ result=yield self
306
+ ensure
307
+ self.position=pos unless result
308
+ pos.close
309
+ end
310
+ end
311
+
312
+ def holding_position! &block
313
+ pos=position
314
+ begin
315
+ result=instance_eval self,&block
316
+ ensure
317
+ self.position=pos
318
+ pos.close
319
+ end
320
+ end
321
+
322
+
323
+ # number of elements from the beginning (0 is at the beginning).
324
+ def pos()
325
+ abstract
326
+ end
327
+ def rest_size; size - pos end
328
+
329
+ # this checks to see if p is a valid numeric position.
330
+ def pos?(p)
331
+ sz=size
332
+ (-sz..sz)===p
333
+ end
334
+
335
+ # Set #pos to be +p+. When +p+ is negative, it is set from the end.
336
+ def pos=(p)
337
+ position?(p) and p=p.pos unless Integer===p
338
+ self._pos=_normalize_pos p
339
+ end
340
+
341
+ #go to an absolute position; identical to #pos=
342
+ def goto p
343
+ self.pos= p
344
+ end
345
+
346
+ def _pos=(p)
347
+ abstract
348
+ end
349
+ # move position +len+ elements, relative to the current position.
350
+ # A negative +len+ will go in reverse.
351
+ # The (positive) amount actually moved is returned (<+len+ if reached beginning/end).
352
+ def move(len)
353
+ oldpos=pos
354
+ goto oldpos+len
355
+ return (pos-oldpos).abs
356
+ end
357
+ # move to end of the remaining elements.
358
+ # reverse=true to move to beginning instead of end
359
+ # The amount moved is returned.
360
+ def move!(reverse=false)
361
+ reverse ? begin! : end!
362
+ end
363
+
364
+ # Get (if no +value+) and set properties. Normally, +name+
365
+ # should be a symbol. If +name+ is +nil+, it wil get/set using a hash
366
+ # representing all of the properties.
367
+ def prop(name=nil,*value) # :args: (name[,value])
368
+ if name.nil?
369
+ if value.size.zero?
370
+ defined?(@prop) &&@prop&&@prop.clone
371
+ else
372
+ if (value = value[0]).nil?
373
+ defined?(@prop) &&@prop&&remove_instance_variable(:@prop)
374
+ else
375
+ (@prop||={}).replace(value)
376
+ end
377
+ end
378
+ else
379
+ if value.size.zero?
380
+ defined?(@prop) &&@prop&&@prop[name]
381
+ else
382
+ (@prop||={})[name] = value[0]
383
+ end
384
+ end
385
+ end
386
+
387
+ # #position returns a Sequence::Position to represent a location within this sequence.
388
+ # The argument allows you to specify a numeric location for the position; default is
389
+ # currrent position. If the element that a
390
+ # Position is anchored to is deleted, that Position may become invalid
391
+ # or have an unknown behavior.
392
+ def position(_pos=pos)
393
+ Position.new(self,_pos)
394
+ end
395
+ # Set the position to a Position +p+ (from #position).
396
+ def position=(p)
397
+ self.pos = p.pos
398
+ self.prop(nil,p.prop)
399
+ p
400
+ end
401
+
402
+ # this queries whether a particular #position +p+ is valid (is a child or self).
403
+ # numeric positions and also be tested
404
+ def position?(p)
405
+ case p
406
+ when Integer: (-size..size)===p
407
+ when Position: equal? p.data
408
+ else equal? p
409
+ end
410
+ end
411
+
412
+ #make a new sequence out of a subrange of current sequence data.
413
+ #the subseq and parent seq share data, so changes in one
414
+ #will be reflected in the other.
415
+ def subseq(*args)
416
+ assert !closed?
417
+ first,len,only1=_parse_slice_args(*args)
418
+ SubSeq.new(self,first,len)
419
+ end
420
+
421
+ #make a new sequence that reverses the order of data.
422
+ #reversed and parent sequence share data.
423
+ def reversed
424
+ Reversed.new self
425
+ end
426
+
427
+ # Close the sequence. This will also close/invalidate every child
428
+ # position or derived sequence.
429
+ def close
430
+ defined? @change_listeners and @change_listeners.each { |p|
431
+ Sequence===p and p.close
432
+ }
433
+ # this should make just about any operation fail
434
+ instance_variables.each { |v| remove_instance_variable(v) }
435
+ nil
436
+ end
437
+ # Is this sequence closed?
438
+ def closed?
439
+ instance_variables.empty?
440
+ end
441
+
442
+ # Compare +other+ (a Position or Integer) to the current position. return +1
443
+ # for the self is after, -1 for self being before, and 0 for it being at
444
+ # same location, nil (or false) if other is not a position of self.
445
+ def <=>(other)
446
+ if other.respond_to? :to_i then pos<=>other
447
+ elsif position?(other) then pos<=>other.pos
448
+ end
449
+ end
450
+ #if passed an integer arg, return a new position decreased by len. if passed
451
+ # a position, return the distance (number
452
+ # or elements) from +other+ (a #position) to +self+. This can be +, -, or 0.
453
+ def -(other)
454
+ if position?(other)
455
+ pos-other.pos
456
+ else
457
+ position(pos-other)
458
+ end
459
+ end
460
+ # Returns a new #position increased by +len+ (positive or negative).
461
+ def +(other)
462
+ if ::Sequence===other
463
+ List[self, other]
464
+ else
465
+ position(pos+other)
466
+ end
467
+ end
468
+
469
+ # Return a new #position for next location or +nil+ if we are at the end.
470
+ def succ
471
+ self+1 unless eof?
472
+ end
473
+ # Return a new #position for previous location or +nil+ if we are at the beginning.
474
+ def pred
475
+ self-1 unless pos.zero?
476
+ end
477
+ # Return a new #position for the beginning.
478
+ def begin
479
+ position(0)
480
+ end
481
+ # Return a new #position for the end.
482
+ def end
483
+ position(size)
484
+ end
485
+
486
+ #go to beginning
487
+ def begin!
488
+ self._pos=0
489
+ end
490
+ #go to end
491
+ def end!
492
+ self._pos=size
493
+ end
494
+
495
+ #-------------------------------------
496
+ #is position within len elements of the beginning?
497
+ def nearbegin(len,at=pos)
498
+ at<=len
499
+ end
500
+
501
+ #-------------------------------------
502
+ #is position within len elements of the end?
503
+ def nearend(len,at=pos)
504
+ at+len>=size
505
+ end
506
+
507
+ #is there any more data after the position?
508
+ def more_data?
509
+ #!eof?
510
+ (size-pos).nonzero?
511
+ end
512
+
513
+ #has any data been seen so far, or are we still at the beginning?
514
+ def was_data?
515
+ pos.nonzero?
516
+ end
517
+
518
+
519
+ # Returns the number of elements.
520
+ def size
521
+ abstract
522
+ end
523
+ def length; size end
524
+
525
+ #are we at past the end of the sequence data, with no more data ever to arrive?
526
+ def eof?
527
+ abstract
528
+ end
529
+
530
+ # is there any data in the sequence?
531
+ def empty?
532
+ size==0
533
+ end
534
+
535
+ #return first element of data
536
+ def first
537
+ slice 0
538
+ end
539
+
540
+ #return last element of data
541
+ def last
542
+ slice( -1)
543
+ end
544
+
545
+ def _normalize_pos(pos,size=self.size)
546
+ if pos<0
547
+ pos+=size
548
+ pos<0 and pos=0
549
+ elsif pos>size
550
+ pos=size
551
+ end
552
+
553
+ assert((0..size)===pos)
554
+ pos
555
+ end
556
+
557
+ def _parse_slice_args(*args)
558
+ asize=args.size
559
+ assert !closed?
560
+ size=self.size
561
+ case r=args.first
562
+ when Range:
563
+ asize==1 or raise ArgumentError
564
+ first,last=r.first,r.last
565
+ first=_normalize_pos(first,size)
566
+ last=_normalize_pos(last,size)
567
+ len=last-first
568
+ r.exclude_end? or len+=1
569
+ when Integer:
570
+ asize<=2 or raise ArgumentError
571
+ first=_normalize_pos(r,size)
572
+ len=args[1] || (only1=1)
573
+ when nil:
574
+ asize==0 or raise ArgumentError
575
+ first=nil
576
+ len=only1=1
577
+ else raise ArgumentError
578
+ end
579
+ return first,len,only1
580
+ end
581
+
582
+ # Provides random access for the sequence like what is in Array/String.
583
+ # +index+ can be +nil+ (start at the current location) or a numeric
584
+ # (for #pos=) or a range.
585
+ # +len+ can be +nil+ (get a single element) or the number of elements to
586
+ # #read (positive or negative). The sequence's position is left alone.
587
+ def slice(*args) #index|range=nil,len=nil
588
+ first,len,only1=_parse_slice_args( *args)
589
+ pos==first and first=nil
590
+ holding {
591
+ self.pos = first if first
592
+ only1 ? read1 : read(len)
593
+ }
594
+ end
595
+ def [](*a) slice(*a) end
596
+ def slice1(idx) slice(idx) end
597
+
598
+ # Like #slice except the element(s) are deleted.
599
+ def slice!(*args) #index|range, len
600
+ first,len,only1=_parse_slice_args( *args)
601
+ result=slice(first,len)
602
+ delete(first,len)
603
+ only1 ? result.first : result
604
+ end
605
+ def slice1!(idx) slice!(idx) end
606
+
607
+ # Similar to #slice except data is written. +index+ and +len+ have the
608
+ # same meaning as they do in #slice. +len+ elements are deleted and +replacedata+
609
+ # is inserted. +replacedata+ is a single item if len is ommitted and 1st param is Fixnum
610
+ def modify(*args) #index|range, len, replacedata
611
+ abstract
612
+ end
613
+ def []=(*a) modify(*a) end
614
+
615
+ def delete(*args) #index|range, len
616
+ modify( *args<<new_data)
617
+ nil
618
+ end
619
+
620
+ def insert index, replacedata
621
+ modify index,0, replacedata
622
+ end
623
+
624
+ def overwrite index, replacedata
625
+ modify index,replacedata.size, replacedata
626
+ end
627
+
628
+ def pop count=nil
629
+ slice!(count ? -count...size : -1)
630
+ end
631
+
632
+ def shift count=nil
633
+ slice!(count ? 0...count : 0 )
634
+ end
635
+
636
+ def <<(x) push x; return self end
637
+
638
+ #push/unshift in stringlike/arraylike
639
+
640
+ def append stuff
641
+ insert(size, stuff)
642
+ self
643
+ end
644
+
645
+ def prepend stuff
646
+ insert(0, stuff)
647
+ self
648
+ end
649
+
650
+ def write(data)
651
+ assert oldpos=pos
652
+ writeahead(data)
653
+ assert oldpos==pos
654
+ move data.size
655
+ end
656
+
657
+ def writeback(data)
658
+ assert oldpos=pos
659
+ writebehind(data)
660
+ assert oldpos==pos
661
+ move( -data.size)
662
+ end
663
+
664
+ def writeahead(data)
665
+ raise ArgumentError, "attempted overwrite at end of #{self}" if data.size>rest_size
666
+ overwrite(pos,data)
667
+ data.size
668
+ end
669
+
670
+ def writebehind(data)
671
+ raise ArgumentError, "attempted overwrite at begin of #{self}" if data.size>pos
672
+ overwrite(pos-data.size,data)
673
+ data.size
674
+ end
675
+
676
+ def _adjust_pos_on_change pos,first,oldsize,newsize
677
+ # assert newsize != oldsize
678
+ if pos>=first+oldsize
679
+ oldsize.zero? and pos==first and return pos
680
+ pos+newsize-oldsize
681
+ elsif pos>first
682
+ first
683
+ else pos
684
+ end
685
+ end
686
+
687
+ def on_change_notify obj
688
+ Symbol===obj and raise ArgumentError
689
+ obj.respond_to? :change_notification or raise ArgumentError
690
+ @change_listeners||=WeakRefSet[]
691
+ @change_listeners<<obj
692
+ end
693
+
694
+ def notify_change *args #seq, first, oldsize, newsize
695
+ args[0]=self
696
+ defined? @change_listeners and @change_listeners.each{|obj|
697
+ obj.change_notification(*args)
698
+ }
699
+ end
700
+
701
+ # Delete +p+ from the list of children (from #position).
702
+ # Should only be used by child Position.
703
+ def _delete_position(p) # :nodoc:
704
+ @change_listeners.delete(p)
705
+ end
706
+
707
+ # Performs each just to make this class Enumerable.
708
+ # self is returned (or the break value if the code does a break).
709
+ def each # :yield: value
710
+ holding {
711
+ begin!
712
+ until eof?
713
+ yield read1
714
+ end or self
715
+ }
716
+ end
717
+
718
+
719
+ end
720
+
721
+ require 'sequence/position'