external 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
data/lib/ext_ind.rb ADDED
@@ -0,0 +1,1120 @@
1
+ require 'external/base'
2
+
3
+ # for some reason this is sometimes not required by String
4
+ # automatically, leading to a missing each_char method
5
+ require 'jcode'
6
+
7
+ #--
8
+ # not implemented --
9
+ # dclone, flatten, flatten!, frozen?, pack, quote, to_yaml, transpose, yaml_initialize
10
+ #
11
+ # be careful accession io directly. for peformance reasons there is no check to make
12
+ # sure io is in register (ie pos is at a frame boundary, ie io.length % frame_size == 0)
13
+ # In addition, note that length must be adjusted manually in most io operations (truncate is
14
+ # the exception). Thus if you change the file length by any means, the file length must be
15
+ # reset.
16
+ #
17
+ # ExtInd allows array-like access to formatted binary data stored on disk.
18
+ #
19
+ # == Caching
20
+ #
21
+ # To improve peformance, ExtInd can be run in a cached mode where the data is loaded into
22
+ # memory and kept in memory until the ExtInd closes (or is flushed). Cached mode is
23
+ # recommended for all but the largest index files, which cannot or should not be loaded
24
+ # into memory.
25
+ #++
26
+ class ExtInd < External::Base
27
+
28
+ class << self
29
+ def [](*args)
30
+ options = args.last.kind_of?(Hash) ? args.pop : {}
31
+ ab = self.new(nil, options)
32
+ normalized_args = args.collect {|item| item.nil? ? ab.nil_value : item }.flatten
33
+ ab.unframed_write(normalized_args)
34
+
35
+ # reset the position of the IO under this initialize
36
+ ab.pos = 0
37
+ ab
38
+ end
39
+
40
+ def read(fd, options={})
41
+ return [] if fd.nil?
42
+ open(fd, "r", options) do |index|
43
+ index.read(nil, 0)
44
+ end
45
+ end
46
+
47
+ # Returns the number of bytes required to pack an item in an array
48
+ # using the directive (see Array.pack for more details). All directives
49
+ # return a size except the positioning directives '@' and 'X'; these
50
+ # and all other unknown directives return nil.
51
+ #
52
+ # Directives N bytes
53
+ # ------------------------------
54
+ # AaBbCcHhUwxZ | 1
55
+ # nSsv | 2
56
+ # M | 3
57
+ # eFfgIiLlNPpV | 4
58
+ # m | 5
59
+ # u | 6
60
+ # DdEGQq | 8
61
+ # @X | nil
62
+ def directive_size(directive)
63
+ case directive
64
+ when /^[eFfgIiLlNPpV]$/ then 4
65
+ when /^[DdEGQq]$/ then 8
66
+ when /^[AaBbCcHhUwxZ]$/ then 1
67
+ when /^[nSsv]$/ then 2
68
+ when 'M' then 3
69
+ when 'm' then 5
70
+ when 'u' then 6
71
+ else
72
+ nil
73
+ end
74
+ end
75
+
76
+ attr_writer :default_nil_value
77
+ def default_nil_value
78
+ @default_nil_value ||= 0
79
+ end
80
+ end
81
+
82
+ attr_reader :frame, :frame_size, :format, :cache, :process_in_bulk
83
+
84
+ def initialize(io=nil, options={})
85
+ super(io)
86
+
87
+ options = {
88
+ :format => "I",
89
+ :nil_value => nil,
90
+ :cached => false,
91
+ :buffer_size => 8 * 2**20 # 8Mb
92
+ }.merge(options)
93
+
94
+ # set the format, frame, and frame size
95
+ @format = options[:format]
96
+ @frame = @format.length
97
+ @frame_size = 0
98
+
99
+ # TODO -- allow specification of numbers in the format
100
+ @format.each_char do |directive|
101
+ size = ExtInd.directive_size(directive)
102
+ raise ArgumentError.new("cannot determine size of: '#{directive}'") if size == nil
103
+ @frame_size += size
104
+ end
105
+ @process_in_bulk = false
106
+
107
+ # set the buffer size
108
+ self.buffer_size = options[:buffer_size]
109
+
110
+ # Repetitive formats like "I", "II", "III",
111
+ # etc can be packed and unpacked in bulk
112
+ directive = @format[0, 1]
113
+ if @format == (directive * @frame)
114
+ @format = "#{directive}*"
115
+ @process_in_bulk = true
116
+ end
117
+
118
+ # set the nil value to an array of zeros, or
119
+ # to the specified nil value. If a nil value
120
+ # was specified, ensure it is of the correct
121
+ # frame size and can be packed
122
+ nil_value = if options[:nil_value] == nil
123
+ Array.new(@frame, self.class.default_nil_value)
124
+ else
125
+ options[:nil_value]
126
+ end
127
+
128
+ begin
129
+ @nil_value = nil_value.pack(@format)
130
+ raise "" unless nil_value.length == @frame && @nil_value.unpack(@format) == nil_value
131
+ rescue
132
+ raise ArgumentError.new(
133
+ "unacceptable nil value '#{nil_value}': the nil value must " +
134
+ "be in frame and packable using the format '#{format}'")
135
+ end
136
+
137
+ self.cached = options[:cached]
138
+ end
139
+
140
+ def buffer_size
141
+ self.io.default_blksize
142
+ end
143
+
144
+ def buffer_size=(buffer_size)
145
+ raise ArgumentError.new("buffer size must be > 0") if buffer_size <= 0
146
+
147
+ @default_blksize = (buffer_size/frame_size).ceil
148
+ self.io.default_blksize = buffer_size
149
+ end
150
+
151
+ def default_blksize=(value)
152
+ @default_blksize = value
153
+ self.io.default_blksize = value * frame_size
154
+ end
155
+
156
+ def options
157
+ {:format => (process_in_bulk ? format.chomp("*") * frame : format),
158
+ :nil_value => nil_value,
159
+ :cached => cached?,
160
+ :buffer_size => buffer_size}
161
+ end
162
+
163
+ # Returns the string value used for nils. Specify unpacked to
164
+ # show the unpacked array value.
165
+ #
166
+ # i = ExtInd.new
167
+ # i.nil_value # => [0]
168
+ # i.nil_value(false) # => "\000\000\000\000"
169
+ def nil_value(unpacked=true)
170
+ unpacked ? @nil_value.unpack(format) : @nil_value
171
+ end
172
+
173
+ # True if cached
174
+ def cached?
175
+ cache != nil
176
+ end
177
+
178
+ # Sets the index to cache data or not. When setting cached to
179
+ # false, currently cached data is flushed.
180
+ def cached=(input)
181
+ if input && !cache
182
+ @cache_pos = self.pos
183
+ @cache = read(nil, 0)
184
+
185
+ # ensure the cache is an array of framed items...
186
+ # if io has only one item, then read returns an
187
+ # array like [0] rather than [[0]]
188
+ unless @cache.empty? || @cache.first.kind_of?(Array)
189
+ @cache = [@cache]
190
+ end
191
+ elsif !input && cache
192
+ flush
193
+ @cache = nil
194
+ self.pos = @cache_pos
195
+ @cache_pos = nil
196
+ end
197
+ end
198
+
199
+ # Flushes the io, writing cached data if necessary.
200
+ def flush
201
+ if cached?
202
+ io.truncate(0)
203
+ cache.each {|item| io.write item.pack(format) }
204
+ end
205
+
206
+ io.flush
207
+ io.reset_length
208
+ end
209
+
210
+ # Flushes cached data and closes the io.
211
+ def close
212
+ cached = false if cached?
213
+ super
214
+ end
215
+
216
+ ###########################
217
+ # Array methods
218
+ ###########################
219
+
220
+ # def &(another)
221
+ # not_implemented
222
+ # end
223
+
224
+ # def *(arg)
225
+ # not_implemented
226
+ # end
227
+
228
+ def dup
229
+ self.flush
230
+ ExtInd.new(nil, options).concat(self)
231
+ end
232
+
233
+ def +(another)
234
+ dup.concat(another)
235
+ end
236
+
237
+ # def -(another)
238
+ # not_implemented
239
+ # end
240
+
241
+ def <<(array)
242
+ # WRONG BEHAVIOR -- should be write, push in frame
243
+ unframed_write(array, length)
244
+ self
245
+ end
246
+
247
+ def <=>(another)
248
+ return 0 if self.object_id == another.object_id
249
+
250
+ # reverse comparison in case another is an ExtInd
251
+ return -1 * (another <=> cache) if cached?
252
+
253
+ case another
254
+ when Array
255
+ if another.length < self.length
256
+ # if another is equal to the matching subset of self,
257
+ # then self is obviously the longer array and wins.
258
+ result = (self.to_a(another.length) <=> another)
259
+ result == 0 ? 1 : result
260
+ else
261
+ self.to_a <=> another
262
+ end
263
+ when ExtInd
264
+ self.io.sort_compare(another.io, (buffer_size/2).ceil)
265
+ else
266
+ raise TypeError.new("can't convert from #{another.class} to ExtInd or Array")
267
+ end
268
+ end
269
+
270
+ def ==(another)
271
+ return true if super
272
+
273
+ case another
274
+ when Array
275
+ return false unless self.length == another.length
276
+ self.to_a == another
277
+ when ExtInd
278
+ return false unless self.length == another.length
279
+
280
+ unless self.cached? && another.cached?
281
+ return false unless self.index_attrs == another.index_attrs
282
+ if (self.io.sort_compare(another.io, (buffer_size/2).ceil)) == 0
283
+ return true
284
+ end
285
+ end
286
+
287
+ self.to_a == another.to_a
288
+ else
289
+ false
290
+ end
291
+ end
292
+
293
+ # Element Reference — Returns the entry at index, or returns an array starting
294
+ # at start and continuing for length entries, or returns an array specified
295
+ # by range. Negative indices count backward from the end of self (-1 is the last
296
+ # element). Returns nil if the index (or starting index) is out of range.
297
+ #
298
+ # io = StringIO.new [1,2,3,4,5].pack("I*")
299
+ # i = ExtInd.new(io, :format => 'I')
300
+ # i[2] #=> [3]
301
+ # i[6] #=> nil
302
+ # i[1, 2] #=> [ [2], [3] ]
303
+ # i[1..3] #=> [ [2], [3], [4] ]
304
+ # i[4..7] #=> [ [5] ]
305
+ # i[6..10] #=> nil
306
+ # i[-3, 3] #=> [ [3], [4], [5] ]
307
+ # # special cases
308
+ # i[5] #=> nil
309
+ # i[5, 1] #=> []
310
+ # i[5..10] #=> []
311
+ #
312
+ # Note that entries are returned in frame, as arrays.
313
+ def [](index, length=nil)
314
+ # return the cached value if cached
315
+ return (length == nil ? cache[index] : cache[index,length]) if cached?
316
+
317
+ case index
318
+ when Fixnum
319
+ index += self.length if index < 0
320
+ return nil if index < 0
321
+
322
+ unless length == nil
323
+ raise TypeError.new("no implicit conversion from nil to integer") if length.nil?
324
+ return [] if length == 0 || index >= self.length
325
+ return nil if length < 0
326
+
327
+ # ensure you don't try to read more entries than are available
328
+ max_length = self.length - index
329
+ length = max_length if length > max_length
330
+ end
331
+
332
+ case
333
+ when length == nil then read(1, index) # read one, as index[0]
334
+ when length == 1 then [read(1, index)] # read one framed, as index[0,1]
335
+ else
336
+ read(length, index) # read length, automatic framing
337
+ end
338
+
339
+ when Range
340
+ raise TypeError.new("can't convert Range into Integer") unless length == nil
341
+
342
+ offset, length = split_range(index)
343
+
344
+ # for conformance with array range retrieval
345
+ return nil if offset < 0 || offset > self.length
346
+ return [] if length < 0
347
+
348
+ self[offset, length + 1]
349
+ when nil
350
+ raise TypeError.new("no implicit conversion from nil to integer")
351
+ else
352
+ raise TypeError.new("can't convert #{index.class} into Integer")
353
+ end
354
+ end
355
+
356
+ # Element Assignment — Sets the entry at index, or replaces a subset starting at start
357
+ # and continuing for length entries, or replaces a subset specified by range.
358
+ # A negative indices will count backward from the end of self. Inserts elements if
359
+ # length is zero. If nil is used in the second and third form, deletes elements from
360
+ # self. An IndexError is raised if a negative index points past the beginning of self.
361
+ # See also push, and unshift.
362
+ #
363
+ # io = StringIO.new ""
364
+ # i = ExtInd.new(io, :format => 'I')
365
+ # i.nil_value # => [0]
366
+ # i[4] = [4] # => [[0], [0], [0], [0], [4]]
367
+ # i[0, 3] = [ [1], [2], [3] ] # => [[1], [2], [3], [0], [4]]
368
+ # i[1..2] = [ [5], [6] ] # => [[1], [5], [6], [0], [4]]
369
+ # i[0, 2] = [ [7] ] # => [[7], [6], [0], [4]]
370
+ # i[0..2] = [ [8] ] # => [[8], [4]]
371
+ # i[-1] = [9] # => [[8], [9]]
372
+ # i[1..-1] = nil # => [[8]]
373
+ #
374
+ # Note that []= must take entries in frame, or (in the case of [offset, length] and
375
+ # range insertions) another ExtInd with the same frame, format, and nil_value.
376
+ #--
377
+ # TODO -- cleanup error messages so they are more meaningful
378
+ # and helpful, esp for frame errors
379
+ #++
380
+ def []=(*args)
381
+ raise ArgumentError.new("wrong number of arguments (1 for 2)") if args.length < 2
382
+ index, length, value = args
383
+ if args.length == 2
384
+ value = length
385
+ length = nil
386
+ end
387
+
388
+ case index
389
+ when Fixnum
390
+ if index < 0
391
+ index += self.length
392
+ raise IndexError.new("index #{index} out of range") if index < 0
393
+ end
394
+
395
+ if length == nil
396
+ # simple insertion
397
+ value = nil_value if value.object_id == 4 # nil
398
+ unframed_write(value, index)
399
+ else
400
+ raise IndexError.new("negative length (#{length})") if length < 0
401
+
402
+ # arrayify value if needed
403
+ unless value.kind_of?(ExtInd)
404
+ value = [value] unless value.kind_of?(Array)
405
+ end
406
+
407
+ case
408
+ when cached?
409
+ # validation must occur here, because this cached insertion
410
+ # bypasses the validations that normally occur in write
411
+ case value
412
+ when Array then validate_framed_array(value)
413
+ when ExtInd then validate_index(value)
414
+ end
415
+
416
+ # must be done before padding in case value == self
417
+ # WARN - could be expensive
418
+ # TODO - check the effect of cache.dup on speed if cached?
419
+ value = value.to_a.collect {|item| item == nil ? nil_value : item }
420
+
421
+ # pad as needed
422
+ pad_to(index) if index > self.length
423
+
424
+ # write the value to the cache
425
+ cache[index, length] = value
426
+ when self == value
427
+ # special case when insertion is self (no validation needed)
428
+ # A whole copy of self is required because the insertion
429
+ # can overwrite the tail of self. As such this can be a
430
+ # worst-case scenario-slow and expensive procedure.
431
+ copy_beg = (index + length) * frame_size
432
+ copy_end = io.length
433
+
434
+ io.copy do |copy|
435
+ # truncate io
436
+ io.truncate(index * frame_size)
437
+ io.pos = io.length
438
+
439
+ # pad as needed
440
+ pad_to(index) if index > self.length
441
+
442
+ # write the copy of self
443
+ io.insert(copy)
444
+
445
+ # copy the tail of the insertion
446
+ io.insert(copy, copy_beg..copy_end)
447
+ end
448
+ when value.length == length
449
+ # optimized insertion, when insertion is the correct length
450
+ write(value, index)
451
+ else
452
+ # range insertion: requires copy and rewrite of the tail
453
+ # of the ExtInd, after the insertion.
454
+ # WARN - can be slow when the tail is large
455
+ copy_beg = (index + length) * frame_size
456
+ copy_end = io.length
457
+
458
+ io.copy("r", copy_beg..copy_end) do |copy|
459
+ # pad as needed
460
+ pad_to(index) if index > self.length
461
+
462
+ # write inserted value
463
+ io.pos = index * frame_size
464
+ write(value)
465
+
466
+ # truncate io
467
+ io.truncate(io.pos)
468
+
469
+ # copy the tail of the insertion
470
+ io.insert(copy)
471
+ end
472
+ end
473
+ end
474
+
475
+ value
476
+ when Range
477
+ raise TypeError.new("can't convert Range into Integer") if args.length == 3
478
+
479
+ # for conformance with setting a range with nil (truncates)
480
+ value = [] if value.nil?
481
+ offset, length = split_range(index)
482
+ self[offset, length + 1] = value
483
+ when nil
484
+ raise TypeError.new("no implicit conversion from nil to integer")
485
+ else
486
+ raise TypeError.new("can't convert #{index.class} into Integer")
487
+ end
488
+ end
489
+
490
+ # def abbrev(pattern=nil)
491
+ # not_implemented
492
+ # end
493
+
494
+ # def assoc(obj)
495
+ # not_implemented
496
+ # end
497
+
498
+ # Returns entry at index
499
+ def at(index)
500
+ self[index]
501
+ end
502
+
503
+ # Removes all elements from _self_.
504
+ def clear
505
+ cached? ? cache.clear : io.truncate(0)
506
+ self
507
+ end
508
+
509
+ # def compact
510
+ # not_implemented
511
+ # end
512
+
513
+ # def compact!
514
+ # not_implemented
515
+ # end
516
+
517
+ def concat(another)
518
+ case another
519
+ when Array
520
+ write(another, length)
521
+ when ExtInd
522
+ validate_index(another)
523
+
524
+ if cached?
525
+ # WARN - could be expensive
526
+ cache.concat(another.to_a)
527
+ else
528
+ io.concat(another.io)
529
+ end
530
+ else
531
+ raise TypeError.new("can't convert #{another.class} into ExtInd or Array")
532
+ end
533
+ self
534
+ end
535
+
536
+ # def delete(obj)
537
+ # not_implemented
538
+ # end
539
+
540
+ # def delete_at(index)
541
+ # not_implemented
542
+ # end
543
+
544
+ # def delete_if # :yield: item
545
+ # not_implemented
546
+ # end
547
+
548
+ def each(&block) # :yield: item
549
+ self.pos = 0
550
+ chunk do |offset, length|
551
+ # special treatment for 1, because then read(1) => [...] rather
552
+ # than [[...]]. when frame > 1, each will iterate over the
553
+ # element rather than pass it to the block directly
554
+ if length == 1
555
+ yield read(1)
556
+ else
557
+ read(length).each(&block)
558
+ end
559
+ end
560
+ self
561
+ end
562
+
563
+ # Passes the index of each char
564
+ def each_index(&block) # :yield: index
565
+ 0.upto(length-1, &block)
566
+ self
567
+ end
568
+
569
+ # Returns true if _self_ contains no elements
570
+ def empty?
571
+ length == 0
572
+ end
573
+
574
+ def eql?(another)
575
+ self == another
576
+ end
577
+
578
+ # def fetch(index, default=nil, &block)
579
+ # index += index_length if index < 0
580
+ # val = (index >= length ? default : self[index])
581
+ # block_given? ? yield(val) : val
582
+ # end
583
+
584
+ # def fill(*args)
585
+ # not_implemented
586
+ # end
587
+
588
+ # Returns the first n entries (default 1)
589
+ def first(n=nil)
590
+ n.nil? ? self[0] : self[0,n]
591
+ end
592
+
593
+ # def hash
594
+ # not_implemented
595
+ # end
596
+
597
+ # def include?(obj)
598
+ # not_implemented
599
+ # end
600
+
601
+ # def index(obj)
602
+ # not_implemented
603
+ # end
604
+
605
+ # def indexes(*args)
606
+ # values_at(*args)
607
+ # end
608
+ #
609
+ # def indicies(*args)
610
+ # values_at(*args)
611
+ # end
612
+
613
+ # def replace(other)
614
+ # not_implemented
615
+ # end
616
+
617
+ # def insert(index, *obj)
618
+ # self[index] = obj
619
+ # end
620
+
621
+ # def inspect
622
+ # not_implemented
623
+ # end
624
+
625
+ # def join(sep=$,)
626
+ # not_implemented
627
+ # end
628
+
629
+ # Returns the last n entries (default 1)
630
+ def last(n=nil)
631
+ return self[-1] if n.nil?
632
+
633
+ start = length-n
634
+ start = 0 if start < 0
635
+ self[start, n]
636
+ end
637
+
638
+ # Returns the number of entries in self
639
+ def length
640
+ cached? ? cache.length : io.length/frame_size
641
+ end
642
+
643
+ # def nitems
644
+ # not_implemented
645
+ # end
646
+
647
+ # def pop
648
+ # not_implemented
649
+ # end
650
+
651
+ # def pretty_print(q)
652
+ # not_implemented
653
+ # end
654
+
655
+ # def pretty_print_cycle(q)
656
+ # not_implemented
657
+ # end
658
+
659
+ # def push(*obj)
660
+ # not_implemented
661
+ # end
662
+
663
+ # def rassoc(key)
664
+ # not_implemented
665
+ # end
666
+
667
+ # def replace(another)
668
+ # not_implemented
669
+ # end
670
+
671
+ # def reverse
672
+ # not_implemented
673
+ # end
674
+
675
+ # def reverse!
676
+ # not_implemented
677
+ # end
678
+
679
+ def reverse_each(&block)
680
+ reverse_chunk do |offset, length|
681
+ # special treatment for 1, because then read(1) => [...] rather
682
+ # than [[...]]. when frame > 1, each will iterate over the
683
+ # element rather than pass it to the block directly
684
+ if length == 1
685
+ yield read(1)
686
+ else
687
+ read(length, offset).reverse_each(&block)
688
+ end
689
+ end
690
+ self
691
+ end
692
+
693
+ # def rindex(obj)
694
+ # not_implemented
695
+ # end
696
+
697
+ # def select # :yield: item
698
+ # not_implemented
699
+ # end
700
+
701
+ # def shift
702
+ # not_implemented
703
+ # end
704
+
705
+ # Alias for length
706
+ def size
707
+ length
708
+ end
709
+
710
+ # def slice(*args)
711
+ # self.call(:[], *args)
712
+ # end
713
+
714
+ # def slice!(*args)
715
+ # not_implemented
716
+ # end
717
+
718
+ def to_a(length=self.length)
719
+ case
720
+ when cached? then cache.dup
721
+ when length == 0 then []
722
+ when length == 1 then [read(length, 0)]
723
+ else
724
+ read(length, 0)
725
+ end
726
+ end
727
+
728
+ # def to_ary
729
+ # not_implemented
730
+ # end
731
+
732
+ # Returns _self_.join.
733
+ # def to_s
734
+ # self.join
735
+ # end
736
+
737
+ # def uniq
738
+ # not_implemented
739
+ # end
740
+
741
+ # def uniq!
742
+ # not_implemented
743
+ # end
744
+
745
+ # def unshift(*obj)
746
+ # not_implemented
747
+ # end
748
+
749
+ # Returns an array containing the chars in io corresponding to the given
750
+ # selector(s). The selectors may be either integer indices or ranges
751
+ # def values_at(*selectors)
752
+ # selectors.collect {|s| self[s]}.flatten
753
+ # end
754
+
755
+ # def |(another)
756
+ # not_implemented
757
+ # end
758
+
759
+ #################
760
+ # IO-like methods
761
+ ##################
762
+
763
+ # Sets the current position of the index. Negative positions
764
+ # are counted from the end of the index (just as they are in
765
+ # an array). Positions can be set beyond the actual length
766
+ # of the index (similar to an IO).
767
+ #
768
+ # i = ExtInd[[1],[2],[3]]
769
+ # i.length # => 3
770
+ # i.pos = 2; i.pos # => 2
771
+ # i.pos = -1; i.pos # => 2
772
+ # i.pos = 10; i.pos # => 40
773
+ def pos=(pos)
774
+ if pos < 0
775
+ raise ArgumentError.new("position out of bounds: #{pos}") if pos < -length
776
+ pos += length
777
+ end
778
+
779
+ # do something fake for caching so that
780
+ # the position need not be set (this
781
+ # works either way)
782
+ if cached?
783
+ self.cache_pos = pos
784
+ else
785
+ io.pos = (pos * frame_size)
786
+ end
787
+ end
788
+
789
+ # Returns the current position of the index
790
+ def pos
791
+ cached? ? cache_pos : io.pos/frame_size
792
+ end
793
+
794
+ # Reads the packed byte string for n entries from the specified
795
+ # position. By default reads the string for all remaining entries
796
+ # from the current position.
797
+ #
798
+ # i = ExtInd[[1],[2],[3]]
799
+ # i.pos # => 0
800
+ # i.readbytes.unpack("I*") # => [1,2,3]
801
+ # i.readbytes(1,0).unpack("I*") # => [1]
802
+ # i.readbytes(10,1).unpack("I*") # => [2,3]
803
+ #
804
+ # Like an IO, when n is nil and no entries can be read, an empty
805
+ # string is returned. When n is specified, nil will be returned
806
+ # when no entries can be read.
807
+ #
808
+ # i.pos = 3
809
+ # i.readbytes # => ""
810
+ # i.readbytes(1) # => nil
811
+ def readbytes(n=nil, pos=nil)
812
+ if cached?
813
+ ary = read(n, pos)
814
+ return (ary == nil ? nil : ary.flatten.pack(format))
815
+ end
816
+
817
+ # set the io position to the specified index
818
+ self.pos = pos unless pos == nil
819
+
820
+ # read until the end if no n is given
821
+ n == nil ? io.read : io.read(n * frame_size)
822
+ end
823
+
824
+ # Unpacks the given string into an array of index values.
825
+ # Single entries are returned in frame, multiple entries
826
+ # are returned in an array.
827
+ #
828
+ # i.format # => 'I*'
829
+ # i.unpack( [1].pack('I*') ) # => [1]
830
+ # i.unpack( [1,2,3].pack('I*') ) # => [[1],[2],[3]]
831
+ # i.unpack("") # => []
832
+ #
833
+ def unpack(str)
834
+ case
835
+ when str.empty? then []
836
+ when str.length == frame_size
837
+ str.unpack(format)
838
+ when process_in_bulk
839
+ results = []
840
+ str.unpack(format).each_slice(frame) {|s| results << s}
841
+ results
842
+ else
843
+ Array.new(str.length/frame_size) do |i|
844
+ str[i*frame_size, frame_size].unpack(format)
845
+ end
846
+ end
847
+ end
848
+
849
+ # Reads n entries from the specified position. By default
850
+ # reads all remaining entries from the current position.
851
+ # Single entries are returned in frame, multiple entries
852
+ # are returned in an array.
853
+ #
854
+ # i = ExtInd[[1],[2],[3]]
855
+ # i.pos # => 0
856
+ # i.read # => [[1],[2],[3]]
857
+ # i.read(1,0) # => [1]
858
+ # i.read(10,1) # => [[2],[3]]
859
+ #
860
+ # When n is nil and no entries can be read, an empty array
861
+ # is returned. When n is specified, nil will be returned
862
+ # when no entries can be read.
863
+ #
864
+ # i.pos = 3
865
+ # i.read # => []
866
+ # i.read(1) # => nil
867
+ def read(n=nil, pos=nil)
868
+ if cached?
869
+ self.pos = pos unless pos == nil
870
+ m = (n == nil || n > (length - cache_pos)) ? (length - cache_pos) : n
871
+
872
+ return case
873
+ when n == nil && m == 0 then []
874
+ when m <= 1 then cache[cache_pos]
875
+ else
876
+ cache[cache_pos, m]
877
+ end
878
+ end
879
+
880
+ str = readbytes(n, pos)
881
+ str == nil ? nil : unpack(str)
882
+ end
883
+
884
+ def write(array, pos=nil)
885
+ case array
886
+ when Array
887
+ validate_framed_array(array)
888
+ prepare_write_to_pos(pos)
889
+ write_framed_array(array)
890
+ when ExtInd
891
+ validate_index(array)
892
+ prepare_write_to_pos(pos)
893
+ write_index(array)
894
+ else
895
+ raise ArgumentError.new("could not convert #{array.class} to Array or ExtInd")
896
+ end
897
+ end
898
+
899
+ # Writes the array as an entry (or set of entries) into
900
+ # self starting at the specified position. By default
901
+ # write begins at the current position. The array can
902
+ # have multiple entries in sequence, but MUST be in the
903
+ # correct frame.
904
+ #
905
+ # i = ExtInd[]
906
+ # i.unframed_write([2,3], 1)
907
+ # i.pos = 0;
908
+ # i.unframed_write([1])
909
+ # i.read(3, 0) # => [[1],[2],[3]]
910
+ #
911
+ # Note -- no range checking when cached
912
+ def unframed_write(array, pos=nil)
913
+ case array
914
+ when Array
915
+ validate_unframed_array(array)
916
+ prepare_write_to_pos(pos)
917
+ write_unframed_array(array)
918
+ when ExtInd
919
+ validate_index(array)
920
+ prepare_write_to_pos(pos)
921
+ write_index(array)
922
+ else
923
+ raise ArgumentError.new("could not convert #{array.class} to Array or ExtInd")
924
+ end
925
+ end
926
+
927
+ protected
928
+
929
+ attr_accessor :cache_pos
930
+
931
+ # An array of the core index attributes: frame, format, nil_value
932
+ def index_attrs # :nodoc:
933
+ [frame, format, nil_value]
934
+ end
935
+
936
+ def prepare_write_to_pos(pos)
937
+ unless pos == nil
938
+ # pad to the starting position if necessary
939
+ pad_to(pos) if pos > length
940
+
941
+ # set the io position to the specified index
942
+ self.pos = pos
943
+ end
944
+ end
945
+
946
+ def pad_to(pos)
947
+ n = (pos-length)/frame
948
+
949
+ if cached?
950
+ cache.concat(Array.new(n, nil_value))
951
+ else
952
+ io.pos = io.length
953
+ io.length += io.write(nil_value(false) * n)
954
+
955
+ # in this case position doesn't
956
+ # need to be set. set pos to nil
957
+ # to skip the set statement below
958
+ pos = nil
959
+ end
960
+ end
961
+
962
+ def validate_index(index)
963
+ unless index.index_attrs == index_attrs
964
+ raise ArgumentError.new("incompatible index attributes [#{index.index_attrs.join(',')}]")
965
+ end
966
+ end
967
+
968
+ def validate_framed_array(array)
969
+ array.each do |item|
970
+ case item
971
+ when Array
972
+ unless item.length == frame
973
+ raise ArgumentError.new("expected array in frame '#{frame}' but was '#{item.length}'")
974
+ end
975
+ when nil
976
+ # framed arrays can contain nils
977
+ next
978
+ else
979
+ raise ArgumentError.new("expected array in frame '#{frame}', was #{item.class}")
980
+ end
981
+ end
982
+ end
983
+
984
+ def validate_unframed_array(array)
985
+ unless array.length % frame == 0
986
+ raise ArgumentError.new("expected array in frame '#{frame}' but was '#{array.length}'")
987
+ end
988
+ end
989
+
990
+ def write_index(index)
991
+ if cached?
992
+ if index.cached?
993
+ cache[cache_pos, index.length] = index.cache
994
+ self.cache_pos += index.length
995
+ else
996
+ index.each do |item|
997
+ cache[cache_pos] = item
998
+ self.cache_pos += 1
999
+ end
1000
+ end
1001
+ else
1002
+ end_pos = io.pos
1003
+ if index.cached?
1004
+ end_pos += io.write( index.cache.pack(format) )
1005
+ else
1006
+ end_pos += io.insert(index.io)
1007
+ end
1008
+
1009
+ io.length = end_pos if end_pos > io.length
1010
+ end
1011
+ end
1012
+
1013
+ def write_framed_array(array)
1014
+ # framed arrays may contain nils, and must
1015
+ # be resolved before writing the data
1016
+
1017
+ if cached?
1018
+ cache[cache_pos, array.length] = array.collect {|item| item == nil ? nil_value : item }
1019
+ self.cache_pos += array.length
1020
+ else
1021
+ start_pos = io.pos
1022
+ length_written = 0
1023
+
1024
+ if process_in_bulk
1025
+ arr = []
1026
+ array.each {|item| arr.concat(item == nil ? nil_value : item) }
1027
+ length_written += io.write(arr.pack(format))
1028
+ else
1029
+ array.each do |item|
1030
+ str = (item == nil ? nil_value(false) : item.pack(format))
1031
+ length_written += io.write(str)
1032
+ end
1033
+ end
1034
+
1035
+ end_pos = start_pos + length_written
1036
+ io.length = end_pos if end_pos > io.length
1037
+ end
1038
+ end
1039
+
1040
+ def write_unframed_array(array)
1041
+ # unframed arrays cannot contain nils
1042
+
1043
+ if cached?
1044
+ array.each_slice(frame) do |item|
1045
+ cache[cache_pos] = item
1046
+ self.cache_pos += 1
1047
+ end
1048
+ else
1049
+ start_pos = io.pos
1050
+ length_written = 0
1051
+
1052
+ if process_in_bulk
1053
+ length_written += io.write(array.pack(format))
1054
+ else
1055
+ array.each_slice(frame) do |arr|
1056
+ length_written += io.write(arr.pack(format))
1057
+ end
1058
+ end
1059
+
1060
+ end_pos = start_pos + length_written
1061
+ io.length = end_pos if end_pos > io.length
1062
+ end
1063
+ end
1064
+ end
1065
+
1066
+
1067
+ # # Include the inline enhancements for ExtInd
1068
+ # if RUBY_PLATFORM.index('mswin').nil?
1069
+ # require 'inline'
1070
+ # inline do |builder|
1071
+ # #builder.include "<rubyio.h>"
1072
+ # # Array.new(str.length/frame_size) do |i|
1073
+ # # str[i*frame_size, frame_size].unpack(format)
1074
+ # # end
1075
+ # builder.c %Q{
1076
+ # static VALUE unpack(VALUE str)
1077
+ # {
1078
+ # char *p = RSTRING(str)->ptr;
1079
+ # int str_len = RSTRING(str)->len;
1080
+ # int frame_size = NUM2INT(rb_iv_get(self, "@frame_size"));
1081
+ # int frame = NUM2INT(rb_iv_get(self, "@frame"));
1082
+ # int i, j, times = str_len/frame_size;
1083
+ # VALUE fmt = rb_iv_get(self, "@format");
1084
+ # VALUE results, arr;
1085
+ #
1086
+ # if(times <= 1)
1087
+ # return rb_funcall(str, rb_intern("unpack"), 1, fmt);
1088
+ #
1089
+ # results = rb_ary_new();
1090
+ # i = 0;
1091
+ # while(i < times)
1092
+ # {
1093
+ # j = 0;
1094
+ # arr = rb_ary_new();
1095
+ # while(j < frame)
1096
+ # {
1097
+ # // no need to copy the data at *p,
1098
+ # // apparently the conversion can
1099
+ # // happen directly from the pointer
1100
+ # rb_ary_push(arr, UINT2NUM(*p));
1101
+ # p += 4;
1102
+ #
1103
+ #
1104
+ # ++j;
1105
+ # }
1106
+ #
1107
+ # rb_ary_push(results, arr);
1108
+ # ++i;
1109
+ # }
1110
+ # return results;
1111
+ # }
1112
+ #
1113
+ # }#File.read(File.dirname(__FILE__) + "/../../src/inline.c")
1114
+ # end
1115
+ # else
1116
+ # # on windows when it's not likely that the user has
1117
+ # # a compiler, include the precompiled binaries
1118
+ # # require ...
1119
+ # end
1120
+