external 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
data/lib/ext_ind.rb ADDED
@@ -0,0 +1,1120 @@
1
+ require 'external/base'
2
+
3
+ # for some reason this is sometimes not required by String
4
+ # automatically, leading to a missing each_char method
5
+ require 'jcode'
6
+
7
+ #--
8
+ # not implemented --
9
+ # dclone, flatten, flatten!, frozen?, pack, quote, to_yaml, transpose, yaml_initialize
10
+ #
11
+ # be careful accession io directly. for peformance reasons there is no check to make
12
+ # sure io is in register (ie pos is at a frame boundary, ie io.length % frame_size == 0)
13
+ # In addition, note that length must be adjusted manually in most io operations (truncate is
14
+ # the exception). Thus if you change the file length by any means, the file length must be
15
+ # reset.
16
+ #
17
+ # ExtInd allows array-like access to formatted binary data stored on disk.
18
+ #
19
+ # == Caching
20
+ #
21
+ # To improve peformance, ExtInd can be run in a cached mode where the data is loaded into
22
+ # memory and kept in memory until the ExtInd closes (or is flushed). Cached mode is
23
+ # recommended for all but the largest index files, which cannot or should not be loaded
24
+ # into memory.
25
+ #++
26
+ class ExtInd < External::Base
27
+
28
+ class << self
29
+ def [](*args)
30
+ options = args.last.kind_of?(Hash) ? args.pop : {}
31
+ ab = self.new(nil, options)
32
+ normalized_args = args.collect {|item| item.nil? ? ab.nil_value : item }.flatten
33
+ ab.unframed_write(normalized_args)
34
+
35
+ # reset the position of the IO under this initialize
36
+ ab.pos = 0
37
+ ab
38
+ end
39
+
40
+ def read(fd, options={})
41
+ return [] if fd.nil?
42
+ open(fd, "r", options) do |index|
43
+ index.read(nil, 0)
44
+ end
45
+ end
46
+
47
+ # Returns the number of bytes required to pack an item in an array
48
+ # using the directive (see Array.pack for more details). All directives
49
+ # return a size except the positioning directives '@' and 'X'; these
50
+ # and all other unknown directives return nil.
51
+ #
52
+ # Directives N bytes
53
+ # ------------------------------
54
+ # AaBbCcHhUwxZ | 1
55
+ # nSsv | 2
56
+ # M | 3
57
+ # eFfgIiLlNPpV | 4
58
+ # m | 5
59
+ # u | 6
60
+ # DdEGQq | 8
61
+ # @X | nil
62
+ def directive_size(directive)
63
+ case directive
64
+ when /^[eFfgIiLlNPpV]$/ then 4
65
+ when /^[DdEGQq]$/ then 8
66
+ when /^[AaBbCcHhUwxZ]$/ then 1
67
+ when /^[nSsv]$/ then 2
68
+ when 'M' then 3
69
+ when 'm' then 5
70
+ when 'u' then 6
71
+ else
72
+ nil
73
+ end
74
+ end
75
+
76
+ attr_writer :default_nil_value
77
+ def default_nil_value
78
+ @default_nil_value ||= 0
79
+ end
80
+ end
81
+
82
+ attr_reader :frame, :frame_size, :format, :cache, :process_in_bulk
83
+
84
+ def initialize(io=nil, options={})
85
+ super(io)
86
+
87
+ options = {
88
+ :format => "I",
89
+ :nil_value => nil,
90
+ :cached => false,
91
+ :buffer_size => 8 * 2**20 # 8Mb
92
+ }.merge(options)
93
+
94
+ # set the format, frame, and frame size
95
+ @format = options[:format]
96
+ @frame = @format.length
97
+ @frame_size = 0
98
+
99
+ # TODO -- allow specification of numbers in the format
100
+ @format.each_char do |directive|
101
+ size = ExtInd.directive_size(directive)
102
+ raise ArgumentError.new("cannot determine size of: '#{directive}'") if size == nil
103
+ @frame_size += size
104
+ end
105
+ @process_in_bulk = false
106
+
107
+ # set the buffer size
108
+ self.buffer_size = options[:buffer_size]
109
+
110
+ # Repetitive formats like "I", "II", "III",
111
+ # etc can be packed and unpacked in bulk
112
+ directive = @format[0, 1]
113
+ if @format == (directive * @frame)
114
+ @format = "#{directive}*"
115
+ @process_in_bulk = true
116
+ end
117
+
118
+ # set the nil value to an array of zeros, or
119
+ # to the specified nil value. If a nil value
120
+ # was specified, ensure it is of the correct
121
+ # frame size and can be packed
122
+ nil_value = if options[:nil_value] == nil
123
+ Array.new(@frame, self.class.default_nil_value)
124
+ else
125
+ options[:nil_value]
126
+ end
127
+
128
+ begin
129
+ @nil_value = nil_value.pack(@format)
130
+ raise "" unless nil_value.length == @frame && @nil_value.unpack(@format) == nil_value
131
+ rescue
132
+ raise ArgumentError.new(
133
+ "unacceptable nil value '#{nil_value}': the nil value must " +
134
+ "be in frame and packable using the format '#{format}'")
135
+ end
136
+
137
+ self.cached = options[:cached]
138
+ end
139
+
140
+ def buffer_size
141
+ self.io.default_blksize
142
+ end
143
+
144
+ def buffer_size=(buffer_size)
145
+ raise ArgumentError.new("buffer size must be > 0") if buffer_size <= 0
146
+
147
+ @default_blksize = (buffer_size/frame_size).ceil
148
+ self.io.default_blksize = buffer_size
149
+ end
150
+
151
+ def default_blksize=(value)
152
+ @default_blksize = value
153
+ self.io.default_blksize = value * frame_size
154
+ end
155
+
156
+ def options
157
+ {:format => (process_in_bulk ? format.chomp("*") * frame : format),
158
+ :nil_value => nil_value,
159
+ :cached => cached?,
160
+ :buffer_size => buffer_size}
161
+ end
162
+
163
+ # Returns the string value used for nils. Specify unpacked to
164
+ # show the unpacked array value.
165
+ #
166
+ # i = ExtInd.new
167
+ # i.nil_value # => [0]
168
+ # i.nil_value(false) # => "\000\000\000\000"
169
+ def nil_value(unpacked=true)
170
+ unpacked ? @nil_value.unpack(format) : @nil_value
171
+ end
172
+
173
+ # True if cached
174
+ def cached?
175
+ cache != nil
176
+ end
177
+
178
+ # Sets the index to cache data or not. When setting cached to
179
+ # false, currently cached data is flushed.
180
+ def cached=(input)
181
+ if input && !cache
182
+ @cache_pos = self.pos
183
+ @cache = read(nil, 0)
184
+
185
+ # ensure the cache is an array of framed items...
186
+ # if io has only one item, then read returns an
187
+ # array like [0] rather than [[0]]
188
+ unless @cache.empty? || @cache.first.kind_of?(Array)
189
+ @cache = [@cache]
190
+ end
191
+ elsif !input && cache
192
+ flush
193
+ @cache = nil
194
+ self.pos = @cache_pos
195
+ @cache_pos = nil
196
+ end
197
+ end
198
+
199
+ # Flushes the io, writing cached data if necessary.
200
+ def flush
201
+ if cached?
202
+ io.truncate(0)
203
+ cache.each {|item| io.write item.pack(format) }
204
+ end
205
+
206
+ io.flush
207
+ io.reset_length
208
+ end
209
+
210
+ # Flushes cached data and closes the io.
211
+ def close
212
+ cached = false if cached?
213
+ super
214
+ end
215
+
216
+ ###########################
217
+ # Array methods
218
+ ###########################
219
+
220
+ # def &(another)
221
+ # not_implemented
222
+ # end
223
+
224
+ # def *(arg)
225
+ # not_implemented
226
+ # end
227
+
228
+ def dup
229
+ self.flush
230
+ ExtInd.new(nil, options).concat(self)
231
+ end
232
+
233
+ def +(another)
234
+ dup.concat(another)
235
+ end
236
+
237
+ # def -(another)
238
+ # not_implemented
239
+ # end
240
+
241
+ def <<(array)
242
+ # WRONG BEHAVIOR -- should be write, push in frame
243
+ unframed_write(array, length)
244
+ self
245
+ end
246
+
247
+ def <=>(another)
248
+ return 0 if self.object_id == another.object_id
249
+
250
+ # reverse comparison in case another is an ExtInd
251
+ return -1 * (another <=> cache) if cached?
252
+
253
+ case another
254
+ when Array
255
+ if another.length < self.length
256
+ # if another is equal to the matching subset of self,
257
+ # then self is obviously the longer array and wins.
258
+ result = (self.to_a(another.length) <=> another)
259
+ result == 0 ? 1 : result
260
+ else
261
+ self.to_a <=> another
262
+ end
263
+ when ExtInd
264
+ self.io.sort_compare(another.io, (buffer_size/2).ceil)
265
+ else
266
+ raise TypeError.new("can't convert from #{another.class} to ExtInd or Array")
267
+ end
268
+ end
269
+
270
+ def ==(another)
271
+ return true if super
272
+
273
+ case another
274
+ when Array
275
+ return false unless self.length == another.length
276
+ self.to_a == another
277
+ when ExtInd
278
+ return false unless self.length == another.length
279
+
280
+ unless self.cached? && another.cached?
281
+ return false unless self.index_attrs == another.index_attrs
282
+ if (self.io.sort_compare(another.io, (buffer_size/2).ceil)) == 0
283
+ return true
284
+ end
285
+ end
286
+
287
+ self.to_a == another.to_a
288
+ else
289
+ false
290
+ end
291
+ end
292
+
293
+ # Element Reference — Returns the entry at index, or returns an array starting
294
+ # at start and continuing for length entries, or returns an array specified
295
+ # by range. Negative indices count backward from the end of self (-1 is the last
296
+ # element). Returns nil if the index (or starting index) is out of range.
297
+ #
298
+ # io = StringIO.new [1,2,3,4,5].pack("I*")
299
+ # i = ExtInd.new(io, :format => 'I')
300
+ # i[2] #=> [3]
301
+ # i[6] #=> nil
302
+ # i[1, 2] #=> [ [2], [3] ]
303
+ # i[1..3] #=> [ [2], [3], [4] ]
304
+ # i[4..7] #=> [ [5] ]
305
+ # i[6..10] #=> nil
306
+ # i[-3, 3] #=> [ [3], [4], [5] ]
307
+ # # special cases
308
+ # i[5] #=> nil
309
+ # i[5, 1] #=> []
310
+ # i[5..10] #=> []
311
+ #
312
+ # Note that entries are returned in frame, as arrays.
313
+ def [](index, length=nil)
314
+ # return the cached value if cached
315
+ return (length == nil ? cache[index] : cache[index,length]) if cached?
316
+
317
+ case index
318
+ when Fixnum
319
+ index += self.length if index < 0
320
+ return nil if index < 0
321
+
322
+ unless length == nil
323
+ raise TypeError.new("no implicit conversion from nil to integer") if length.nil?
324
+ return [] if length == 0 || index >= self.length
325
+ return nil if length < 0
326
+
327
+ # ensure you don't try to read more entries than are available
328
+ max_length = self.length - index
329
+ length = max_length if length > max_length
330
+ end
331
+
332
+ case
333
+ when length == nil then read(1, index) # read one, as index[0]
334
+ when length == 1 then [read(1, index)] # read one framed, as index[0,1]
335
+ else
336
+ read(length, index) # read length, automatic framing
337
+ end
338
+
339
+ when Range
340
+ raise TypeError.new("can't convert Range into Integer") unless length == nil
341
+
342
+ offset, length = split_range(index)
343
+
344
+ # for conformance with array range retrieval
345
+ return nil if offset < 0 || offset > self.length
346
+ return [] if length < 0
347
+
348
+ self[offset, length + 1]
349
+ when nil
350
+ raise TypeError.new("no implicit conversion from nil to integer")
351
+ else
352
+ raise TypeError.new("can't convert #{index.class} into Integer")
353
+ end
354
+ end
355
+
356
+ # Element Assignment — Sets the entry at index, or replaces a subset starting at start
357
+ # and continuing for length entries, or replaces a subset specified by range.
358
+ # A negative indices will count backward from the end of self. Inserts elements if
359
+ # length is zero. If nil is used in the second and third form, deletes elements from
360
+ # self. An IndexError is raised if a negative index points past the beginning of self.
361
+ # See also push, and unshift.
362
+ #
363
+ # io = StringIO.new ""
364
+ # i = ExtInd.new(io, :format => 'I')
365
+ # i.nil_value # => [0]
366
+ # i[4] = [4] # => [[0], [0], [0], [0], [4]]
367
+ # i[0, 3] = [ [1], [2], [3] ] # => [[1], [2], [3], [0], [4]]
368
+ # i[1..2] = [ [5], [6] ] # => [[1], [5], [6], [0], [4]]
369
+ # i[0, 2] = [ [7] ] # => [[7], [6], [0], [4]]
370
+ # i[0..2] = [ [8] ] # => [[8], [4]]
371
+ # i[-1] = [9] # => [[8], [9]]
372
+ # i[1..-1] = nil # => [[8]]
373
+ #
374
+ # Note that []= must take entries in frame, or (in the case of [offset, length] and
375
+ # range insertions) another ExtInd with the same frame, format, and nil_value.
376
+ #--
377
+ # TODO -- cleanup error messages so they are more meaningful
378
+ # and helpful, esp for frame errors
379
+ #++
380
+ def []=(*args)
381
+ raise ArgumentError.new("wrong number of arguments (1 for 2)") if args.length < 2
382
+ index, length, value = args
383
+ if args.length == 2
384
+ value = length
385
+ length = nil
386
+ end
387
+
388
+ case index
389
+ when Fixnum
390
+ if index < 0
391
+ index += self.length
392
+ raise IndexError.new("index #{index} out of range") if index < 0
393
+ end
394
+
395
+ if length == nil
396
+ # simple insertion
397
+ value = nil_value if value.object_id == 4 # nil
398
+ unframed_write(value, index)
399
+ else
400
+ raise IndexError.new("negative length (#{length})") if length < 0
401
+
402
+ # arrayify value if needed
403
+ unless value.kind_of?(ExtInd)
404
+ value = [value] unless value.kind_of?(Array)
405
+ end
406
+
407
+ case
408
+ when cached?
409
+ # validation must occur here, because this cached insertion
410
+ # bypasses the validations that normally occur in write
411
+ case value
412
+ when Array then validate_framed_array(value)
413
+ when ExtInd then validate_index(value)
414
+ end
415
+
416
+ # must be done before padding in case value == self
417
+ # WARN - could be expensive
418
+ # TODO - check the effect of cache.dup on speed if cached?
419
+ value = value.to_a.collect {|item| item == nil ? nil_value : item }
420
+
421
+ # pad as needed
422
+ pad_to(index) if index > self.length
423
+
424
+ # write the value to the cache
425
+ cache[index, length] = value
426
+ when self == value
427
+ # special case when insertion is self (no validation needed)
428
+ # A whole copy of self is required because the insertion
429
+ # can overwrite the tail of self. As such this can be a
430
+ # worst-case scenario-slow and expensive procedure.
431
+ copy_beg = (index + length) * frame_size
432
+ copy_end = io.length
433
+
434
+ io.copy do |copy|
435
+ # truncate io
436
+ io.truncate(index * frame_size)
437
+ io.pos = io.length
438
+
439
+ # pad as needed
440
+ pad_to(index) if index > self.length
441
+
442
+ # write the copy of self
443
+ io.insert(copy)
444
+
445
+ # copy the tail of the insertion
446
+ io.insert(copy, copy_beg..copy_end)
447
+ end
448
+ when value.length == length
449
+ # optimized insertion, when insertion is the correct length
450
+ write(value, index)
451
+ else
452
+ # range insertion: requires copy and rewrite of the tail
453
+ # of the ExtInd, after the insertion.
454
+ # WARN - can be slow when the tail is large
455
+ copy_beg = (index + length) * frame_size
456
+ copy_end = io.length
457
+
458
+ io.copy("r", copy_beg..copy_end) do |copy|
459
+ # pad as needed
460
+ pad_to(index) if index > self.length
461
+
462
+ # write inserted value
463
+ io.pos = index * frame_size
464
+ write(value)
465
+
466
+ # truncate io
467
+ io.truncate(io.pos)
468
+
469
+ # copy the tail of the insertion
470
+ io.insert(copy)
471
+ end
472
+ end
473
+ end
474
+
475
+ value
476
+ when Range
477
+ raise TypeError.new("can't convert Range into Integer") if args.length == 3
478
+
479
+ # for conformance with setting a range with nil (truncates)
480
+ value = [] if value.nil?
481
+ offset, length = split_range(index)
482
+ self[offset, length + 1] = value
483
+ when nil
484
+ raise TypeError.new("no implicit conversion from nil to integer")
485
+ else
486
+ raise TypeError.new("can't convert #{index.class} into Integer")
487
+ end
488
+ end
489
+
490
+ # def abbrev(pattern=nil)
491
+ # not_implemented
492
+ # end
493
+
494
+ # def assoc(obj)
495
+ # not_implemented
496
+ # end
497
+
498
+ # Returns entry at index
499
+ def at(index)
500
+ self[index]
501
+ end
502
+
503
+ # Removes all elements from _self_.
504
+ def clear
505
+ cached? ? cache.clear : io.truncate(0)
506
+ self
507
+ end
508
+
509
+ # def compact
510
+ # not_implemented
511
+ # end
512
+
513
+ # def compact!
514
+ # not_implemented
515
+ # end
516
+
517
+ def concat(another)
518
+ case another
519
+ when Array
520
+ write(another, length)
521
+ when ExtInd
522
+ validate_index(another)
523
+
524
+ if cached?
525
+ # WARN - could be expensive
526
+ cache.concat(another.to_a)
527
+ else
528
+ io.concat(another.io)
529
+ end
530
+ else
531
+ raise TypeError.new("can't convert #{another.class} into ExtInd or Array")
532
+ end
533
+ self
534
+ end
535
+
536
+ # def delete(obj)
537
+ # not_implemented
538
+ # end
539
+
540
+ # def delete_at(index)
541
+ # not_implemented
542
+ # end
543
+
544
+ # def delete_if # :yield: item
545
+ # not_implemented
546
+ # end
547
+
548
+ def each(&block) # :yield: item
549
+ self.pos = 0
550
+ chunk do |offset, length|
551
+ # special treatment for 1, because then read(1) => [...] rather
552
+ # than [[...]]. when frame > 1, each will iterate over the
553
+ # element rather than pass it to the block directly
554
+ if length == 1
555
+ yield read(1)
556
+ else
557
+ read(length).each(&block)
558
+ end
559
+ end
560
+ self
561
+ end
562
+
563
+ # Passes the index of each char
564
+ def each_index(&block) # :yield: index
565
+ 0.upto(length-1, &block)
566
+ self
567
+ end
568
+
569
+ # Returns true if _self_ contains no elements
570
+ def empty?
571
+ length == 0
572
+ end
573
+
574
+ def eql?(another)
575
+ self == another
576
+ end
577
+
578
+ # def fetch(index, default=nil, &block)
579
+ # index += index_length if index < 0
580
+ # val = (index >= length ? default : self[index])
581
+ # block_given? ? yield(val) : val
582
+ # end
583
+
584
+ # def fill(*args)
585
+ # not_implemented
586
+ # end
587
+
588
+ # Returns the first n entries (default 1)
589
+ def first(n=nil)
590
+ n.nil? ? self[0] : self[0,n]
591
+ end
592
+
593
+ # def hash
594
+ # not_implemented
595
+ # end
596
+
597
+ # def include?(obj)
598
+ # not_implemented
599
+ # end
600
+
601
+ # def index(obj)
602
+ # not_implemented
603
+ # end
604
+
605
+ # def indexes(*args)
606
+ # values_at(*args)
607
+ # end
608
+ #
609
+ # def indicies(*args)
610
+ # values_at(*args)
611
+ # end
612
+
613
+ # def replace(other)
614
+ # not_implemented
615
+ # end
616
+
617
+ # def insert(index, *obj)
618
+ # self[index] = obj
619
+ # end
620
+
621
+ # def inspect
622
+ # not_implemented
623
+ # end
624
+
625
+ # def join(sep=$,)
626
+ # not_implemented
627
+ # end
628
+
629
+ # Returns the last n entries (default 1)
630
+ def last(n=nil)
631
+ return self[-1] if n.nil?
632
+
633
+ start = length-n
634
+ start = 0 if start < 0
635
+ self[start, n]
636
+ end
637
+
638
+ # Returns the number of entries in self
639
+ def length
640
+ cached? ? cache.length : io.length/frame_size
641
+ end
642
+
643
+ # def nitems
644
+ # not_implemented
645
+ # end
646
+
647
+ # def pop
648
+ # not_implemented
649
+ # end
650
+
651
+ # def pretty_print(q)
652
+ # not_implemented
653
+ # end
654
+
655
+ # def pretty_print_cycle(q)
656
+ # not_implemented
657
+ # end
658
+
659
+ # def push(*obj)
660
+ # not_implemented
661
+ # end
662
+
663
+ # def rassoc(key)
664
+ # not_implemented
665
+ # end
666
+
667
+ # def replace(another)
668
+ # not_implemented
669
+ # end
670
+
671
+ # def reverse
672
+ # not_implemented
673
+ # end
674
+
675
+ # def reverse!
676
+ # not_implemented
677
+ # end
678
+
679
+ def reverse_each(&block)
680
+ reverse_chunk do |offset, length|
681
+ # special treatment for 1, because then read(1) => [...] rather
682
+ # than [[...]]. when frame > 1, each will iterate over the
683
+ # element rather than pass it to the block directly
684
+ if length == 1
685
+ yield read(1)
686
+ else
687
+ read(length, offset).reverse_each(&block)
688
+ end
689
+ end
690
+ self
691
+ end
692
+
693
+ # def rindex(obj)
694
+ # not_implemented
695
+ # end
696
+
697
+ # def select # :yield: item
698
+ # not_implemented
699
+ # end
700
+
701
+ # def shift
702
+ # not_implemented
703
+ # end
704
+
705
+ # Alias for length
706
+ def size
707
+ length
708
+ end
709
+
710
+ # def slice(*args)
711
+ # self.call(:[], *args)
712
+ # end
713
+
714
+ # def slice!(*args)
715
+ # not_implemented
716
+ # end
717
+
718
+ def to_a(length=self.length)
719
+ case
720
+ when cached? then cache.dup
721
+ when length == 0 then []
722
+ when length == 1 then [read(length, 0)]
723
+ else
724
+ read(length, 0)
725
+ end
726
+ end
727
+
728
+ # def to_ary
729
+ # not_implemented
730
+ # end
731
+
732
+ # Returns _self_.join.
733
+ # def to_s
734
+ # self.join
735
+ # end
736
+
737
+ # def uniq
738
+ # not_implemented
739
+ # end
740
+
741
+ # def uniq!
742
+ # not_implemented
743
+ # end
744
+
745
+ # def unshift(*obj)
746
+ # not_implemented
747
+ # end
748
+
749
+ # Returns an array containing the chars in io corresponding to the given
750
+ # selector(s). The selectors may be either integer indices or ranges
751
+ # def values_at(*selectors)
752
+ # selectors.collect {|s| self[s]}.flatten
753
+ # end
754
+
755
+ # def |(another)
756
+ # not_implemented
757
+ # end
758
+
759
+ #################
760
+ # IO-like methods
761
+ ##################
762
+
763
+ # Sets the current position of the index. Negative positions
764
+ # are counted from the end of the index (just as they are in
765
+ # an array). Positions can be set beyond the actual length
766
+ # of the index (similar to an IO).
767
+ #
768
+ # i = ExtInd[[1],[2],[3]]
769
+ # i.length # => 3
770
+ # i.pos = 2; i.pos # => 2
771
+ # i.pos = -1; i.pos # => 2
772
+ # i.pos = 10; i.pos # => 40
773
+ def pos=(pos)
774
+ if pos < 0
775
+ raise ArgumentError.new("position out of bounds: #{pos}") if pos < -length
776
+ pos += length
777
+ end
778
+
779
+ # do something fake for caching so that
780
+ # the position need not be set (this
781
+ # works either way)
782
+ if cached?
783
+ self.cache_pos = pos
784
+ else
785
+ io.pos = (pos * frame_size)
786
+ end
787
+ end
788
+
789
+ # Returns the current position of the index
790
+ def pos
791
+ cached? ? cache_pos : io.pos/frame_size
792
+ end
793
+
794
+ # Reads the packed byte string for n entries from the specified
795
+ # position. By default reads the string for all remaining entries
796
+ # from the current position.
797
+ #
798
+ # i = ExtInd[[1],[2],[3]]
799
+ # i.pos # => 0
800
+ # i.readbytes.unpack("I*") # => [1,2,3]
801
+ # i.readbytes(1,0).unpack("I*") # => [1]
802
+ # i.readbytes(10,1).unpack("I*") # => [2,3]
803
+ #
804
+ # Like an IO, when n is nil and no entries can be read, an empty
805
+ # string is returned. When n is specified, nil will be returned
806
+ # when no entries can be read.
807
+ #
808
+ # i.pos = 3
809
+ # i.readbytes # => ""
810
+ # i.readbytes(1) # => nil
811
+ def readbytes(n=nil, pos=nil)
812
+ if cached?
813
+ ary = read(n, pos)
814
+ return (ary == nil ? nil : ary.flatten.pack(format))
815
+ end
816
+
817
+ # set the io position to the specified index
818
+ self.pos = pos unless pos == nil
819
+
820
+ # read until the end if no n is given
821
+ n == nil ? io.read : io.read(n * frame_size)
822
+ end
823
+
824
+ # Unpacks the given string into an array of index values.
825
+ # Single entries are returned in frame, multiple entries
826
+ # are returned in an array.
827
+ #
828
+ # i.format # => 'I*'
829
+ # i.unpack( [1].pack('I*') ) # => [1]
830
+ # i.unpack( [1,2,3].pack('I*') ) # => [[1],[2],[3]]
831
+ # i.unpack("") # => []
832
+ #
833
+ def unpack(str)
834
+ case
835
+ when str.empty? then []
836
+ when str.length == frame_size
837
+ str.unpack(format)
838
+ when process_in_bulk
839
+ results = []
840
+ str.unpack(format).each_slice(frame) {|s| results << s}
841
+ results
842
+ else
843
+ Array.new(str.length/frame_size) do |i|
844
+ str[i*frame_size, frame_size].unpack(format)
845
+ end
846
+ end
847
+ end
848
+
849
+ # Reads n entries from the specified position. By default
850
+ # reads all remaining entries from the current position.
851
+ # Single entries are returned in frame, multiple entries
852
+ # are returned in an array.
853
+ #
854
+ # i = ExtInd[[1],[2],[3]]
855
+ # i.pos # => 0
856
+ # i.read # => [[1],[2],[3]]
857
+ # i.read(1,0) # => [1]
858
+ # i.read(10,1) # => [[2],[3]]
859
+ #
860
+ # When n is nil and no entries can be read, an empty array
861
+ # is returned. When n is specified, nil will be returned
862
+ # when no entries can be read.
863
+ #
864
+ # i.pos = 3
865
+ # i.read # => []
866
+ # i.read(1) # => nil
867
+ def read(n=nil, pos=nil)
868
+ if cached?
869
+ self.pos = pos unless pos == nil
870
+ m = (n == nil || n > (length - cache_pos)) ? (length - cache_pos) : n
871
+
872
+ return case
873
+ when n == nil && m == 0 then []
874
+ when m <= 1 then cache[cache_pos]
875
+ else
876
+ cache[cache_pos, m]
877
+ end
878
+ end
879
+
880
+ str = readbytes(n, pos)
881
+ str == nil ? nil : unpack(str)
882
+ end
883
+
884
+ def write(array, pos=nil)
885
+ case array
886
+ when Array
887
+ validate_framed_array(array)
888
+ prepare_write_to_pos(pos)
889
+ write_framed_array(array)
890
+ when ExtInd
891
+ validate_index(array)
892
+ prepare_write_to_pos(pos)
893
+ write_index(array)
894
+ else
895
+ raise ArgumentError.new("could not convert #{array.class} to Array or ExtInd")
896
+ end
897
+ end
898
+
899
+ # Writes the array as an entry (or set of entries) into
900
+ # self starting at the specified position. By default
901
+ # write begins at the current position. The array can
902
+ # have multiple entries in sequence, but MUST be in the
903
+ # correct frame.
904
+ #
905
+ # i = ExtInd[]
906
+ # i.unframed_write([2,3], 1)
907
+ # i.pos = 0;
908
+ # i.unframed_write([1])
909
+ # i.read(3, 0) # => [[1],[2],[3]]
910
+ #
911
+ # Note -- no range checking when cached
912
+ def unframed_write(array, pos=nil)
913
+ case array
914
+ when Array
915
+ validate_unframed_array(array)
916
+ prepare_write_to_pos(pos)
917
+ write_unframed_array(array)
918
+ when ExtInd
919
+ validate_index(array)
920
+ prepare_write_to_pos(pos)
921
+ write_index(array)
922
+ else
923
+ raise ArgumentError.new("could not convert #{array.class} to Array or ExtInd")
924
+ end
925
+ end
926
+
927
+ protected
928
+
929
+ attr_accessor :cache_pos
930
+
931
+ # An array of the core index attributes: frame, format, nil_value
932
+ def index_attrs # :nodoc:
933
+ [frame, format, nil_value]
934
+ end
935
+
936
+ def prepare_write_to_pos(pos)
937
+ unless pos == nil
938
+ # pad to the starting position if necessary
939
+ pad_to(pos) if pos > length
940
+
941
+ # set the io position to the specified index
942
+ self.pos = pos
943
+ end
944
+ end
945
+
946
+ def pad_to(pos)
947
+ n = (pos-length)/frame
948
+
949
+ if cached?
950
+ cache.concat(Array.new(n, nil_value))
951
+ else
952
+ io.pos = io.length
953
+ io.length += io.write(nil_value(false) * n)
954
+
955
+ # in this case position doesn't
956
+ # need to be set. set pos to nil
957
+ # to skip the set statement below
958
+ pos = nil
959
+ end
960
+ end
961
+
962
+ def validate_index(index)
963
+ unless index.index_attrs == index_attrs
964
+ raise ArgumentError.new("incompatible index attributes [#{index.index_attrs.join(',')}]")
965
+ end
966
+ end
967
+
968
+ def validate_framed_array(array)
969
+ array.each do |item|
970
+ case item
971
+ when Array
972
+ unless item.length == frame
973
+ raise ArgumentError.new("expected array in frame '#{frame}' but was '#{item.length}'")
974
+ end
975
+ when nil
976
+ # framed arrays can contain nils
977
+ next
978
+ else
979
+ raise ArgumentError.new("expected array in frame '#{frame}', was #{item.class}")
980
+ end
981
+ end
982
+ end
983
+
984
+ def validate_unframed_array(array)
985
+ unless array.length % frame == 0
986
+ raise ArgumentError.new("expected array in frame '#{frame}' but was '#{array.length}'")
987
+ end
988
+ end
989
+
990
+ def write_index(index)
991
+ if cached?
992
+ if index.cached?
993
+ cache[cache_pos, index.length] = index.cache
994
+ self.cache_pos += index.length
995
+ else
996
+ index.each do |item|
997
+ cache[cache_pos] = item
998
+ self.cache_pos += 1
999
+ end
1000
+ end
1001
+ else
1002
+ end_pos = io.pos
1003
+ if index.cached?
1004
+ end_pos += io.write( index.cache.pack(format) )
1005
+ else
1006
+ end_pos += io.insert(index.io)
1007
+ end
1008
+
1009
+ io.length = end_pos if end_pos > io.length
1010
+ end
1011
+ end
1012
+
1013
+ def write_framed_array(array)
1014
+ # framed arrays may contain nils, and must
1015
+ # be resolved before writing the data
1016
+
1017
+ if cached?
1018
+ cache[cache_pos, array.length] = array.collect {|item| item == nil ? nil_value : item }
1019
+ self.cache_pos += array.length
1020
+ else
1021
+ start_pos = io.pos
1022
+ length_written = 0
1023
+
1024
+ if process_in_bulk
1025
+ arr = []
1026
+ array.each {|item| arr.concat(item == nil ? nil_value : item) }
1027
+ length_written += io.write(arr.pack(format))
1028
+ else
1029
+ array.each do |item|
1030
+ str = (item == nil ? nil_value(false) : item.pack(format))
1031
+ length_written += io.write(str)
1032
+ end
1033
+ end
1034
+
1035
+ end_pos = start_pos + length_written
1036
+ io.length = end_pos if end_pos > io.length
1037
+ end
1038
+ end
1039
+
1040
+ def write_unframed_array(array)
1041
+ # unframed arrays cannot contain nils
1042
+
1043
+ if cached?
1044
+ array.each_slice(frame) do |item|
1045
+ cache[cache_pos] = item
1046
+ self.cache_pos += 1
1047
+ end
1048
+ else
1049
+ start_pos = io.pos
1050
+ length_written = 0
1051
+
1052
+ if process_in_bulk
1053
+ length_written += io.write(array.pack(format))
1054
+ else
1055
+ array.each_slice(frame) do |arr|
1056
+ length_written += io.write(arr.pack(format))
1057
+ end
1058
+ end
1059
+
1060
+ end_pos = start_pos + length_written
1061
+ io.length = end_pos if end_pos > io.length
1062
+ end
1063
+ end
1064
+ end
1065
+
1066
+
1067
+ # # Include the inline enhancements for ExtInd
1068
+ # if RUBY_PLATFORM.index('mswin').nil?
1069
+ # require 'inline'
1070
+ # inline do |builder|
1071
+ # #builder.include "<rubyio.h>"
1072
+ # # Array.new(str.length/frame_size) do |i|
1073
+ # # str[i*frame_size, frame_size].unpack(format)
1074
+ # # end
1075
+ # builder.c %Q{
1076
+ # static VALUE unpack(VALUE str)
1077
+ # {
1078
+ # char *p = RSTRING(str)->ptr;
1079
+ # int str_len = RSTRING(str)->len;
1080
+ # int frame_size = NUM2INT(rb_iv_get(self, "@frame_size"));
1081
+ # int frame = NUM2INT(rb_iv_get(self, "@frame"));
1082
+ # int i, j, times = str_len/frame_size;
1083
+ # VALUE fmt = rb_iv_get(self, "@format");
1084
+ # VALUE results, arr;
1085
+ #
1086
+ # if(times <= 1)
1087
+ # return rb_funcall(str, rb_intern("unpack"), 1, fmt);
1088
+ #
1089
+ # results = rb_ary_new();
1090
+ # i = 0;
1091
+ # while(i < times)
1092
+ # {
1093
+ # j = 0;
1094
+ # arr = rb_ary_new();
1095
+ # while(j < frame)
1096
+ # {
1097
+ # // no need to copy the data at *p,
1098
+ # // apparently the conversion can
1099
+ # // happen directly from the pointer
1100
+ # rb_ary_push(arr, UINT2NUM(*p));
1101
+ # p += 4;
1102
+ #
1103
+ #
1104
+ # ++j;
1105
+ # }
1106
+ #
1107
+ # rb_ary_push(results, arr);
1108
+ # ++i;
1109
+ # }
1110
+ # return results;
1111
+ # }
1112
+ #
1113
+ # }#File.read(File.dirname(__FILE__) + "/../../src/inline.c")
1114
+ # end
1115
+ # else
1116
+ # # on windows when it's not likely that the user has
1117
+ # # a compiler, include the precompiled binaries
1118
+ # # require ...
1119
+ # end
1120
+