external 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/History +7 -0
  2. data/MIT-LICENSE +1 -3
  3. data/README +162 -127
  4. data/lib/external.rb +2 -3
  5. data/lib/external/base.rb +174 -47
  6. data/lib/external/chunkable.rb +131 -105
  7. data/lib/external/enumerable.rb +78 -33
  8. data/lib/external/io.rb +163 -398
  9. data/lib/external/patches/ruby_1_8_io.rb +31 -0
  10. data/lib/external/patches/windows_io.rb +53 -0
  11. data/lib/external/patches/windows_utils.rb +27 -0
  12. data/lib/external/utils.rb +148 -0
  13. data/lib/external_archive.rb +840 -0
  14. data/lib/external_array.rb +57 -0
  15. data/lib/external_index.rb +1053 -0
  16. metadata +42 -58
  17. data/lib/ext_arc.rb +0 -108
  18. data/lib/ext_arr.rb +0 -727
  19. data/lib/ext_ind.rb +0 -1120
  20. data/test/benchmarks/benchmarks_20070918.txt +0 -45
  21. data/test/benchmarks/benchmarks_20070921.txt +0 -91
  22. data/test/benchmarks/benchmarks_20071006.txt +0 -147
  23. data/test/benchmarks/test_copy_file.rb +0 -80
  24. data/test/benchmarks/test_pos_speed.rb +0 -47
  25. data/test/benchmarks/test_read_time.rb +0 -55
  26. data/test/cached_ext_ind_test.rb +0 -219
  27. data/test/check/benchmark_check.rb +0 -441
  28. data/test/check/namespace_conflicts_check.rb +0 -23
  29. data/test/check/pack_check.rb +0 -90
  30. data/test/ext_arc_test.rb +0 -286
  31. data/test/ext_arr/alt_sep.txt +0 -3
  32. data/test/ext_arr/cr_lf_input.txt +0 -3
  33. data/test/ext_arr/input.index +0 -0
  34. data/test/ext_arr/input.txt +0 -1
  35. data/test/ext_arr/inputb.index +0 -0
  36. data/test/ext_arr/inputb.txt +0 -1
  37. data/test/ext_arr/lf_input.txt +0 -3
  38. data/test/ext_arr/lines.txt +0 -19
  39. data/test/ext_arr/without_index.txt +0 -1
  40. data/test/ext_arr_test.rb +0 -534
  41. data/test/ext_ind_test.rb +0 -1472
  42. data/test/external/base_test.rb +0 -74
  43. data/test/external/chunkable_test.rb +0 -182
  44. data/test/external/index/input.index +0 -0
  45. data/test/external/index/inputb.index +0 -0
  46. data/test/external/io_test.rb +0 -414
  47. data/test/external_test_helper.rb +0 -31
  48. data/test/external_test_suite.rb +0 -4
  49. data/test/test_array.rb +0 -1192
@@ -0,0 +1,57 @@
1
+ require 'external_archive'
2
+ require 'yaml'
3
+
4
+ #--
5
+ # later separate out individual objects logically
6
+ # If writing, create new files:
7
+ # - base/object_id.aio (new file for recieving appends)
8
+ # - base/object_id.index (copy of existing index -- made on first insertion)
9
+ # - in index, -index indicates object_id.aio file whereas +index indicates original file
10
+ # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
11
+ # requires index rewrite as well, to remove negatives
12
+ #
13
+ # If appending, ONLY allow << and all changes get committed to the original file.
14
+ #
15
+ # This should allow returning of new arrayio objects under read/write conditions
16
+ # By default read-only. No insertions. New ExternalArray objects inherit parent mode.
17
+ #
18
+ # Independent modes:
19
+ # - r
20
+ # - r+
21
+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
22
+ # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
23
+ # - b ALWAYS on with Windows
24
+ #++
25
+
26
+ #--
27
+ # YAML cannot/does not properly handle:
28
+ # - Proc
29
+ # - Class (cannot dump)
30
+ # - Carriage return strings (removes "\r"): "\r", "\r\n", "string_with_\r\n_internal"
31
+ # - Chains of newlines (loads to ""): "\n", "\n\n"
32
+ #
33
+ #
34
+ # Bugs:
35
+ # @cls[ 'cat', 99, /a/, @cls[ 1, 2, 3] ].include?(@cls[ 1, 2, 3]) raises error
36
+ #++
37
+
38
+ class ExternalArray < ExternalArchive
39
+
40
+ def reindex(&block)
41
+ reindex_by_sep(nil,
42
+ :sep_regexp => /^-{3} /,
43
+ :sep_length => 4,
44
+ :entry_follows_sep => true,
45
+ &block)
46
+ end
47
+
48
+ def str_to_entry(str)
49
+ str == nil || str.empty? ? nil : YAML.load(str)
50
+ end
51
+
52
+ def entry_to_str(entry)
53
+ entry.to_yaml
54
+ end
55
+
56
+ private :reindex_by_regexp, :reindex_by_sep
57
+ end
@@ -0,0 +1,1053 @@
1
+ require 'external/base'
2
+ require 'strscan'
3
+
4
+ # Provides array-like access to index data kept on disk. Index data is
5
+ # defined by a packing format (see Array#pack) like 'II', which would
6
+ # represent two integers; in this case each member of the ExternalIndex
7
+ # would be a two-integer array.
8
+ #
9
+ # All directives except '@' and 'X' are allowed, in any combination.
10
+ #
11
+ #--
12
+ # not implemented --
13
+ # dclone, flatten, flatten!, frozen?, pack, quote, to_yaml, transpose, yaml_initialize
14
+ #
15
+ # be careful accession io directly. for peformance reasons there is no check to make
16
+ # sure io is in register (ie pos is at a frame boundary, ie io.length % frame_size == 0)
17
+ # In addition, note that length must be adjusted manually in most io operations (truncate is
18
+ # the exception). Thus if you change the file length by any means, the file length must be
19
+ # reset.
20
+ class ExternalIndex < External::Base
21
+
22
+ class << self
23
+
24
+ # Initializes a new ExternalIndex using an array-like [] syntax.
25
+ # The last argument may be an options hash (this is ok since
26
+ # ExternalIndex cannot store a Hash anyhow).
27
+ def [](*argv)
28
+ options = argv.last.kind_of?(Hash) ? argv.pop : {}
29
+ index = new(nil, options)
30
+
31
+ normalized_args = argv.collect do |item|
32
+ item.nil? ? index.nil_value : item
33
+ end.flatten
34
+ index.unframed_write(normalized_args)
35
+
36
+ # reset the position of the IO under this initialize
37
+ index.pos = 0
38
+ index
39
+ end
40
+
41
+ # Opens and reads the file into an array.
42
+ def read(fd, options={})
43
+ return [] if fd.nil?
44
+ open(fd, "r", options) do |index|
45
+ index.read(nil, 0)
46
+ end
47
+ end
48
+
49
+ # Returns the number of bytes required to pack an item in an array
50
+ # using the directive (see Array.pack for more details). All directives
51
+ # return a size except the positioning directives '@' and 'X'; these
52
+ # and all other unknown directives return nil.
53
+ #
54
+ # Directives N bytes
55
+ # ------------------------------
56
+ # AaBbCcHhUwxZ | 1
57
+ # nSsv | 2
58
+ # M | 3
59
+ # eFfgIiLlNPpV | 4
60
+ # m | 5
61
+ # u | 6
62
+ # DdEGQq | 8
63
+ # @X | nil
64
+ def directive_size(directive)
65
+ case directive
66
+ when /^[eFfgIiLlNPpV]$/ then 4
67
+ when /^[DdEGQq]$/ then 8
68
+ when /^[AaBbCcHhUwxZ]$/ then 1
69
+ when /^[nSsv]$/ then 2
70
+ when 'M' then 3
71
+ when 'm' then 5
72
+ when 'u' then 6
73
+ else
74
+ nil
75
+ end
76
+ end
77
+
78
+ # Returns an array of zeros in the specified frame.
79
+ def default_nil_value(format, frame)
80
+ Array.new(frame, 0)
81
+ end
82
+ end
83
+
84
+ # The format of the indexed data. Format may be optimized from
85
+ # the original input format in cases like 'III' where bulk
86
+ # processing is useful.
87
+ attr_reader :format
88
+
89
+ # The number of elements in each entry, ex: ('I' => 1, 'III' => 3).
90
+ # frame is calculated from format.
91
+ attr_reader :frame
92
+
93
+ # The number of bytes required for each entry; frame_size is
94
+ # calculated from format.
95
+ attr_reader :frame_size
96
+
97
+ # A flag indicating whether or not the format was optimized
98
+ # to pack/unpack entries in bulk; proccess_in_bulk is
99
+ # automatically set according to format.
100
+ attr_reader :process_in_bulk
101
+
102
+ # The default buffer size (8Mb)
103
+ DEFAULT_BUFFER_SIZE = 8 * 2**20
104
+
105
+ def initialize(io=nil, options={})
106
+ super(io)
107
+
108
+ options = {
109
+ :format => "I",
110
+ :nil_value => nil,
111
+ :buffer_size => DEFAULT_BUFFER_SIZE
112
+ }.merge(options)
113
+
114
+ # set the format, frame, and frame size
115
+ format = options[:format]
116
+ @frame = 0
117
+ @frame_size = 0
118
+ @process_in_bulk = true
119
+
120
+ scanner = StringScanner.new(format)
121
+ if scanner.skip(/\d+/)
122
+ # skip leading numbers ... they are normally ignored
123
+ # by pack and unpack but you could raise an error.
124
+ end
125
+
126
+ bulk_directive = nil
127
+ while directive = scanner.scan(/./)
128
+ size = ExternalIndex.directive_size(directive)
129
+ raise ArgumentError.new("cannot determine size of: '#{directive}'") if size == nil
130
+
131
+ # scan for a multiplicity factor
132
+ multiplicity = (scanner.scan(/\d+/) || 1).to_i
133
+ @frame += multiplicity
134
+ @frame_size += size * multiplicity
135
+
136
+ # if the bulk directive changes,
137
+ # processing in bulk is impossible
138
+ if bulk_directive == nil
139
+ bulk_directive = directive
140
+ elsif bulk_directive != directive
141
+ @process_in_bulk = false
142
+ end
143
+ end
144
+
145
+ # The "a" and "A" directives cannot be
146
+ # processed in bulk.
147
+ if ['a','A'].include?(bulk_directive)
148
+ @process_in_bulk = false
149
+ end
150
+
151
+ # Repetitive formats like "I", "II", "I2I",
152
+ # etc can be packed and unpacked in bulk.
153
+ @format = process_in_bulk ? "#{bulk_directive}*" : format
154
+
155
+ # set the buffer size
156
+ self.buffer_size = options[:buffer_size]
157
+
158
+ # set the nil value to an array of zeros, or
159
+ # to the specified nil value. If a nil value
160
+ # was specified, ensure it is of the correct
161
+ # frame size and can be packed
162
+ nil_value = if options[:nil_value] == nil
163
+ self.class.default_nil_value(format, frame)
164
+ else
165
+ options[:nil_value]
166
+ end
167
+
168
+ begin
169
+ @nil_value = nil_value.pack(format)
170
+ unless nil_value.length == frame && @nil_value.unpack(format) == nil_value
171
+ raise "" # just to invoke the rescue block
172
+ end
173
+ rescue
174
+ raise ArgumentError,
175
+ "unacceptable nil value '#{nil_value}': the nil value must " +
176
+ "be in frame and packable using the format '#{format}'"
177
+ end
178
+ end
179
+
180
+ # Returns the buffer size of self (equal to io.default_blksize and
181
+ # default_blksize * frame_size). Buffer size specifies the memory
182
+ # available for io perform external operations.
183
+ def buffer_size
184
+ self.io.default_blksize
185
+ end
186
+
187
+ # Sets the buffer size of self (as well as io.default_blksize and
188
+ # self.default_blksize). See buffer_size.
189
+ def buffer_size=(buffer_size)
190
+ raise ArgumentError.new("buffer size must be > 0") if buffer_size <= 0
191
+
192
+ @default_blksize = (buffer_size/frame_size).ceil
193
+ self.io.default_blksize = buffer_size
194
+ end
195
+
196
+ # Returns the default_blksize of self. See buffer_size.
197
+ def default_blksize=(value)
198
+ @default_blksize = value
199
+ self.io.default_blksize = value * frame_size
200
+ end
201
+
202
+ # Returns the string value used for nils. Specify unpacked to
203
+ # show the unpacked array value.
204
+ #
205
+ # index = ExternalIndex.new
206
+ # index.nil_value # => [0]
207
+ # index.nil_value(false) # => "\000\000\000\000"
208
+ #
209
+ def nil_value(unpacked=true)
210
+ unpacked ? @nil_value.unpack(format) : @nil_value
211
+ end
212
+
213
+ # An array of the index attributes of self: [frame, format, nil_value]
214
+ def index_attrs
215
+ [frame, format, nil_value]
216
+ end
217
+
218
+ # Returns initialization options for the current settings of self.
219
+ def options
220
+ { :format => process_in_bulk ? format[0,1] * frame : format,
221
+ :nil_value => nil_value,
222
+ :buffer_size => buffer_size}
223
+ end
224
+
225
+ # Returns another instance of self.class,
226
+ # initialized with the current options of self.
227
+ def another
228
+ self.class.new(nil, options)
229
+ end
230
+
231
+ ###########################
232
+ # Array methods
233
+ ###########################
234
+
235
+ # def &(another)
236
+ # not_implemented
237
+ # end
238
+
239
+ # def *(arg)
240
+ # not_implemented
241
+ # end
242
+
243
+ def +(another)
244
+ dup.concat(another)
245
+ end
246
+
247
+ # def -(another)
248
+ # not_implemented
249
+ # end
250
+
251
+ # Differs from the Array << in that multiple entries
252
+ # can be shifted on at once.
253
+ def <<(array)
254
+ unframed_write(array, length)
255
+ self
256
+ end
257
+
258
+ def <=>(another)
259
+ return 0 if self.object_id == another.object_id
260
+
261
+ case another
262
+ when Array
263
+ if another.length < self.length
264
+ # if another is equal to the matching subset of self,
265
+ # then self is obviously the longer array and wins.
266
+ result = (self.to_a(another.length) <=> another)
267
+ result == 0 ? 1 : result
268
+ else
269
+ self.to_a <=> another
270
+ end
271
+ when ExternalIndex
272
+ self.io.sort_compare(another.io, (buffer_size/2).ceil)
273
+ else
274
+ raise TypeError.new("can't convert from #{another.class} to ExternalIndex or Array")
275
+ end
276
+ end
277
+
278
+ def ==(another)
279
+ return true if super
280
+
281
+ case another
282
+ when Array
283
+ return false if self.length != another.length
284
+ self.to_a == another
285
+
286
+ when ExternalIndex
287
+ return false if self.length != another.length || self.index_attrs != another.index_attrs
288
+ return true if (self.io.sort_compare(another.io, (buffer_size/2).ceil)) == 0
289
+
290
+ self.to_a == another.to_a
291
+ else
292
+ false
293
+ end
294
+ end
295
+
296
+ # Element Reference — Returns the entry at index, or returns an array starting
297
+ # at start and continuing for length entries, or returns an array specified
298
+ # by range. Negative indices count backward from the end of self (-1 is the last
299
+ # element). Returns nil if the index (or starting index) is out of range.
300
+ #
301
+ # index = ExternalIndex[1,2,3,4,5]
302
+ # index[2] #=> [3]
303
+ # index[6] #=> nil
304
+ # index[1, 2] #=> [[2],[3]]
305
+ # index[1..3] #=> [[2],[3],[4]]
306
+ # index[4..7] #=> [[5]]
307
+ # index[6..10] #=> nil
308
+ # index[-3, 3] #=> [[3],[4],[5]]
309
+ #
310
+ # # special cases
311
+ # index[5] #=> nil
312
+ # index[5, 1] #=> []
313
+ # index[5..10] #=> []
314
+ #
315
+ # Note that entries are returned in frame.
316
+ def [](one, two = nil)
317
+ one = convert_to_int(one)
318
+
319
+ case one
320
+ when Fixnum
321
+
322
+ # normalize the index
323
+ if one < 0
324
+ one += length
325
+ return nil if one < 0
326
+ end
327
+
328
+ if two == nil
329
+ at(one) # read one, no frame
330
+ else
331
+ two = convert_to_int(two)
332
+ return nil if two < 0 || one > length
333
+ return [] if two == 0 || one == length
334
+
335
+ read(two, one) # read length, framed
336
+ end
337
+
338
+ when Range
339
+ raise TypeError, "can't convert Range into Integer" unless two == nil
340
+ total = length
341
+ start, length = split_range(one, total)
342
+
343
+ # (identical to those above...)
344
+ return nil if start < 0 || start > total
345
+ return [] if length < 0 || start == total
346
+
347
+ read(length + 1, start) # read length, framed
348
+
349
+ when nil
350
+ raise TypeError, "no implicit conversion from nil to integer"
351
+ when Bignum
352
+ # special case, RangeError not TypeError is raised by Array
353
+ raise RangeError, "can't convert #{one.class} into Integer"
354
+ else
355
+ raise TypeError, "can't convert #{one.class} into Integer"
356
+ end
357
+ end
358
+
359
+ # Element Assignment — Sets the entry at index, or replaces a subset starting at start
360
+ # and continuing for length entries, or replaces a subset specified by range.
361
+ # A negative indices will count backward from the end of self. Inserts elements if
362
+ # length is zero. If nil is used in the second and third form, deletes elements from
363
+ # self. An IndexError is raised if a negative index points past the beginning of self.
364
+ # See also push, and unshift.
365
+ #
366
+ # index = ExternalIndex.new("", :format => 'I')
367
+ # index.nil_value # => [0]
368
+ # index[4] = [4]; index # => [[0], [0], [0], [0], [4]]
369
+ # index[0, 3] = [[1], [2], [3]]; index # => [[1], [2], [3], [0], [4]]
370
+ # index[1..2] = [[5], [6]]; index # => [[1], [5], [6], [0], [4]]
371
+ # index[0, 2] = [[7]]; index # => [[7], [6], [0], [4]]
372
+ # index[0..2] = [[8]]; index # => [[8], [4]]
373
+ # index[-1] = [9]; index # => [[8], [9]]
374
+ # index[1..-1] = nil; index # => [[8]]
375
+ #
376
+ # === Differences from Array#[]=
377
+ #
378
+ # ExternalIndex#[]= can only take entries in frame. This means that for individual
379
+ # assignments, a framed array must be given; in the case of [start, length] and
380
+ # range insertions, an array of framed arrays must be given. Nils are allowed
381
+ # in both cases, and are treated the same as in Array (although insertions replace
382
+ # nil with the nil_value for self).
383
+ #
384
+ # index = ExternalIndex.new("", :format => 'II')
385
+ # index.nil_value # => [0,0]
386
+ #
387
+ # index[0] = [1,2]; index # => [[1,2]]
388
+ # index[1] = nil; index # => [[1,2], [0,0]]
389
+ #
390
+ # index[0,2] = [[1,2],[3,4]]; index # => [[1,2], [3,4]]
391
+ # index[1..3] = [[5,6],[7,8]]; index # => [[1,2], [5,6], [7,8]]
392
+ # index[0,3] = nil; index # => []
393
+ #
394
+ # Another ExternalIndex with the same frame, format, and nil_value (ie index_attrs)
395
+ # may be used as an input to [start, length] and range insertions.
396
+ #
397
+ # === Performance
398
+ # Range insertions may require a full copy/rewrite of an ExternalIndex io. For
399
+ # very large instances, this obviously can be quite slow; the cases to watch out
400
+ # for are:
401
+ #
402
+ # - insertion of self into self (worst case)
403
+ # - insertion of values with lengths that do not match the insertion length
404
+ #
405
+ # For example:
406
+ #
407
+ # index = ExternalIndex.new("")
408
+ # index[0,1] = index
409
+ # index[0,3] = [[1], [2]]
410
+ # index[0...3] = [[1], [2], [3], [4]]
411
+ #
412
+ #--
413
+ # TODO -- cleanup error messages so they are more meaningful
414
+ # and helpful, esp for frame errors
415
+ #++
416
+ def []=(*args)
417
+ raise ArgumentError, "wrong number of arguments (1 for 2)" if args.length < 2
418
+
419
+ one, two, value = args
420
+ if args.length == 2
421
+ value = two
422
+ two = nil
423
+ end
424
+
425
+ one = convert_to_int(one)
426
+ case one
427
+ when Fixnum
428
+ if one < 0
429
+ one += length
430
+ raise IndexError, "index #{one} out of range" if one < 0
431
+ end
432
+
433
+ if two == nil
434
+ # simple insertion
435
+ unframed_write(value == nil ? nil_value : value, one)
436
+ else
437
+ two = convert_to_int(two)
438
+ raise IndexError, "negative length (#{two})" if two < 0
439
+
440
+ value = convert_to_ary(value)
441
+ case
442
+ when self == value
443
+ # special case when insertion is self (no validation needed)
444
+ # A whole copy of self is required because the insertion
445
+ # can overwrite the tail of self. As such this can be a
446
+ # worst-case scenario-slow and expensive procedure.
447
+ copy_beg = (one + two) * frame_size
448
+ copy_end = io.length
449
+
450
+ io.copy do |copy|
451
+ # truncate io
452
+ io.truncate(one * frame_size)
453
+ io.pos = io.length
454
+
455
+ # pad as needed
456
+ pad_to(one) if one > length
457
+
458
+ # write the copy of self
459
+ io.insert(copy)
460
+
461
+ # copy the tail of the insertion
462
+ io.insert(copy, copy_beg..copy_end)
463
+ end
464
+
465
+ when value.length == two
466
+ # optimized insertion, when insertion is the correct length
467
+ write(value, one)
468
+
469
+ else
470
+ # range insertion: requires copy and rewrite of the tail
471
+ # of the ExternalIndex, after the insertion.
472
+ # WARN - can be slow when the tail is large
473
+ copy_beg = (one + two) * frame_size
474
+ copy_end = io.length
475
+
476
+ io.copy("r", copy_beg..copy_end) do |copy|
477
+ # pad as needed
478
+ pad_to(one) if one > length
479
+
480
+ # write inserted value
481
+ io.pos = one * frame_size
482
+ write(value)
483
+
484
+ # truncate io
485
+ io.truncate(io.pos)
486
+
487
+ # copy the tail of the insertion
488
+ io.insert(copy)
489
+ end
490
+ end
491
+ end
492
+
493
+ when Range
494
+ raise TypeError, "can't convert Range into Integer" unless two == nil
495
+ start, length, total = split_range(one)
496
+
497
+ raise RangeError, "#{one} out of range" if start < 0
498
+ self[start, length < 0 ? 0 : length + 1] = value
499
+
500
+ when nil
501
+ raise TypeError, "no implicit conversion from nil to integer"
502
+ else
503
+ raise TypeError, "can't convert #{one.class} into Integer"
504
+ end
505
+ end
506
+
507
+ # def abbrev(pattern=nil)
508
+ # not_implemented
509
+ # end
510
+
511
+ # def assoc(obj)
512
+ # not_implemented
513
+ # end
514
+
515
+ # Returns entry at index
516
+ def at(index)
517
+ if index >= length || (index < 0 && index < -length)
518
+ nil
519
+ else
520
+ str = readbytes(1, index)
521
+ str == nil ? nil : str.unpack(format)
522
+ end
523
+ end
524
+
525
+ # Removes all elements from _self_.
526
+ def clear
527
+ io.truncate(0)
528
+ self
529
+ end
530
+
531
+ # Returns a copy of self with all nil entries removed. Nil
532
+ # entries are those which equal nil_value.
533
+ #
534
+ # <em>potentially expensive</em>
535
+ def compact
536
+ another = self.another
537
+ nil_array = self.nil_value
538
+ each do |array|
539
+ another << array unless array == nil_array
540
+ end
541
+ another
542
+ end
543
+
544
+ # def compact!
545
+ # not_implemented
546
+ # end
547
+
548
+ # Appends the entries in another to self. Another may be an array
549
+ # of entries (in frame), or another ExternalIndex with corresponding
550
+ # index_attrs.
551
+ #
552
+ # <em>potentially expensive</em> especially if another is very
553
+ # large, or if it must be loaded into memory to be concatenated,
554
+ # ie when cached? = true.
555
+ def concat(another)
556
+ case another
557
+ when Array
558
+ write(another, length)
559
+ when ExternalIndex
560
+ check_index(another)
561
+ io.concat(another.io)
562
+ else
563
+ raise TypeError.new("can't convert #{another.class} into ExternalIndex or Array")
564
+ end
565
+ self
566
+ end
567
+
568
+ # def delete(obj)
569
+ # not_implemented
570
+ # end
571
+
572
+ # def delete_at(index)
573
+ # not_implemented
574
+ # end
575
+
576
+ # def delete_if # :yield: item
577
+ # not_implemented
578
+ # end
579
+
580
+ # Calls block once for each entry in self, passing that entry as a parameter.
581
+ def each(&block) # :yield: entry
582
+ self.pos = 0
583
+ chunk do |offset, length|
584
+ # special treatment for 1, because then read(1) => [...] rather
585
+ # than [[...]]. when frame > 1, each will iterate over the
586
+ # element rather than pass it to the block directly
587
+ read(length).each(&block)
588
+ end
589
+ self
590
+ end
591
+
592
+ # Same as each, but passes the index of the entry instead of the entry itself.
593
+ def each_index(&block) # :yield: index
594
+ 0.upto(length-1, &block)
595
+ self
596
+ end
597
+
598
+ # def fetch(index, default=nil, &block)
599
+ # index += index_length if index < 0
600
+ # val = (index >= length ? default : self[index])
601
+ # block_given? ? yield(val) : val
602
+ # end
603
+
604
+ # def fill(*args)
605
+ # not_implemented
606
+ # end
607
+
608
+ # Returns the first n entries (default 1)
609
+ def first(n=nil)
610
+ n.nil? ? self[0] : self[0,n]
611
+ end
612
+
613
+ # def hash
614
+ # not_implemented
615
+ # end
616
+
617
+ # def include?(obj)
618
+ # not_implemented
619
+ # end
620
+
621
+ # def index(obj)
622
+ # not_implemented
623
+ # end
624
+
625
+ # def indexes(*args)
626
+ # values_at(*args)
627
+ # end
628
+ #
629
+ # def indicies(*args)
630
+ # values_at(*args)
631
+ # end
632
+
633
+ # def replace(other)
634
+ # not_implemented
635
+ # end
636
+
637
+ # def insert(index, *obj)
638
+ # self[index] = obj
639
+ # end
640
+
641
+ # def inspect
642
+ # not_implemented
643
+ # end
644
+
645
+ # def join(sep=$,)
646
+ # not_implemented
647
+ # end
648
+
649
+ # Returns the last n entries (default 1)
650
+ def last(n=nil)
651
+ return self[-1] if n.nil?
652
+
653
+ start = length-n
654
+ start = 0 if start < 0
655
+ self[start, n]
656
+ end
657
+
658
+ # Returns the number of entries in self
659
+ def length
660
+ io.length/frame_size
661
+ end
662
+
663
+ # Returns the number of non-nil entries in self. Nil entries
664
+ # are those which equal nil_value. May be zero.
665
+ def nitems
666
+ # TODO - seems like this could be optimized
667
+ # to run without unpacking each item...
668
+ count = self.length
669
+ nil_array = self.nil_value
670
+ each do |array|
671
+ count -= 1 if array == nil_array
672
+ end
673
+ count
674
+ end
675
+
676
+ # def pop
677
+ # not_implemented
678
+ # end
679
+
680
+ # def pretty_print(q)
681
+ # not_implemented
682
+ # end
683
+
684
+ # def pretty_print_cycle(q)
685
+ # not_implemented
686
+ # end
687
+
688
+ # Append — Pushes the given entry(s) on to the end of self.
689
+ # This expression returns self, so several appends may be
690
+ # chained together. Pushed entries must be in frame.
691
+ def push(*array)
692
+ write(array, length)
693
+ self
694
+ end
695
+
696
+ # def rassoc(key)
697
+ # not_implemented
698
+ # end
699
+
700
+ # def replace(another)
701
+ # not_implemented
702
+ # end
703
+
704
+ # def reverse
705
+ # not_implemented
706
+ # end
707
+
708
+ # def reverse!
709
+ # not_implemented
710
+ # end
711
+
712
+ # Same as each, but traverses self in reverse order.
713
+ def reverse_each(&block)
714
+ reverse_chunk do |offset, length|
715
+ # special treatment for 1, because then read(1) => [...] rather
716
+ # than [[...]]. when frame > 1, each will iterate over the
717
+ # element rather than pass it to the block directly
718
+ read(length, offset).reverse_each(&block)
719
+ end
720
+ self
721
+ end
722
+
723
+ # def rindex(obj)
724
+ # not_implemented
725
+ # end
726
+
727
+ # def shift
728
+ # not_implemented
729
+ # end
730
+
731
+ # Alias for length
732
+ def size
733
+ length
734
+ end
735
+
736
+ # def slice(*args)
737
+ # self.call(:[], *args)
738
+ # end
739
+
740
+ # def slice!(*args)
741
+ # not_implemented
742
+ # end
743
+
744
+ # Converts self to an array, or returns the cache if cached?.
745
+ def to_a
746
+ length == 0 ? [] : read(length, 0)
747
+ end
748
+
749
+ # Returns _self_.join.
750
+ # def to_s
751
+ # self.join
752
+ # end
753
+
754
+ # def uniq
755
+ # not_implemented
756
+ # end
757
+
758
+ # def uniq!
759
+ # not_implemented
760
+ # end
761
+
762
+ # def unshift(*obj)
763
+ # not_implemented
764
+ # end
765
+
766
+ # Returns a copy of self containing the entries corresponding to the
767
+ # given selector(s). The selectors may be either integer indices or
768
+ # ranges.
769
+ #
770
+ # <em>potentially expensive</em>
771
+ def values_at(*selectors)
772
+ another = self.another
773
+ selectors.each do |s|
774
+ entries = self[s]
775
+ another << (entries == nil ? nil_value : entries.flatten)
776
+ end
777
+ another
778
+ end
779
+
780
+ # def |(another)
781
+ # not_implemented
782
+ # end
783
+
784
+ #################
785
+ # IO-like methods
786
+ ##################
787
+
788
+ # Returns the current position of self (ie io.pos/frame_size).
789
+ # pos is often used as the default location for IO-like
790
+ # operations like read or write.
791
+ def pos
792
+ io.pos/frame_size
793
+ end
794
+
795
+ # Sets the current position of the index. Positions can be set beyond
796
+ # the actual length of the index, similar to an IO. Negative positions
797
+ # are counted back from the end of the index (just as they are in
798
+ # an array), but naturally raise an error if they count back to a
799
+ # position less than zero.
800
+ #
801
+ # index = ExternalIndex[[1],[2],[3]]
802
+ # index.length # => 3
803
+ #
804
+ # index.pos = 2; index.pos # => 2
805
+ # index.pos = 10; index.pos # => 10
806
+ #
807
+ # index.pos = -1; index.pos # => 2
808
+ # index.pos = -10; index.pos # !> ArgumentError
809
+ #
810
+ def pos=(pos)
811
+ if pos < 0
812
+ raise ArgumentError.new("position out of bounds: #{pos}") if pos < -length
813
+ pos += length
814
+ end
815
+
816
+ io.pos = (pos * frame_size)
817
+ end
818
+
819
+ # Reads the packed byte string for n entries from the specified
820
+ # position. By default all remaining entries will be read.
821
+ #
822
+ # index = ExternalIndex[[1],[2],[3]]
823
+ # index.pos # => 0
824
+ # index.readbytes.unpack("I*") # => [1,2,3]
825
+ # index.readbytes(1,0).unpack("I*") # => [1]
826
+ # index.readbytes(10,1).unpack("I*") # => [2,3]
827
+ #
828
+ # The behavior of readbytes when no entries can be read echos
829
+ # that of IO; when n is nil, an empty string is returned;
830
+ # when n is specified, nil will be returned.
831
+ #
832
+ # index.pos = 3
833
+ # index.readbytes # => ""
834
+ # index.readbytes(1) # => nil
835
+ #
836
+ def readbytes(n=nil, pos=nil)
837
+ # set the io position to the specified index
838
+ self.pos = pos unless pos == nil
839
+
840
+ # read until the end if no n is given
841
+ n == nil ? io.read : io.read(n * frame_size)
842
+ end
843
+
844
+ # Unpacks the given string into an array of index values.
845
+ # Entries are returned in frame.
846
+ #
847
+ # index = ExternalIndex[[1],[2],[3]]
848
+ # index.format # => 'I*'
849
+ # index.unpack( [1].pack('I*') ) # => [[1]]
850
+ # index.unpack( [1,2,3].pack('I*') ) # => [[1],[2],[3]]
851
+ # index.unpack("") # => []
852
+ #
853
+ def unpack(str)
854
+ case
855
+ when process_in_bulk
856
+ # multiple entries, bulk processing (faster)
857
+ results = []
858
+ str.unpack(format).each_slice(frame) {|s| results << s}
859
+ results
860
+ else
861
+ # multiple entries, individual unpacking (slower)
862
+ Array.new(str.length/frame_size) do |i|
863
+ str[i*frame_size, frame_size].unpack(format)
864
+ end
865
+ end
866
+ end
867
+
868
+ # Reads n entries from the specified position (ie, read
869
+ # is basically readbytes, then unpack). By default all
870
+ # remaining entries will be read; single entries are
871
+ # returned in frame, multiple entries are returned in
872
+ # an array.
873
+ #
874
+ # index = ExternalIndex[[1],[2],[3]]
875
+ # index.pos # => 0
876
+ # index.read # => [[1],[2],[3]]
877
+ # index.read(1,0) # => [[1]]
878
+ # index.read(10,1) # => [[2],[3]]
879
+ #
880
+ # The behavior of read when no entries can be read echos
881
+ # that of IO; when n is nil, an empty array is returned;
882
+ # when n is specified, nil will be returned.
883
+ #
884
+ # index.pos = 3
885
+ # index.read # => []
886
+ # index.read(1) # => nil
887
+ #
888
+ def read(n=nil, pos=nil)
889
+ str = readbytes(n, pos)
890
+ str == nil ? nil : unpack(str)
891
+ end
892
+
893
+ # Writes the framed entries into self starting at the
894
+ # specified position. By default writing begins at the
895
+ # current position. The array can have multiple entries
896
+ # so long as each is in the correct frame.
897
+ #
898
+ # index = ExternalIndex[]
899
+ # index.write([[2],[3]], 1)
900
+ # index.pos = 0;
901
+ # index.write([[1]])
902
+ # index.read(3, 0) # => [[1],[2],[3]]
903
+ #
904
+ # write may accept an ExternalIndex if it has the same
905
+ # index_attrs as self.
906
+ def write(array, pos=nil)
907
+ case array
908
+ when Array
909
+ check_framed_array(array)
910
+ prepare_write_to_pos(pos)
911
+ write_framed_array(array)
912
+ when ExternalIndex
913
+ check_index(array)
914
+ prepare_write_to_pos(pos)
915
+ write_index(array)
916
+ else
917
+ raise ArgumentError, "could not convert #{array.class} to Array or ExternalIndex"
918
+ end
919
+ end
920
+
921
+ # Same as write, except the input entries are unframed.
922
+ # Multiple entries can be provided in a single array,
923
+ # so long as the total number of elements is divisible
924
+ # into entries of the correct frame.
925
+ #
926
+ # index = ExternalIndex[]
927
+ # index.unframed_write([2,3], 1)
928
+ # index.pos = 0;
929
+ # index.unframed_write([1])
930
+ # index.read(3, 0) # => [[1],[2],[3]]
931
+ #
932
+ def unframed_write(array, pos=nil)
933
+ case array
934
+ when Array
935
+ check_unframed_array(array)
936
+ prepare_write_to_pos(pos)
937
+ write_unframed_array(array)
938
+ when ExternalIndex
939
+ check_index(array)
940
+ prepare_write_to_pos(pos)
941
+ write_index(array)
942
+ else
943
+ raise ArgumentError.new("could not convert #{array.class} to Array or ExternalIndex")
944
+ end
945
+ end
946
+
947
+ private
948
+
949
+ # prepares a write at the specified position by
950
+ # padding to the position and setting pos to
951
+ # the position
952
+ def prepare_write_to_pos(pos) # :nodoc:
953
+ unless pos == nil
954
+ # pad to the starting position if necessary
955
+ pad_to(pos) if pos > length
956
+
957
+ # set the io position to the specified index
958
+ self.pos = pos
959
+ end
960
+ end
961
+
962
+ # pads io with nil_value up to pos.
963
+ def pad_to(pos) # :nodoc:
964
+ n = (pos-length)/frame
965
+
966
+ io.pos = io.length
967
+ io.length += io.write(nil_value(false) * n)
968
+
969
+ # in this case position doesn't
970
+ # need to be set. set pos to nil
971
+ # to skip the set statement below
972
+ pos = nil
973
+ end
974
+
975
+ # checks that the input has the same index_attrs as self.
976
+ def check_index(index) # :nodoc:
977
+ unless index.index_attrs == index_attrs
978
+ raise ArgumentError.new("incompatible index attributes [#{index.index_attrs.join(',')}]")
979
+ end
980
+ end
981
+
982
+ # checks that the array consists only of
983
+ # arrays of the correct frame, or nils.
984
+ def check_framed_array(array) # :nodoc:
985
+ array.each do |item|
986
+ case item
987
+ when Array
988
+
989
+ # validate the frame of the array
990
+ unless item.length == frame
991
+ raise ArgumentError, "not in frame #{frame}: #{ellipse_inspect(item)}"
992
+ end
993
+
994
+ when nil # framed arrays can contain nils
995
+ else raise ArgumentError, "not an Array or nil value: #{item.class} "
996
+ end
997
+ end
998
+ end
999
+
1000
+ # checks that the unframed array is of a
1001
+ # frameable length
1002
+ def check_unframed_array(array) # :nodoc:
1003
+ unless array.length % frame == 0
1004
+ raise ArgumentError, "not in frame #{frame}: #{ellipse_inspect(array)}"
1005
+ end
1006
+ end
1007
+
1008
+ # writes the ExternalIndex to io.
1009
+ def write_index(index) # :nodoc:
1010
+ end_pos = io.pos + io.insert(index.io)
1011
+ io.length = end_pos if end_pos > io.length
1012
+ end
1013
+
1014
+ # writes the framed array to io. nil values
1015
+ # in the array are converted to nil_value.
1016
+ def write_framed_array(array) # :nodoc:
1017
+ start_pos = io.pos
1018
+ length_written = 0
1019
+
1020
+ if process_in_bulk
1021
+ arr = []
1022
+ array.each {|item| arr.concat(item == nil ? nil_value : item) }
1023
+ length_written += io.write(arr.pack(format))
1024
+ else
1025
+ array.each do |item|
1026
+ length_written += io.write(item == nil ? nil_value(false) : item.pack(format))
1027
+ end
1028
+ end
1029
+
1030
+ # update io.length as necessary
1031
+ end_pos = start_pos + length_written
1032
+ io.length = end_pos if end_pos > io.length
1033
+ end
1034
+
1035
+ # writes the unframed array to io. unframed
1036
+ # arrays cannot contain nils.
1037
+ def write_unframed_array(array) # :nodoc:
1038
+ start_pos = io.pos
1039
+ length_written = 0
1040
+
1041
+ if process_in_bulk
1042
+ length_written += io.write(array.pack(format))
1043
+ else
1044
+ array.each_slice(frame) do |arr|
1045
+ length_written += io.write(arr.pack(format))
1046
+ end
1047
+ end
1048
+
1049
+ # update io.length as necessary
1050
+ end_pos = start_pos + length_written
1051
+ io.length = end_pos if end_pos > io.length
1052
+ end
1053
+ end