external 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/History +7 -0
  2. data/MIT-LICENSE +1 -3
  3. data/README +162 -127
  4. data/lib/external.rb +2 -3
  5. data/lib/external/base.rb +174 -47
  6. data/lib/external/chunkable.rb +131 -105
  7. data/lib/external/enumerable.rb +78 -33
  8. data/lib/external/io.rb +163 -398
  9. data/lib/external/patches/ruby_1_8_io.rb +31 -0
  10. data/lib/external/patches/windows_io.rb +53 -0
  11. data/lib/external/patches/windows_utils.rb +27 -0
  12. data/lib/external/utils.rb +148 -0
  13. data/lib/external_archive.rb +840 -0
  14. data/lib/external_array.rb +57 -0
  15. data/lib/external_index.rb +1053 -0
  16. metadata +42 -58
  17. data/lib/ext_arc.rb +0 -108
  18. data/lib/ext_arr.rb +0 -727
  19. data/lib/ext_ind.rb +0 -1120
  20. data/test/benchmarks/benchmarks_20070918.txt +0 -45
  21. data/test/benchmarks/benchmarks_20070921.txt +0 -91
  22. data/test/benchmarks/benchmarks_20071006.txt +0 -147
  23. data/test/benchmarks/test_copy_file.rb +0 -80
  24. data/test/benchmarks/test_pos_speed.rb +0 -47
  25. data/test/benchmarks/test_read_time.rb +0 -55
  26. data/test/cached_ext_ind_test.rb +0 -219
  27. data/test/check/benchmark_check.rb +0 -441
  28. data/test/check/namespace_conflicts_check.rb +0 -23
  29. data/test/check/pack_check.rb +0 -90
  30. data/test/ext_arc_test.rb +0 -286
  31. data/test/ext_arr/alt_sep.txt +0 -3
  32. data/test/ext_arr/cr_lf_input.txt +0 -3
  33. data/test/ext_arr/input.index +0 -0
  34. data/test/ext_arr/input.txt +0 -1
  35. data/test/ext_arr/inputb.index +0 -0
  36. data/test/ext_arr/inputb.txt +0 -1
  37. data/test/ext_arr/lf_input.txt +0 -3
  38. data/test/ext_arr/lines.txt +0 -19
  39. data/test/ext_arr/without_index.txt +0 -1
  40. data/test/ext_arr_test.rb +0 -534
  41. data/test/ext_ind_test.rb +0 -1472
  42. data/test/external/base_test.rb +0 -74
  43. data/test/external/chunkable_test.rb +0 -182
  44. data/test/external/index/input.index +0 -0
  45. data/test/external/index/inputb.index +0 -0
  46. data/test/external/io_test.rb +0 -414
  47. data/test/external_test_helper.rb +0 -31
  48. data/test/external_test_suite.rb +0 -4
  49. data/test/test_array.rb +0 -1192
@@ -0,0 +1,57 @@
1
+ require 'external_archive'
2
+ require 'yaml'
3
+
4
+ #--
5
+ # later separate out individual objects logically
6
+ # If writing, create new files:
7
+ # - base/object_id.aio (new file for recieving appends)
8
+ # - base/object_id.index (copy of existing index -- made on first insertion)
9
+ # - in index, -index indicates object_id.aio file whereas +index indicates original file
10
+ # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
11
+ # requires index rewrite as well, to remove negatives
12
+ #
13
+ # If appending, ONLY allow << and all changes get committed to the original file.
14
+ #
15
+ # This should allow returning of new arrayio objects under read/write conditions
16
+ # By default read-only. No insertions. New ExternalArray objects inherit parent mode.
17
+ #
18
+ # Independent modes:
19
+ # - r
20
+ # - r+
21
+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
22
+ # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
23
+ # - b ALWAYS on with Windows
24
+ #++
25
+
26
+ #--
27
+ # YAML cannot/does not properly handle:
28
+ # - Proc
29
+ # - Class (cannot dump)
30
+ # - Carriage return strings (removes "\r"): "\r", "\r\n", "string_with_\r\n_internal"
31
+ # - Chains of newlines (loads to ""): "\n", "\n\n"
32
+ #
33
+ #
34
+ # Bugs:
35
+ # @cls[ 'cat', 99, /a/, @cls[ 1, 2, 3] ].include?(@cls[ 1, 2, 3]) raises error
36
+ #++
37
+
38
+ class ExternalArray < ExternalArchive
39
+
40
+ def reindex(&block)
41
+ reindex_by_sep(nil,
42
+ :sep_regexp => /^-{3} /,
43
+ :sep_length => 4,
44
+ :entry_follows_sep => true,
45
+ &block)
46
+ end
47
+
48
+ def str_to_entry(str)
49
+ str == nil || str.empty? ? nil : YAML.load(str)
50
+ end
51
+
52
+ def entry_to_str(entry)
53
+ entry.to_yaml
54
+ end
55
+
56
+ private :reindex_by_regexp, :reindex_by_sep
57
+ end
@@ -0,0 +1,1053 @@
1
+ require 'external/base'
2
+ require 'strscan'
3
+
4
+ # Provides array-like access to index data kept on disk. Index data is
5
+ # defined by a packing format (see Array#pack) like 'II', which would
6
+ # represent two integers; in this case each member of the ExternalIndex
7
+ # would be a two-integer array.
8
+ #
9
+ # All directives except '@' and 'X' are allowed, in any combination.
10
+ #
11
+ #--
12
+ # not implemented --
13
+ # dclone, flatten, flatten!, frozen?, pack, quote, to_yaml, transpose, yaml_initialize
14
+ #
15
+ # be careful accession io directly. for peformance reasons there is no check to make
16
+ # sure io is in register (ie pos is at a frame boundary, ie io.length % frame_size == 0)
17
+ # In addition, note that length must be adjusted manually in most io operations (truncate is
18
+ # the exception). Thus if you change the file length by any means, the file length must be
19
+ # reset.
20
+ class ExternalIndex < External::Base
21
+
22
+ class << self
23
+
24
+ # Initializes a new ExternalIndex using an array-like [] syntax.
25
+ # The last argument may be an options hash (this is ok since
26
+ # ExternalIndex cannot store a Hash anyhow).
27
+ def [](*argv)
28
+ options = argv.last.kind_of?(Hash) ? argv.pop : {}
29
+ index = new(nil, options)
30
+
31
+ normalized_args = argv.collect do |item|
32
+ item.nil? ? index.nil_value : item
33
+ end.flatten
34
+ index.unframed_write(normalized_args)
35
+
36
+ # reset the position of the IO under this initialize
37
+ index.pos = 0
38
+ index
39
+ end
40
+
41
+ # Opens and reads the file into an array.
42
+ def read(fd, options={})
43
+ return [] if fd.nil?
44
+ open(fd, "r", options) do |index|
45
+ index.read(nil, 0)
46
+ end
47
+ end
48
+
49
+ # Returns the number of bytes required to pack an item in an array
50
+ # using the directive (see Array.pack for more details). All directives
51
+ # return a size except the positioning directives '@' and 'X'; these
52
+ # and all other unknown directives return nil.
53
+ #
54
+ # Directives N bytes
55
+ # ------------------------------
56
+ # AaBbCcHhUwxZ | 1
57
+ # nSsv | 2
58
+ # M | 3
59
+ # eFfgIiLlNPpV | 4
60
+ # m | 5
61
+ # u | 6
62
+ # DdEGQq | 8
63
+ # @X | nil
64
+ def directive_size(directive)
65
+ case directive
66
+ when /^[eFfgIiLlNPpV]$/ then 4
67
+ when /^[DdEGQq]$/ then 8
68
+ when /^[AaBbCcHhUwxZ]$/ then 1
69
+ when /^[nSsv]$/ then 2
70
+ when 'M' then 3
71
+ when 'm' then 5
72
+ when 'u' then 6
73
+ else
74
+ nil
75
+ end
76
+ end
77
+
78
+ # Returns an array of zeros in the specified frame.
79
+ def default_nil_value(format, frame)
80
+ Array.new(frame, 0)
81
+ end
82
+ end
83
+
84
+ # The format of the indexed data. Format may be optimized from
85
+ # the original input format in cases like 'III' where bulk
86
+ # processing is useful.
87
+ attr_reader :format
88
+
89
+ # The number of elements in each entry, ex: ('I' => 1, 'III' => 3).
90
+ # frame is calculated from format.
91
+ attr_reader :frame
92
+
93
+ # The number of bytes required for each entry; frame_size is
94
+ # calculated from format.
95
+ attr_reader :frame_size
96
+
97
+ # A flag indicating whether or not the format was optimized
98
+ # to pack/unpack entries in bulk; proccess_in_bulk is
99
+ # automatically set according to format.
100
+ attr_reader :process_in_bulk
101
+
102
+ # The default buffer size (8Mb)
103
+ DEFAULT_BUFFER_SIZE = 8 * 2**20
104
+
105
+ def initialize(io=nil, options={})
106
+ super(io)
107
+
108
+ options = {
109
+ :format => "I",
110
+ :nil_value => nil,
111
+ :buffer_size => DEFAULT_BUFFER_SIZE
112
+ }.merge(options)
113
+
114
+ # set the format, frame, and frame size
115
+ format = options[:format]
116
+ @frame = 0
117
+ @frame_size = 0
118
+ @process_in_bulk = true
119
+
120
+ scanner = StringScanner.new(format)
121
+ if scanner.skip(/\d+/)
122
+ # skip leading numbers ... they are normally ignored
123
+ # by pack and unpack but you could raise an error.
124
+ end
125
+
126
+ bulk_directive = nil
127
+ while directive = scanner.scan(/./)
128
+ size = ExternalIndex.directive_size(directive)
129
+ raise ArgumentError.new("cannot determine size of: '#{directive}'") if size == nil
130
+
131
+ # scan for a multiplicity factor
132
+ multiplicity = (scanner.scan(/\d+/) || 1).to_i
133
+ @frame += multiplicity
134
+ @frame_size += size * multiplicity
135
+
136
+ # if the bulk directive changes,
137
+ # processing in bulk is impossible
138
+ if bulk_directive == nil
139
+ bulk_directive = directive
140
+ elsif bulk_directive != directive
141
+ @process_in_bulk = false
142
+ end
143
+ end
144
+
145
+ # The "a" and "A" directives cannot be
146
+ # processed in bulk.
147
+ if ['a','A'].include?(bulk_directive)
148
+ @process_in_bulk = false
149
+ end
150
+
151
+ # Repetitive formats like "I", "II", "I2I",
152
+ # etc can be packed and unpacked in bulk.
153
+ @format = process_in_bulk ? "#{bulk_directive}*" : format
154
+
155
+ # set the buffer size
156
+ self.buffer_size = options[:buffer_size]
157
+
158
+ # set the nil value to an array of zeros, or
159
+ # to the specified nil value. If a nil value
160
+ # was specified, ensure it is of the correct
161
+ # frame size and can be packed
162
+ nil_value = if options[:nil_value] == nil
163
+ self.class.default_nil_value(format, frame)
164
+ else
165
+ options[:nil_value]
166
+ end
167
+
168
+ begin
169
+ @nil_value = nil_value.pack(format)
170
+ unless nil_value.length == frame && @nil_value.unpack(format) == nil_value
171
+ raise "" # just to invoke the rescue block
172
+ end
173
+ rescue
174
+ raise ArgumentError,
175
+ "unacceptable nil value '#{nil_value}': the nil value must " +
176
+ "be in frame and packable using the format '#{format}'"
177
+ end
178
+ end
179
+
180
+ # Returns the buffer size of self (equal to io.default_blksize and
181
+ # default_blksize * frame_size). Buffer size specifies the memory
182
+ # available for io perform external operations.
183
+ def buffer_size
184
+ self.io.default_blksize
185
+ end
186
+
187
+ # Sets the buffer size of self (as well as io.default_blksize and
188
+ # self.default_blksize). See buffer_size.
189
+ def buffer_size=(buffer_size)
190
+ raise ArgumentError.new("buffer size must be > 0") if buffer_size <= 0
191
+
192
+ @default_blksize = (buffer_size/frame_size).ceil
193
+ self.io.default_blksize = buffer_size
194
+ end
195
+
196
+ # Returns the default_blksize of self. See buffer_size.
197
+ def default_blksize=(value)
198
+ @default_blksize = value
199
+ self.io.default_blksize = value * frame_size
200
+ end
201
+
202
+ # Returns the string value used for nils. Specify unpacked to
203
+ # show the unpacked array value.
204
+ #
205
+ # index = ExternalIndex.new
206
+ # index.nil_value # => [0]
207
+ # index.nil_value(false) # => "\000\000\000\000"
208
+ #
209
+ def nil_value(unpacked=true)
210
+ unpacked ? @nil_value.unpack(format) : @nil_value
211
+ end
212
+
213
+ # An array of the index attributes of self: [frame, format, nil_value]
214
+ def index_attrs
215
+ [frame, format, nil_value]
216
+ end
217
+
218
+ # Returns initialization options for the current settings of self.
219
+ def options
220
+ { :format => process_in_bulk ? format[0,1] * frame : format,
221
+ :nil_value => nil_value,
222
+ :buffer_size => buffer_size}
223
+ end
224
+
225
+ # Returns another instance of self.class,
226
+ # initialized with the current options of self.
227
+ def another
228
+ self.class.new(nil, options)
229
+ end
230
+
231
+ ###########################
232
+ # Array methods
233
+ ###########################
234
+
235
+ # def &(another)
236
+ # not_implemented
237
+ # end
238
+
239
+ # def *(arg)
240
+ # not_implemented
241
+ # end
242
+
243
+ def +(another)
244
+ dup.concat(another)
245
+ end
246
+
247
+ # def -(another)
248
+ # not_implemented
249
+ # end
250
+
251
+ # Differs from the Array << in that multiple entries
252
+ # can be shifted on at once.
253
+ def <<(array)
254
+ unframed_write(array, length)
255
+ self
256
+ end
257
+
258
+ def <=>(another)
259
+ return 0 if self.object_id == another.object_id
260
+
261
+ case another
262
+ when Array
263
+ if another.length < self.length
264
+ # if another is equal to the matching subset of self,
265
+ # then self is obviously the longer array and wins.
266
+ result = (self.to_a(another.length) <=> another)
267
+ result == 0 ? 1 : result
268
+ else
269
+ self.to_a <=> another
270
+ end
271
+ when ExternalIndex
272
+ self.io.sort_compare(another.io, (buffer_size/2).ceil)
273
+ else
274
+ raise TypeError.new("can't convert from #{another.class} to ExternalIndex or Array")
275
+ end
276
+ end
277
+
278
+ def ==(another)
279
+ return true if super
280
+
281
+ case another
282
+ when Array
283
+ return false if self.length != another.length
284
+ self.to_a == another
285
+
286
+ when ExternalIndex
287
+ return false if self.length != another.length || self.index_attrs != another.index_attrs
288
+ return true if (self.io.sort_compare(another.io, (buffer_size/2).ceil)) == 0
289
+
290
+ self.to_a == another.to_a
291
+ else
292
+ false
293
+ end
294
+ end
295
+
296
+ # Element Reference — Returns the entry at index, or returns an array starting
297
+ # at start and continuing for length entries, or returns an array specified
298
+ # by range. Negative indices count backward from the end of self (-1 is the last
299
+ # element). Returns nil if the index (or starting index) is out of range.
300
+ #
301
+ # index = ExternalIndex[1,2,3,4,5]
302
+ # index[2] #=> [3]
303
+ # index[6] #=> nil
304
+ # index[1, 2] #=> [[2],[3]]
305
+ # index[1..3] #=> [[2],[3],[4]]
306
+ # index[4..7] #=> [[5]]
307
+ # index[6..10] #=> nil
308
+ # index[-3, 3] #=> [[3],[4],[5]]
309
+ #
310
+ # # special cases
311
+ # index[5] #=> nil
312
+ # index[5, 1] #=> []
313
+ # index[5..10] #=> []
314
+ #
315
+ # Note that entries are returned in frame.
316
+ def [](one, two = nil)
317
+ one = convert_to_int(one)
318
+
319
+ case one
320
+ when Fixnum
321
+
322
+ # normalize the index
323
+ if one < 0
324
+ one += length
325
+ return nil if one < 0
326
+ end
327
+
328
+ if two == nil
329
+ at(one) # read one, no frame
330
+ else
331
+ two = convert_to_int(two)
332
+ return nil if two < 0 || one > length
333
+ return [] if two == 0 || one == length
334
+
335
+ read(two, one) # read length, framed
336
+ end
337
+
338
+ when Range
339
+ raise TypeError, "can't convert Range into Integer" unless two == nil
340
+ total = length
341
+ start, length = split_range(one, total)
342
+
343
+ # (identical to those above...)
344
+ return nil if start < 0 || start > total
345
+ return [] if length < 0 || start == total
346
+
347
+ read(length + 1, start) # read length, framed
348
+
349
+ when nil
350
+ raise TypeError, "no implicit conversion from nil to integer"
351
+ when Bignum
352
+ # special case, RangeError not TypeError is raised by Array
353
+ raise RangeError, "can't convert #{one.class} into Integer"
354
+ else
355
+ raise TypeError, "can't convert #{one.class} into Integer"
356
+ end
357
+ end
358
+
359
+ # Element Assignment — Sets the entry at index, or replaces a subset starting at start
360
+ # and continuing for length entries, or replaces a subset specified by range.
361
+ # A negative indices will count backward from the end of self. Inserts elements if
362
+ # length is zero. If nil is used in the second and third form, deletes elements from
363
+ # self. An IndexError is raised if a negative index points past the beginning of self.
364
+ # See also push, and unshift.
365
+ #
366
+ # index = ExternalIndex.new("", :format => 'I')
367
+ # index.nil_value # => [0]
368
+ # index[4] = [4]; index # => [[0], [0], [0], [0], [4]]
369
+ # index[0, 3] = [[1], [2], [3]]; index # => [[1], [2], [3], [0], [4]]
370
+ # index[1..2] = [[5], [6]]; index # => [[1], [5], [6], [0], [4]]
371
+ # index[0, 2] = [[7]]; index # => [[7], [6], [0], [4]]
372
+ # index[0..2] = [[8]]; index # => [[8], [4]]
373
+ # index[-1] = [9]; index # => [[8], [9]]
374
+ # index[1..-1] = nil; index # => [[8]]
375
+ #
376
+ # === Differences from Array#[]=
377
+ #
378
+ # ExternalIndex#[]= can only take entries in frame. This means that for individual
379
+ # assignments, a framed array must be given; in the case of [start, length] and
380
+ # range insertions, an array of framed arrays must be given. Nils are allowed
381
+ # in both cases, and are treated the same as in Array (although insertions replace
382
+ # nil with the nil_value for self).
383
+ #
384
+ # index = ExternalIndex.new("", :format => 'II')
385
+ # index.nil_value # => [0,0]
386
+ #
387
+ # index[0] = [1,2]; index # => [[1,2]]
388
+ # index[1] = nil; index # => [[1,2], [0,0]]
389
+ #
390
+ # index[0,2] = [[1,2],[3,4]]; index # => [[1,2], [3,4]]
391
+ # index[1..3] = [[5,6],[7,8]]; index # => [[1,2], [5,6], [7,8]]
392
+ # index[0,3] = nil; index # => []
393
+ #
394
+ # Another ExternalIndex with the same frame, format, and nil_value (ie index_attrs)
395
+ # may be used as an input to [start, length] and range insertions.
396
+ #
397
+ # === Performance
398
+ # Range insertions may require a full copy/rewrite of an ExternalIndex io. For
399
+ # very large instances, this obviously can be quite slow; the cases to watch out
400
+ # for are:
401
+ #
402
+ # - insertion of self into self (worst case)
403
+ # - insertion of values with lengths that do not match the insertion length
404
+ #
405
+ # For example:
406
+ #
407
+ # index = ExternalIndex.new("")
408
+ # index[0,1] = index
409
+ # index[0,3] = [[1], [2]]
410
+ # index[0...3] = [[1], [2], [3], [4]]
411
+ #
412
+ #--
413
+ # TODO -- cleanup error messages so they are more meaningful
414
+ # and helpful, esp for frame errors
415
+ #++
416
+ def []=(*args)
417
+ raise ArgumentError, "wrong number of arguments (1 for 2)" if args.length < 2
418
+
419
+ one, two, value = args
420
+ if args.length == 2
421
+ value = two
422
+ two = nil
423
+ end
424
+
425
+ one = convert_to_int(one)
426
+ case one
427
+ when Fixnum
428
+ if one < 0
429
+ one += length
430
+ raise IndexError, "index #{one} out of range" if one < 0
431
+ end
432
+
433
+ if two == nil
434
+ # simple insertion
435
+ unframed_write(value == nil ? nil_value : value, one)
436
+ else
437
+ two = convert_to_int(two)
438
+ raise IndexError, "negative length (#{two})" if two < 0
439
+
440
+ value = convert_to_ary(value)
441
+ case
442
+ when self == value
443
+ # special case when insertion is self (no validation needed)
444
+ # A whole copy of self is required because the insertion
445
+ # can overwrite the tail of self. As such this can be a
446
+ # worst-case scenario-slow and expensive procedure.
447
+ copy_beg = (one + two) * frame_size
448
+ copy_end = io.length
449
+
450
+ io.copy do |copy|
451
+ # truncate io
452
+ io.truncate(one * frame_size)
453
+ io.pos = io.length
454
+
455
+ # pad as needed
456
+ pad_to(one) if one > length
457
+
458
+ # write the copy of self
459
+ io.insert(copy)
460
+
461
+ # copy the tail of the insertion
462
+ io.insert(copy, copy_beg..copy_end)
463
+ end
464
+
465
+ when value.length == two
466
+ # optimized insertion, when insertion is the correct length
467
+ write(value, one)
468
+
469
+ else
470
+ # range insertion: requires copy and rewrite of the tail
471
+ # of the ExternalIndex, after the insertion.
472
+ # WARN - can be slow when the tail is large
473
+ copy_beg = (one + two) * frame_size
474
+ copy_end = io.length
475
+
476
+ io.copy("r", copy_beg..copy_end) do |copy|
477
+ # pad as needed
478
+ pad_to(one) if one > length
479
+
480
+ # write inserted value
481
+ io.pos = one * frame_size
482
+ write(value)
483
+
484
+ # truncate io
485
+ io.truncate(io.pos)
486
+
487
+ # copy the tail of the insertion
488
+ io.insert(copy)
489
+ end
490
+ end
491
+ end
492
+
493
+ when Range
494
+ raise TypeError, "can't convert Range into Integer" unless two == nil
495
+ start, length, total = split_range(one)
496
+
497
+ raise RangeError, "#{one} out of range" if start < 0
498
+ self[start, length < 0 ? 0 : length + 1] = value
499
+
500
+ when nil
501
+ raise TypeError, "no implicit conversion from nil to integer"
502
+ else
503
+ raise TypeError, "can't convert #{one.class} into Integer"
504
+ end
505
+ end
506
+
507
+ # def abbrev(pattern=nil)
508
+ # not_implemented
509
+ # end
510
+
511
+ # def assoc(obj)
512
+ # not_implemented
513
+ # end
514
+
515
+ # Returns entry at index
516
+ def at(index)
517
+ if index >= length || (index < 0 && index < -length)
518
+ nil
519
+ else
520
+ str = readbytes(1, index)
521
+ str == nil ? nil : str.unpack(format)
522
+ end
523
+ end
524
+
525
+ # Removes all elements from _self_.
526
+ def clear
527
+ io.truncate(0)
528
+ self
529
+ end
530
+
531
+ # Returns a copy of self with all nil entries removed. Nil
532
+ # entries are those which equal nil_value.
533
+ #
534
+ # <em>potentially expensive</em>
535
+ def compact
536
+ another = self.another
537
+ nil_array = self.nil_value
538
+ each do |array|
539
+ another << array unless array == nil_array
540
+ end
541
+ another
542
+ end
543
+
544
+ # def compact!
545
+ # not_implemented
546
+ # end
547
+
548
+ # Appends the entries in another to self. Another may be an array
549
+ # of entries (in frame), or another ExternalIndex with corresponding
550
+ # index_attrs.
551
+ #
552
+ # <em>potentially expensive</em> especially if another is very
553
+ # large, or if it must be loaded into memory to be concatenated,
554
+ # ie when cached? = true.
555
+ def concat(another)
556
+ case another
557
+ when Array
558
+ write(another, length)
559
+ when ExternalIndex
560
+ check_index(another)
561
+ io.concat(another.io)
562
+ else
563
+ raise TypeError.new("can't convert #{another.class} into ExternalIndex or Array")
564
+ end
565
+ self
566
+ end
567
+
568
+ # def delete(obj)
569
+ # not_implemented
570
+ # end
571
+
572
+ # def delete_at(index)
573
+ # not_implemented
574
+ # end
575
+
576
+ # def delete_if # :yield: item
577
+ # not_implemented
578
+ # end
579
+
580
+ # Calls block once for each entry in self, passing that entry as a parameter.
581
+ def each(&block) # :yield: entry
582
+ self.pos = 0
583
+ chunk do |offset, length|
584
+ # special treatment for 1, because then read(1) => [...] rather
585
+ # than [[...]]. when frame > 1, each will iterate over the
586
+ # element rather than pass it to the block directly
587
+ read(length).each(&block)
588
+ end
589
+ self
590
+ end
591
+
592
+ # Same as each, but passes the index of the entry instead of the entry itself.
593
+ def each_index(&block) # :yield: index
594
+ 0.upto(length-1, &block)
595
+ self
596
+ end
597
+
598
+ # def fetch(index, default=nil, &block)
599
+ # index += index_length if index < 0
600
+ # val = (index >= length ? default : self[index])
601
+ # block_given? ? yield(val) : val
602
+ # end
603
+
604
+ # def fill(*args)
605
+ # not_implemented
606
+ # end
607
+
608
+ # Returns the first n entries (default 1)
609
+ def first(n=nil)
610
+ n.nil? ? self[0] : self[0,n]
611
+ end
612
+
613
+ # def hash
614
+ # not_implemented
615
+ # end
616
+
617
+ # def include?(obj)
618
+ # not_implemented
619
+ # end
620
+
621
+ # def index(obj)
622
+ # not_implemented
623
+ # end
624
+
625
+ # def indexes(*args)
626
+ # values_at(*args)
627
+ # end
628
+ #
629
+ # def indicies(*args)
630
+ # values_at(*args)
631
+ # end
632
+
633
+ # def replace(other)
634
+ # not_implemented
635
+ # end
636
+
637
+ # def insert(index, *obj)
638
+ # self[index] = obj
639
+ # end
640
+
641
+ # def inspect
642
+ # not_implemented
643
+ # end
644
+
645
+ # def join(sep=$,)
646
+ # not_implemented
647
+ # end
648
+
649
+ # Returns the last n entries (default 1)
650
+ def last(n=nil)
651
+ return self[-1] if n.nil?
652
+
653
+ start = length-n
654
+ start = 0 if start < 0
655
+ self[start, n]
656
+ end
657
+
658
+ # Returns the number of entries in self
659
+ def length
660
+ io.length/frame_size
661
+ end
662
+
663
+ # Returns the number of non-nil entries in self. Nil entries
664
+ # are those which equal nil_value. May be zero.
665
+ def nitems
666
+ # TODO - seems like this could be optimized
667
+ # to run without unpacking each item...
668
+ count = self.length
669
+ nil_array = self.nil_value
670
+ each do |array|
671
+ count -= 1 if array == nil_array
672
+ end
673
+ count
674
+ end
675
+
676
+ # def pop
677
+ # not_implemented
678
+ # end
679
+
680
+ # def pretty_print(q)
681
+ # not_implemented
682
+ # end
683
+
684
+ # def pretty_print_cycle(q)
685
+ # not_implemented
686
+ # end
687
+
688
+ # Append — Pushes the given entry(s) on to the end of self.
689
+ # This expression returns self, so several appends may be
690
+ # chained together. Pushed entries must be in frame.
691
+ def push(*array)
692
+ write(array, length)
693
+ self
694
+ end
695
+
696
+ # def rassoc(key)
697
+ # not_implemented
698
+ # end
699
+
700
+ # def replace(another)
701
+ # not_implemented
702
+ # end
703
+
704
+ # def reverse
705
+ # not_implemented
706
+ # end
707
+
708
+ # def reverse!
709
+ # not_implemented
710
+ # end
711
+
712
+ # Same as each, but traverses self in reverse order.
713
+ def reverse_each(&block)
714
+ reverse_chunk do |offset, length|
715
+ # special treatment for 1, because then read(1) => [...] rather
716
+ # than [[...]]. when frame > 1, each will iterate over the
717
+ # element rather than pass it to the block directly
718
+ read(length, offset).reverse_each(&block)
719
+ end
720
+ self
721
+ end
722
+
723
+ # def rindex(obj)
724
+ # not_implemented
725
+ # end
726
+
727
+ # def shift
728
+ # not_implemented
729
+ # end
730
+
731
+ # Alias for length
732
+ def size
733
+ length
734
+ end
735
+
736
+ # def slice(*args)
737
+ # self.call(:[], *args)
738
+ # end
739
+
740
+ # def slice!(*args)
741
+ # not_implemented
742
+ # end
743
+
744
+ # Converts self to an array, or returns the cache if cached?.
745
+ def to_a
746
+ length == 0 ? [] : read(length, 0)
747
+ end
748
+
749
+ # Returns _self_.join.
750
+ # def to_s
751
+ # self.join
752
+ # end
753
+
754
+ # def uniq
755
+ # not_implemented
756
+ # end
757
+
758
+ # def uniq!
759
+ # not_implemented
760
+ # end
761
+
762
+ # def unshift(*obj)
763
+ # not_implemented
764
+ # end
765
+
766
+ # Returns a copy of self containing the entries corresponding to the
767
+ # given selector(s). The selectors may be either integer indices or
768
+ # ranges.
769
+ #
770
+ # <em>potentially expensive</em>
771
+ def values_at(*selectors)
772
+ another = self.another
773
+ selectors.each do |s|
774
+ entries = self[s]
775
+ another << (entries == nil ? nil_value : entries.flatten)
776
+ end
777
+ another
778
+ end
779
+
780
+ # def |(another)
781
+ # not_implemented
782
+ # end
783
+
784
+ #################
785
+ # IO-like methods
786
+ ##################
787
+
788
+ # Returns the current position of self (ie io.pos/frame_size).
789
+ # pos is often used as the default location for IO-like
790
+ # operations like read or write.
791
+ def pos
792
+ io.pos/frame_size
793
+ end
794
+
795
+ # Sets the current position of the index. Positions can be set beyond
796
+ # the actual length of the index, similar to an IO. Negative positions
797
+ # are counted back from the end of the index (just as they are in
798
+ # an array), but naturally raise an error if they count back to a
799
+ # position less than zero.
800
+ #
801
+ # index = ExternalIndex[[1],[2],[3]]
802
+ # index.length # => 3
803
+ #
804
+ # index.pos = 2; index.pos # => 2
805
+ # index.pos = 10; index.pos # => 10
806
+ #
807
+ # index.pos = -1; index.pos # => 2
808
+ # index.pos = -10; index.pos # !> ArgumentError
809
+ #
810
+ def pos=(pos)
811
+ if pos < 0
812
+ raise ArgumentError.new("position out of bounds: #{pos}") if pos < -length
813
+ pos += length
814
+ end
815
+
816
+ io.pos = (pos * frame_size)
817
+ end
818
+
819
+ # Reads the packed byte string for n entries from the specified
820
+ # position. By default all remaining entries will be read.
821
+ #
822
+ # index = ExternalIndex[[1],[2],[3]]
823
+ # index.pos # => 0
824
+ # index.readbytes.unpack("I*") # => [1,2,3]
825
+ # index.readbytes(1,0).unpack("I*") # => [1]
826
+ # index.readbytes(10,1).unpack("I*") # => [2,3]
827
+ #
828
+ # The behavior of readbytes when no entries can be read echos
829
+ # that of IO; when n is nil, an empty string is returned;
830
+ # when n is specified, nil will be returned.
831
+ #
832
+ # index.pos = 3
833
+ # index.readbytes # => ""
834
+ # index.readbytes(1) # => nil
835
+ #
836
+ def readbytes(n=nil, pos=nil)
837
+ # set the io position to the specified index
838
+ self.pos = pos unless pos == nil
839
+
840
+ # read until the end if no n is given
841
+ n == nil ? io.read : io.read(n * frame_size)
842
+ end
843
+
844
+ # Unpacks the given string into an array of index values.
845
+ # Entries are returned in frame.
846
+ #
847
+ # index = ExternalIndex[[1],[2],[3]]
848
+ # index.format # => 'I*'
849
+ # index.unpack( [1].pack('I*') ) # => [[1]]
850
+ # index.unpack( [1,2,3].pack('I*') ) # => [[1],[2],[3]]
851
+ # index.unpack("") # => []
852
+ #
853
+ def unpack(str)
854
+ case
855
+ when process_in_bulk
856
+ # multiple entries, bulk processing (faster)
857
+ results = []
858
+ str.unpack(format).each_slice(frame) {|s| results << s}
859
+ results
860
+ else
861
+ # multiple entries, individual unpacking (slower)
862
+ Array.new(str.length/frame_size) do |i|
863
+ str[i*frame_size, frame_size].unpack(format)
864
+ end
865
+ end
866
+ end
867
+
868
+ # Reads n entries from the specified position (ie, read
869
+ # is basically readbytes, then unpack). By default all
870
+ # remaining entries will be read; single entries are
871
+ # returned in frame, multiple entries are returned in
872
+ # an array.
873
+ #
874
+ # index = ExternalIndex[[1],[2],[3]]
875
+ # index.pos # => 0
876
+ # index.read # => [[1],[2],[3]]
877
+ # index.read(1,0) # => [[1]]
878
+ # index.read(10,1) # => [[2],[3]]
879
+ #
880
+ # The behavior of read when no entries can be read echos
881
+ # that of IO; when n is nil, an empty array is returned;
882
+ # when n is specified, nil will be returned.
883
+ #
884
+ # index.pos = 3
885
+ # index.read # => []
886
+ # index.read(1) # => nil
887
+ #
888
+ def read(n=nil, pos=nil)
889
+ str = readbytes(n, pos)
890
+ str == nil ? nil : unpack(str)
891
+ end
892
+
893
+ # Writes the framed entries into self starting at the
894
+ # specified position. By default writing begins at the
895
+ # current position. The array can have multiple entries
896
+ # so long as each is in the correct frame.
897
+ #
898
+ # index = ExternalIndex[]
899
+ # index.write([[2],[3]], 1)
900
+ # index.pos = 0;
901
+ # index.write([[1]])
902
+ # index.read(3, 0) # => [[1],[2],[3]]
903
+ #
904
+ # write may accept an ExternalIndex if it has the same
905
+ # index_attrs as self.
906
+ def write(array, pos=nil)
907
+ case array
908
+ when Array
909
+ check_framed_array(array)
910
+ prepare_write_to_pos(pos)
911
+ write_framed_array(array)
912
+ when ExternalIndex
913
+ check_index(array)
914
+ prepare_write_to_pos(pos)
915
+ write_index(array)
916
+ else
917
+ raise ArgumentError, "could not convert #{array.class} to Array or ExternalIndex"
918
+ end
919
+ end
920
+
921
+ # Same as write, except the input entries are unframed.
922
+ # Multiple entries can be provided in a single array,
923
+ # so long as the total number of elements is divisible
924
+ # into entries of the correct frame.
925
+ #
926
+ # index = ExternalIndex[]
927
+ # index.unframed_write([2,3], 1)
928
+ # index.pos = 0;
929
+ # index.unframed_write([1])
930
+ # index.read(3, 0) # => [[1],[2],[3]]
931
+ #
932
+ def unframed_write(array, pos=nil)
933
+ case array
934
+ when Array
935
+ check_unframed_array(array)
936
+ prepare_write_to_pos(pos)
937
+ write_unframed_array(array)
938
+ when ExternalIndex
939
+ check_index(array)
940
+ prepare_write_to_pos(pos)
941
+ write_index(array)
942
+ else
943
+ raise ArgumentError.new("could not convert #{array.class} to Array or ExternalIndex")
944
+ end
945
+ end
946
+
947
+ private
948
+
949
+ # prepares a write at the specified position by
950
+ # padding to the position and setting pos to
951
+ # the position
952
+ def prepare_write_to_pos(pos) # :nodoc:
953
+ unless pos == nil
954
+ # pad to the starting position if necessary
955
+ pad_to(pos) if pos > length
956
+
957
+ # set the io position to the specified index
958
+ self.pos = pos
959
+ end
960
+ end
961
+
962
+ # pads io with nil_value up to pos.
963
+ def pad_to(pos) # :nodoc:
964
+ n = (pos-length)/frame
965
+
966
+ io.pos = io.length
967
+ io.length += io.write(nil_value(false) * n)
968
+
969
+ # in this case position doesn't
970
+ # need to be set. set pos to nil
971
+ # to skip the set statement below
972
+ pos = nil
973
+ end
974
+
975
+ # checks that the input has the same index_attrs as self.
976
+ def check_index(index) # :nodoc:
977
+ unless index.index_attrs == index_attrs
978
+ raise ArgumentError.new("incompatible index attributes [#{index.index_attrs.join(',')}]")
979
+ end
980
+ end
981
+
982
+ # checks that the array consists only of
983
+ # arrays of the correct frame, or nils.
984
+ def check_framed_array(array) # :nodoc:
985
+ array.each do |item|
986
+ case item
987
+ when Array
988
+
989
+ # validate the frame of the array
990
+ unless item.length == frame
991
+ raise ArgumentError, "not in frame #{frame}: #{ellipse_inspect(item)}"
992
+ end
993
+
994
+ when nil # framed arrays can contain nils
995
+ else raise ArgumentError, "not an Array or nil value: #{item.class} "
996
+ end
997
+ end
998
+ end
999
+
1000
+ # checks that the unframed array is of a
1001
+ # frameable length
1002
+ def check_unframed_array(array) # :nodoc:
1003
+ unless array.length % frame == 0
1004
+ raise ArgumentError, "not in frame #{frame}: #{ellipse_inspect(array)}"
1005
+ end
1006
+ end
1007
+
1008
+ # writes the ExternalIndex to io.
1009
+ def write_index(index) # :nodoc:
1010
+ end_pos = io.pos + io.insert(index.io)
1011
+ io.length = end_pos if end_pos > io.length
1012
+ end
1013
+
1014
+ # writes the framed array to io. nil values
1015
+ # in the array are converted to nil_value.
1016
+ def write_framed_array(array) # :nodoc:
1017
+ start_pos = io.pos
1018
+ length_written = 0
1019
+
1020
+ if process_in_bulk
1021
+ arr = []
1022
+ array.each {|item| arr.concat(item == nil ? nil_value : item) }
1023
+ length_written += io.write(arr.pack(format))
1024
+ else
1025
+ array.each do |item|
1026
+ length_written += io.write(item == nil ? nil_value(false) : item.pack(format))
1027
+ end
1028
+ end
1029
+
1030
+ # update io.length as necessary
1031
+ end_pos = start_pos + length_written
1032
+ io.length = end_pos if end_pos > io.length
1033
+ end
1034
+
1035
+ # writes the unframed array to io. unframed
1036
+ # arrays cannot contain nils.
1037
+ def write_unframed_array(array) # :nodoc:
1038
+ start_pos = io.pos
1039
+ length_written = 0
1040
+
1041
+ if process_in_bulk
1042
+ length_written += io.write(array.pack(format))
1043
+ else
1044
+ array.each_slice(frame) do |arr|
1045
+ length_written += io.write(arr.pack(format))
1046
+ end
1047
+ end
1048
+
1049
+ # update io.length as necessary
1050
+ end_pos = start_pos + length_written
1051
+ io.length = end_pos if end_pos > io.length
1052
+ end
1053
+ end