external 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +5 -0
- data/MIT-LICENSE +21 -0
- data/README +168 -0
- data/lib/ext_arc.rb +108 -0
- data/lib/ext_arr.rb +727 -0
- data/lib/ext_ind.rb +1120 -0
- data/lib/external/base.rb +85 -0
- data/lib/external/chunkable.rb +105 -0
- data/lib/external/enumerable.rb +137 -0
- data/lib/external/io.rb +398 -0
- data/lib/external.rb +3 -0
- data/test/benchmarks/benchmarks_20070918.txt +45 -0
- data/test/benchmarks/benchmarks_20070921.txt +91 -0
- data/test/benchmarks/benchmarks_20071006.txt +147 -0
- data/test/benchmarks/test_copy_file.rb +80 -0
- data/test/benchmarks/test_pos_speed.rb +47 -0
- data/test/benchmarks/test_read_time.rb +55 -0
- data/test/cached_ext_ind_test.rb +219 -0
- data/test/check/benchmark_check.rb +441 -0
- data/test/check/namespace_conflicts_check.rb +23 -0
- data/test/check/pack_check.rb +90 -0
- data/test/ext_arc_test.rb +286 -0
- data/test/ext_arr/alt_sep.txt +3 -0
- data/test/ext_arr/cr_lf_input.txt +3 -0
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +1 -0
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +1 -0
- data/test/ext_arr/lf_input.txt +3 -0
- data/test/ext_arr/lines.txt +19 -0
- data/test/ext_arr/without_index.txt +1 -0
- data/test/ext_arr_test.rb +534 -0
- data/test/ext_ind_test.rb +1472 -0
- data/test/external/base_test.rb +74 -0
- data/test/external/chunkable_test.rb +182 -0
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +414 -0
- data/test/external_test_helper.rb +31 -0
- data/test/external_test_suite.rb +4 -0
- data/test/test_array.rb +1192 -0
- metadata +104 -0
data/lib/ext_ind.rb
ADDED
@@ -0,0 +1,1120 @@
|
|
1
|
+
require 'external/base'
|
2
|
+
|
3
|
+
# for some reason this is sometimes not required by String
|
4
|
+
# automatically, leading to a missing each_char method
|
5
|
+
require 'jcode'
|
6
|
+
|
7
|
+
#--
|
8
|
+
# not implemented --
|
9
|
+
# dclone, flatten, flatten!, frozen?, pack, quote, to_yaml, transpose, yaml_initialize
|
10
|
+
#
|
11
|
+
# be careful accession io directly. for peformance reasons there is no check to make
|
12
|
+
# sure io is in register (ie pos is at a frame boundary, ie io.length % frame_size == 0)
|
13
|
+
# In addition, note that length must be adjusted manually in most io operations (truncate is
|
14
|
+
# the exception). Thus if you change the file length by any means, the file length must be
|
15
|
+
# reset.
|
16
|
+
#
|
17
|
+
# ExtInd allows array-like access to formatted binary data stored on disk.
|
18
|
+
#
|
19
|
+
# == Caching
|
20
|
+
#
|
21
|
+
# To improve peformance, ExtInd can be run in a cached mode where the data is loaded into
|
22
|
+
# memory and kept in memory until the ExtInd closes (or is flushed). Cached mode is
|
23
|
+
# recommended for all but the largest index files, which cannot or should not be loaded
|
24
|
+
# into memory.
|
25
|
+
#++
|
26
|
+
class ExtInd < External::Base
|
27
|
+
|
28
|
+
class << self
|
29
|
+
def [](*args)
|
30
|
+
options = args.last.kind_of?(Hash) ? args.pop : {}
|
31
|
+
ab = self.new(nil, options)
|
32
|
+
normalized_args = args.collect {|item| item.nil? ? ab.nil_value : item }.flatten
|
33
|
+
ab.unframed_write(normalized_args)
|
34
|
+
|
35
|
+
# reset the position of the IO under this initialize
|
36
|
+
ab.pos = 0
|
37
|
+
ab
|
38
|
+
end
|
39
|
+
|
40
|
+
def read(fd, options={})
|
41
|
+
return [] if fd.nil?
|
42
|
+
open(fd, "r", options) do |index|
|
43
|
+
index.read(nil, 0)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the number of bytes required to pack an item in an array
|
48
|
+
# using the directive (see Array.pack for more details). All directives
|
49
|
+
# return a size except the positioning directives '@' and 'X'; these
|
50
|
+
# and all other unknown directives return nil.
|
51
|
+
#
|
52
|
+
# Directives N bytes
|
53
|
+
# ------------------------------
|
54
|
+
# AaBbCcHhUwxZ | 1
|
55
|
+
# nSsv | 2
|
56
|
+
# M | 3
|
57
|
+
# eFfgIiLlNPpV | 4
|
58
|
+
# m | 5
|
59
|
+
# u | 6
|
60
|
+
# DdEGQq | 8
|
61
|
+
# @X | nil
|
62
|
+
def directive_size(directive)
|
63
|
+
case directive
|
64
|
+
when /^[eFfgIiLlNPpV]$/ then 4
|
65
|
+
when /^[DdEGQq]$/ then 8
|
66
|
+
when /^[AaBbCcHhUwxZ]$/ then 1
|
67
|
+
when /^[nSsv]$/ then 2
|
68
|
+
when 'M' then 3
|
69
|
+
when 'm' then 5
|
70
|
+
when 'u' then 6
|
71
|
+
else
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
attr_writer :default_nil_value
|
77
|
+
def default_nil_value
|
78
|
+
@default_nil_value ||= 0
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
attr_reader :frame, :frame_size, :format, :cache, :process_in_bulk
|
83
|
+
|
84
|
+
def initialize(io=nil, options={})
|
85
|
+
super(io)
|
86
|
+
|
87
|
+
options = {
|
88
|
+
:format => "I",
|
89
|
+
:nil_value => nil,
|
90
|
+
:cached => false,
|
91
|
+
:buffer_size => 8 * 2**20 # 8Mb
|
92
|
+
}.merge(options)
|
93
|
+
|
94
|
+
# set the format, frame, and frame size
|
95
|
+
@format = options[:format]
|
96
|
+
@frame = @format.length
|
97
|
+
@frame_size = 0
|
98
|
+
|
99
|
+
# TODO -- allow specification of numbers in the format
|
100
|
+
@format.each_char do |directive|
|
101
|
+
size = ExtInd.directive_size(directive)
|
102
|
+
raise ArgumentError.new("cannot determine size of: '#{directive}'") if size == nil
|
103
|
+
@frame_size += size
|
104
|
+
end
|
105
|
+
@process_in_bulk = false
|
106
|
+
|
107
|
+
# set the buffer size
|
108
|
+
self.buffer_size = options[:buffer_size]
|
109
|
+
|
110
|
+
# Repetitive formats like "I", "II", "III",
|
111
|
+
# etc can be packed and unpacked in bulk
|
112
|
+
directive = @format[0, 1]
|
113
|
+
if @format == (directive * @frame)
|
114
|
+
@format = "#{directive}*"
|
115
|
+
@process_in_bulk = true
|
116
|
+
end
|
117
|
+
|
118
|
+
# set the nil value to an array of zeros, or
|
119
|
+
# to the specified nil value. If a nil value
|
120
|
+
# was specified, ensure it is of the correct
|
121
|
+
# frame size and can be packed
|
122
|
+
nil_value = if options[:nil_value] == nil
|
123
|
+
Array.new(@frame, self.class.default_nil_value)
|
124
|
+
else
|
125
|
+
options[:nil_value]
|
126
|
+
end
|
127
|
+
|
128
|
+
begin
|
129
|
+
@nil_value = nil_value.pack(@format)
|
130
|
+
raise "" unless nil_value.length == @frame && @nil_value.unpack(@format) == nil_value
|
131
|
+
rescue
|
132
|
+
raise ArgumentError.new(
|
133
|
+
"unacceptable nil value '#{nil_value}': the nil value must " +
|
134
|
+
"be in frame and packable using the format '#{format}'")
|
135
|
+
end
|
136
|
+
|
137
|
+
self.cached = options[:cached]
|
138
|
+
end
|
139
|
+
|
140
|
+
def buffer_size
|
141
|
+
self.io.default_blksize
|
142
|
+
end
|
143
|
+
|
144
|
+
def buffer_size=(buffer_size)
|
145
|
+
raise ArgumentError.new("buffer size must be > 0") if buffer_size <= 0
|
146
|
+
|
147
|
+
@default_blksize = (buffer_size/frame_size).ceil
|
148
|
+
self.io.default_blksize = buffer_size
|
149
|
+
end
|
150
|
+
|
151
|
+
def default_blksize=(value)
|
152
|
+
@default_blksize = value
|
153
|
+
self.io.default_blksize = value * frame_size
|
154
|
+
end
|
155
|
+
|
156
|
+
def options
|
157
|
+
{:format => (process_in_bulk ? format.chomp("*") * frame : format),
|
158
|
+
:nil_value => nil_value,
|
159
|
+
:cached => cached?,
|
160
|
+
:buffer_size => buffer_size}
|
161
|
+
end
|
162
|
+
|
163
|
+
# Returns the string value used for nils. Specify unpacked to
|
164
|
+
# show the unpacked array value.
|
165
|
+
#
|
166
|
+
# i = ExtInd.new
|
167
|
+
# i.nil_value # => [0]
|
168
|
+
# i.nil_value(false) # => "\000\000\000\000"
|
169
|
+
def nil_value(unpacked=true)
|
170
|
+
unpacked ? @nil_value.unpack(format) : @nil_value
|
171
|
+
end
|
172
|
+
|
173
|
+
# True if cached
|
174
|
+
def cached?
|
175
|
+
cache != nil
|
176
|
+
end
|
177
|
+
|
178
|
+
# Sets the index to cache data or not. When setting cached to
|
179
|
+
# false, currently cached data is flushed.
|
180
|
+
def cached=(input)
|
181
|
+
if input && !cache
|
182
|
+
@cache_pos = self.pos
|
183
|
+
@cache = read(nil, 0)
|
184
|
+
|
185
|
+
# ensure the cache is an array of framed items...
|
186
|
+
# if io has only one item, then read returns an
|
187
|
+
# array like [0] rather than [[0]]
|
188
|
+
unless @cache.empty? || @cache.first.kind_of?(Array)
|
189
|
+
@cache = [@cache]
|
190
|
+
end
|
191
|
+
elsif !input && cache
|
192
|
+
flush
|
193
|
+
@cache = nil
|
194
|
+
self.pos = @cache_pos
|
195
|
+
@cache_pos = nil
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Flushes the io, writing cached data if necessary.
|
200
|
+
def flush
|
201
|
+
if cached?
|
202
|
+
io.truncate(0)
|
203
|
+
cache.each {|item| io.write item.pack(format) }
|
204
|
+
end
|
205
|
+
|
206
|
+
io.flush
|
207
|
+
io.reset_length
|
208
|
+
end
|
209
|
+
|
210
|
+
# Flushes cached data and closes the io.
|
211
|
+
def close
|
212
|
+
cached = false if cached?
|
213
|
+
super
|
214
|
+
end
|
215
|
+
|
216
|
+
###########################
|
217
|
+
# Array methods
|
218
|
+
###########################
|
219
|
+
|
220
|
+
# def &(another)
|
221
|
+
# not_implemented
|
222
|
+
# end
|
223
|
+
|
224
|
+
# def *(arg)
|
225
|
+
# not_implemented
|
226
|
+
# end
|
227
|
+
|
228
|
+
def dup
|
229
|
+
self.flush
|
230
|
+
ExtInd.new(nil, options).concat(self)
|
231
|
+
end
|
232
|
+
|
233
|
+
def +(another)
|
234
|
+
dup.concat(another)
|
235
|
+
end
|
236
|
+
|
237
|
+
# def -(another)
|
238
|
+
# not_implemented
|
239
|
+
# end
|
240
|
+
|
241
|
+
def <<(array)
|
242
|
+
# WRONG BEHAVIOR -- should be write, push in frame
|
243
|
+
unframed_write(array, length)
|
244
|
+
self
|
245
|
+
end
|
246
|
+
|
247
|
+
def <=>(another)
|
248
|
+
return 0 if self.object_id == another.object_id
|
249
|
+
|
250
|
+
# reverse comparison in case another is an ExtInd
|
251
|
+
return -1 * (another <=> cache) if cached?
|
252
|
+
|
253
|
+
case another
|
254
|
+
when Array
|
255
|
+
if another.length < self.length
|
256
|
+
# if another is equal to the matching subset of self,
|
257
|
+
# then self is obviously the longer array and wins.
|
258
|
+
result = (self.to_a(another.length) <=> another)
|
259
|
+
result == 0 ? 1 : result
|
260
|
+
else
|
261
|
+
self.to_a <=> another
|
262
|
+
end
|
263
|
+
when ExtInd
|
264
|
+
self.io.sort_compare(another.io, (buffer_size/2).ceil)
|
265
|
+
else
|
266
|
+
raise TypeError.new("can't convert from #{another.class} to ExtInd or Array")
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
def ==(another)
|
271
|
+
return true if super
|
272
|
+
|
273
|
+
case another
|
274
|
+
when Array
|
275
|
+
return false unless self.length == another.length
|
276
|
+
self.to_a == another
|
277
|
+
when ExtInd
|
278
|
+
return false unless self.length == another.length
|
279
|
+
|
280
|
+
unless self.cached? && another.cached?
|
281
|
+
return false unless self.index_attrs == another.index_attrs
|
282
|
+
if (self.io.sort_compare(another.io, (buffer_size/2).ceil)) == 0
|
283
|
+
return true
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
self.to_a == another.to_a
|
288
|
+
else
|
289
|
+
false
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Element Reference — Returns the entry at index, or returns an array starting
|
294
|
+
# at start and continuing for length entries, or returns an array specified
|
295
|
+
# by range. Negative indices count backward from the end of self (-1 is the last
|
296
|
+
# element). Returns nil if the index (or starting index) is out of range.
|
297
|
+
#
|
298
|
+
# io = StringIO.new [1,2,3,4,5].pack("I*")
|
299
|
+
# i = ExtInd.new(io, :format => 'I')
|
300
|
+
# i[2] #=> [3]
|
301
|
+
# i[6] #=> nil
|
302
|
+
# i[1, 2] #=> [ [2], [3] ]
|
303
|
+
# i[1..3] #=> [ [2], [3], [4] ]
|
304
|
+
# i[4..7] #=> [ [5] ]
|
305
|
+
# i[6..10] #=> nil
|
306
|
+
# i[-3, 3] #=> [ [3], [4], [5] ]
|
307
|
+
# # special cases
|
308
|
+
# i[5] #=> nil
|
309
|
+
# i[5, 1] #=> []
|
310
|
+
# i[5..10] #=> []
|
311
|
+
#
|
312
|
+
# Note that entries are returned in frame, as arrays.
|
313
|
+
def [](index, length=nil)
|
314
|
+
# return the cached value if cached
|
315
|
+
return (length == nil ? cache[index] : cache[index,length]) if cached?
|
316
|
+
|
317
|
+
case index
|
318
|
+
when Fixnum
|
319
|
+
index += self.length if index < 0
|
320
|
+
return nil if index < 0
|
321
|
+
|
322
|
+
unless length == nil
|
323
|
+
raise TypeError.new("no implicit conversion from nil to integer") if length.nil?
|
324
|
+
return [] if length == 0 || index >= self.length
|
325
|
+
return nil if length < 0
|
326
|
+
|
327
|
+
# ensure you don't try to read more entries than are available
|
328
|
+
max_length = self.length - index
|
329
|
+
length = max_length if length > max_length
|
330
|
+
end
|
331
|
+
|
332
|
+
case
|
333
|
+
when length == nil then read(1, index) # read one, as index[0]
|
334
|
+
when length == 1 then [read(1, index)] # read one framed, as index[0,1]
|
335
|
+
else
|
336
|
+
read(length, index) # read length, automatic framing
|
337
|
+
end
|
338
|
+
|
339
|
+
when Range
|
340
|
+
raise TypeError.new("can't convert Range into Integer") unless length == nil
|
341
|
+
|
342
|
+
offset, length = split_range(index)
|
343
|
+
|
344
|
+
# for conformance with array range retrieval
|
345
|
+
return nil if offset < 0 || offset > self.length
|
346
|
+
return [] if length < 0
|
347
|
+
|
348
|
+
self[offset, length + 1]
|
349
|
+
when nil
|
350
|
+
raise TypeError.new("no implicit conversion from nil to integer")
|
351
|
+
else
|
352
|
+
raise TypeError.new("can't convert #{index.class} into Integer")
|
353
|
+
end
|
354
|
+
end
|
355
|
+
|
356
|
+
# Element Assignment — Sets the entry at index, or replaces a subset starting at start
|
357
|
+
# and continuing for length entries, or replaces a subset specified by range.
|
358
|
+
# A negative indices will count backward from the end of self. Inserts elements if
|
359
|
+
# length is zero. If nil is used in the second and third form, deletes elements from
|
360
|
+
# self. An IndexError is raised if a negative index points past the beginning of self.
|
361
|
+
# See also push, and unshift.
|
362
|
+
#
|
363
|
+
# io = StringIO.new ""
|
364
|
+
# i = ExtInd.new(io, :format => 'I')
|
365
|
+
# i.nil_value # => [0]
|
366
|
+
# i[4] = [4] # => [[0], [0], [0], [0], [4]]
|
367
|
+
# i[0, 3] = [ [1], [2], [3] ] # => [[1], [2], [3], [0], [4]]
|
368
|
+
# i[1..2] = [ [5], [6] ] # => [[1], [5], [6], [0], [4]]
|
369
|
+
# i[0, 2] = [ [7] ] # => [[7], [6], [0], [4]]
|
370
|
+
# i[0..2] = [ [8] ] # => [[8], [4]]
|
371
|
+
# i[-1] = [9] # => [[8], [9]]
|
372
|
+
# i[1..-1] = nil # => [[8]]
|
373
|
+
#
|
374
|
+
# Note that []= must take entries in frame, or (in the case of [offset, length] and
|
375
|
+
# range insertions) another ExtInd with the same frame, format, and nil_value.
|
376
|
+
#--
|
377
|
+
# TODO -- cleanup error messages so they are more meaningful
|
378
|
+
# and helpful, esp for frame errors
|
379
|
+
#++
|
380
|
+
def []=(*args)
|
381
|
+
raise ArgumentError.new("wrong number of arguments (1 for 2)") if args.length < 2
|
382
|
+
index, length, value = args
|
383
|
+
if args.length == 2
|
384
|
+
value = length
|
385
|
+
length = nil
|
386
|
+
end
|
387
|
+
|
388
|
+
case index
|
389
|
+
when Fixnum
|
390
|
+
if index < 0
|
391
|
+
index += self.length
|
392
|
+
raise IndexError.new("index #{index} out of range") if index < 0
|
393
|
+
end
|
394
|
+
|
395
|
+
if length == nil
|
396
|
+
# simple insertion
|
397
|
+
value = nil_value if value.object_id == 4 # nil
|
398
|
+
unframed_write(value, index)
|
399
|
+
else
|
400
|
+
raise IndexError.new("negative length (#{length})") if length < 0
|
401
|
+
|
402
|
+
# arrayify value if needed
|
403
|
+
unless value.kind_of?(ExtInd)
|
404
|
+
value = [value] unless value.kind_of?(Array)
|
405
|
+
end
|
406
|
+
|
407
|
+
case
|
408
|
+
when cached?
|
409
|
+
# validation must occur here, because this cached insertion
|
410
|
+
# bypasses the validations that normally occur in write
|
411
|
+
case value
|
412
|
+
when Array then validate_framed_array(value)
|
413
|
+
when ExtInd then validate_index(value)
|
414
|
+
end
|
415
|
+
|
416
|
+
# must be done before padding in case value == self
|
417
|
+
# WARN - could be expensive
|
418
|
+
# TODO - check the effect of cache.dup on speed if cached?
|
419
|
+
value = value.to_a.collect {|item| item == nil ? nil_value : item }
|
420
|
+
|
421
|
+
# pad as needed
|
422
|
+
pad_to(index) if index > self.length
|
423
|
+
|
424
|
+
# write the value to the cache
|
425
|
+
cache[index, length] = value
|
426
|
+
when self == value
|
427
|
+
# special case when insertion is self (no validation needed)
|
428
|
+
# A whole copy of self is required because the insertion
|
429
|
+
# can overwrite the tail of self. As such this can be a
|
430
|
+
# worst-case scenario-slow and expensive procedure.
|
431
|
+
copy_beg = (index + length) * frame_size
|
432
|
+
copy_end = io.length
|
433
|
+
|
434
|
+
io.copy do |copy|
|
435
|
+
# truncate io
|
436
|
+
io.truncate(index * frame_size)
|
437
|
+
io.pos = io.length
|
438
|
+
|
439
|
+
# pad as needed
|
440
|
+
pad_to(index) if index > self.length
|
441
|
+
|
442
|
+
# write the copy of self
|
443
|
+
io.insert(copy)
|
444
|
+
|
445
|
+
# copy the tail of the insertion
|
446
|
+
io.insert(copy, copy_beg..copy_end)
|
447
|
+
end
|
448
|
+
when value.length == length
|
449
|
+
# optimized insertion, when insertion is the correct length
|
450
|
+
write(value, index)
|
451
|
+
else
|
452
|
+
# range insertion: requires copy and rewrite of the tail
|
453
|
+
# of the ExtInd, after the insertion.
|
454
|
+
# WARN - can be slow when the tail is large
|
455
|
+
copy_beg = (index + length) * frame_size
|
456
|
+
copy_end = io.length
|
457
|
+
|
458
|
+
io.copy("r", copy_beg..copy_end) do |copy|
|
459
|
+
# pad as needed
|
460
|
+
pad_to(index) if index > self.length
|
461
|
+
|
462
|
+
# write inserted value
|
463
|
+
io.pos = index * frame_size
|
464
|
+
write(value)
|
465
|
+
|
466
|
+
# truncate io
|
467
|
+
io.truncate(io.pos)
|
468
|
+
|
469
|
+
# copy the tail of the insertion
|
470
|
+
io.insert(copy)
|
471
|
+
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
value
|
476
|
+
when Range
|
477
|
+
raise TypeError.new("can't convert Range into Integer") if args.length == 3
|
478
|
+
|
479
|
+
# for conformance with setting a range with nil (truncates)
|
480
|
+
value = [] if value.nil?
|
481
|
+
offset, length = split_range(index)
|
482
|
+
self[offset, length + 1] = value
|
483
|
+
when nil
|
484
|
+
raise TypeError.new("no implicit conversion from nil to integer")
|
485
|
+
else
|
486
|
+
raise TypeError.new("can't convert #{index.class} into Integer")
|
487
|
+
end
|
488
|
+
end
|
489
|
+
|
490
|
+
# def abbrev(pattern=nil)
|
491
|
+
# not_implemented
|
492
|
+
# end
|
493
|
+
|
494
|
+
# def assoc(obj)
|
495
|
+
# not_implemented
|
496
|
+
# end
|
497
|
+
|
498
|
+
# Returns entry at index
|
499
|
+
def at(index)
|
500
|
+
self[index]
|
501
|
+
end
|
502
|
+
|
503
|
+
# Removes all elements from _self_.
|
504
|
+
def clear
|
505
|
+
cached? ? cache.clear : io.truncate(0)
|
506
|
+
self
|
507
|
+
end
|
508
|
+
|
509
|
+
# def compact
|
510
|
+
# not_implemented
|
511
|
+
# end
|
512
|
+
|
513
|
+
# def compact!
|
514
|
+
# not_implemented
|
515
|
+
# end
|
516
|
+
|
517
|
+
def concat(another)
|
518
|
+
case another
|
519
|
+
when Array
|
520
|
+
write(another, length)
|
521
|
+
when ExtInd
|
522
|
+
validate_index(another)
|
523
|
+
|
524
|
+
if cached?
|
525
|
+
# WARN - could be expensive
|
526
|
+
cache.concat(another.to_a)
|
527
|
+
else
|
528
|
+
io.concat(another.io)
|
529
|
+
end
|
530
|
+
else
|
531
|
+
raise TypeError.new("can't convert #{another.class} into ExtInd or Array")
|
532
|
+
end
|
533
|
+
self
|
534
|
+
end
|
535
|
+
|
536
|
+
# def delete(obj)
|
537
|
+
# not_implemented
|
538
|
+
# end
|
539
|
+
|
540
|
+
# def delete_at(index)
|
541
|
+
# not_implemented
|
542
|
+
# end
|
543
|
+
|
544
|
+
# def delete_if # :yield: item
|
545
|
+
# not_implemented
|
546
|
+
# end
|
547
|
+
|
548
|
+
def each(&block) # :yield: item
|
549
|
+
self.pos = 0
|
550
|
+
chunk do |offset, length|
|
551
|
+
# special treatment for 1, because then read(1) => [...] rather
|
552
|
+
# than [[...]]. when frame > 1, each will iterate over the
|
553
|
+
# element rather than pass it to the block directly
|
554
|
+
if length == 1
|
555
|
+
yield read(1)
|
556
|
+
else
|
557
|
+
read(length).each(&block)
|
558
|
+
end
|
559
|
+
end
|
560
|
+
self
|
561
|
+
end
|
562
|
+
|
563
|
+
# Passes the index of each char
|
564
|
+
def each_index(&block) # :yield: index
|
565
|
+
0.upto(length-1, &block)
|
566
|
+
self
|
567
|
+
end
|
568
|
+
|
569
|
+
# Returns true if _self_ contains no elements
|
570
|
+
def empty?
|
571
|
+
length == 0
|
572
|
+
end
|
573
|
+
|
574
|
+
def eql?(another)
|
575
|
+
self == another
|
576
|
+
end
|
577
|
+
|
578
|
+
# def fetch(index, default=nil, &block)
|
579
|
+
# index += index_length if index < 0
|
580
|
+
# val = (index >= length ? default : self[index])
|
581
|
+
# block_given? ? yield(val) : val
|
582
|
+
# end
|
583
|
+
|
584
|
+
# def fill(*args)
|
585
|
+
# not_implemented
|
586
|
+
# end
|
587
|
+
|
588
|
+
# Returns the first n entries (default 1)
|
589
|
+
def first(n=nil)
|
590
|
+
n.nil? ? self[0] : self[0,n]
|
591
|
+
end
|
592
|
+
|
593
|
+
# def hash
|
594
|
+
# not_implemented
|
595
|
+
# end
|
596
|
+
|
597
|
+
# def include?(obj)
|
598
|
+
# not_implemented
|
599
|
+
# end
|
600
|
+
|
601
|
+
# def index(obj)
|
602
|
+
# not_implemented
|
603
|
+
# end
|
604
|
+
|
605
|
+
# def indexes(*args)
|
606
|
+
# values_at(*args)
|
607
|
+
# end
|
608
|
+
#
|
609
|
+
# def indicies(*args)
|
610
|
+
# values_at(*args)
|
611
|
+
# end
|
612
|
+
|
613
|
+
# def replace(other)
|
614
|
+
# not_implemented
|
615
|
+
# end
|
616
|
+
|
617
|
+
# def insert(index, *obj)
|
618
|
+
# self[index] = obj
|
619
|
+
# end
|
620
|
+
|
621
|
+
# def inspect
|
622
|
+
# not_implemented
|
623
|
+
# end
|
624
|
+
|
625
|
+
# def join(sep=$,)
|
626
|
+
# not_implemented
|
627
|
+
# end
|
628
|
+
|
629
|
+
# Returns the last n entries (default 1)
|
630
|
+
def last(n=nil)
|
631
|
+
return self[-1] if n.nil?
|
632
|
+
|
633
|
+
start = length-n
|
634
|
+
start = 0 if start < 0
|
635
|
+
self[start, n]
|
636
|
+
end
|
637
|
+
|
638
|
+
# Returns the number of entries in self
|
639
|
+
def length
|
640
|
+
cached? ? cache.length : io.length/frame_size
|
641
|
+
end
|
642
|
+
|
643
|
+
# def nitems
|
644
|
+
# not_implemented
|
645
|
+
# end
|
646
|
+
|
647
|
+
# def pop
|
648
|
+
# not_implemented
|
649
|
+
# end
|
650
|
+
|
651
|
+
# def pretty_print(q)
|
652
|
+
# not_implemented
|
653
|
+
# end
|
654
|
+
|
655
|
+
# def pretty_print_cycle(q)
|
656
|
+
# not_implemented
|
657
|
+
# end
|
658
|
+
|
659
|
+
# def push(*obj)
|
660
|
+
# not_implemented
|
661
|
+
# end
|
662
|
+
|
663
|
+
# def rassoc(key)
|
664
|
+
# not_implemented
|
665
|
+
# end
|
666
|
+
|
667
|
+
# def replace(another)
|
668
|
+
# not_implemented
|
669
|
+
# end
|
670
|
+
|
671
|
+
# def reverse
|
672
|
+
# not_implemented
|
673
|
+
# end
|
674
|
+
|
675
|
+
# def reverse!
|
676
|
+
# not_implemented
|
677
|
+
# end
|
678
|
+
|
679
|
+
def reverse_each(&block)
|
680
|
+
reverse_chunk do |offset, length|
|
681
|
+
# special treatment for 1, because then read(1) => [...] rather
|
682
|
+
# than [[...]]. when frame > 1, each will iterate over the
|
683
|
+
# element rather than pass it to the block directly
|
684
|
+
if length == 1
|
685
|
+
yield read(1)
|
686
|
+
else
|
687
|
+
read(length, offset).reverse_each(&block)
|
688
|
+
end
|
689
|
+
end
|
690
|
+
self
|
691
|
+
end
|
692
|
+
|
693
|
+
# def rindex(obj)
|
694
|
+
# not_implemented
|
695
|
+
# end
|
696
|
+
|
697
|
+
# def select # :yield: item
|
698
|
+
# not_implemented
|
699
|
+
# end
|
700
|
+
|
701
|
+
# def shift
|
702
|
+
# not_implemented
|
703
|
+
# end
|
704
|
+
|
705
|
+
# Alias for length
|
706
|
+
def size
|
707
|
+
length
|
708
|
+
end
|
709
|
+
|
710
|
+
# def slice(*args)
|
711
|
+
# self.call(:[], *args)
|
712
|
+
# end
|
713
|
+
|
714
|
+
# def slice!(*args)
|
715
|
+
# not_implemented
|
716
|
+
# end
|
717
|
+
|
718
|
+
def to_a(length=self.length)
|
719
|
+
case
|
720
|
+
when cached? then cache.dup
|
721
|
+
when length == 0 then []
|
722
|
+
when length == 1 then [read(length, 0)]
|
723
|
+
else
|
724
|
+
read(length, 0)
|
725
|
+
end
|
726
|
+
end
|
727
|
+
|
728
|
+
# def to_ary
|
729
|
+
# not_implemented
|
730
|
+
# end
|
731
|
+
|
732
|
+
# Returns _self_.join.
|
733
|
+
# def to_s
|
734
|
+
# self.join
|
735
|
+
# end
|
736
|
+
|
737
|
+
# def uniq
|
738
|
+
# not_implemented
|
739
|
+
# end
|
740
|
+
|
741
|
+
# def uniq!
|
742
|
+
# not_implemented
|
743
|
+
# end
|
744
|
+
|
745
|
+
# def unshift(*obj)
|
746
|
+
# not_implemented
|
747
|
+
# end
|
748
|
+
|
749
|
+
# Returns an array containing the chars in io corresponding to the given
|
750
|
+
# selector(s). The selectors may be either integer indices or ranges
|
751
|
+
# def values_at(*selectors)
|
752
|
+
# selectors.collect {|s| self[s]}.flatten
|
753
|
+
# end
|
754
|
+
|
755
|
+
# def |(another)
|
756
|
+
# not_implemented
|
757
|
+
# end
|
758
|
+
|
759
|
+
#################
|
760
|
+
# IO-like methods
|
761
|
+
##################
|
762
|
+
|
763
|
+
# Sets the current position of the index. Negative positions
|
764
|
+
# are counted from the end of the index (just as they are in
|
765
|
+
# an array). Positions can be set beyond the actual length
|
766
|
+
# of the index (similar to an IO).
|
767
|
+
#
|
768
|
+
# i = ExtInd[[1],[2],[3]]
|
769
|
+
# i.length # => 3
|
770
|
+
# i.pos = 2; i.pos # => 2
|
771
|
+
# i.pos = -1; i.pos # => 2
|
772
|
+
# i.pos = 10; i.pos # => 40
|
773
|
+
def pos=(pos)
|
774
|
+
if pos < 0
|
775
|
+
raise ArgumentError.new("position out of bounds: #{pos}") if pos < -length
|
776
|
+
pos += length
|
777
|
+
end
|
778
|
+
|
779
|
+
# do something fake for caching so that
|
780
|
+
# the position need not be set (this
|
781
|
+
# works either way)
|
782
|
+
if cached?
|
783
|
+
self.cache_pos = pos
|
784
|
+
else
|
785
|
+
io.pos = (pos * frame_size)
|
786
|
+
end
|
787
|
+
end
|
788
|
+
|
789
|
+
# Returns the current position of the index
|
790
|
+
def pos
|
791
|
+
cached? ? cache_pos : io.pos/frame_size
|
792
|
+
end
|
793
|
+
|
794
|
+
# Reads the packed byte string for n entries from the specified
|
795
|
+
# position. By default reads the string for all remaining entries
|
796
|
+
# from the current position.
|
797
|
+
#
|
798
|
+
# i = ExtInd[[1],[2],[3]]
|
799
|
+
# i.pos # => 0
|
800
|
+
# i.readbytes.unpack("I*") # => [1,2,3]
|
801
|
+
# i.readbytes(1,0).unpack("I*") # => [1]
|
802
|
+
# i.readbytes(10,1).unpack("I*") # => [2,3]
|
803
|
+
#
|
804
|
+
# Like an IO, when n is nil and no entries can be read, an empty
|
805
|
+
# string is returned. When n is specified, nil will be returned
|
806
|
+
# when no entries can be read.
|
807
|
+
#
|
808
|
+
# i.pos = 3
|
809
|
+
# i.readbytes # => ""
|
810
|
+
# i.readbytes(1) # => nil
|
811
|
+
def readbytes(n=nil, pos=nil)
|
812
|
+
if cached?
|
813
|
+
ary = read(n, pos)
|
814
|
+
return (ary == nil ? nil : ary.flatten.pack(format))
|
815
|
+
end
|
816
|
+
|
817
|
+
# set the io position to the specified index
|
818
|
+
self.pos = pos unless pos == nil
|
819
|
+
|
820
|
+
# read until the end if no n is given
|
821
|
+
n == nil ? io.read : io.read(n * frame_size)
|
822
|
+
end
|
823
|
+
|
824
|
+
# Unpacks the given string into an array of index values.
|
825
|
+
# Single entries are returned in frame, multiple entries
|
826
|
+
# are returned in an array.
|
827
|
+
#
|
828
|
+
# i.format # => 'I*'
|
829
|
+
# i.unpack( [1].pack('I*') ) # => [1]
|
830
|
+
# i.unpack( [1,2,3].pack('I*') ) # => [[1],[2],[3]]
|
831
|
+
# i.unpack("") # => []
|
832
|
+
#
|
833
|
+
def unpack(str)
|
834
|
+
case
|
835
|
+
when str.empty? then []
|
836
|
+
when str.length == frame_size
|
837
|
+
str.unpack(format)
|
838
|
+
when process_in_bulk
|
839
|
+
results = []
|
840
|
+
str.unpack(format).each_slice(frame) {|s| results << s}
|
841
|
+
results
|
842
|
+
else
|
843
|
+
Array.new(str.length/frame_size) do |i|
|
844
|
+
str[i*frame_size, frame_size].unpack(format)
|
845
|
+
end
|
846
|
+
end
|
847
|
+
end
|
848
|
+
|
849
|
+
# Reads n entries from the specified position. By default
|
850
|
+
# reads all remaining entries from the current position.
|
851
|
+
# Single entries are returned in frame, multiple entries
|
852
|
+
# are returned in an array.
|
853
|
+
#
|
854
|
+
# i = ExtInd[[1],[2],[3]]
|
855
|
+
# i.pos # => 0
|
856
|
+
# i.read # => [[1],[2],[3]]
|
857
|
+
# i.read(1,0) # => [1]
|
858
|
+
# i.read(10,1) # => [[2],[3]]
|
859
|
+
#
|
860
|
+
# When n is nil and no entries can be read, an empty array
|
861
|
+
# is returned. When n is specified, nil will be returned
|
862
|
+
# when no entries can be read.
|
863
|
+
#
|
864
|
+
# i.pos = 3
|
865
|
+
# i.read # => []
|
866
|
+
# i.read(1) # => nil
|
867
|
+
def read(n=nil, pos=nil)
|
868
|
+
if cached?
|
869
|
+
self.pos = pos unless pos == nil
|
870
|
+
m = (n == nil || n > (length - cache_pos)) ? (length - cache_pos) : n
|
871
|
+
|
872
|
+
return case
|
873
|
+
when n == nil && m == 0 then []
|
874
|
+
when m <= 1 then cache[cache_pos]
|
875
|
+
else
|
876
|
+
cache[cache_pos, m]
|
877
|
+
end
|
878
|
+
end
|
879
|
+
|
880
|
+
str = readbytes(n, pos)
|
881
|
+
str == nil ? nil : unpack(str)
|
882
|
+
end
|
883
|
+
|
884
|
+
def write(array, pos=nil)
|
885
|
+
case array
|
886
|
+
when Array
|
887
|
+
validate_framed_array(array)
|
888
|
+
prepare_write_to_pos(pos)
|
889
|
+
write_framed_array(array)
|
890
|
+
when ExtInd
|
891
|
+
validate_index(array)
|
892
|
+
prepare_write_to_pos(pos)
|
893
|
+
write_index(array)
|
894
|
+
else
|
895
|
+
raise ArgumentError.new("could not convert #{array.class} to Array or ExtInd")
|
896
|
+
end
|
897
|
+
end
|
898
|
+
|
899
|
+
# Writes the array as an entry (or set of entries) into
|
900
|
+
# self starting at the specified position. By default
|
901
|
+
# write begins at the current position. The array can
|
902
|
+
# have multiple entries in sequence, but MUST be in the
|
903
|
+
# correct frame.
|
904
|
+
#
|
905
|
+
# i = ExtInd[]
|
906
|
+
# i.unframed_write([2,3], 1)
|
907
|
+
# i.pos = 0;
|
908
|
+
# i.unframed_write([1])
|
909
|
+
# i.read(3, 0) # => [[1],[2],[3]]
|
910
|
+
#
|
911
|
+
# Note -- no range checking when cached
|
912
|
+
def unframed_write(array, pos=nil)
|
913
|
+
case array
|
914
|
+
when Array
|
915
|
+
validate_unframed_array(array)
|
916
|
+
prepare_write_to_pos(pos)
|
917
|
+
write_unframed_array(array)
|
918
|
+
when ExtInd
|
919
|
+
validate_index(array)
|
920
|
+
prepare_write_to_pos(pos)
|
921
|
+
write_index(array)
|
922
|
+
else
|
923
|
+
raise ArgumentError.new("could not convert #{array.class} to Array or ExtInd")
|
924
|
+
end
|
925
|
+
end
|
926
|
+
|
927
|
+
protected
|
928
|
+
|
929
|
+
attr_accessor :cache_pos
|
930
|
+
|
931
|
+
# An array of the core index attributes: frame, format, nil_value
|
932
|
+
def index_attrs # :nodoc:
|
933
|
+
[frame, format, nil_value]
|
934
|
+
end
|
935
|
+
|
936
|
+
def prepare_write_to_pos(pos)
|
937
|
+
unless pos == nil
|
938
|
+
# pad to the starting position if necessary
|
939
|
+
pad_to(pos) if pos > length
|
940
|
+
|
941
|
+
# set the io position to the specified index
|
942
|
+
self.pos = pos
|
943
|
+
end
|
944
|
+
end
|
945
|
+
|
946
|
+
def pad_to(pos)
|
947
|
+
n = (pos-length)/frame
|
948
|
+
|
949
|
+
if cached?
|
950
|
+
cache.concat(Array.new(n, nil_value))
|
951
|
+
else
|
952
|
+
io.pos = io.length
|
953
|
+
io.length += io.write(nil_value(false) * n)
|
954
|
+
|
955
|
+
# in this case position doesn't
|
956
|
+
# need to be set. set pos to nil
|
957
|
+
# to skip the set statement below
|
958
|
+
pos = nil
|
959
|
+
end
|
960
|
+
end
|
961
|
+
|
962
|
+
def validate_index(index)
|
963
|
+
unless index.index_attrs == index_attrs
|
964
|
+
raise ArgumentError.new("incompatible index attributes [#{index.index_attrs.join(',')}]")
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
def validate_framed_array(array)
|
969
|
+
array.each do |item|
|
970
|
+
case item
|
971
|
+
when Array
|
972
|
+
unless item.length == frame
|
973
|
+
raise ArgumentError.new("expected array in frame '#{frame}' but was '#{item.length}'")
|
974
|
+
end
|
975
|
+
when nil
|
976
|
+
# framed arrays can contain nils
|
977
|
+
next
|
978
|
+
else
|
979
|
+
raise ArgumentError.new("expected array in frame '#{frame}', was #{item.class}")
|
980
|
+
end
|
981
|
+
end
|
982
|
+
end
|
983
|
+
|
984
|
+
def validate_unframed_array(array)
|
985
|
+
unless array.length % frame == 0
|
986
|
+
raise ArgumentError.new("expected array in frame '#{frame}' but was '#{array.length}'")
|
987
|
+
end
|
988
|
+
end
|
989
|
+
|
990
|
+
def write_index(index)
|
991
|
+
if cached?
|
992
|
+
if index.cached?
|
993
|
+
cache[cache_pos, index.length] = index.cache
|
994
|
+
self.cache_pos += index.length
|
995
|
+
else
|
996
|
+
index.each do |item|
|
997
|
+
cache[cache_pos] = item
|
998
|
+
self.cache_pos += 1
|
999
|
+
end
|
1000
|
+
end
|
1001
|
+
else
|
1002
|
+
end_pos = io.pos
|
1003
|
+
if index.cached?
|
1004
|
+
end_pos += io.write( index.cache.pack(format) )
|
1005
|
+
else
|
1006
|
+
end_pos += io.insert(index.io)
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
io.length = end_pos if end_pos > io.length
|
1010
|
+
end
|
1011
|
+
end
|
1012
|
+
|
1013
|
+
def write_framed_array(array)
|
1014
|
+
# framed arrays may contain nils, and must
|
1015
|
+
# be resolved before writing the data
|
1016
|
+
|
1017
|
+
if cached?
|
1018
|
+
cache[cache_pos, array.length] = array.collect {|item| item == nil ? nil_value : item }
|
1019
|
+
self.cache_pos += array.length
|
1020
|
+
else
|
1021
|
+
start_pos = io.pos
|
1022
|
+
length_written = 0
|
1023
|
+
|
1024
|
+
if process_in_bulk
|
1025
|
+
arr = []
|
1026
|
+
array.each {|item| arr.concat(item == nil ? nil_value : item) }
|
1027
|
+
length_written += io.write(arr.pack(format))
|
1028
|
+
else
|
1029
|
+
array.each do |item|
|
1030
|
+
str = (item == nil ? nil_value(false) : item.pack(format))
|
1031
|
+
length_written += io.write(str)
|
1032
|
+
end
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
end_pos = start_pos + length_written
|
1036
|
+
io.length = end_pos if end_pos > io.length
|
1037
|
+
end
|
1038
|
+
end
|
1039
|
+
|
1040
|
+
def write_unframed_array(array)
|
1041
|
+
# unframed arrays cannot contain nils
|
1042
|
+
|
1043
|
+
if cached?
|
1044
|
+
array.each_slice(frame) do |item|
|
1045
|
+
cache[cache_pos] = item
|
1046
|
+
self.cache_pos += 1
|
1047
|
+
end
|
1048
|
+
else
|
1049
|
+
start_pos = io.pos
|
1050
|
+
length_written = 0
|
1051
|
+
|
1052
|
+
if process_in_bulk
|
1053
|
+
length_written += io.write(array.pack(format))
|
1054
|
+
else
|
1055
|
+
array.each_slice(frame) do |arr|
|
1056
|
+
length_written += io.write(arr.pack(format))
|
1057
|
+
end
|
1058
|
+
end
|
1059
|
+
|
1060
|
+
end_pos = start_pos + length_written
|
1061
|
+
io.length = end_pos if end_pos > io.length
|
1062
|
+
end
|
1063
|
+
end
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
|
1067
|
+
# # Include the inline enhancements for ExtInd
|
1068
|
+
# if RUBY_PLATFORM.index('mswin').nil?
|
1069
|
+
# require 'inline'
|
1070
|
+
# inline do |builder|
|
1071
|
+
# #builder.include "<rubyio.h>"
|
1072
|
+
# # Array.new(str.length/frame_size) do |i|
|
1073
|
+
# # str[i*frame_size, frame_size].unpack(format)
|
1074
|
+
# # end
|
1075
|
+
# builder.c %Q{
|
1076
|
+
# static VALUE unpack(VALUE str)
|
1077
|
+
# {
|
1078
|
+
# char *p = RSTRING(str)->ptr;
|
1079
|
+
# int str_len = RSTRING(str)->len;
|
1080
|
+
# int frame_size = NUM2INT(rb_iv_get(self, "@frame_size"));
|
1081
|
+
# int frame = NUM2INT(rb_iv_get(self, "@frame"));
|
1082
|
+
# int i, j, times = str_len/frame_size;
|
1083
|
+
# VALUE fmt = rb_iv_get(self, "@format");
|
1084
|
+
# VALUE results, arr;
|
1085
|
+
#
|
1086
|
+
# if(times <= 1)
|
1087
|
+
# return rb_funcall(str, rb_intern("unpack"), 1, fmt);
|
1088
|
+
#
|
1089
|
+
# results = rb_ary_new();
|
1090
|
+
# i = 0;
|
1091
|
+
# while(i < times)
|
1092
|
+
# {
|
1093
|
+
# j = 0;
|
1094
|
+
# arr = rb_ary_new();
|
1095
|
+
# while(j < frame)
|
1096
|
+
# {
|
1097
|
+
# // no need to copy the data at *p,
|
1098
|
+
# // apparently the conversion can
|
1099
|
+
# // happen directly from the pointer
|
1100
|
+
# rb_ary_push(arr, UINT2NUM(*p));
|
1101
|
+
# p += 4;
|
1102
|
+
#
|
1103
|
+
#
|
1104
|
+
# ++j;
|
1105
|
+
# }
|
1106
|
+
#
|
1107
|
+
# rb_ary_push(results, arr);
|
1108
|
+
# ++i;
|
1109
|
+
# }
|
1110
|
+
# return results;
|
1111
|
+
# }
|
1112
|
+
#
|
1113
|
+
# }#File.read(File.dirname(__FILE__) + "/../../src/inline.c")
|
1114
|
+
# end
|
1115
|
+
# else
|
1116
|
+
# # on windows when it's not likely that the user has
|
1117
|
+
# # a compiler, include the precompiled binaries
|
1118
|
+
# # require ...
|
1119
|
+
# end
|
1120
|
+
|