external 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/History +7 -0
  2. data/MIT-LICENSE +1 -3
  3. data/README +162 -127
  4. data/lib/external.rb +2 -3
  5. data/lib/external/base.rb +174 -47
  6. data/lib/external/chunkable.rb +131 -105
  7. data/lib/external/enumerable.rb +78 -33
  8. data/lib/external/io.rb +163 -398
  9. data/lib/external/patches/ruby_1_8_io.rb +31 -0
  10. data/lib/external/patches/windows_io.rb +53 -0
  11. data/lib/external/patches/windows_utils.rb +27 -0
  12. data/lib/external/utils.rb +148 -0
  13. data/lib/external_archive.rb +840 -0
  14. data/lib/external_array.rb +57 -0
  15. data/lib/external_index.rb +1053 -0
  16. metadata +42 -58
  17. data/lib/ext_arc.rb +0 -108
  18. data/lib/ext_arr.rb +0 -727
  19. data/lib/ext_ind.rb +0 -1120
  20. data/test/benchmarks/benchmarks_20070918.txt +0 -45
  21. data/test/benchmarks/benchmarks_20070921.txt +0 -91
  22. data/test/benchmarks/benchmarks_20071006.txt +0 -147
  23. data/test/benchmarks/test_copy_file.rb +0 -80
  24. data/test/benchmarks/test_pos_speed.rb +0 -47
  25. data/test/benchmarks/test_read_time.rb +0 -55
  26. data/test/cached_ext_ind_test.rb +0 -219
  27. data/test/check/benchmark_check.rb +0 -441
  28. data/test/check/namespace_conflicts_check.rb +0 -23
  29. data/test/check/pack_check.rb +0 -90
  30. data/test/ext_arc_test.rb +0 -286
  31. data/test/ext_arr/alt_sep.txt +0 -3
  32. data/test/ext_arr/cr_lf_input.txt +0 -3
  33. data/test/ext_arr/input.index +0 -0
  34. data/test/ext_arr/input.txt +0 -1
  35. data/test/ext_arr/inputb.index +0 -0
  36. data/test/ext_arr/inputb.txt +0 -1
  37. data/test/ext_arr/lf_input.txt +0 -3
  38. data/test/ext_arr/lines.txt +0 -19
  39. data/test/ext_arr/without_index.txt +0 -1
  40. data/test/ext_arr_test.rb +0 -534
  41. data/test/ext_ind_test.rb +0 -1472
  42. data/test/external/base_test.rb +0 -74
  43. data/test/external/chunkable_test.rb +0 -182
  44. data/test/external/index/input.index +0 -0
  45. data/test/external/index/inputb.index +0 -0
  46. data/test/external/io_test.rb +0 -414
  47. data/test/external_test_helper.rb +0 -31
  48. data/test/external_test_suite.rb +0 -4
  49. data/test/test_array.rb +0 -1192
@@ -0,0 +1,31 @@
1
+ module External
2
+ module Patches
3
+ module Ruby18Io
4
+ attr_reader :generic_mode
5
+
6
+ def self.extended(base)
7
+ base.instance_variable_set(:@generic_mode, Utils.mode(base))
8
+ end
9
+
10
+ def flush
11
+ super unless generic_mode == "r"
12
+ end
13
+
14
+ def fsync
15
+ super unless generic_mode == "r"
16
+ end
17
+
18
+ # Quick comparision with another IO. Returns true if
19
+ # another == self, or if both are file-type IOs and
20
+ # their paths are equal.
21
+ def quick_compare(another)
22
+ self == another || (
23
+ (self.kind_of?(File) || self.kind_of?(Tempfile)) &&
24
+ (another.kind_of?(File) || another.kind_of?(Tempfile)) &&
25
+ self.path == another.path)
26
+ end
27
+ end
28
+ end
29
+
30
+ Io::PATCHES << Patches::Ruby18Io
31
+ end
@@ -0,0 +1,53 @@
1
+ module External
2
+ module Patches
3
+
4
+ # Ruby on Windows has problems with files larger than ~2 gigabytes.
5
+ # Sizes return as negative, and positions cannot be set beyond the max
6
+ # size of a long (2147483647 ~ 2GB = 2475636895). WindowsIo corrects
7
+ # both of these issues thanks in large part to a bit of code taken from
8
+ # 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
9
+ #
10
+ module WindowsIo
11
+ POSITION_MAX = 2147483647 # maximum size of long
12
+
13
+ def self.extended(base)
14
+ base.instance_variable_set("@pos", nil)
15
+ end
16
+
17
+ # Modified to handle positions past the 2Gb limit
18
+ def pos # :nodoc:
19
+ @pos || super
20
+ end
21
+
22
+ # Positions larger than the max value of a long cannot be directly given
23
+ # to the default +pos=+. This version incrementally seeks to positions
24
+ # beyond the maximum, if necessary.
25
+ #
26
+ # Note: setting the position beyond the 2Gb limit requires the use of a
27
+ # sysseek statement. As such, errors will arise if you try to position
28
+ # an IO object that does not support this method (for example StringIO...
29
+ # but then what are you doing with a 2Gb StringIO anyhow?)
30
+ def pos=(pos)
31
+ if pos < POSITION_MAX
32
+ super(pos)
33
+ @pos = nil
34
+ elsif @pos != pos
35
+ # note sysseek appears to be necessary here, rather than io.seek
36
+ @pos = pos
37
+
38
+ super(POSITION_MAX)
39
+ pos -= POSITION_MAX
40
+
41
+ while pos > POSITION_MAX
42
+ pos -= POSITION_MAX
43
+ self.sysseek(POSITION_MAX, ::IO::SEEK_CUR)
44
+ end
45
+
46
+ self.sysseek(pos, ::IO::SEEK_CUR)
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ Io::PATCHES << Patches::WindowsIo
53
+ end
@@ -0,0 +1,27 @@
1
+ # This code block modifies IO only if running on windows
2
+ require 'Win32API'
3
+
4
+ module External
5
+ module Utils
6
+ module_function
7
+
8
+ # Modfied to properly determine file lengths on Windows. Uses code
9
+ # from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
10
+ def file_length(io)
11
+ io.fsync
12
+
13
+ # I would have liked to use win32/file/stat to do this... however, some issue
14
+ # arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
15
+ # error because the mode would be nil for files. I wasn't sure how to fix it,
16
+ # so I've lifted the relevant code for pulling the large file size.
17
+
18
+ # Note this is a simplified version... if you base.path point to a chardev,
19
+ # this may need to be changed, because apparently the call to the Win32API
20
+ # may fail
21
+
22
+ stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
23
+ Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
24
+ stat_buf[24, 4].unpack('L').first # Size of file in bytes
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,148 @@
1
+ module External
2
+ module Utils
3
+ module_function
4
+
5
+ # try_handle is a forwarding method allowing External::IO to handle
6
+ # non-File, non-Tempfile IO objects. try_handle infers a method
7
+ # name based on the class of the input and trys to forward the
8
+ # input io to that method within External::IO. For instance:
9
+ #
10
+ # * the _mode method for StringIO is 'stringio_mode'
11
+ # * the _length method for StringIO is 'stringio_length'
12
+ #
13
+ # Nested classes have '::' replaced by '_'. Thus to add support
14
+ # for Some::Unknown::IO, extend External::IO as below:
15
+ #
16
+ # module External::IO
17
+ # def some_unknown_io_mode(io)
18
+ # ...
19
+ # end
20
+ #
21
+ # def some_unknown_io_length(io)
22
+ # ...
23
+ # end
24
+ # end
25
+ #
26
+ # See stringio_mode and stringio_length for more details.
27
+ def try_handle(io, method)
28
+ method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
29
+ if Utils.respond_to?(method_name)
30
+ Utils.send(method_name, io)
31
+ else
32
+ raise "cannot determine #{method} for '%s'" % io.class
33
+ end
34
+ end
35
+
36
+ # Determines the generic mode of the input io using the _mode
37
+ # method for the input io class. By default Io provides _mode
38
+ # methods for File, Tempfile, and StringIo. The return string
39
+ # is determined as follows:
40
+ #
41
+ # readable & writable:: r+
42
+ # readable:: r
43
+ # writable:: w
44
+ #
45
+ # The _mode method takes the input io and should return an array
46
+ # specifying whether or not io is readable and writable
47
+ # (ie [readable, writable]).
48
+ #
49
+ # See try_handle for more details.
50
+ def mode(io)
51
+ readable, writable = try_handle(io, "mode")
52
+
53
+ case
54
+ when readable && writable then "r+"
55
+ when readable then "r"
56
+ when writable then "w"
57
+ else
58
+ # occurs for r+ mode, for some reason
59
+ "r+"
60
+ end
61
+ end
62
+
63
+ # Determines the length of the input io using the _length method
64
+ # for the input io class. Non-External::Io inputs are extended
65
+ # in this process.
66
+ #
67
+ # The _length method takes the input io, and should return the
68
+ # current length of the input io (ie a flush operation may be
69
+ # required).
70
+ #
71
+ # See try_handle for more details.
72
+ def length(io)
73
+ case io
74
+ when Io then try_handle(io, "length")
75
+ else
76
+ io.extend Io
77
+ io.length
78
+ end
79
+ end
80
+
81
+ # Returns an array of bools determining if the input File
82
+ # is readable and writable.
83
+ def file_mode(io)
84
+ begin
85
+ dup = io.dup
86
+
87
+ # determine readable/writable by sending close methods
88
+ # to the duplicated Io. If the io cannot be closed for
89
+ # read/write then it will raise an error, indicating that
90
+ # it was not open in the given mode.
91
+ [:close_read, :close_write].collect do |method|
92
+ begin
93
+ dup.send(method)
94
+ true
95
+ rescue(IOError)
96
+ false
97
+ end
98
+ end
99
+ ensure
100
+ # Be sure that the dup is fully closed before proceeding!
101
+ # (Otherwise Tempfiles will not be properly disposed of
102
+ # ... at least on Windows, perhaps on others)
103
+ dup.close if dup && !dup.closed?
104
+ end
105
+ end
106
+
107
+ # Returns the length of the input File
108
+ def file_length(io)
109
+ io.fsync
110
+ File.size(io.path)
111
+ end
112
+
113
+ # Returns an array of bools determining if the input Tempfile
114
+ # is readable and writable.
115
+ def tempfile_mode(io)
116
+ file_mode(io.instance_variable_get(:@tmpfile))
117
+ end
118
+
119
+ # Returns the length of the input Tempfile
120
+ def tempfile_length(io)
121
+ file_length(io)
122
+ end
123
+
124
+ # Returns an array of bools determining if the input StringIo
125
+ # is readable and writable.
126
+ #
127
+ # s = StringIo.new("abcde", "r+")
128
+ # External::Io.stringio_mode(s) # => [true, true]
129
+ #
130
+ def stringio_mode(io)
131
+ [!io.closed_read?, !io.closed_write?]
132
+ end
133
+
134
+ # Returns the length of the input StringIo
135
+ #
136
+ # s = StringIo.new("abcde", "r+")
137
+ # External::Io.length(s) # => 5
138
+ #
139
+ def stringio_length(io)
140
+ io.string.length
141
+ end
142
+ end
143
+ end
144
+
145
+ # Apply platform-specific patches
146
+ # case RUBY_PLATFORM
147
+ # when 'java'
148
+ # end
@@ -0,0 +1,840 @@
1
+ require 'external/base'
2
+ require 'external_index'
3
+
4
+ #--
5
+ # later separate out individual objects logically
6
+ # If writing, create new files:
7
+ # - base/object_id.aio (new file for recieving appends)
8
+ # - base/object_id._index (copy of existing index -- made on first insertion)
9
+ # - in index, -index indicates object_id.aio file whereas +index indicates original file
10
+ # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
11
+ # requires index rewrite as well, to remove negatives
12
+ #
13
+ # If appending, ONLY allow << and all changes get committed to the original file.
14
+ #
15
+ # This should allow returning of new arrayio objects under read/write conditions
16
+ # By default read-only. No insertions. New ExternalArchive objects inherit parent mode.
17
+ #
18
+ # Independent modes:
19
+ # - r
20
+ # - r+
21
+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
22
+ # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
23
+ # - b ALWAYS on with Windows
24
+ #++
25
+
26
+ # ExternalArchive provides array-like access to archival data stored on disk.
27
+ # ExternalArchives consist of an IO object and an index of [start, length]
28
+ # pairs which indicate the start position and length of entries in the IO.
29
+ #
30
+ class ExternalArchive < External::Base
31
+ class << self
32
+
33
+ # Array-like constructor for an ExternalArchive.
34
+ def [](*args)
35
+ extarc = new
36
+ extarc.concat(args)
37
+ extarc
38
+ end
39
+
40
+ # Returns the default io index filepath for path:
41
+ #
42
+ # ExternalArchive.index_path("/path/to/file.txt") # => "/path/to/file.index"
43
+ #
44
+ def index_path(path)
45
+ path ? path.chomp(File.extname(path)) + '.index' : nil
46
+ end
47
+
48
+ # Initializes an instance of self with File.open(path, mode) as an io.
49
+ # As with File.open, the instance will be passed to the block and
50
+ # closed when the block returns. If no block is given, open returns
51
+ # the new instance.
52
+ #
53
+ # By default the instance will be initialized with an ExternalIndex
54
+ # io_index, linked to index_path(path). The instance will be
55
+ # automatically reindexed if it is empty but it's io is not.
56
+ #
57
+ # Options (specify using symbols):
58
+ # io_index:: Specifies the io_index manually. A filepath may be
59
+ # provided and it will be used instead of index_path(path).
60
+ # Array and ExternalIndex values are used directly.
61
+ # reindex:: Forces a call to reindex; using auto reindexing, reindex
62
+ # is normally only called when the instance is empty
63
+ # and the instance io is not. (default false)
64
+ # auto_reindex:: Turns on or off auto reindexing (default true)
65
+ #
66
+ def open(path, mode="rb", options={})
67
+ options = {
68
+ :io_index => nil,
69
+ :reindex => false,
70
+ :auto_reindex => true
71
+ }.merge(options)
72
+
73
+ index = options[:io_index]
74
+ if index == nil
75
+ index = index_path(path)
76
+ FileUtils.touch(index) unless File.exists?(index)
77
+ end
78
+
79
+ io_index = case index
80
+ when Array, ExternalIndex then index
81
+ else ExternalIndex.open(index, 'r+', :format => 'II')
82
+ end
83
+
84
+ io = path == nil ? nil : File.open(path, mode)
85
+ extarc = new(io, io_index)
86
+
87
+ # reindex if necessary
88
+ if options[:reindex] || (options[:auto_reindex] && extarc.empty? && extarc.io.length > 0)
89
+ extarc.reindex
90
+ end
91
+
92
+ if block_given?
93
+ begin
94
+ yield(extarc)
95
+ ensure
96
+ extarc.close
97
+ end
98
+ else
99
+ extarc
100
+ end
101
+ end
102
+ end
103
+
104
+ # The underlying index of [position, length] arrays
105
+ # indicating where entries in the io are located.
106
+ attr_reader :io_index
107
+
108
+ def initialize(io=nil, io_index=nil)
109
+ super(io)
110
+ @io_index = io_index || []
111
+ end
112
+
113
+ # Returns true if io_index is an Array.
114
+ def cached?
115
+ io_index.kind_of?(Array)
116
+ end
117
+
118
+ # Turns on or off caching by converting io_index
119
+ # to an Array (cache=true) or to an ExternalIndex
120
+ # (cache=false).
121
+ def cache=(input)
122
+ case
123
+ when input && !cached?
124
+ cache = io_index.to_a
125
+ io_index.close
126
+ @io_index = cache
127
+
128
+ when !input && cached?
129
+ io_index << {:format => 'II'}
130
+ @io_index = ExternalIndex[*io_index]
131
+
132
+ end
133
+ end
134
+
135
+ # Closes self as in External::Base#close. An io_path may be
136
+ # be specified to close io_index as well; when io_index is
137
+ # not an ExternalIndex, one is temporarily created with the
138
+ # current io_index content to 'close' and save the index.
139
+ def close(path=nil, index_path=self.class.index_path(path), overwrite=false)
140
+ case
141
+ when io_index.kind_of?(ExternalIndex)
142
+ io_index.close(index_path, overwrite)
143
+ when index_path != nil
144
+ ExternalIndex[*io_index].close(index_path, overwrite)
145
+ end
146
+
147
+ super(path, overwrite)
148
+ end
149
+
150
+ # Returns another instance of self.class; the new instance will
151
+ # be cached if self is cached.
152
+ def another
153
+ self.class.new(nil, cached? ? [] : io_index.another)
154
+ end
155
+
156
+ public
157
+
158
+ # Converts an string read from io into an entry. By default
159
+ # the string is simply returned.
160
+ def str_to_entry(str)
161
+ str
162
+ end
163
+
164
+ # Converts an entry into a string. By default this method
165
+ # returns entry.to_s.
166
+ def entry_to_str(entry)
167
+ entry.to_s
168
+ end
169
+
170
+ # Clears the io_index, and yields io and the io_index to the
171
+ # block for reindexing. The io is flushed and rewound before
172
+ # being yielded to the block. Returns self
173
+ def reset_index
174
+ io_index.clear
175
+ io.flush
176
+ io.rewind
177
+ yield(io, io_index) if block_given?
178
+ self
179
+ end
180
+
181
+ alias reindex reset_index
182
+
183
+ # The speed of reindex_by_regexp is dictated by how fast the underlying
184
+ # code can match the pattern. Under ideal conditions (ie a very simple
185
+ # regexp), it will be as fast as reindex_by_sep.
186
+ def reindex_by_regexp(pattern=/\r?\n/, options={})
187
+ options = {
188
+ :range_or_span => nil,
189
+ :blksize => 8388608,
190
+ :carryover_limit => 8388608
191
+ }.merge(options)
192
+
193
+ reset_index do |io, index|
194
+ span = options[:range_or_span] || io.default_span
195
+ blksize = options[:blksize]
196
+ carryover_limit = options[:carryover_limit]
197
+
198
+ io.scan(span, blksize, carryover_limit) do |scan_pos, string|
199
+ scanner = StringScanner.new(string)
200
+ while advanced = scanner.search_full(pattern, true, false)
201
+ break unless advanced > 0
202
+
203
+ index << [scan_pos, advanced]
204
+ scan_pos += advanced
205
+ end
206
+
207
+ # allow a blockfor monitoring
208
+ yield if block_given?
209
+ scanner.rest_size
210
+ end
211
+ end
212
+ end
213
+
214
+ def reindex_by_sep(sep_str=$/, options={})
215
+ sep_str = sep_str.to_s
216
+ options = {
217
+ :sep_regexp => Regexp.new(sep_str),
218
+ :sep_length => sep_str.length,
219
+ :entry_follows_sep => false,
220
+ :exclude_sep => false,
221
+ :range_or_span => nil,
222
+ :blksize => 8388608,
223
+ :carryover_limit => 8388608
224
+ }.merge(options)
225
+
226
+ regexp = options[:sep_regexp]
227
+ sep_length = options[:sep_length]
228
+ entry_follows_sep = options[:entry_follows_sep]
229
+ exclude_sep = options[:exclude_sep]
230
+
231
+ mode = case
232
+ when !entry_follows_sep && !exclude_sep then 0
233
+ when entry_follows_sep && exclude_sep then 1
234
+ when entry_follows_sep && !exclude_sep then 2
235
+ when !entry_follows_sep && exclude_sep then 3
236
+ end
237
+
238
+ reset_index do |io, index|
239
+ # calculate default span after resetio_index in case any flush needs to happen
240
+ span = options[:range_or_span] || io.default_span
241
+ blksize = options[:blksize]
242
+ carryover_limit = options[:carryover_limit]
243
+
244
+ remainder = io.scan(span, blksize, carryover_limit) do |scan_pos, string|
245
+ scanner = StringScanner.new(string)
246
+
247
+ # When the entry follows the separator, the scanner must
248
+ # be set right after the separator for the first entry, so
249
+ # that the search will find the beginning of the next entry.
250
+ if scan_pos == 0 && entry_follows_sep
251
+ scanner.pos = sep_length
252
+ scan_pos = sep_length
253
+ end
254
+
255
+ # Scan for entries documents by looking for the beginning
256
+ # of the next entry, signaling the end of the current entry.
257
+ while advanced = scanner.skip_until(regexp)
258
+
259
+ # adjust indicies as needed...
260
+ io_index << case mode
261
+ when 0 then [scan_pos, advanced]
262
+ when 2 then [scan_pos-sep_length, advanced]
263
+ else [scan_pos, advanced-sep_length]
264
+ end
265
+
266
+ scan_pos += advanced
267
+ end
268
+
269
+ # allow a blockfor monitoring
270
+ yield if block_given?
271
+ scanner.rest_size
272
+ end
273
+
274
+ # Unless the io is empty, there will be a remaining entry that
275
+ # doesn't get scanned when the entry follows the separator.
276
+ # Add the entry here.
277
+ if entry_follows_sep && io.length != 0
278
+ io_index << if exclude_sep
279
+ [io.length - remainder, remainder]
280
+ else
281
+ [io.length - remainder - sep_length, remainder + sep_length]
282
+ end
283
+ end
284
+ end
285
+ end
286
+
287
+ ###########################
288
+ # Array methods
289
+ ###########################
290
+
291
+ # def &(another)
292
+ # not_implemented
293
+ # end
294
+
295
+ # def *(arg)
296
+ # not_implemented
297
+ # end
298
+
299
+ def +(another)
300
+ self.concat(another)
301
+ end
302
+
303
+ # def -(another)
304
+ # not_implemented
305
+ # end
306
+
307
+ def <<(obj)
308
+ self[length] = obj
309
+ self
310
+ end
311
+
312
+ def <=>(another)
313
+ case another
314
+ when Array
315
+ if another.length < self.length
316
+ # if another is equal to the matching subset of self,
317
+ # then self is obviously the longer array and wins.
318
+ result = (self.to_a(another.length) <=> another)
319
+ result == 0 ? 1 : result
320
+ else
321
+ self.to_a <=> another
322
+ end
323
+ when ExternalArray
324
+ # if indexes are equal, additional
325
+ # 'quick' comparisons are allowed
326
+ if self.io_index == another.io_index
327
+
328
+ # equal in comparison if the ios are equal
329
+ return 0 if self.io.quick_compare(another.io)
330
+ end
331
+
332
+ self.io.flush
333
+ another.io.flush
334
+
335
+ # should chunk compare
336
+ if another.length > self.length
337
+ result = (self.to_a <=> another.to_a(self.length))
338
+ result == 0 ? -1 : result
339
+ elsif another.length < self.length
340
+ result = (self.to_a(another.length) <=> another.to_a)
341
+ result == 0 ? 1 : result
342
+ else
343
+ self.to_a <=> another.to_a
344
+ end
345
+ else
346
+ raise TypeError.new("can't convert from #{another.class} to ExternalArchive or Array")
347
+ end
348
+ end
349
+
350
+ def ==(another)
351
+ case another
352
+ when Array
353
+ # test simply based on length
354
+ return false unless self.length == another.length
355
+
356
+ # compare arrays
357
+ self.to_a == another
358
+
359
+ when ExternalArchive
360
+ # test simply based on length
361
+ return false unless self.length == another.length
362
+
363
+ # if indexes are equal, additional
364
+ # 'quick' comparisons are allowed
365
+ if self.io_index == another.io_index
366
+
367
+ # equal in comparison if the ios are equal
368
+ #, (self.io_index.buffer_size/2).ceil) ??
369
+ return true if self.io.sort_compare(another.io) == 0
370
+ end
371
+
372
+ # compare arrays
373
+ self.to_a == another.to_a
374
+ else
375
+ false
376
+ end
377
+ end
378
+
379
+ # Element Reference — Returns the entry at index, or returns an array starting
380
+ # at start and continuing for length entries, or returns an array specified
381
+ # by range. Negative indices count backward from the end of self (-1 is the last
382
+ # element). Returns nil if the index (or starting index) is out of range.
383
+ #
384
+ # a = ExternalArchive[ "a", "b", "c", "d", "e" ]
385
+ # a[2] + a[0] + a[1] #=> "cab"
386
+ # a[6] #=> nil
387
+ # a[1, 2] #=> [ "b", "c" ]
388
+ # a[1..3] #=> [ "b", "c", "d" ]
389
+ # a[4..7] #=> [ "e" ]
390
+ # a[6..10] #=> nil
391
+ # a[-3, 3] #=> [ "c", "d", "e" ]
392
+ # # special cases
393
+ # a[5] #=> nil
394
+ # a[5, 1] #=> []
395
+ # a[5..10] #=> []
396
+ #
397
+ def [](input, length=nil)
398
+ # two call types are required because while ExternalIndex can take
399
+ # a nil length, Array cannot and index can be either
400
+ entry_indicies = (length == nil ? io_index[input] : io_index[input, length])
401
+
402
+ case
403
+ when entry_indicies == nil || entry_indicies.empty?
404
+ # for conformance with array range retrieval,
405
+ # simply return nil and [] indicies
406
+ entry_indicies
407
+
408
+ when length == nil && !input.kind_of?(Range)
409
+ # a single entry was specified, read it
410
+ entry_start, entry_length = entry_indicies
411
+ io.pos = entry_start
412
+ str_to_entry( io.read(entry_length) )
413
+
414
+ else
415
+ # multiple entries were specified, collect each
416
+ pos = nil
417
+ entry_indicies.collect do |(entry_start, entry_length)|
418
+ next if entry_start == nil
419
+
420
+ # only set io position if necessary
421
+ unless pos == entry_start
422
+ pos = entry_start
423
+ io.pos = pos
424
+ end
425
+
426
+ pos += entry_length
427
+
428
+ # read entry
429
+ str_to_entry( io.read(entry_length) )
430
+ end
431
+ end
432
+ end
433
+
434
+ # Element Assignment — Sets the entry at index, or replaces a subset starting at start
435
+ # and continuing for length entries, or replaces a subset specified by range.
436
+ # A negative indices will count backward from the end of self. Inserts elements if
437
+ # length is zero. If nil is used in the second and third form, deletes elements from
438
+ # self. An IndexError is raised if a negative index points past the beginning of self.
439
+ # See also push, and unshift.
440
+ #
441
+ # a = ExternalArchive.new
442
+ # a[4] = "4"; a #=> [nil, nil, nil, nil, "4"]
443
+ # a[0, 3] = [ 'a', 'b', 'c' ]; a #=> ["a", "b", "c", nil, "4"]
444
+ # a[1..2] = [ '1', '2' ]; a #=> ["a", '1', '2', nil, "4"]
445
+ # a[0, 2] = "?"; a #=> ["?", '2', nil, "4"]
446
+ # a[0..2] = "A"; a #=> ["A", "4"]
447
+ # a[-1] = "Z"; a #=> ["A", "Z"]
448
+ # a[1..-1] = nil; a #=> ["A"]
449
+ #
450
+ def []=(*args)
451
+ raise ArgumentError, "wrong number of arguments (1 for 2)" if args.length < 2
452
+
453
+ one, two, value = args
454
+ if args.length == 2
455
+ value = two
456
+ two = nil
457
+ end
458
+
459
+ one = convert_to_int(one)
460
+ case one
461
+ when Fixnum
462
+ if one < 0
463
+ one += length
464
+ raise IndexError, "index #{one} out of range" if one < 0
465
+ end
466
+
467
+ entry_start = io.length
468
+ io.pos = entry_start
469
+
470
+ if two == nil
471
+ # simple insertion
472
+ # (note it is important to write the entry to io
473
+ # first as a check that io is open for writing)
474
+
475
+ entry_length = io.write( entry_to_str(value) )
476
+ io.length += entry_length
477
+ io_index[one] = [entry_start, entry_length]
478
+
479
+ else
480
+ values = case value
481
+ when Array then value
482
+ when ExternalArchive
483
+ # special case, self will be reading and
484
+ # writing from the same io, producing
485
+ # incorrect results
486
+
487
+ # potential to load a huge amount of data
488
+ value == self ? value.to_a : value
489
+ else convert_to_ary(value)
490
+ end
491
+
492
+ # write each value to self, collecting the indicies
493
+ indicies = []
494
+ values.each do |value|
495
+ entry_length = io.write( entry_to_str(value) )
496
+ indicies << [entry_start, entry_length]
497
+
498
+ io.length += entry_length
499
+ entry_start += entry_length
500
+ end
501
+
502
+ # register the indicies
503
+ io_index[one, two] = indicies
504
+ end
505
+
506
+ when Range
507
+ raise TypeError, "can't convert Range into Integer" unless two == nil
508
+ start, length, total = split_range(one)
509
+
510
+ raise RangeError, "#{one} out of range" if start < 0
511
+ self[start, length < 0 ? 0 : length + 1] = value
512
+
513
+ when nil
514
+ raise TypeError, "no implicit conversion from nil to integer"
515
+ else
516
+ raise TypeError, "can't convert #{one.class} into Integer"
517
+ end
518
+ end
519
+
520
+ # def abbrev(pattern=nil)
521
+ # not_implemented
522
+ # end
523
+
524
+ # def assoc(obj)
525
+ # not_implemented
526
+ # end
527
+
528
+ # Returns entry at index
529
+ def at(index)
530
+ self[index]
531
+ end
532
+
533
+ # Removes all elements from _self_.
534
+ def clear
535
+ io.truncate(0)
536
+ io_index.clear
537
+ self
538
+ end
539
+
540
+ def compact
541
+ # TODO - optimize?
542
+ another = self.another
543
+ each do |item|
544
+ another << item unless item == nil
545
+ end
546
+ another
547
+ end
548
+
549
+ # def compact!
550
+ # not_implemented
551
+ # end
552
+
553
+ def concat(another)
554
+ case another
555
+ when Array, ExternalArchive
556
+ self[length, another.length] = another
557
+ else
558
+ raise TypeError.new("can't convert #{another.class} into ExternalArchive or Array")
559
+ end
560
+ self
561
+ end
562
+
563
+ # def dclone
564
+ # not_implemented
565
+ # end
566
+
567
+ # def delete(obj)
568
+ # not_implemented
569
+ # end
570
+
571
+ # def delete_at(index)
572
+ # not_implemented
573
+ # end
574
+
575
+ # def delete_if # :yield: item
576
+ # not_implemented
577
+ # end
578
+
579
+ # Calls block once for each element string in self, passing that string as a parameter.
580
+ def each_str(&block) # :yield: string
581
+ # tracking the position using a local variable
582
+ # is faster than calling io.pos.
583
+ pos = nil
584
+ io_index.each do |(start, length)|
585
+ if start == nil
586
+ yield("")
587
+ next
588
+ end
589
+
590
+ # only set io position if necessary
591
+ unless pos == start
592
+ pos = start
593
+ io.pos = pos
594
+ end
595
+
596
+ # advance position
597
+ pos += length
598
+
599
+ # yield entry string
600
+ yield io.read(length)
601
+ end
602
+ self
603
+ end
604
+
605
+ # Calls block once for each element in self, passing that element as a parameter.
606
+ def each(&block) # :yield: item
607
+ each_str do |str|
608
+ # yield entry
609
+ yield str_to_entry(str)
610
+ end
611
+ end
612
+
613
+ # Same as each, but passes the index of the element instead of the element itself.
614
+ def eachio_index(&block) # :yield: index
615
+ 0.upto(length-1, &block)
616
+ self
617
+ end
618
+
619
+ # def fetch(index, default=nil, &block)
620
+ # index += index_length if index < 0
621
+ # val = (index >= length ? default : self[index])
622
+ # block_given? ? yield(val) : val
623
+ # end
624
+ #
625
+ # def fill(*args)
626
+ # not_implemented
627
+ # end
628
+
629
+ # def flatten
630
+ # not_implemented
631
+ # end
632
+
633
+ # def flatten!
634
+ # not_implemented
635
+ # end
636
+
637
+ # def frozen?
638
+ # not_implemented
639
+ # end
640
+
641
+ # def hash
642
+ # not_implemented
643
+ # end
644
+
645
+ # def include?(obj)
646
+ # not_implemented
647
+ # end
648
+
649
+ # def index(obj)
650
+ # not_implemented
651
+ # end
652
+ #
653
+ # def indexes(*args)
654
+ # values_at(*args)
655
+ # end
656
+ #
657
+ # def indicies(*args)
658
+ # values_at(*args)
659
+ # end
660
+
661
+ # def replace(other)
662
+ # not_implemented
663
+ # end
664
+
665
+ # def insert(index, *obj)
666
+ # self[index] = obj
667
+ # end
668
+
669
+ # def inspect
670
+ # not_implemented
671
+ # end
672
+
673
+ # def join(sep=$,)
674
+ # not_implemented
675
+ # end
676
+
677
+ # Returns the last n entries (default 1)
678
+ def last(n=nil)
679
+ return self[-1] if n.nil?
680
+
681
+ start = length-n
682
+ start = 0 if start < 0
683
+ self[start, n]
684
+ end
685
+
686
+ # Returns the number of entries in self
687
+ def length
688
+ io_index.length
689
+ end
690
+
691
+ # Returns the number of non-nil elements in self. May be zero.
692
+ # def nitems
693
+ # count = self.length
694
+ # io_index.each do |(start, length)|
695
+ # # the logic of this search is that nil,
696
+ # # (and only nil ?) can have an entry
697
+ # # length of 5: nil.to_yaml == "--- \n"
698
+ # count -= 1 if length == nil || length == 5
699
+ # end
700
+ # count
701
+ # end
702
+
703
+ # def pack(aTemplateString)
704
+ # not_implemented
705
+ # end
706
+
707
+ # def pop
708
+ # not_implemented
709
+ # end
710
+
711
+ # def pretty_print(q)
712
+ # not_implemented
713
+ # end
714
+
715
+ # def pretty_print_cycle(q)
716
+ # not_implemented
717
+ # end
718
+
719
+ def push(*obj)
720
+ obj.each {|obj| self << obj }
721
+ self
722
+ end
723
+
724
+ # def quote
725
+ # not_implemented
726
+ # end
727
+
728
+ # def rassoc(key)
729
+ # not_implemented
730
+ # end
731
+
732
+ # def replace(another)
733
+ # not_implemented
734
+ # end
735
+
736
+ # def reverse
737
+ # not_implemented
738
+ # end
739
+
740
+ # def reverse!
741
+ # not_implemented
742
+ # end
743
+
744
+ def reverse_each_str(&block) # :yield: string
745
+ io_index.reverse_each do |(start,length)|
746
+ next if start == nil
747
+
748
+ # A more optimized approach would
749
+ # read in a chunk of entries and
750
+ # iterate over them?
751
+ io.pos = start
752
+
753
+ # yield entry string
754
+ yield io.read(length)
755
+ end
756
+ self
757
+ end
758
+
759
+ def reverse_each # :yield: item
760
+ reverse_each_str do |str|
761
+ yield( str_to_entry(str) )
762
+ end
763
+ end
764
+
765
+ # def rindex(obj)
766
+ # not_implemented
767
+ # end
768
+
769
+ # def select # :yield: item
770
+ # not_implemented
771
+ # end
772
+
773
+ # def shift
774
+ # not_implemented
775
+ # end
776
+
777
+ # Alias for length
778
+ def size
779
+ length
780
+ end
781
+
782
+ # def slice(*args)
783
+ # self.call(:[], *args)
784
+ # end
785
+
786
+ # def slice!(*args)
787
+ # not_implemented
788
+ # end
789
+
790
+ def to_a(length=self.length)
791
+ length == 0 ? [] : self[0, length]
792
+ end
793
+
794
+ # def to_ary
795
+ # not_implemented
796
+ # end
797
+
798
+ # Returns _self_.join.
799
+ # def to_s
800
+ # self.join
801
+ # end
802
+
803
+ # def to_yaml(opts={})
804
+ # self[0, self.length].to_yaml(opts)
805
+ # end
806
+
807
+ # def transpose
808
+ # not_implemented
809
+ # end
810
+
811
+ # def uniq
812
+ # not_implemented
813
+ # end
814
+
815
+ # def uniq!
816
+ # not_implemented
817
+ # end
818
+
819
+ # def unshift(*obj)
820
+ # not_implemented
821
+ # end
822
+
823
+ # Returns an array containing the chars in io corresponding to the given
824
+ # selector(s). The selectors may be either integer indices or ranges
825
+ def values_at(*selectors)
826
+ another = self.another
827
+ selectors.each do |s|
828
+ another << self[s]
829
+ end
830
+ another
831
+ end
832
+
833
+ # def yaml_initialize(tag, val)
834
+ # not_implemented
835
+ # end
836
+
837
+ # def |(another)
838
+ # not_implemented
839
+ # end
840
+ end