external 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/History +7 -0
  2. data/MIT-LICENSE +1 -3
  3. data/README +162 -127
  4. data/lib/external.rb +2 -3
  5. data/lib/external/base.rb +174 -47
  6. data/lib/external/chunkable.rb +131 -105
  7. data/lib/external/enumerable.rb +78 -33
  8. data/lib/external/io.rb +163 -398
  9. data/lib/external/patches/ruby_1_8_io.rb +31 -0
  10. data/lib/external/patches/windows_io.rb +53 -0
  11. data/lib/external/patches/windows_utils.rb +27 -0
  12. data/lib/external/utils.rb +148 -0
  13. data/lib/external_archive.rb +840 -0
  14. data/lib/external_array.rb +57 -0
  15. data/lib/external_index.rb +1053 -0
  16. metadata +42 -58
  17. data/lib/ext_arc.rb +0 -108
  18. data/lib/ext_arr.rb +0 -727
  19. data/lib/ext_ind.rb +0 -1120
  20. data/test/benchmarks/benchmarks_20070918.txt +0 -45
  21. data/test/benchmarks/benchmarks_20070921.txt +0 -91
  22. data/test/benchmarks/benchmarks_20071006.txt +0 -147
  23. data/test/benchmarks/test_copy_file.rb +0 -80
  24. data/test/benchmarks/test_pos_speed.rb +0 -47
  25. data/test/benchmarks/test_read_time.rb +0 -55
  26. data/test/cached_ext_ind_test.rb +0 -219
  27. data/test/check/benchmark_check.rb +0 -441
  28. data/test/check/namespace_conflicts_check.rb +0 -23
  29. data/test/check/pack_check.rb +0 -90
  30. data/test/ext_arc_test.rb +0 -286
  31. data/test/ext_arr/alt_sep.txt +0 -3
  32. data/test/ext_arr/cr_lf_input.txt +0 -3
  33. data/test/ext_arr/input.index +0 -0
  34. data/test/ext_arr/input.txt +0 -1
  35. data/test/ext_arr/inputb.index +0 -0
  36. data/test/ext_arr/inputb.txt +0 -1
  37. data/test/ext_arr/lf_input.txt +0 -3
  38. data/test/ext_arr/lines.txt +0 -19
  39. data/test/ext_arr/without_index.txt +0 -1
  40. data/test/ext_arr_test.rb +0 -534
  41. data/test/ext_ind_test.rb +0 -1472
  42. data/test/external/base_test.rb +0 -74
  43. data/test/external/chunkable_test.rb +0 -182
  44. data/test/external/index/input.index +0 -0
  45. data/test/external/index/inputb.index +0 -0
  46. data/test/external/io_test.rb +0 -414
  47. data/test/external_test_helper.rb +0 -31
  48. data/test/external_test_suite.rb +0 -4
  49. data/test/test_array.rb +0 -1192
@@ -0,0 +1,31 @@
1
+ module External
2
+ module Patches
3
+ module Ruby18Io
4
+ attr_reader :generic_mode
5
+
6
+ def self.extended(base)
7
+ base.instance_variable_set(:@generic_mode, Utils.mode(base))
8
+ end
9
+
10
+ def flush
11
+ super unless generic_mode == "r"
12
+ end
13
+
14
+ def fsync
15
+ super unless generic_mode == "r"
16
+ end
17
+
18
+ # Quick comparision with another IO. Returns true if
19
+ # another == self, or if both are file-type IOs and
20
+ # their paths are equal.
21
+ def quick_compare(another)
22
+ self == another || (
23
+ (self.kind_of?(File) || self.kind_of?(Tempfile)) &&
24
+ (another.kind_of?(File) || another.kind_of?(Tempfile)) &&
25
+ self.path == another.path)
26
+ end
27
+ end
28
+ end
29
+
30
+ Io::PATCHES << Patches::Ruby18Io
31
+ end
@@ -0,0 +1,53 @@
1
+ module External
2
+ module Patches
3
+
4
+ # Ruby on Windows has problems with files larger than ~2 gigabytes.
5
+ # Sizes return as negative, and positions cannot be set beyond the max
6
+ # size of a long (2147483647 ~ 2GB = 2475636895). WindowsIo corrects
7
+ # both of these issues thanks in large part to a bit of code taken from
8
+ # 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
9
+ #
10
+ module WindowsIo
11
+ POSITION_MAX = 2147483647 # maximum size of long
12
+
13
+ def self.extended(base)
14
+ base.instance_variable_set("@pos", nil)
15
+ end
16
+
17
+ # Modified to handle positions past the 2Gb limit
18
+ def pos # :nodoc:
19
+ @pos || super
20
+ end
21
+
22
+ # Positions larger than the max value of a long cannot be directly given
23
+ # to the default +pos=+. This version incrementally seeks to positions
24
+ # beyond the maximum, if necessary.
25
+ #
26
+ # Note: setting the position beyond the 2Gb limit requires the use of a
27
+ # sysseek statement. As such, errors will arise if you try to position
28
+ # an IO object that does not support this method (for example StringIO...
29
+ # but then what are you doing with a 2Gb StringIO anyhow?)
30
+ def pos=(pos)
31
+ if pos < POSITION_MAX
32
+ super(pos)
33
+ @pos = nil
34
+ elsif @pos != pos
35
+ # note sysseek appears to be necessary here, rather than io.seek
36
+ @pos = pos
37
+
38
+ super(POSITION_MAX)
39
+ pos -= POSITION_MAX
40
+
41
+ while pos > POSITION_MAX
42
+ pos -= POSITION_MAX
43
+ self.sysseek(POSITION_MAX, ::IO::SEEK_CUR)
44
+ end
45
+
46
+ self.sysseek(pos, ::IO::SEEK_CUR)
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ Io::PATCHES << Patches::WindowsIo
53
+ end
@@ -0,0 +1,27 @@
1
+ # This code block modifies IO only if running on windows
2
+ require 'Win32API'
3
+
4
+ module External
5
+ module Utils
6
+ module_function
7
+
8
+ # Modfied to properly determine file lengths on Windows. Uses code
9
+ # from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
10
+ def file_length(io)
11
+ io.fsync
12
+
13
+ # I would have liked to use win32/file/stat to do this... however, some issue
14
+ # arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
15
+ # error because the mode would be nil for files. I wasn't sure how to fix it,
16
+ # so I've lifted the relevant code for pulling the large file size.
17
+
18
+ # Note this is a simplified version... if you base.path point to a chardev,
19
+ # this may need to be changed, because apparently the call to the Win32API
20
+ # may fail
21
+
22
+ stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
23
+ Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
24
+ stat_buf[24, 4].unpack('L').first # Size of file in bytes
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,148 @@
1
+ module External
2
+ module Utils
3
+ module_function
4
+
5
+ # try_handle is a forwarding method allowing External::IO to handle
6
+ # non-File, non-Tempfile IO objects. try_handle infers a method
7
+ # name based on the class of the input and trys to forward the
8
+ # input io to that method within External::IO. For instance:
9
+ #
10
+ # * the _mode method for StringIO is 'stringio_mode'
11
+ # * the _length method for StringIO is 'stringio_length'
12
+ #
13
+ # Nested classes have '::' replaced by '_'. Thus to add support
14
+ # for Some::Unknown::IO, extend External::IO as below:
15
+ #
16
+ # module External::IO
17
+ # def some_unknown_io_mode(io)
18
+ # ...
19
+ # end
20
+ #
21
+ # def some_unknown_io_length(io)
22
+ # ...
23
+ # end
24
+ # end
25
+ #
26
+ # See stringio_mode and stringio_length for more details.
27
+ def try_handle(io, method)
28
+ method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
29
+ if Utils.respond_to?(method_name)
30
+ Utils.send(method_name, io)
31
+ else
32
+ raise "cannot determine #{method} for '%s'" % io.class
33
+ end
34
+ end
35
+
36
+ # Determines the generic mode of the input io using the _mode
37
+ # method for the input io class. By default Io provides _mode
38
+ # methods for File, Tempfile, and StringIo. The return string
39
+ # is determined as follows:
40
+ #
41
+ # readable & writable:: r+
42
+ # readable:: r
43
+ # writable:: w
44
+ #
45
+ # The _mode method takes the input io and should return an array
46
+ # specifying whether or not io is readable and writable
47
+ # (ie [readable, writable]).
48
+ #
49
+ # See try_handle for more details.
50
+ def mode(io)
51
+ readable, writable = try_handle(io, "mode")
52
+
53
+ case
54
+ when readable && writable then "r+"
55
+ when readable then "r"
56
+ when writable then "w"
57
+ else
58
+ # occurs for r+ mode, for some reason
59
+ "r+"
60
+ end
61
+ end
62
+
63
+ # Determines the length of the input io using the _length method
64
+ # for the input io class. Non-External::Io inputs are extended
65
+ # in this process.
66
+ #
67
+ # The _length method takes the input io, and should return the
68
+ # current length of the input io (ie a flush operation may be
69
+ # required).
70
+ #
71
+ # See try_handle for more details.
72
+ def length(io)
73
+ case io
74
+ when Io then try_handle(io, "length")
75
+ else
76
+ io.extend Io
77
+ io.length
78
+ end
79
+ end
80
+
81
+ # Returns an array of bools determining if the input File
82
+ # is readable and writable.
83
+ def file_mode(io)
84
+ begin
85
+ dup = io.dup
86
+
87
+ # determine readable/writable by sending close methods
88
+ # to the duplicated Io. If the io cannot be closed for
89
+ # read/write then it will raise an error, indicating that
90
+ # it was not open in the given mode.
91
+ [:close_read, :close_write].collect do |method|
92
+ begin
93
+ dup.send(method)
94
+ true
95
+ rescue(IOError)
96
+ false
97
+ end
98
+ end
99
+ ensure
100
+ # Be sure that the dup is fully closed before proceeding!
101
+ # (Otherwise Tempfiles will not be properly disposed of
102
+ # ... at least on Windows, perhaps on others)
103
+ dup.close if dup && !dup.closed?
104
+ end
105
+ end
106
+
107
+ # Returns the length of the input File
108
+ def file_length(io)
109
+ io.fsync
110
+ File.size(io.path)
111
+ end
112
+
113
+ # Returns an array of bools determining if the input Tempfile
114
+ # is readable and writable.
115
+ def tempfile_mode(io)
116
+ file_mode(io.instance_variable_get(:@tmpfile))
117
+ end
118
+
119
+ # Returns the length of the input Tempfile
120
+ def tempfile_length(io)
121
+ file_length(io)
122
+ end
123
+
124
+ # Returns an array of bools determining if the input StringIo
125
+ # is readable and writable.
126
+ #
127
+ # s = StringIo.new("abcde", "r+")
128
+ # External::Io.stringio_mode(s) # => [true, true]
129
+ #
130
+ def stringio_mode(io)
131
+ [!io.closed_read?, !io.closed_write?]
132
+ end
133
+
134
+ # Returns the length of the input StringIo
135
+ #
136
+ # s = StringIo.new("abcde", "r+")
137
+ # External::Io.length(s) # => 5
138
+ #
139
+ def stringio_length(io)
140
+ io.string.length
141
+ end
142
+ end
143
+ end
144
+
145
+ # Apply platform-specific patches
146
+ # case RUBY_PLATFORM
147
+ # when 'java'
148
+ # end
@@ -0,0 +1,840 @@
1
+ require 'external/base'
2
+ require 'external_index'
3
+
4
+ #--
5
+ # later separate out individual objects logically
6
+ # If writing, create new files:
7
+ # - base/object_id.aio (new file for recieving appends)
8
+ # - base/object_id._index (copy of existing index -- made on first insertion)
9
+ # - in index, -index indicates object_id.aio file whereas +index indicates original file
10
+ # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
11
+ # requires index rewrite as well, to remove negatives
12
+ #
13
+ # If appending, ONLY allow << and all changes get committed to the original file.
14
+ #
15
+ # This should allow returning of new arrayio objects under read/write conditions
16
+ # By default read-only. No insertions. New ExternalArchive objects inherit parent mode.
17
+ #
18
+ # Independent modes:
19
+ # - r
20
+ # - r+
21
+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
22
+ # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
23
+ # - b ALWAYS on with Windows
24
+ #++
25
+
26
+ # ExternalArchive provides array-like access to archival data stored on disk.
27
+ # ExternalArchives consist of an IO object and an index of [start, length]
28
+ # pairs which indicate the start position and length of entries in the IO.
29
+ #
30
+ class ExternalArchive < External::Base
31
+ class << self
32
+
33
+ # Array-like constructor for an ExternalArchive.
34
+ def [](*args)
35
+ extarc = new
36
+ extarc.concat(args)
37
+ extarc
38
+ end
39
+
40
+ # Returns the default io index filepath for path:
41
+ #
42
+ # ExternalArchive.index_path("/path/to/file.txt") # => "/path/to/file.index"
43
+ #
44
+ def index_path(path)
45
+ path ? path.chomp(File.extname(path)) + '.index' : nil
46
+ end
47
+
48
+ # Initializes an instance of self with File.open(path, mode) as an io.
49
+ # As with File.open, the instance will be passed to the block and
50
+ # closed when the block returns. If no block is given, open returns
51
+ # the new instance.
52
+ #
53
+ # By default the instance will be initialized with an ExternalIndex
54
+ # io_index, linked to index_path(path). The instance will be
55
+ # automatically reindexed if it is empty but it's io is not.
56
+ #
57
+ # Options (specify using symbols):
58
+ # io_index:: Specifies the io_index manually. A filepath may be
59
+ # provided and it will be used instead of index_path(path).
60
+ # Array and ExternalIndex values are used directly.
61
+ # reindex:: Forces a call to reindex; using auto reindexing, reindex
62
+ # is normally only called when the instance is empty
63
+ # and the instance io is not. (default false)
64
+ # auto_reindex:: Turns on or off auto reindexing (default true)
65
+ #
66
+ def open(path, mode="rb", options={})
67
+ options = {
68
+ :io_index => nil,
69
+ :reindex => false,
70
+ :auto_reindex => true
71
+ }.merge(options)
72
+
73
+ index = options[:io_index]
74
+ if index == nil
75
+ index = index_path(path)
76
+ FileUtils.touch(index) unless File.exists?(index)
77
+ end
78
+
79
+ io_index = case index
80
+ when Array, ExternalIndex then index
81
+ else ExternalIndex.open(index, 'r+', :format => 'II')
82
+ end
83
+
84
+ io = path == nil ? nil : File.open(path, mode)
85
+ extarc = new(io, io_index)
86
+
87
+ # reindex if necessary
88
+ if options[:reindex] || (options[:auto_reindex] && extarc.empty? && extarc.io.length > 0)
89
+ extarc.reindex
90
+ end
91
+
92
+ if block_given?
93
+ begin
94
+ yield(extarc)
95
+ ensure
96
+ extarc.close
97
+ end
98
+ else
99
+ extarc
100
+ end
101
+ end
102
+ end
103
+
104
+ # The underlying index of [position, length] arrays
105
+ # indicating where entries in the io are located.
106
+ attr_reader :io_index
107
+
108
+ def initialize(io=nil, io_index=nil)
109
+ super(io)
110
+ @io_index = io_index || []
111
+ end
112
+
113
+ # Returns true if io_index is an Array.
114
+ def cached?
115
+ io_index.kind_of?(Array)
116
+ end
117
+
118
+ # Turns on or off caching by converting io_index
119
+ # to an Array (cache=true) or to an ExternalIndex
120
+ # (cache=false).
121
+ def cache=(input)
122
+ case
123
+ when input && !cached?
124
+ cache = io_index.to_a
125
+ io_index.close
126
+ @io_index = cache
127
+
128
+ when !input && cached?
129
+ io_index << {:format => 'II'}
130
+ @io_index = ExternalIndex[*io_index]
131
+
132
+ end
133
+ end
134
+
135
+ # Closes self as in External::Base#close. An io_path may be
136
+ # be specified to close io_index as well; when io_index is
137
+ # not an ExternalIndex, one is temporarily created with the
138
+ # current io_index content to 'close' and save the index.
139
+ def close(path=nil, index_path=self.class.index_path(path), overwrite=false)
140
+ case
141
+ when io_index.kind_of?(ExternalIndex)
142
+ io_index.close(index_path, overwrite)
143
+ when index_path != nil
144
+ ExternalIndex[*io_index].close(index_path, overwrite)
145
+ end
146
+
147
+ super(path, overwrite)
148
+ end
149
+
150
+ # Returns another instance of self.class; the new instance will
151
+ # be cached if self is cached.
152
+ def another
153
+ self.class.new(nil, cached? ? [] : io_index.another)
154
+ end
155
+
156
+ public
157
+
158
+ # Converts an string read from io into an entry. By default
159
+ # the string is simply returned.
160
+ def str_to_entry(str)
161
+ str
162
+ end
163
+
164
+ # Converts an entry into a string. By default this method
165
+ # returns entry.to_s.
166
+ def entry_to_str(entry)
167
+ entry.to_s
168
+ end
169
+
170
+ # Clears the io_index, and yields io and the io_index to the
171
+ # block for reindexing. The io is flushed and rewound before
172
+ # being yielded to the block. Returns self
173
+ def reset_index
174
+ io_index.clear
175
+ io.flush
176
+ io.rewind
177
+ yield(io, io_index) if block_given?
178
+ self
179
+ end
180
+
181
+ alias reindex reset_index
182
+
183
+ # The speed of reindex_by_regexp is dictated by how fast the underlying
184
+ # code can match the pattern. Under ideal conditions (ie a very simple
185
+ # regexp), it will be as fast as reindex_by_sep.
186
+ def reindex_by_regexp(pattern=/\r?\n/, options={})
187
+ options = {
188
+ :range_or_span => nil,
189
+ :blksize => 8388608,
190
+ :carryover_limit => 8388608
191
+ }.merge(options)
192
+
193
+ reset_index do |io, index|
194
+ span = options[:range_or_span] || io.default_span
195
+ blksize = options[:blksize]
196
+ carryover_limit = options[:carryover_limit]
197
+
198
+ io.scan(span, blksize, carryover_limit) do |scan_pos, string|
199
+ scanner = StringScanner.new(string)
200
+ while advanced = scanner.search_full(pattern, true, false)
201
+ break unless advanced > 0
202
+
203
+ index << [scan_pos, advanced]
204
+ scan_pos += advanced
205
+ end
206
+
207
+ # allow a blockfor monitoring
208
+ yield if block_given?
209
+ scanner.rest_size
210
+ end
211
+ end
212
+ end
213
+
214
+ def reindex_by_sep(sep_str=$/, options={})
215
+ sep_str = sep_str.to_s
216
+ options = {
217
+ :sep_regexp => Regexp.new(sep_str),
218
+ :sep_length => sep_str.length,
219
+ :entry_follows_sep => false,
220
+ :exclude_sep => false,
221
+ :range_or_span => nil,
222
+ :blksize => 8388608,
223
+ :carryover_limit => 8388608
224
+ }.merge(options)
225
+
226
+ regexp = options[:sep_regexp]
227
+ sep_length = options[:sep_length]
228
+ entry_follows_sep = options[:entry_follows_sep]
229
+ exclude_sep = options[:exclude_sep]
230
+
231
+ mode = case
232
+ when !entry_follows_sep && !exclude_sep then 0
233
+ when entry_follows_sep && exclude_sep then 1
234
+ when entry_follows_sep && !exclude_sep then 2
235
+ when !entry_follows_sep && exclude_sep then 3
236
+ end
237
+
238
+ reset_index do |io, index|
239
+ # calculate default span after resetio_index in case any flush needs to happen
240
+ span = options[:range_or_span] || io.default_span
241
+ blksize = options[:blksize]
242
+ carryover_limit = options[:carryover_limit]
243
+
244
+ remainder = io.scan(span, blksize, carryover_limit) do |scan_pos, string|
245
+ scanner = StringScanner.new(string)
246
+
247
+ # When the entry follows the separator, the scanner must
248
+ # be set right after the separator for the first entry, so
249
+ # that the search will find the beginning of the next entry.
250
+ if scan_pos == 0 && entry_follows_sep
251
+ scanner.pos = sep_length
252
+ scan_pos = sep_length
253
+ end
254
+
255
+ # Scan for entries documents by looking for the beginning
256
+ # of the next entry, signaling the end of the current entry.
257
+ while advanced = scanner.skip_until(regexp)
258
+
259
+ # adjust indicies as needed...
260
+ io_index << case mode
261
+ when 0 then [scan_pos, advanced]
262
+ when 2 then [scan_pos-sep_length, advanced]
263
+ else [scan_pos, advanced-sep_length]
264
+ end
265
+
266
+ scan_pos += advanced
267
+ end
268
+
269
+ # allow a blockfor monitoring
270
+ yield if block_given?
271
+ scanner.rest_size
272
+ end
273
+
274
+ # Unless the io is empty, there will be a remaining entry that
275
+ # doesn't get scanned when the entry follows the separator.
276
+ # Add the entry here.
277
+ if entry_follows_sep && io.length != 0
278
+ io_index << if exclude_sep
279
+ [io.length - remainder, remainder]
280
+ else
281
+ [io.length - remainder - sep_length, remainder + sep_length]
282
+ end
283
+ end
284
+ end
285
+ end
286
+
287
+ ###########################
288
+ # Array methods
289
+ ###########################
290
+
291
+ # def &(another)
292
+ # not_implemented
293
+ # end
294
+
295
+ # def *(arg)
296
+ # not_implemented
297
+ # end
298
+
299
+ def +(another)
300
+ self.concat(another)
301
+ end
302
+
303
+ # def -(another)
304
+ # not_implemented
305
+ # end
306
+
307
+ def <<(obj)
308
+ self[length] = obj
309
+ self
310
+ end
311
+
312
+ def <=>(another)
313
+ case another
314
+ when Array
315
+ if another.length < self.length
316
+ # if another is equal to the matching subset of self,
317
+ # then self is obviously the longer array and wins.
318
+ result = (self.to_a(another.length) <=> another)
319
+ result == 0 ? 1 : result
320
+ else
321
+ self.to_a <=> another
322
+ end
323
+ when ExternalArray
324
+ # if indexes are equal, additional
325
+ # 'quick' comparisons are allowed
326
+ if self.io_index == another.io_index
327
+
328
+ # equal in comparison if the ios are equal
329
+ return 0 if self.io.quick_compare(another.io)
330
+ end
331
+
332
+ self.io.flush
333
+ another.io.flush
334
+
335
+ # should chunk compare
336
+ if another.length > self.length
337
+ result = (self.to_a <=> another.to_a(self.length))
338
+ result == 0 ? -1 : result
339
+ elsif another.length < self.length
340
+ result = (self.to_a(another.length) <=> another.to_a)
341
+ result == 0 ? 1 : result
342
+ else
343
+ self.to_a <=> another.to_a
344
+ end
345
+ else
346
+ raise TypeError.new("can't convert from #{another.class} to ExternalArchive or Array")
347
+ end
348
+ end
349
+
350
+ def ==(another)
351
+ case another
352
+ when Array
353
+ # test simply based on length
354
+ return false unless self.length == another.length
355
+
356
+ # compare arrays
357
+ self.to_a == another
358
+
359
+ when ExternalArchive
360
+ # test simply based on length
361
+ return false unless self.length == another.length
362
+
363
+ # if indexes are equal, additional
364
+ # 'quick' comparisons are allowed
365
+ if self.io_index == another.io_index
366
+
367
+ # equal in comparison if the ios are equal
368
+ #, (self.io_index.buffer_size/2).ceil) ??
369
+ return true if self.io.sort_compare(another.io) == 0
370
+ end
371
+
372
+ # compare arrays
373
+ self.to_a == another.to_a
374
+ else
375
+ false
376
+ end
377
+ end
378
+
379
+ # Element Reference — Returns the entry at index, or returns an array starting
380
+ # at start and continuing for length entries, or returns an array specified
381
+ # by range. Negative indices count backward from the end of self (-1 is the last
382
+ # element). Returns nil if the index (or starting index) is out of range.
383
+ #
384
+ # a = ExternalArchive[ "a", "b", "c", "d", "e" ]
385
+ # a[2] + a[0] + a[1] #=> "cab"
386
+ # a[6] #=> nil
387
+ # a[1, 2] #=> [ "b", "c" ]
388
+ # a[1..3] #=> [ "b", "c", "d" ]
389
+ # a[4..7] #=> [ "e" ]
390
+ # a[6..10] #=> nil
391
+ # a[-3, 3] #=> [ "c", "d", "e" ]
392
+ # # special cases
393
+ # a[5] #=> nil
394
+ # a[5, 1] #=> []
395
+ # a[5..10] #=> []
396
+ #
397
+ def [](input, length=nil)
398
+ # two call types are required because while ExternalIndex can take
399
+ # a nil length, Array cannot and index can be either
400
+ entry_indicies = (length == nil ? io_index[input] : io_index[input, length])
401
+
402
+ case
403
+ when entry_indicies == nil || entry_indicies.empty?
404
+ # for conformance with array range retrieval,
405
+ # simply return nil and [] indicies
406
+ entry_indicies
407
+
408
+ when length == nil && !input.kind_of?(Range)
409
+ # a single entry was specified, read it
410
+ entry_start, entry_length = entry_indicies
411
+ io.pos = entry_start
412
+ str_to_entry( io.read(entry_length) )
413
+
414
+ else
415
+ # multiple entries were specified, collect each
416
+ pos = nil
417
+ entry_indicies.collect do |(entry_start, entry_length)|
418
+ next if entry_start == nil
419
+
420
+ # only set io position if necessary
421
+ unless pos == entry_start
422
+ pos = entry_start
423
+ io.pos = pos
424
+ end
425
+
426
+ pos += entry_length
427
+
428
+ # read entry
429
+ str_to_entry( io.read(entry_length) )
430
+ end
431
+ end
432
+ end
433
+
434
+ # Element Assignment — Sets the entry at index, or replaces a subset starting at start
435
+ # and continuing for length entries, or replaces a subset specified by range.
436
+ # A negative indices will count backward from the end of self. Inserts elements if
437
+ # length is zero. If nil is used in the second and third form, deletes elements from
438
+ # self. An IndexError is raised if a negative index points past the beginning of self.
439
+ # See also push, and unshift.
440
+ #
441
+ # a = ExternalArchive.new
442
+ # a[4] = "4"; a #=> [nil, nil, nil, nil, "4"]
443
+ # a[0, 3] = [ 'a', 'b', 'c' ]; a #=> ["a", "b", "c", nil, "4"]
444
+ # a[1..2] = [ '1', '2' ]; a #=> ["a", '1', '2', nil, "4"]
445
+ # a[0, 2] = "?"; a #=> ["?", '2', nil, "4"]
446
+ # a[0..2] = "A"; a #=> ["A", "4"]
447
+ # a[-1] = "Z"; a #=> ["A", "Z"]
448
+ # a[1..-1] = nil; a #=> ["A"]
449
+ #
450
+ def []=(*args)
451
+ raise ArgumentError, "wrong number of arguments (1 for 2)" if args.length < 2
452
+
453
+ one, two, value = args
454
+ if args.length == 2
455
+ value = two
456
+ two = nil
457
+ end
458
+
459
+ one = convert_to_int(one)
460
+ case one
461
+ when Fixnum
462
+ if one < 0
463
+ one += length
464
+ raise IndexError, "index #{one} out of range" if one < 0
465
+ end
466
+
467
+ entry_start = io.length
468
+ io.pos = entry_start
469
+
470
+ if two == nil
471
+ # simple insertion
472
+ # (note it is important to write the entry to io
473
+ # first as a check that io is open for writing)
474
+
475
+ entry_length = io.write( entry_to_str(value) )
476
+ io.length += entry_length
477
+ io_index[one] = [entry_start, entry_length]
478
+
479
+ else
480
+ values = case value
481
+ when Array then value
482
+ when ExternalArchive
483
+ # special case, self will be reading and
484
+ # writing from the same io, producing
485
+ # incorrect results
486
+
487
+ # potential to load a huge amount of data
488
+ value == self ? value.to_a : value
489
+ else convert_to_ary(value)
490
+ end
491
+
492
+ # write each value to self, collecting the indicies
493
+ indicies = []
494
+ values.each do |value|
495
+ entry_length = io.write( entry_to_str(value) )
496
+ indicies << [entry_start, entry_length]
497
+
498
+ io.length += entry_length
499
+ entry_start += entry_length
500
+ end
501
+
502
+ # register the indicies
503
+ io_index[one, two] = indicies
504
+ end
505
+
506
+ when Range
507
+ raise TypeError, "can't convert Range into Integer" unless two == nil
508
+ start, length, total = split_range(one)
509
+
510
+ raise RangeError, "#{one} out of range" if start < 0
511
+ self[start, length < 0 ? 0 : length + 1] = value
512
+
513
+ when nil
514
+ raise TypeError, "no implicit conversion from nil to integer"
515
+ else
516
+ raise TypeError, "can't convert #{one.class} into Integer"
517
+ end
518
+ end
519
+
520
+ # def abbrev(pattern=nil)
521
+ # not_implemented
522
+ # end
523
+
524
+ # def assoc(obj)
525
+ # not_implemented
526
+ # end
527
+
528
+ # Returns entry at index
529
+ def at(index)
530
+ self[index]
531
+ end
532
+
533
+ # Removes all elements from _self_.
534
+ def clear
535
+ io.truncate(0)
536
+ io_index.clear
537
+ self
538
+ end
539
+
540
+ def compact
541
+ # TODO - optimize?
542
+ another = self.another
543
+ each do |item|
544
+ another << item unless item == nil
545
+ end
546
+ another
547
+ end
548
+
549
+ # def compact!
550
+ # not_implemented
551
+ # end
552
+
553
+ def concat(another)
554
+ case another
555
+ when Array, ExternalArchive
556
+ self[length, another.length] = another
557
+ else
558
+ raise TypeError.new("can't convert #{another.class} into ExternalArchive or Array")
559
+ end
560
+ self
561
+ end
562
+
563
+ # def dclone
564
+ # not_implemented
565
+ # end
566
+
567
+ # def delete(obj)
568
+ # not_implemented
569
+ # end
570
+
571
+ # def delete_at(index)
572
+ # not_implemented
573
+ # end
574
+
575
+ # def delete_if # :yield: item
576
+ # not_implemented
577
+ # end
578
+
579
+ # Calls block once for each element string in self, passing that string as a parameter.
580
+ def each_str(&block) # :yield: string
581
+ # tracking the position using a local variable
582
+ # is faster than calling io.pos.
583
+ pos = nil
584
+ io_index.each do |(start, length)|
585
+ if start == nil
586
+ yield("")
587
+ next
588
+ end
589
+
590
+ # only set io position if necessary
591
+ unless pos == start
592
+ pos = start
593
+ io.pos = pos
594
+ end
595
+
596
+ # advance position
597
+ pos += length
598
+
599
+ # yield entry string
600
+ yield io.read(length)
601
+ end
602
+ self
603
+ end
604
+
605
+ # Calls block once for each element in self, passing that element as a parameter.
606
+ def each(&block) # :yield: item
607
+ each_str do |str|
608
+ # yield entry
609
+ yield str_to_entry(str)
610
+ end
611
+ end
612
+
613
+ # Same as each, but passes the index of the element instead of the element itself.
614
+ def eachio_index(&block) # :yield: index
615
+ 0.upto(length-1, &block)
616
+ self
617
+ end
618
+
619
+ # def fetch(index, default=nil, &block)
620
+ # index += index_length if index < 0
621
+ # val = (index >= length ? default : self[index])
622
+ # block_given? ? yield(val) : val
623
+ # end
624
+ #
625
+ # def fill(*args)
626
+ # not_implemented
627
+ # end
628
+
629
+ # def flatten
630
+ # not_implemented
631
+ # end
632
+
633
+ # def flatten!
634
+ # not_implemented
635
+ # end
636
+
637
+ # def frozen?
638
+ # not_implemented
639
+ # end
640
+
641
+ # def hash
642
+ # not_implemented
643
+ # end
644
+
645
+ # def include?(obj)
646
+ # not_implemented
647
+ # end
648
+
649
+ # def index(obj)
650
+ # not_implemented
651
+ # end
652
+ #
653
+ # def indexes(*args)
654
+ # values_at(*args)
655
+ # end
656
+ #
657
+ # def indicies(*args)
658
+ # values_at(*args)
659
+ # end
660
+
661
+ # def replace(other)
662
+ # not_implemented
663
+ # end
664
+
665
+ # def insert(index, *obj)
666
+ # self[index] = obj
667
+ # end
668
+
669
+ # def inspect
670
+ # not_implemented
671
+ # end
672
+
673
+ # def join(sep=$,)
674
+ # not_implemented
675
+ # end
676
+
677
+ # Returns the last n entries (default 1)
678
+ def last(n=nil)
679
+ return self[-1] if n.nil?
680
+
681
+ start = length-n
682
+ start = 0 if start < 0
683
+ self[start, n]
684
+ end
685
+
686
+ # Returns the number of entries in self
687
+ def length
688
+ io_index.length
689
+ end
690
+
691
+ # Returns the number of non-nil elements in self. May be zero.
692
+ # def nitems
693
+ # count = self.length
694
+ # io_index.each do |(start, length)|
695
+ # # the logic of this search is that nil,
696
+ # # (and only nil ?) can have an entry
697
+ # # length of 5: nil.to_yaml == "--- \n"
698
+ # count -= 1 if length == nil || length == 5
699
+ # end
700
+ # count
701
+ # end
702
+
703
+ # def pack(aTemplateString)
704
+ # not_implemented
705
+ # end
706
+
707
+ # def pop
708
+ # not_implemented
709
+ # end
710
+
711
+ # def pretty_print(q)
712
+ # not_implemented
713
+ # end
714
+
715
+ # def pretty_print_cycle(q)
716
+ # not_implemented
717
+ # end
718
+
719
+ def push(*obj)
720
+ obj.each {|obj| self << obj }
721
+ self
722
+ end
723
+
724
+ # def quote
725
+ # not_implemented
726
+ # end
727
+
728
+ # def rassoc(key)
729
+ # not_implemented
730
+ # end
731
+
732
+ # def replace(another)
733
+ # not_implemented
734
+ # end
735
+
736
+ # def reverse
737
+ # not_implemented
738
+ # end
739
+
740
+ # def reverse!
741
+ # not_implemented
742
+ # end
743
+
744
+ def reverse_each_str(&block) # :yield: string
745
+ io_index.reverse_each do |(start,length)|
746
+ next if start == nil
747
+
748
+ # A more optimized approach would
749
+ # read in a chunk of entries and
750
+ # iterate over them?
751
+ io.pos = start
752
+
753
+ # yield entry string
754
+ yield io.read(length)
755
+ end
756
+ self
757
+ end
758
+
759
+ def reverse_each # :yield: item
760
+ reverse_each_str do |str|
761
+ yield( str_to_entry(str) )
762
+ end
763
+ end
764
+
765
+ # def rindex(obj)
766
+ # not_implemented
767
+ # end
768
+
769
+ # def select # :yield: item
770
+ # not_implemented
771
+ # end
772
+
773
+ # def shift
774
+ # not_implemented
775
+ # end
776
+
777
+ # Alias for length
778
+ def size
779
+ length
780
+ end
781
+
782
+ # def slice(*args)
783
+ # self.call(:[], *args)
784
+ # end
785
+
786
+ # def slice!(*args)
787
+ # not_implemented
788
+ # end
789
+
790
+ def to_a(length=self.length)
791
+ length == 0 ? [] : self[0, length]
792
+ end
793
+
794
+ # def to_ary
795
+ # not_implemented
796
+ # end
797
+
798
+ # Returns _self_.join.
799
+ # def to_s
800
+ # self.join
801
+ # end
802
+
803
+ # def to_yaml(opts={})
804
+ # self[0, self.length].to_yaml(opts)
805
+ # end
806
+
807
+ # def transpose
808
+ # not_implemented
809
+ # end
810
+
811
+ # def uniq
812
+ # not_implemented
813
+ # end
814
+
815
+ # def uniq!
816
+ # not_implemented
817
+ # end
818
+
819
+ # def unshift(*obj)
820
+ # not_implemented
821
+ # end
822
+
823
+ # Returns an array containing the chars in io corresponding to the given
824
+ # selector(s). The selectors may be either integer indices or ranges
825
+ def values_at(*selectors)
826
+ another = self.another
827
+ selectors.each do |s|
828
+ another << self[s]
829
+ end
830
+ another
831
+ end
832
+
833
+ # def yaml_initialize(tag, val)
834
+ # not_implemented
835
+ # end
836
+
837
+ # def |(another)
838
+ # not_implemented
839
+ # end
840
+ end