external 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +7 -0
- data/MIT-LICENSE +1 -3
- data/README +162 -127
- data/lib/external.rb +2 -3
- data/lib/external/base.rb +174 -47
- data/lib/external/chunkable.rb +131 -105
- data/lib/external/enumerable.rb +78 -33
- data/lib/external/io.rb +163 -398
- data/lib/external/patches/ruby_1_8_io.rb +31 -0
- data/lib/external/patches/windows_io.rb +53 -0
- data/lib/external/patches/windows_utils.rb +27 -0
- data/lib/external/utils.rb +148 -0
- data/lib/external_archive.rb +840 -0
- data/lib/external_array.rb +57 -0
- data/lib/external_index.rb +1053 -0
- metadata +42 -58
- data/lib/ext_arc.rb +0 -108
- data/lib/ext_arr.rb +0 -727
- data/lib/ext_ind.rb +0 -1120
- data/test/benchmarks/benchmarks_20070918.txt +0 -45
- data/test/benchmarks/benchmarks_20070921.txt +0 -91
- data/test/benchmarks/benchmarks_20071006.txt +0 -147
- data/test/benchmarks/test_copy_file.rb +0 -80
- data/test/benchmarks/test_pos_speed.rb +0 -47
- data/test/benchmarks/test_read_time.rb +0 -55
- data/test/cached_ext_ind_test.rb +0 -219
- data/test/check/benchmark_check.rb +0 -441
- data/test/check/namespace_conflicts_check.rb +0 -23
- data/test/check/pack_check.rb +0 -90
- data/test/ext_arc_test.rb +0 -286
- data/test/ext_arr/alt_sep.txt +0 -3
- data/test/ext_arr/cr_lf_input.txt +0 -3
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +0 -1
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +0 -1
- data/test/ext_arr/lf_input.txt +0 -3
- data/test/ext_arr/lines.txt +0 -19
- data/test/ext_arr/without_index.txt +0 -1
- data/test/ext_arr_test.rb +0 -534
- data/test/ext_ind_test.rb +0 -1472
- data/test/external/base_test.rb +0 -74
- data/test/external/chunkable_test.rb +0 -182
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +0 -414
- data/test/external_test_helper.rb +0 -31
- data/test/external_test_suite.rb +0 -4
- data/test/test_array.rb +0 -1192
@@ -0,0 +1,31 @@
|
|
1
|
+
module External
|
2
|
+
module Patches
|
3
|
+
module Ruby18Io
|
4
|
+
attr_reader :generic_mode
|
5
|
+
|
6
|
+
def self.extended(base)
|
7
|
+
base.instance_variable_set(:@generic_mode, Utils.mode(base))
|
8
|
+
end
|
9
|
+
|
10
|
+
def flush
|
11
|
+
super unless generic_mode == "r"
|
12
|
+
end
|
13
|
+
|
14
|
+
def fsync
|
15
|
+
super unless generic_mode == "r"
|
16
|
+
end
|
17
|
+
|
18
|
+
# Quick comparision with another IO. Returns true if
|
19
|
+
# another == self, or if both are file-type IOs and
|
20
|
+
# their paths are equal.
|
21
|
+
def quick_compare(another)
|
22
|
+
self == another || (
|
23
|
+
(self.kind_of?(File) || self.kind_of?(Tempfile)) &&
|
24
|
+
(another.kind_of?(File) || another.kind_of?(Tempfile)) &&
|
25
|
+
self.path == another.path)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
Io::PATCHES << Patches::Ruby18Io
|
31
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module External
|
2
|
+
module Patches
|
3
|
+
|
4
|
+
# Ruby on Windows has problems with files larger than ~2 gigabytes.
|
5
|
+
# Sizes return as negative, and positions cannot be set beyond the max
|
6
|
+
# size of a long (2147483647 ~ 2GB = 2475636895). WindowsIo corrects
|
7
|
+
# both of these issues thanks in large part to a bit of code taken from
|
8
|
+
# 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
|
9
|
+
#
|
10
|
+
module WindowsIo
|
11
|
+
POSITION_MAX = 2147483647 # maximum size of long
|
12
|
+
|
13
|
+
def self.extended(base)
|
14
|
+
base.instance_variable_set("@pos", nil)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Modified to handle positions past the 2Gb limit
|
18
|
+
def pos # :nodoc:
|
19
|
+
@pos || super
|
20
|
+
end
|
21
|
+
|
22
|
+
# Positions larger than the max value of a long cannot be directly given
|
23
|
+
# to the default +pos=+. This version incrementally seeks to positions
|
24
|
+
# beyond the maximum, if necessary.
|
25
|
+
#
|
26
|
+
# Note: setting the position beyond the 2Gb limit requires the use of a
|
27
|
+
# sysseek statement. As such, errors will arise if you try to position
|
28
|
+
# an IO object that does not support this method (for example StringIO...
|
29
|
+
# but then what are you doing with a 2Gb StringIO anyhow?)
|
30
|
+
def pos=(pos)
|
31
|
+
if pos < POSITION_MAX
|
32
|
+
super(pos)
|
33
|
+
@pos = nil
|
34
|
+
elsif @pos != pos
|
35
|
+
# note sysseek appears to be necessary here, rather than io.seek
|
36
|
+
@pos = pos
|
37
|
+
|
38
|
+
super(POSITION_MAX)
|
39
|
+
pos -= POSITION_MAX
|
40
|
+
|
41
|
+
while pos > POSITION_MAX
|
42
|
+
pos -= POSITION_MAX
|
43
|
+
self.sysseek(POSITION_MAX, ::IO::SEEK_CUR)
|
44
|
+
end
|
45
|
+
|
46
|
+
self.sysseek(pos, ::IO::SEEK_CUR)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
Io::PATCHES << Patches::WindowsIo
|
53
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# This code block modifies IO only if running on windows
|
2
|
+
require 'Win32API'
|
3
|
+
|
4
|
+
module External
|
5
|
+
module Utils
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# Modfied to properly determine file lengths on Windows. Uses code
|
9
|
+
# from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
|
10
|
+
def file_length(io)
|
11
|
+
io.fsync
|
12
|
+
|
13
|
+
# I would have liked to use win32/file/stat to do this... however, some issue
|
14
|
+
# arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
|
15
|
+
# error because the mode would be nil for files. I wasn't sure how to fix it,
|
16
|
+
# so I've lifted the relevant code for pulling the large file size.
|
17
|
+
|
18
|
+
# Note this is a simplified version... if you base.path point to a chardev,
|
19
|
+
# this may need to be changed, because apparently the call to the Win32API
|
20
|
+
# may fail
|
21
|
+
|
22
|
+
stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
|
23
|
+
Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
|
24
|
+
stat_buf[24, 4].unpack('L').first # Size of file in bytes
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
module External
|
2
|
+
module Utils
|
3
|
+
module_function
|
4
|
+
|
5
|
+
# try_handle is a forwarding method allowing External::IO to handle
|
6
|
+
# non-File, non-Tempfile IO objects. try_handle infers a method
|
7
|
+
# name based on the class of the input and trys to forward the
|
8
|
+
# input io to that method within External::IO. For instance:
|
9
|
+
#
|
10
|
+
# * the _mode method for StringIO is 'stringio_mode'
|
11
|
+
# * the _length method for StringIO is 'stringio_length'
|
12
|
+
#
|
13
|
+
# Nested classes have '::' replaced by '_'. Thus to add support
|
14
|
+
# for Some::Unknown::IO, extend External::IO as below:
|
15
|
+
#
|
16
|
+
# module External::IO
|
17
|
+
# def some_unknown_io_mode(io)
|
18
|
+
# ...
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# def some_unknown_io_length(io)
|
22
|
+
# ...
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# See stringio_mode and stringio_length for more details.
|
27
|
+
def try_handle(io, method)
|
28
|
+
method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
|
29
|
+
if Utils.respond_to?(method_name)
|
30
|
+
Utils.send(method_name, io)
|
31
|
+
else
|
32
|
+
raise "cannot determine #{method} for '%s'" % io.class
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Determines the generic mode of the input io using the _mode
|
37
|
+
# method for the input io class. By default Io provides _mode
|
38
|
+
# methods for File, Tempfile, and StringIo. The return string
|
39
|
+
# is determined as follows:
|
40
|
+
#
|
41
|
+
# readable & writable:: r+
|
42
|
+
# readable:: r
|
43
|
+
# writable:: w
|
44
|
+
#
|
45
|
+
# The _mode method takes the input io and should return an array
|
46
|
+
# specifying whether or not io is readable and writable
|
47
|
+
# (ie [readable, writable]).
|
48
|
+
#
|
49
|
+
# See try_handle for more details.
|
50
|
+
def mode(io)
|
51
|
+
readable, writable = try_handle(io, "mode")
|
52
|
+
|
53
|
+
case
|
54
|
+
when readable && writable then "r+"
|
55
|
+
when readable then "r"
|
56
|
+
when writable then "w"
|
57
|
+
else
|
58
|
+
# occurs for r+ mode, for some reason
|
59
|
+
"r+"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Determines the length of the input io using the _length method
|
64
|
+
# for the input io class. Non-External::Io inputs are extended
|
65
|
+
# in this process.
|
66
|
+
#
|
67
|
+
# The _length method takes the input io, and should return the
|
68
|
+
# current length of the input io (ie a flush operation may be
|
69
|
+
# required).
|
70
|
+
#
|
71
|
+
# See try_handle for more details.
|
72
|
+
def length(io)
|
73
|
+
case io
|
74
|
+
when Io then try_handle(io, "length")
|
75
|
+
else
|
76
|
+
io.extend Io
|
77
|
+
io.length
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns an array of bools determining if the input File
|
82
|
+
# is readable and writable.
|
83
|
+
def file_mode(io)
|
84
|
+
begin
|
85
|
+
dup = io.dup
|
86
|
+
|
87
|
+
# determine readable/writable by sending close methods
|
88
|
+
# to the duplicated Io. If the io cannot be closed for
|
89
|
+
# read/write then it will raise an error, indicating that
|
90
|
+
# it was not open in the given mode.
|
91
|
+
[:close_read, :close_write].collect do |method|
|
92
|
+
begin
|
93
|
+
dup.send(method)
|
94
|
+
true
|
95
|
+
rescue(IOError)
|
96
|
+
false
|
97
|
+
end
|
98
|
+
end
|
99
|
+
ensure
|
100
|
+
# Be sure that the dup is fully closed before proceeding!
|
101
|
+
# (Otherwise Tempfiles will not be properly disposed of
|
102
|
+
# ... at least on Windows, perhaps on others)
|
103
|
+
dup.close if dup && !dup.closed?
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Returns the length of the input File
|
108
|
+
def file_length(io)
|
109
|
+
io.fsync
|
110
|
+
File.size(io.path)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns an array of bools determining if the input Tempfile
|
114
|
+
# is readable and writable.
|
115
|
+
def tempfile_mode(io)
|
116
|
+
file_mode(io.instance_variable_get(:@tmpfile))
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns the length of the input Tempfile
|
120
|
+
def tempfile_length(io)
|
121
|
+
file_length(io)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns an array of bools determining if the input StringIo
|
125
|
+
# is readable and writable.
|
126
|
+
#
|
127
|
+
# s = StringIo.new("abcde", "r+")
|
128
|
+
# External::Io.stringio_mode(s) # => [true, true]
|
129
|
+
#
|
130
|
+
def stringio_mode(io)
|
131
|
+
[!io.closed_read?, !io.closed_write?]
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns the length of the input StringIo
|
135
|
+
#
|
136
|
+
# s = StringIo.new("abcde", "r+")
|
137
|
+
# External::Io.length(s) # => 5
|
138
|
+
#
|
139
|
+
def stringio_length(io)
|
140
|
+
io.string.length
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Apply platform-specific patches
|
146
|
+
# case RUBY_PLATFORM
|
147
|
+
# when 'java'
|
148
|
+
# end
|
@@ -0,0 +1,840 @@
|
|
1
|
+
require 'external/base'
|
2
|
+
require 'external_index'
|
3
|
+
|
4
|
+
#--
|
5
|
+
# later separate out individual objects logically
|
6
|
+
# If writing, create new files:
|
7
|
+
# - base/object_id.aio (new file for recieving appends)
|
8
|
+
# - base/object_id._index (copy of existing index -- made on first insertion)
|
9
|
+
# - in index, -index indicates object_id.aio file whereas +index indicates original file
|
10
|
+
# - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
|
11
|
+
# requires index rewrite as well, to remove negatives
|
12
|
+
#
|
13
|
+
# If appending, ONLY allow << and all changes get committed to the original file.
|
14
|
+
#
|
15
|
+
# This should allow returning of new arrayio objects under read/write conditions
|
16
|
+
# By default read-only. No insertions. New ExternalArchive objects inherit parent mode.
|
17
|
+
#
|
18
|
+
# Independent modes:
|
19
|
+
# - r
|
20
|
+
# - r+
|
21
|
+
# - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
|
22
|
+
# changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
|
23
|
+
# - b ALWAYS on with Windows
|
24
|
+
#++
|
25
|
+
|
26
|
+
# ExternalArchive provides array-like access to archival data stored on disk.
|
27
|
+
# ExternalArchives consist of an IO object and an index of [start, length]
|
28
|
+
# pairs which indicate the start position and length of entries in the IO.
|
29
|
+
#
|
30
|
+
class ExternalArchive < External::Base
|
31
|
+
class << self
|
32
|
+
|
33
|
+
# Array-like constructor for an ExternalArchive.
|
34
|
+
def [](*args)
|
35
|
+
extarc = new
|
36
|
+
extarc.concat(args)
|
37
|
+
extarc
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the default io index filepath for path:
|
41
|
+
#
|
42
|
+
# ExternalArchive.index_path("/path/to/file.txt") # => "/path/to/file.index"
|
43
|
+
#
|
44
|
+
def index_path(path)
|
45
|
+
path ? path.chomp(File.extname(path)) + '.index' : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
# Initializes an instance of self with File.open(path, mode) as an io.
|
49
|
+
# As with File.open, the instance will be passed to the block and
|
50
|
+
# closed when the block returns. If no block is given, open returns
|
51
|
+
# the new instance.
|
52
|
+
#
|
53
|
+
# By default the instance will be initialized with an ExternalIndex
|
54
|
+
# io_index, linked to index_path(path). The instance will be
|
55
|
+
# automatically reindexed if it is empty but it's io is not.
|
56
|
+
#
|
57
|
+
# Options (specify using symbols):
|
58
|
+
# io_index:: Specifies the io_index manually. A filepath may be
|
59
|
+
# provided and it will be used instead of index_path(path).
|
60
|
+
# Array and ExternalIndex values are used directly.
|
61
|
+
# reindex:: Forces a call to reindex; using auto reindexing, reindex
|
62
|
+
# is normally only called when the instance is empty
|
63
|
+
# and the instance io is not. (default false)
|
64
|
+
# auto_reindex:: Turns on or off auto reindexing (default true)
|
65
|
+
#
|
66
|
+
def open(path, mode="rb", options={})
|
67
|
+
options = {
|
68
|
+
:io_index => nil,
|
69
|
+
:reindex => false,
|
70
|
+
:auto_reindex => true
|
71
|
+
}.merge(options)
|
72
|
+
|
73
|
+
index = options[:io_index]
|
74
|
+
if index == nil
|
75
|
+
index = index_path(path)
|
76
|
+
FileUtils.touch(index) unless File.exists?(index)
|
77
|
+
end
|
78
|
+
|
79
|
+
io_index = case index
|
80
|
+
when Array, ExternalIndex then index
|
81
|
+
else ExternalIndex.open(index, 'r+', :format => 'II')
|
82
|
+
end
|
83
|
+
|
84
|
+
io = path == nil ? nil : File.open(path, mode)
|
85
|
+
extarc = new(io, io_index)
|
86
|
+
|
87
|
+
# reindex if necessary
|
88
|
+
if options[:reindex] || (options[:auto_reindex] && extarc.empty? && extarc.io.length > 0)
|
89
|
+
extarc.reindex
|
90
|
+
end
|
91
|
+
|
92
|
+
if block_given?
|
93
|
+
begin
|
94
|
+
yield(extarc)
|
95
|
+
ensure
|
96
|
+
extarc.close
|
97
|
+
end
|
98
|
+
else
|
99
|
+
extarc
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# The underlying index of [position, length] arrays
|
105
|
+
# indicating where entries in the io are located.
|
106
|
+
attr_reader :io_index
|
107
|
+
|
108
|
+
def initialize(io=nil, io_index=nil)
|
109
|
+
super(io)
|
110
|
+
@io_index = io_index || []
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns true if io_index is an Array.
|
114
|
+
def cached?
|
115
|
+
io_index.kind_of?(Array)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Turns on or off caching by converting io_index
|
119
|
+
# to an Array (cache=true) or to an ExternalIndex
|
120
|
+
# (cache=false).
|
121
|
+
def cache=(input)
|
122
|
+
case
|
123
|
+
when input && !cached?
|
124
|
+
cache = io_index.to_a
|
125
|
+
io_index.close
|
126
|
+
@io_index = cache
|
127
|
+
|
128
|
+
when !input && cached?
|
129
|
+
io_index << {:format => 'II'}
|
130
|
+
@io_index = ExternalIndex[*io_index]
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Closes self as in External::Base#close. An io_path may be
|
136
|
+
# be specified to close io_index as well; when io_index is
|
137
|
+
# not an ExternalIndex, one is temporarily created with the
|
138
|
+
# current io_index content to 'close' and save the index.
|
139
|
+
def close(path=nil, index_path=self.class.index_path(path), overwrite=false)
|
140
|
+
case
|
141
|
+
when io_index.kind_of?(ExternalIndex)
|
142
|
+
io_index.close(index_path, overwrite)
|
143
|
+
when index_path != nil
|
144
|
+
ExternalIndex[*io_index].close(index_path, overwrite)
|
145
|
+
end
|
146
|
+
|
147
|
+
super(path, overwrite)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns another instance of self.class; the new instance will
|
151
|
+
# be cached if self is cached.
|
152
|
+
def another
|
153
|
+
self.class.new(nil, cached? ? [] : io_index.another)
|
154
|
+
end
|
155
|
+
|
156
|
+
public
|
157
|
+
|
158
|
+
# Converts an string read from io into an entry. By default
|
159
|
+
# the string is simply returned.
|
160
|
+
def str_to_entry(str)
|
161
|
+
str
|
162
|
+
end
|
163
|
+
|
164
|
+
# Converts an entry into a string. By default this method
|
165
|
+
# returns entry.to_s.
|
166
|
+
def entry_to_str(entry)
|
167
|
+
entry.to_s
|
168
|
+
end
|
169
|
+
|
170
|
+
# Clears the io_index, and yields io and the io_index to the
|
171
|
+
# block for reindexing. The io is flushed and rewound before
|
172
|
+
# being yielded to the block. Returns self
|
173
|
+
def reset_index
|
174
|
+
io_index.clear
|
175
|
+
io.flush
|
176
|
+
io.rewind
|
177
|
+
yield(io, io_index) if block_given?
|
178
|
+
self
|
179
|
+
end
|
180
|
+
|
181
|
+
alias reindex reset_index
|
182
|
+
|
183
|
+
# The speed of reindex_by_regexp is dictated by how fast the underlying
|
184
|
+
# code can match the pattern. Under ideal conditions (ie a very simple
|
185
|
+
# regexp), it will be as fast as reindex_by_sep.
|
186
|
+
def reindex_by_regexp(pattern=/\r?\n/, options={})
|
187
|
+
options = {
|
188
|
+
:range_or_span => nil,
|
189
|
+
:blksize => 8388608,
|
190
|
+
:carryover_limit => 8388608
|
191
|
+
}.merge(options)
|
192
|
+
|
193
|
+
reset_index do |io, index|
|
194
|
+
span = options[:range_or_span] || io.default_span
|
195
|
+
blksize = options[:blksize]
|
196
|
+
carryover_limit = options[:carryover_limit]
|
197
|
+
|
198
|
+
io.scan(span, blksize, carryover_limit) do |scan_pos, string|
|
199
|
+
scanner = StringScanner.new(string)
|
200
|
+
while advanced = scanner.search_full(pattern, true, false)
|
201
|
+
break unless advanced > 0
|
202
|
+
|
203
|
+
index << [scan_pos, advanced]
|
204
|
+
scan_pos += advanced
|
205
|
+
end
|
206
|
+
|
207
|
+
# allow a blockfor monitoring
|
208
|
+
yield if block_given?
|
209
|
+
scanner.rest_size
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def reindex_by_sep(sep_str=$/, options={})
|
215
|
+
sep_str = sep_str.to_s
|
216
|
+
options = {
|
217
|
+
:sep_regexp => Regexp.new(sep_str),
|
218
|
+
:sep_length => sep_str.length,
|
219
|
+
:entry_follows_sep => false,
|
220
|
+
:exclude_sep => false,
|
221
|
+
:range_or_span => nil,
|
222
|
+
:blksize => 8388608,
|
223
|
+
:carryover_limit => 8388608
|
224
|
+
}.merge(options)
|
225
|
+
|
226
|
+
regexp = options[:sep_regexp]
|
227
|
+
sep_length = options[:sep_length]
|
228
|
+
entry_follows_sep = options[:entry_follows_sep]
|
229
|
+
exclude_sep = options[:exclude_sep]
|
230
|
+
|
231
|
+
mode = case
|
232
|
+
when !entry_follows_sep && !exclude_sep then 0
|
233
|
+
when entry_follows_sep && exclude_sep then 1
|
234
|
+
when entry_follows_sep && !exclude_sep then 2
|
235
|
+
when !entry_follows_sep && exclude_sep then 3
|
236
|
+
end
|
237
|
+
|
238
|
+
reset_index do |io, index|
|
239
|
+
# calculate default span after resetio_index in case any flush needs to happen
|
240
|
+
span = options[:range_or_span] || io.default_span
|
241
|
+
blksize = options[:blksize]
|
242
|
+
carryover_limit = options[:carryover_limit]
|
243
|
+
|
244
|
+
remainder = io.scan(span, blksize, carryover_limit) do |scan_pos, string|
|
245
|
+
scanner = StringScanner.new(string)
|
246
|
+
|
247
|
+
# When the entry follows the separator, the scanner must
|
248
|
+
# be set right after the separator for the first entry, so
|
249
|
+
# that the search will find the beginning of the next entry.
|
250
|
+
if scan_pos == 0 && entry_follows_sep
|
251
|
+
scanner.pos = sep_length
|
252
|
+
scan_pos = sep_length
|
253
|
+
end
|
254
|
+
|
255
|
+
# Scan for entries documents by looking for the beginning
|
256
|
+
# of the next entry, signaling the end of the current entry.
|
257
|
+
while advanced = scanner.skip_until(regexp)
|
258
|
+
|
259
|
+
# adjust indicies as needed...
|
260
|
+
io_index << case mode
|
261
|
+
when 0 then [scan_pos, advanced]
|
262
|
+
when 2 then [scan_pos-sep_length, advanced]
|
263
|
+
else [scan_pos, advanced-sep_length]
|
264
|
+
end
|
265
|
+
|
266
|
+
scan_pos += advanced
|
267
|
+
end
|
268
|
+
|
269
|
+
# allow a blockfor monitoring
|
270
|
+
yield if block_given?
|
271
|
+
scanner.rest_size
|
272
|
+
end
|
273
|
+
|
274
|
+
# Unless the io is empty, there will be a remaining entry that
|
275
|
+
# doesn't get scanned when the entry follows the separator.
|
276
|
+
# Add the entry here.
|
277
|
+
if entry_follows_sep && io.length != 0
|
278
|
+
io_index << if exclude_sep
|
279
|
+
[io.length - remainder, remainder]
|
280
|
+
else
|
281
|
+
[io.length - remainder - sep_length, remainder + sep_length]
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
###########################
|
288
|
+
# Array methods
|
289
|
+
###########################
|
290
|
+
|
291
|
+
# def &(another)
|
292
|
+
# not_implemented
|
293
|
+
# end
|
294
|
+
|
295
|
+
# def *(arg)
|
296
|
+
# not_implemented
|
297
|
+
# end
|
298
|
+
|
299
|
+
def +(another)
|
300
|
+
self.concat(another)
|
301
|
+
end
|
302
|
+
|
303
|
+
# def -(another)
|
304
|
+
# not_implemented
|
305
|
+
# end
|
306
|
+
|
307
|
+
def <<(obj)
|
308
|
+
self[length] = obj
|
309
|
+
self
|
310
|
+
end
|
311
|
+
|
312
|
+
def <=>(another)
|
313
|
+
case another
|
314
|
+
when Array
|
315
|
+
if another.length < self.length
|
316
|
+
# if another is equal to the matching subset of self,
|
317
|
+
# then self is obviously the longer array and wins.
|
318
|
+
result = (self.to_a(another.length) <=> another)
|
319
|
+
result == 0 ? 1 : result
|
320
|
+
else
|
321
|
+
self.to_a <=> another
|
322
|
+
end
|
323
|
+
when ExternalArray
|
324
|
+
# if indexes are equal, additional
|
325
|
+
# 'quick' comparisons are allowed
|
326
|
+
if self.io_index == another.io_index
|
327
|
+
|
328
|
+
# equal in comparison if the ios are equal
|
329
|
+
return 0 if self.io.quick_compare(another.io)
|
330
|
+
end
|
331
|
+
|
332
|
+
self.io.flush
|
333
|
+
another.io.flush
|
334
|
+
|
335
|
+
# should chunk compare
|
336
|
+
if another.length > self.length
|
337
|
+
result = (self.to_a <=> another.to_a(self.length))
|
338
|
+
result == 0 ? -1 : result
|
339
|
+
elsif another.length < self.length
|
340
|
+
result = (self.to_a(another.length) <=> another.to_a)
|
341
|
+
result == 0 ? 1 : result
|
342
|
+
else
|
343
|
+
self.to_a <=> another.to_a
|
344
|
+
end
|
345
|
+
else
|
346
|
+
raise TypeError.new("can't convert from #{another.class} to ExternalArchive or Array")
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def ==(another)
|
351
|
+
case another
|
352
|
+
when Array
|
353
|
+
# test simply based on length
|
354
|
+
return false unless self.length == another.length
|
355
|
+
|
356
|
+
# compare arrays
|
357
|
+
self.to_a == another
|
358
|
+
|
359
|
+
when ExternalArchive
|
360
|
+
# test simply based on length
|
361
|
+
return false unless self.length == another.length
|
362
|
+
|
363
|
+
# if indexes are equal, additional
|
364
|
+
# 'quick' comparisons are allowed
|
365
|
+
if self.io_index == another.io_index
|
366
|
+
|
367
|
+
# equal in comparison if the ios are equal
|
368
|
+
#, (self.io_index.buffer_size/2).ceil) ??
|
369
|
+
return true if self.io.sort_compare(another.io) == 0
|
370
|
+
end
|
371
|
+
|
372
|
+
# compare arrays
|
373
|
+
self.to_a == another.to_a
|
374
|
+
else
|
375
|
+
false
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# Element Reference — Returns the entry at index, or returns an array starting
|
380
|
+
# at start and continuing for length entries, or returns an array specified
|
381
|
+
# by range. Negative indices count backward from the end of self (-1 is the last
|
382
|
+
# element). Returns nil if the index (or starting index) is out of range.
|
383
|
+
#
|
384
|
+
# a = ExternalArchive[ "a", "b", "c", "d", "e" ]
|
385
|
+
# a[2] + a[0] + a[1] #=> "cab"
|
386
|
+
# a[6] #=> nil
|
387
|
+
# a[1, 2] #=> [ "b", "c" ]
|
388
|
+
# a[1..3] #=> [ "b", "c", "d" ]
|
389
|
+
# a[4..7] #=> [ "e" ]
|
390
|
+
# a[6..10] #=> nil
|
391
|
+
# a[-3, 3] #=> [ "c", "d", "e" ]
|
392
|
+
# # special cases
|
393
|
+
# a[5] #=> nil
|
394
|
+
# a[5, 1] #=> []
|
395
|
+
# a[5..10] #=> []
|
396
|
+
#
|
397
|
+
def [](input, length=nil)
|
398
|
+
# two call types are required because while ExternalIndex can take
|
399
|
+
# a nil length, Array cannot and index can be either
|
400
|
+
entry_indicies = (length == nil ? io_index[input] : io_index[input, length])
|
401
|
+
|
402
|
+
case
|
403
|
+
when entry_indicies == nil || entry_indicies.empty?
|
404
|
+
# for conformance with array range retrieval,
|
405
|
+
# simply return nil and [] indicies
|
406
|
+
entry_indicies
|
407
|
+
|
408
|
+
when length == nil && !input.kind_of?(Range)
|
409
|
+
# a single entry was specified, read it
|
410
|
+
entry_start, entry_length = entry_indicies
|
411
|
+
io.pos = entry_start
|
412
|
+
str_to_entry( io.read(entry_length) )
|
413
|
+
|
414
|
+
else
|
415
|
+
# multiple entries were specified, collect each
|
416
|
+
pos = nil
|
417
|
+
entry_indicies.collect do |(entry_start, entry_length)|
|
418
|
+
next if entry_start == nil
|
419
|
+
|
420
|
+
# only set io position if necessary
|
421
|
+
unless pos == entry_start
|
422
|
+
pos = entry_start
|
423
|
+
io.pos = pos
|
424
|
+
end
|
425
|
+
|
426
|
+
pos += entry_length
|
427
|
+
|
428
|
+
# read entry
|
429
|
+
str_to_entry( io.read(entry_length) )
|
430
|
+
end
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
# Element Assignment — Sets the entry at index, or replaces a subset starting at start
|
435
|
+
# and continuing for length entries, or replaces a subset specified by range.
|
436
|
+
# A negative indices will count backward from the end of self. Inserts elements if
|
437
|
+
# length is zero. If nil is used in the second and third form, deletes elements from
|
438
|
+
# self. An IndexError is raised if a negative index points past the beginning of self.
|
439
|
+
# See also push, and unshift.
|
440
|
+
#
|
441
|
+
# a = ExternalArchive.new
|
442
|
+
# a[4] = "4"; a #=> [nil, nil, nil, nil, "4"]
|
443
|
+
# a[0, 3] = [ 'a', 'b', 'c' ]; a #=> ["a", "b", "c", nil, "4"]
|
444
|
+
# a[1..2] = [ '1', '2' ]; a #=> ["a", '1', '2', nil, "4"]
|
445
|
+
# a[0, 2] = "?"; a #=> ["?", '2', nil, "4"]
|
446
|
+
# a[0..2] = "A"; a #=> ["A", "4"]
|
447
|
+
# a[-1] = "Z"; a #=> ["A", "Z"]
|
448
|
+
# a[1..-1] = nil; a #=> ["A"]
|
449
|
+
#
|
450
|
+
def []=(*args)
|
451
|
+
raise ArgumentError, "wrong number of arguments (1 for 2)" if args.length < 2
|
452
|
+
|
453
|
+
one, two, value = args
|
454
|
+
if args.length == 2
|
455
|
+
value = two
|
456
|
+
two = nil
|
457
|
+
end
|
458
|
+
|
459
|
+
one = convert_to_int(one)
|
460
|
+
case one
|
461
|
+
when Fixnum
|
462
|
+
if one < 0
|
463
|
+
one += length
|
464
|
+
raise IndexError, "index #{one} out of range" if one < 0
|
465
|
+
end
|
466
|
+
|
467
|
+
entry_start = io.length
|
468
|
+
io.pos = entry_start
|
469
|
+
|
470
|
+
if two == nil
|
471
|
+
# simple insertion
|
472
|
+
# (note it is important to write the entry to io
|
473
|
+
# first as a check that io is open for writing)
|
474
|
+
|
475
|
+
entry_length = io.write( entry_to_str(value) )
|
476
|
+
io.length += entry_length
|
477
|
+
io_index[one] = [entry_start, entry_length]
|
478
|
+
|
479
|
+
else
|
480
|
+
values = case value
|
481
|
+
when Array then value
|
482
|
+
when ExternalArchive
|
483
|
+
# special case, self will be reading and
|
484
|
+
# writing from the same io, producing
|
485
|
+
# incorrect results
|
486
|
+
|
487
|
+
# potential to load a huge amount of data
|
488
|
+
value == self ? value.to_a : value
|
489
|
+
else convert_to_ary(value)
|
490
|
+
end
|
491
|
+
|
492
|
+
# write each value to self, collecting the indicies
|
493
|
+
indicies = []
|
494
|
+
values.each do |value|
|
495
|
+
entry_length = io.write( entry_to_str(value) )
|
496
|
+
indicies << [entry_start, entry_length]
|
497
|
+
|
498
|
+
io.length += entry_length
|
499
|
+
entry_start += entry_length
|
500
|
+
end
|
501
|
+
|
502
|
+
# register the indicies
|
503
|
+
io_index[one, two] = indicies
|
504
|
+
end
|
505
|
+
|
506
|
+
when Range
|
507
|
+
raise TypeError, "can't convert Range into Integer" unless two == nil
|
508
|
+
start, length, total = split_range(one)
|
509
|
+
|
510
|
+
raise RangeError, "#{one} out of range" if start < 0
|
511
|
+
self[start, length < 0 ? 0 : length + 1] = value
|
512
|
+
|
513
|
+
when nil
|
514
|
+
raise TypeError, "no implicit conversion from nil to integer"
|
515
|
+
else
|
516
|
+
raise TypeError, "can't convert #{one.class} into Integer"
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
520
|
+
# def abbrev(pattern=nil)
|
521
|
+
# not_implemented
|
522
|
+
# end
|
523
|
+
|
524
|
+
# def assoc(obj)
|
525
|
+
# not_implemented
|
526
|
+
# end
|
527
|
+
|
528
|
+
# Returns entry at index
|
529
|
+
def at(index)
|
530
|
+
self[index]
|
531
|
+
end
|
532
|
+
|
533
|
+
# Removes all elements from _self_.
|
534
|
+
def clear
|
535
|
+
io.truncate(0)
|
536
|
+
io_index.clear
|
537
|
+
self
|
538
|
+
end
|
539
|
+
|
540
|
+
def compact
|
541
|
+
# TODO - optimize?
|
542
|
+
another = self.another
|
543
|
+
each do |item|
|
544
|
+
another << item unless item == nil
|
545
|
+
end
|
546
|
+
another
|
547
|
+
end
|
548
|
+
|
549
|
+
# def compact!
|
550
|
+
# not_implemented
|
551
|
+
# end
|
552
|
+
|
553
|
+
def concat(another)
|
554
|
+
case another
|
555
|
+
when Array, ExternalArchive
|
556
|
+
self[length, another.length] = another
|
557
|
+
else
|
558
|
+
raise TypeError.new("can't convert #{another.class} into ExternalArchive or Array")
|
559
|
+
end
|
560
|
+
self
|
561
|
+
end
|
562
|
+
|
563
|
+
# def dclone
|
564
|
+
# not_implemented
|
565
|
+
# end
|
566
|
+
|
567
|
+
# def delete(obj)
|
568
|
+
# not_implemented
|
569
|
+
# end
|
570
|
+
|
571
|
+
# def delete_at(index)
|
572
|
+
# not_implemented
|
573
|
+
# end
|
574
|
+
|
575
|
+
# def delete_if # :yield: item
|
576
|
+
# not_implemented
|
577
|
+
# end
|
578
|
+
|
579
|
+
# Calls block once for each element string in self, passing that string as a parameter.
|
580
|
+
def each_str(&block) # :yield: string
|
581
|
+
# tracking the position using a local variable
|
582
|
+
# is faster than calling io.pos.
|
583
|
+
pos = nil
|
584
|
+
io_index.each do |(start, length)|
|
585
|
+
if start == nil
|
586
|
+
yield("")
|
587
|
+
next
|
588
|
+
end
|
589
|
+
|
590
|
+
# only set io position if necessary
|
591
|
+
unless pos == start
|
592
|
+
pos = start
|
593
|
+
io.pos = pos
|
594
|
+
end
|
595
|
+
|
596
|
+
# advance position
|
597
|
+
pos += length
|
598
|
+
|
599
|
+
# yield entry string
|
600
|
+
yield io.read(length)
|
601
|
+
end
|
602
|
+
self
|
603
|
+
end
|
604
|
+
|
605
|
+
# Calls block once for each element in self, passing that element as a parameter.
|
606
|
+
def each(&block) # :yield: item
|
607
|
+
each_str do |str|
|
608
|
+
# yield entry
|
609
|
+
yield str_to_entry(str)
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
# Same as each, but passes the index of the element instead of the element itself.
|
614
|
+
def eachio_index(&block) # :yield: index
|
615
|
+
0.upto(length-1, &block)
|
616
|
+
self
|
617
|
+
end
|
618
|
+
|
619
|
+
# def fetch(index, default=nil, &block)
|
620
|
+
# index += index_length if index < 0
|
621
|
+
# val = (index >= length ? default : self[index])
|
622
|
+
# block_given? ? yield(val) : val
|
623
|
+
# end
|
624
|
+
#
|
625
|
+
# def fill(*args)
|
626
|
+
# not_implemented
|
627
|
+
# end
|
628
|
+
|
629
|
+
# def flatten
|
630
|
+
# not_implemented
|
631
|
+
# end
|
632
|
+
|
633
|
+
# def flatten!
|
634
|
+
# not_implemented
|
635
|
+
# end
|
636
|
+
|
637
|
+
# def frozen?
|
638
|
+
# not_implemented
|
639
|
+
# end
|
640
|
+
|
641
|
+
# def hash
|
642
|
+
# not_implemented
|
643
|
+
# end
|
644
|
+
|
645
|
+
# def include?(obj)
|
646
|
+
# not_implemented
|
647
|
+
# end
|
648
|
+
|
649
|
+
# def index(obj)
|
650
|
+
# not_implemented
|
651
|
+
# end
|
652
|
+
#
|
653
|
+
# def indexes(*args)
|
654
|
+
# values_at(*args)
|
655
|
+
# end
|
656
|
+
#
|
657
|
+
# def indicies(*args)
|
658
|
+
# values_at(*args)
|
659
|
+
# end
|
660
|
+
|
661
|
+
# def replace(other)
|
662
|
+
# not_implemented
|
663
|
+
# end
|
664
|
+
|
665
|
+
# def insert(index, *obj)
|
666
|
+
# self[index] = obj
|
667
|
+
# end
|
668
|
+
|
669
|
+
# def inspect
|
670
|
+
# not_implemented
|
671
|
+
# end
|
672
|
+
|
673
|
+
# def join(sep=$,)
|
674
|
+
# not_implemented
|
675
|
+
# end
|
676
|
+
|
677
|
+
# Returns the last n entries (default 1)
|
678
|
+
def last(n=nil)
|
679
|
+
return self[-1] if n.nil?
|
680
|
+
|
681
|
+
start = length-n
|
682
|
+
start = 0 if start < 0
|
683
|
+
self[start, n]
|
684
|
+
end
|
685
|
+
|
686
|
+
# Returns the number of entries in self
|
687
|
+
def length
|
688
|
+
io_index.length
|
689
|
+
end
|
690
|
+
|
691
|
+
# Returns the number of non-nil elements in self. May be zero.
|
692
|
+
# def nitems
|
693
|
+
# count = self.length
|
694
|
+
# io_index.each do |(start, length)|
|
695
|
+
# # the logic of this search is that nil,
|
696
|
+
# # (and only nil ?) can have an entry
|
697
|
+
# # length of 5: nil.to_yaml == "--- \n"
|
698
|
+
# count -= 1 if length == nil || length == 5
|
699
|
+
# end
|
700
|
+
# count
|
701
|
+
# end
|
702
|
+
|
703
|
+
# def pack(aTemplateString)
|
704
|
+
# not_implemented
|
705
|
+
# end
|
706
|
+
|
707
|
+
# def pop
|
708
|
+
# not_implemented
|
709
|
+
# end
|
710
|
+
|
711
|
+
# def pretty_print(q)
|
712
|
+
# not_implemented
|
713
|
+
# end
|
714
|
+
|
715
|
+
# def pretty_print_cycle(q)
|
716
|
+
# not_implemented
|
717
|
+
# end
|
718
|
+
|
719
|
+
def push(*obj)
|
720
|
+
obj.each {|obj| self << obj }
|
721
|
+
self
|
722
|
+
end
|
723
|
+
|
724
|
+
# def quote
|
725
|
+
# not_implemented
|
726
|
+
# end
|
727
|
+
|
728
|
+
# def rassoc(key)
|
729
|
+
# not_implemented
|
730
|
+
# end
|
731
|
+
|
732
|
+
# def replace(another)
|
733
|
+
# not_implemented
|
734
|
+
# end
|
735
|
+
|
736
|
+
# def reverse
|
737
|
+
# not_implemented
|
738
|
+
# end
|
739
|
+
|
740
|
+
# def reverse!
|
741
|
+
# not_implemented
|
742
|
+
# end
|
743
|
+
|
744
|
+
def reverse_each_str(&block) # :yield: string
|
745
|
+
io_index.reverse_each do |(start,length)|
|
746
|
+
next if start == nil
|
747
|
+
|
748
|
+
# A more optimized approach would
|
749
|
+
# read in a chunk of entries and
|
750
|
+
# iterate over them?
|
751
|
+
io.pos = start
|
752
|
+
|
753
|
+
# yield entry string
|
754
|
+
yield io.read(length)
|
755
|
+
end
|
756
|
+
self
|
757
|
+
end
|
758
|
+
|
759
|
+
def reverse_each # :yield: item
|
760
|
+
reverse_each_str do |str|
|
761
|
+
yield( str_to_entry(str) )
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
# def rindex(obj)
|
766
|
+
# not_implemented
|
767
|
+
# end
|
768
|
+
|
769
|
+
# def select # :yield: item
|
770
|
+
# not_implemented
|
771
|
+
# end
|
772
|
+
|
773
|
+
# def shift
|
774
|
+
# not_implemented
|
775
|
+
# end
|
776
|
+
|
777
|
+
# Alias for length
|
778
|
+
def size
|
779
|
+
length
|
780
|
+
end
|
781
|
+
|
782
|
+
# def slice(*args)
|
783
|
+
# self.call(:[], *args)
|
784
|
+
# end
|
785
|
+
|
786
|
+
# def slice!(*args)
|
787
|
+
# not_implemented
|
788
|
+
# end
|
789
|
+
|
790
|
+
def to_a(length=self.length)
|
791
|
+
length == 0 ? [] : self[0, length]
|
792
|
+
end
|
793
|
+
|
794
|
+
# def to_ary
|
795
|
+
# not_implemented
|
796
|
+
# end
|
797
|
+
|
798
|
+
# Returns _self_.join.
|
799
|
+
# def to_s
|
800
|
+
# self.join
|
801
|
+
# end
|
802
|
+
|
803
|
+
# def to_yaml(opts={})
|
804
|
+
# self[0, self.length].to_yaml(opts)
|
805
|
+
# end
|
806
|
+
|
807
|
+
# def transpose
|
808
|
+
# not_implemented
|
809
|
+
# end
|
810
|
+
|
811
|
+
# def uniq
|
812
|
+
# not_implemented
|
813
|
+
# end
|
814
|
+
|
815
|
+
# def uniq!
|
816
|
+
# not_implemented
|
817
|
+
# end
|
818
|
+
|
819
|
+
# def unshift(*obj)
|
820
|
+
# not_implemented
|
821
|
+
# end
|
822
|
+
|
823
|
+
# Returns an array containing the chars in io corresponding to the given
|
824
|
+
# selector(s). The selectors may be either integer indices or ranges
|
825
|
+
def values_at(*selectors)
|
826
|
+
another = self.another
|
827
|
+
selectors.each do |s|
|
828
|
+
another << self[s]
|
829
|
+
end
|
830
|
+
another
|
831
|
+
end
|
832
|
+
|
833
|
+
# def yaml_initialize(tag, val)
|
834
|
+
# not_implemented
|
835
|
+
# end
|
836
|
+
|
837
|
+
# def |(another)
|
838
|
+
# not_implemented
|
839
|
+
# end
|
840
|
+
end
|