external 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +7 -0
- data/MIT-LICENSE +1 -3
- data/README +162 -127
- data/lib/external.rb +2 -3
- data/lib/external/base.rb +174 -47
- data/lib/external/chunkable.rb +131 -105
- data/lib/external/enumerable.rb +78 -33
- data/lib/external/io.rb +163 -398
- data/lib/external/patches/ruby_1_8_io.rb +31 -0
- data/lib/external/patches/windows_io.rb +53 -0
- data/lib/external/patches/windows_utils.rb +27 -0
- data/lib/external/utils.rb +148 -0
- data/lib/external_archive.rb +840 -0
- data/lib/external_array.rb +57 -0
- data/lib/external_index.rb +1053 -0
- metadata +42 -58
- data/lib/ext_arc.rb +0 -108
- data/lib/ext_arr.rb +0 -727
- data/lib/ext_ind.rb +0 -1120
- data/test/benchmarks/benchmarks_20070918.txt +0 -45
- data/test/benchmarks/benchmarks_20070921.txt +0 -91
- data/test/benchmarks/benchmarks_20071006.txt +0 -147
- data/test/benchmarks/test_copy_file.rb +0 -80
- data/test/benchmarks/test_pos_speed.rb +0 -47
- data/test/benchmarks/test_read_time.rb +0 -55
- data/test/cached_ext_ind_test.rb +0 -219
- data/test/check/benchmark_check.rb +0 -441
- data/test/check/namespace_conflicts_check.rb +0 -23
- data/test/check/pack_check.rb +0 -90
- data/test/ext_arc_test.rb +0 -286
- data/test/ext_arr/alt_sep.txt +0 -3
- data/test/ext_arr/cr_lf_input.txt +0 -3
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +0 -1
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +0 -1
- data/test/ext_arr/lf_input.txt +0 -3
- data/test/ext_arr/lines.txt +0 -19
- data/test/ext_arr/without_index.txt +0 -1
- data/test/ext_arr_test.rb +0 -534
- data/test/ext_ind_test.rb +0 -1472
- data/test/external/base_test.rb +0 -74
- data/test/external/chunkable_test.rb +0 -182
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +0 -414
- data/test/external_test_helper.rb +0 -31
- data/test/external_test_suite.rb +0 -4
- data/test/test_array.rb +0 -1192
@@ -0,0 +1,31 @@
|
|
1
|
+
module External
|
2
|
+
module Patches
|
3
|
+
module Ruby18Io
|
4
|
+
attr_reader :generic_mode
|
5
|
+
|
6
|
+
def self.extended(base)
|
7
|
+
base.instance_variable_set(:@generic_mode, Utils.mode(base))
|
8
|
+
end
|
9
|
+
|
10
|
+
def flush
|
11
|
+
super unless generic_mode == "r"
|
12
|
+
end
|
13
|
+
|
14
|
+
def fsync
|
15
|
+
super unless generic_mode == "r"
|
16
|
+
end
|
17
|
+
|
18
|
+
# Quick comparision with another IO. Returns true if
|
19
|
+
# another == self, or if both are file-type IOs and
|
20
|
+
# their paths are equal.
|
21
|
+
def quick_compare(another)
|
22
|
+
self == another || (
|
23
|
+
(self.kind_of?(File) || self.kind_of?(Tempfile)) &&
|
24
|
+
(another.kind_of?(File) || another.kind_of?(Tempfile)) &&
|
25
|
+
self.path == another.path)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
Io::PATCHES << Patches::Ruby18Io
|
31
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module External
|
2
|
+
module Patches
|
3
|
+
|
4
|
+
# Ruby on Windows has problems with files larger than ~2 gigabytes.
|
5
|
+
# Sizes return as negative, and positions cannot be set beyond the max
|
6
|
+
# size of a long (2147483647 ~ 2GB = 2475636895). WindowsIo corrects
|
7
|
+
# both of these issues thanks in large part to a bit of code taken from
|
8
|
+
# 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
|
9
|
+
#
|
10
|
+
module WindowsIo
|
11
|
+
POSITION_MAX = 2147483647 # maximum size of long
|
12
|
+
|
13
|
+
def self.extended(base)
|
14
|
+
base.instance_variable_set("@pos", nil)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Modified to handle positions past the 2Gb limit
|
18
|
+
def pos # :nodoc:
|
19
|
+
@pos || super
|
20
|
+
end
|
21
|
+
|
22
|
+
# Positions larger than the max value of a long cannot be directly given
|
23
|
+
# to the default +pos=+. This version incrementally seeks to positions
|
24
|
+
# beyond the maximum, if necessary.
|
25
|
+
#
|
26
|
+
# Note: setting the position beyond the 2Gb limit requires the use of a
|
27
|
+
# sysseek statement. As such, errors will arise if you try to position
|
28
|
+
# an IO object that does not support this method (for example StringIO...
|
29
|
+
# but then what are you doing with a 2Gb StringIO anyhow?)
|
30
|
+
def pos=(pos)
|
31
|
+
if pos < POSITION_MAX
|
32
|
+
super(pos)
|
33
|
+
@pos = nil
|
34
|
+
elsif @pos != pos
|
35
|
+
# note sysseek appears to be necessary here, rather than io.seek
|
36
|
+
@pos = pos
|
37
|
+
|
38
|
+
super(POSITION_MAX)
|
39
|
+
pos -= POSITION_MAX
|
40
|
+
|
41
|
+
while pos > POSITION_MAX
|
42
|
+
pos -= POSITION_MAX
|
43
|
+
self.sysseek(POSITION_MAX, ::IO::SEEK_CUR)
|
44
|
+
end
|
45
|
+
|
46
|
+
self.sysseek(pos, ::IO::SEEK_CUR)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
Io::PATCHES << Patches::WindowsIo
|
53
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# This code block modifies IO only if running on windows
|
2
|
+
require 'Win32API'
|
3
|
+
|
4
|
+
module External
|
5
|
+
module Utils
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# Modfied to properly determine file lengths on Windows. Uses code
|
9
|
+
# from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
|
10
|
+
def file_length(io)
|
11
|
+
io.fsync
|
12
|
+
|
13
|
+
# I would have liked to use win32/file/stat to do this... however, some issue
|
14
|
+
# arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
|
15
|
+
# error because the mode would be nil for files. I wasn't sure how to fix it,
|
16
|
+
# so I've lifted the relevant code for pulling the large file size.
|
17
|
+
|
18
|
+
# Note this is a simplified version... if you base.path point to a chardev,
|
19
|
+
# this may need to be changed, because apparently the call to the Win32API
|
20
|
+
# may fail
|
21
|
+
|
22
|
+
stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
|
23
|
+
Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
|
24
|
+
stat_buf[24, 4].unpack('L').first # Size of file in bytes
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
module External
|
2
|
+
module Utils
|
3
|
+
module_function
|
4
|
+
|
5
|
+
# try_handle is a forwarding method allowing External::IO to handle
|
6
|
+
# non-File, non-Tempfile IO objects. try_handle infers a method
|
7
|
+
# name based on the class of the input and trys to forward the
|
8
|
+
# input io to that method within External::IO. For instance:
|
9
|
+
#
|
10
|
+
# * the _mode method for StringIO is 'stringio_mode'
|
11
|
+
# * the _length method for StringIO is 'stringio_length'
|
12
|
+
#
|
13
|
+
# Nested classes have '::' replaced by '_'. Thus to add support
|
14
|
+
# for Some::Unknown::IO, extend External::IO as below:
|
15
|
+
#
|
16
|
+
# module External::IO
|
17
|
+
# def some_unknown_io_mode(io)
|
18
|
+
# ...
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# def some_unknown_io_length(io)
|
22
|
+
# ...
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# See stringio_mode and stringio_length for more details.
|
27
|
+
def try_handle(io, method)
|
28
|
+
method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
|
29
|
+
if Utils.respond_to?(method_name)
|
30
|
+
Utils.send(method_name, io)
|
31
|
+
else
|
32
|
+
raise "cannot determine #{method} for '%s'" % io.class
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Determines the generic mode of the input io using the _mode
|
37
|
+
# method for the input io class. By default Io provides _mode
|
38
|
+
# methods for File, Tempfile, and StringIo. The return string
|
39
|
+
# is determined as follows:
|
40
|
+
#
|
41
|
+
# readable & writable:: r+
|
42
|
+
# readable:: r
|
43
|
+
# writable:: w
|
44
|
+
#
|
45
|
+
# The _mode method takes the input io and should return an array
|
46
|
+
# specifying whether or not io is readable and writable
|
47
|
+
# (ie [readable, writable]).
|
48
|
+
#
|
49
|
+
# See try_handle for more details.
|
50
|
+
def mode(io)
|
51
|
+
readable, writable = try_handle(io, "mode")
|
52
|
+
|
53
|
+
case
|
54
|
+
when readable && writable then "r+"
|
55
|
+
when readable then "r"
|
56
|
+
when writable then "w"
|
57
|
+
else
|
58
|
+
# occurs for r+ mode, for some reason
|
59
|
+
"r+"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Determines the length of the input io using the _length method
|
64
|
+
# for the input io class. Non-External::Io inputs are extended
|
65
|
+
# in this process.
|
66
|
+
#
|
67
|
+
# The _length method takes the input io, and should return the
|
68
|
+
# current length of the input io (ie a flush operation may be
|
69
|
+
# required).
|
70
|
+
#
|
71
|
+
# See try_handle for more details.
|
72
|
+
def length(io)
|
73
|
+
case io
|
74
|
+
when Io then try_handle(io, "length")
|
75
|
+
else
|
76
|
+
io.extend Io
|
77
|
+
io.length
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns an array of bools determining if the input File
|
82
|
+
# is readable and writable.
|
83
|
+
def file_mode(io)
|
84
|
+
begin
|
85
|
+
dup = io.dup
|
86
|
+
|
87
|
+
# determine readable/writable by sending close methods
|
88
|
+
# to the duplicated Io. If the io cannot be closed for
|
89
|
+
# read/write then it will raise an error, indicating that
|
90
|
+
# it was not open in the given mode.
|
91
|
+
[:close_read, :close_write].collect do |method|
|
92
|
+
begin
|
93
|
+
dup.send(method)
|
94
|
+
true
|
95
|
+
rescue(IOError)
|
96
|
+
false
|
97
|
+
end
|
98
|
+
end
|
99
|
+
ensure
|
100
|
+
# Be sure that the dup is fully closed before proceeding!
|
101
|
+
# (Otherwise Tempfiles will not be properly disposed of
|
102
|
+
# ... at least on Windows, perhaps on others)
|
103
|
+
dup.close if dup && !dup.closed?
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Returns the length of the input File
|
108
|
+
def file_length(io)
|
109
|
+
io.fsync
|
110
|
+
File.size(io.path)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns an array of bools determining if the input Tempfile
|
114
|
+
# is readable and writable.
|
115
|
+
def tempfile_mode(io)
|
116
|
+
file_mode(io.instance_variable_get(:@tmpfile))
|
117
|
+
end
|
118
|
+
|
119
|
+
# Returns the length of the input Tempfile
|
120
|
+
def tempfile_length(io)
|
121
|
+
file_length(io)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns an array of bools determining if the input StringIo
|
125
|
+
# is readable and writable.
|
126
|
+
#
|
127
|
+
# s = StringIo.new("abcde", "r+")
|
128
|
+
# External::Io.stringio_mode(s) # => [true, true]
|
129
|
+
#
|
130
|
+
def stringio_mode(io)
|
131
|
+
[!io.closed_read?, !io.closed_write?]
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns the length of the input StringIo
|
135
|
+
#
|
136
|
+
# s = StringIo.new("abcde", "r+")
|
137
|
+
# External::Io.length(s) # => 5
|
138
|
+
#
|
139
|
+
def stringio_length(io)
|
140
|
+
io.string.length
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Apply platform-specific patches
|
146
|
+
# case RUBY_PLATFORM
|
147
|
+
# when 'java'
|
148
|
+
# end
|
@@ -0,0 +1,840 @@
|
|
1
|
+
require 'external/base'
|
2
|
+
require 'external_index'
|
3
|
+
|
4
|
+
#--
|
5
|
+
# later separate out individual objects logically
|
6
|
+
# If writing, create new files:
|
7
|
+
# - base/object_id.aio (new file for recieving appends)
|
8
|
+
# - base/object_id._index (copy of existing index -- made on first insertion)
|
9
|
+
# - in index, -index indicates object_id.aio file whereas +index indicates original file
|
10
|
+
# - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
|
11
|
+
# requires index rewrite as well, to remove negatives
|
12
|
+
#
|
13
|
+
# If appending, ONLY allow << and all changes get committed to the original file.
|
14
|
+
#
|
15
|
+
# This should allow returning of new arrayio objects under read/write conditions
|
16
|
+
# By default read-only. No insertions. New ExternalArchive objects inherit parent mode.
|
17
|
+
#
|
18
|
+
# Independent modes:
|
19
|
+
# - r
|
20
|
+
# - r+
|
21
|
+
# - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
|
22
|
+
# changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
|
23
|
+
# - b ALWAYS on with Windows
|
24
|
+
#++
|
25
|
+
|
26
|
+
# ExternalArchive provides array-like access to archival data stored on disk.
|
27
|
+
# ExternalArchives consist of an IO object and an index of [start, length]
|
28
|
+
# pairs which indicate the start position and length of entries in the IO.
|
29
|
+
#
|
30
|
+
class ExternalArchive < External::Base
|
31
|
+
class << self
|
32
|
+
|
33
|
+
# Array-like constructor for an ExternalArchive.
|
34
|
+
def [](*args)
|
35
|
+
extarc = new
|
36
|
+
extarc.concat(args)
|
37
|
+
extarc
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the default io index filepath for path:
|
41
|
+
#
|
42
|
+
# ExternalArchive.index_path("/path/to/file.txt") # => "/path/to/file.index"
|
43
|
+
#
|
44
|
+
def index_path(path)
|
45
|
+
path ? path.chomp(File.extname(path)) + '.index' : nil
|
46
|
+
end
|
47
|
+
|
48
|
+
# Initializes an instance of self with File.open(path, mode) as an io.
|
49
|
+
# As with File.open, the instance will be passed to the block and
|
50
|
+
# closed when the block returns. If no block is given, open returns
|
51
|
+
# the new instance.
|
52
|
+
#
|
53
|
+
# By default the instance will be initialized with an ExternalIndex
|
54
|
+
# io_index, linked to index_path(path). The instance will be
|
55
|
+
# automatically reindexed if it is empty but it's io is not.
|
56
|
+
#
|
57
|
+
# Options (specify using symbols):
|
58
|
+
# io_index:: Specifies the io_index manually. A filepath may be
|
59
|
+
# provided and it will be used instead of index_path(path).
|
60
|
+
# Array and ExternalIndex values are used directly.
|
61
|
+
# reindex:: Forces a call to reindex; using auto reindexing, reindex
|
62
|
+
# is normally only called when the instance is empty
|
63
|
+
# and the instance io is not. (default false)
|
64
|
+
# auto_reindex:: Turns on or off auto reindexing (default true)
|
65
|
+
#
|
66
|
+
def open(path, mode="rb", options={})
|
67
|
+
options = {
|
68
|
+
:io_index => nil,
|
69
|
+
:reindex => false,
|
70
|
+
:auto_reindex => true
|
71
|
+
}.merge(options)
|
72
|
+
|
73
|
+
index = options[:io_index]
|
74
|
+
if index == nil
|
75
|
+
index = index_path(path)
|
76
|
+
FileUtils.touch(index) unless File.exists?(index)
|
77
|
+
end
|
78
|
+
|
79
|
+
io_index = case index
|
80
|
+
when Array, ExternalIndex then index
|
81
|
+
else ExternalIndex.open(index, 'r+', :format => 'II')
|
82
|
+
end
|
83
|
+
|
84
|
+
io = path == nil ? nil : File.open(path, mode)
|
85
|
+
extarc = new(io, io_index)
|
86
|
+
|
87
|
+
# reindex if necessary
|
88
|
+
if options[:reindex] || (options[:auto_reindex] && extarc.empty? && extarc.io.length > 0)
|
89
|
+
extarc.reindex
|
90
|
+
end
|
91
|
+
|
92
|
+
if block_given?
|
93
|
+
begin
|
94
|
+
yield(extarc)
|
95
|
+
ensure
|
96
|
+
extarc.close
|
97
|
+
end
|
98
|
+
else
|
99
|
+
extarc
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# The underlying index of [position, length] arrays
|
105
|
+
# indicating where entries in the io are located.
|
106
|
+
attr_reader :io_index
|
107
|
+
|
108
|
+
def initialize(io=nil, io_index=nil)
|
109
|
+
super(io)
|
110
|
+
@io_index = io_index || []
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns true if io_index is an Array.
|
114
|
+
def cached?
|
115
|
+
io_index.kind_of?(Array)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Turns on or off caching by converting io_index
|
119
|
+
# to an Array (cache=true) or to an ExternalIndex
|
120
|
+
# (cache=false).
|
121
|
+
def cache=(input)
|
122
|
+
case
|
123
|
+
when input && !cached?
|
124
|
+
cache = io_index.to_a
|
125
|
+
io_index.close
|
126
|
+
@io_index = cache
|
127
|
+
|
128
|
+
when !input && cached?
|
129
|
+
io_index << {:format => 'II'}
|
130
|
+
@io_index = ExternalIndex[*io_index]
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Closes self as in External::Base#close. An io_path may be
|
136
|
+
# be specified to close io_index as well; when io_index is
|
137
|
+
# not an ExternalIndex, one is temporarily created with the
|
138
|
+
# current io_index content to 'close' and save the index.
|
139
|
+
def close(path=nil, index_path=self.class.index_path(path), overwrite=false)
|
140
|
+
case
|
141
|
+
when io_index.kind_of?(ExternalIndex)
|
142
|
+
io_index.close(index_path, overwrite)
|
143
|
+
when index_path != nil
|
144
|
+
ExternalIndex[*io_index].close(index_path, overwrite)
|
145
|
+
end
|
146
|
+
|
147
|
+
super(path, overwrite)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns another instance of self.class; the new instance will
|
151
|
+
# be cached if self is cached.
|
152
|
+
def another
|
153
|
+
self.class.new(nil, cached? ? [] : io_index.another)
|
154
|
+
end
|
155
|
+
|
156
|
+
public
|
157
|
+
|
158
|
+
# Converts an string read from io into an entry. By default
|
159
|
+
# the string is simply returned.
|
160
|
+
def str_to_entry(str)
|
161
|
+
str
|
162
|
+
end
|
163
|
+
|
164
|
+
# Converts an entry into a string. By default this method
|
165
|
+
# returns entry.to_s.
|
166
|
+
def entry_to_str(entry)
|
167
|
+
entry.to_s
|
168
|
+
end
|
169
|
+
|
170
|
+
# Clears the io_index, and yields io and the io_index to the
|
171
|
+
# block for reindexing. The io is flushed and rewound before
|
172
|
+
# being yielded to the block. Returns self
|
173
|
+
def reset_index
|
174
|
+
io_index.clear
|
175
|
+
io.flush
|
176
|
+
io.rewind
|
177
|
+
yield(io, io_index) if block_given?
|
178
|
+
self
|
179
|
+
end
|
180
|
+
|
181
|
+
alias reindex reset_index
|
182
|
+
|
183
|
+
# The speed of reindex_by_regexp is dictated by how fast the underlying
|
184
|
+
# code can match the pattern. Under ideal conditions (ie a very simple
|
185
|
+
# regexp), it will be as fast as reindex_by_sep.
|
186
|
+
def reindex_by_regexp(pattern=/\r?\n/, options={})
|
187
|
+
options = {
|
188
|
+
:range_or_span => nil,
|
189
|
+
:blksize => 8388608,
|
190
|
+
:carryover_limit => 8388608
|
191
|
+
}.merge(options)
|
192
|
+
|
193
|
+
reset_index do |io, index|
|
194
|
+
span = options[:range_or_span] || io.default_span
|
195
|
+
blksize = options[:blksize]
|
196
|
+
carryover_limit = options[:carryover_limit]
|
197
|
+
|
198
|
+
io.scan(span, blksize, carryover_limit) do |scan_pos, string|
|
199
|
+
scanner = StringScanner.new(string)
|
200
|
+
while advanced = scanner.search_full(pattern, true, false)
|
201
|
+
break unless advanced > 0
|
202
|
+
|
203
|
+
index << [scan_pos, advanced]
|
204
|
+
scan_pos += advanced
|
205
|
+
end
|
206
|
+
|
207
|
+
# allow a blockfor monitoring
|
208
|
+
yield if block_given?
|
209
|
+
scanner.rest_size
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def reindex_by_sep(sep_str=$/, options={})
|
215
|
+
sep_str = sep_str.to_s
|
216
|
+
options = {
|
217
|
+
:sep_regexp => Regexp.new(sep_str),
|
218
|
+
:sep_length => sep_str.length,
|
219
|
+
:entry_follows_sep => false,
|
220
|
+
:exclude_sep => false,
|
221
|
+
:range_or_span => nil,
|
222
|
+
:blksize => 8388608,
|
223
|
+
:carryover_limit => 8388608
|
224
|
+
}.merge(options)
|
225
|
+
|
226
|
+
regexp = options[:sep_regexp]
|
227
|
+
sep_length = options[:sep_length]
|
228
|
+
entry_follows_sep = options[:entry_follows_sep]
|
229
|
+
exclude_sep = options[:exclude_sep]
|
230
|
+
|
231
|
+
mode = case
|
232
|
+
when !entry_follows_sep && !exclude_sep then 0
|
233
|
+
when entry_follows_sep && exclude_sep then 1
|
234
|
+
when entry_follows_sep && !exclude_sep then 2
|
235
|
+
when !entry_follows_sep && exclude_sep then 3
|
236
|
+
end
|
237
|
+
|
238
|
+
reset_index do |io, index|
|
239
|
+
# calculate default span after resetio_index in case any flush needs to happen
|
240
|
+
span = options[:range_or_span] || io.default_span
|
241
|
+
blksize = options[:blksize]
|
242
|
+
carryover_limit = options[:carryover_limit]
|
243
|
+
|
244
|
+
remainder = io.scan(span, blksize, carryover_limit) do |scan_pos, string|
|
245
|
+
scanner = StringScanner.new(string)
|
246
|
+
|
247
|
+
# When the entry follows the separator, the scanner must
|
248
|
+
# be set right after the separator for the first entry, so
|
249
|
+
# that the search will find the beginning of the next entry.
|
250
|
+
if scan_pos == 0 && entry_follows_sep
|
251
|
+
scanner.pos = sep_length
|
252
|
+
scan_pos = sep_length
|
253
|
+
end
|
254
|
+
|
255
|
+
# Scan for entries documents by looking for the beginning
|
256
|
+
# of the next entry, signaling the end of the current entry.
|
257
|
+
while advanced = scanner.skip_until(regexp)
|
258
|
+
|
259
|
+
# adjust indicies as needed...
|
260
|
+
io_index << case mode
|
261
|
+
when 0 then [scan_pos, advanced]
|
262
|
+
when 2 then [scan_pos-sep_length, advanced]
|
263
|
+
else [scan_pos, advanced-sep_length]
|
264
|
+
end
|
265
|
+
|
266
|
+
scan_pos += advanced
|
267
|
+
end
|
268
|
+
|
269
|
+
# allow a blockfor monitoring
|
270
|
+
yield if block_given?
|
271
|
+
scanner.rest_size
|
272
|
+
end
|
273
|
+
|
274
|
+
# Unless the io is empty, there will be a remaining entry that
|
275
|
+
# doesn't get scanned when the entry follows the separator.
|
276
|
+
# Add the entry here.
|
277
|
+
if entry_follows_sep && io.length != 0
|
278
|
+
io_index << if exclude_sep
|
279
|
+
[io.length - remainder, remainder]
|
280
|
+
else
|
281
|
+
[io.length - remainder - sep_length, remainder + sep_length]
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
###########################
|
288
|
+
# Array methods
|
289
|
+
###########################
|
290
|
+
|
291
|
+
# def &(another)
|
292
|
+
# not_implemented
|
293
|
+
# end
|
294
|
+
|
295
|
+
# def *(arg)
|
296
|
+
# not_implemented
|
297
|
+
# end
|
298
|
+
|
299
|
+
def +(another)
|
300
|
+
self.concat(another)
|
301
|
+
end
|
302
|
+
|
303
|
+
# def -(another)
|
304
|
+
# not_implemented
|
305
|
+
# end
|
306
|
+
|
307
|
+
def <<(obj)
|
308
|
+
self[length] = obj
|
309
|
+
self
|
310
|
+
end
|
311
|
+
|
312
|
+
def <=>(another)
|
313
|
+
case another
|
314
|
+
when Array
|
315
|
+
if another.length < self.length
|
316
|
+
# if another is equal to the matching subset of self,
|
317
|
+
# then self is obviously the longer array and wins.
|
318
|
+
result = (self.to_a(another.length) <=> another)
|
319
|
+
result == 0 ? 1 : result
|
320
|
+
else
|
321
|
+
self.to_a <=> another
|
322
|
+
end
|
323
|
+
when ExternalArray
|
324
|
+
# if indexes are equal, additional
|
325
|
+
# 'quick' comparisons are allowed
|
326
|
+
if self.io_index == another.io_index
|
327
|
+
|
328
|
+
# equal in comparison if the ios are equal
|
329
|
+
return 0 if self.io.quick_compare(another.io)
|
330
|
+
end
|
331
|
+
|
332
|
+
self.io.flush
|
333
|
+
another.io.flush
|
334
|
+
|
335
|
+
# should chunk compare
|
336
|
+
if another.length > self.length
|
337
|
+
result = (self.to_a <=> another.to_a(self.length))
|
338
|
+
result == 0 ? -1 : result
|
339
|
+
elsif another.length < self.length
|
340
|
+
result = (self.to_a(another.length) <=> another.to_a)
|
341
|
+
result == 0 ? 1 : result
|
342
|
+
else
|
343
|
+
self.to_a <=> another.to_a
|
344
|
+
end
|
345
|
+
else
|
346
|
+
raise TypeError.new("can't convert from #{another.class} to ExternalArchive or Array")
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def ==(another)
|
351
|
+
case another
|
352
|
+
when Array
|
353
|
+
# test simply based on length
|
354
|
+
return false unless self.length == another.length
|
355
|
+
|
356
|
+
# compare arrays
|
357
|
+
self.to_a == another
|
358
|
+
|
359
|
+
when ExternalArchive
|
360
|
+
# test simply based on length
|
361
|
+
return false unless self.length == another.length
|
362
|
+
|
363
|
+
# if indexes are equal, additional
|
364
|
+
# 'quick' comparisons are allowed
|
365
|
+
if self.io_index == another.io_index
|
366
|
+
|
367
|
+
# equal in comparison if the ios are equal
|
368
|
+
#, (self.io_index.buffer_size/2).ceil) ??
|
369
|
+
return true if self.io.sort_compare(another.io) == 0
|
370
|
+
end
|
371
|
+
|
372
|
+
# compare arrays
|
373
|
+
self.to_a == another.to_a
|
374
|
+
else
|
375
|
+
false
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# Element Reference — Returns the entry at index, or returns an array starting
|
380
|
+
# at start and continuing for length entries, or returns an array specified
|
381
|
+
# by range. Negative indices count backward from the end of self (-1 is the last
|
382
|
+
# element). Returns nil if the index (or starting index) is out of range.
|
383
|
+
#
|
384
|
+
# a = ExternalArchive[ "a", "b", "c", "d", "e" ]
|
385
|
+
# a[2] + a[0] + a[1] #=> "cab"
|
386
|
+
# a[6] #=> nil
|
387
|
+
# a[1, 2] #=> [ "b", "c" ]
|
388
|
+
# a[1..3] #=> [ "b", "c", "d" ]
|
389
|
+
# a[4..7] #=> [ "e" ]
|
390
|
+
# a[6..10] #=> nil
|
391
|
+
# a[-3, 3] #=> [ "c", "d", "e" ]
|
392
|
+
# # special cases
|
393
|
+
# a[5] #=> nil
|
394
|
+
# a[5, 1] #=> []
|
395
|
+
# a[5..10] #=> []
|
396
|
+
#
|
397
|
+
def [](input, length=nil)
|
398
|
+
# two call types are required because while ExternalIndex can take
|
399
|
+
# a nil length, Array cannot and index can be either
|
400
|
+
entry_indicies = (length == nil ? io_index[input] : io_index[input, length])
|
401
|
+
|
402
|
+
case
|
403
|
+
when entry_indicies == nil || entry_indicies.empty?
|
404
|
+
# for conformance with array range retrieval,
|
405
|
+
# simply return nil and [] indicies
|
406
|
+
entry_indicies
|
407
|
+
|
408
|
+
when length == nil && !input.kind_of?(Range)
|
409
|
+
# a single entry was specified, read it
|
410
|
+
entry_start, entry_length = entry_indicies
|
411
|
+
io.pos = entry_start
|
412
|
+
str_to_entry( io.read(entry_length) )
|
413
|
+
|
414
|
+
else
|
415
|
+
# multiple entries were specified, collect each
|
416
|
+
pos = nil
|
417
|
+
entry_indicies.collect do |(entry_start, entry_length)|
|
418
|
+
next if entry_start == nil
|
419
|
+
|
420
|
+
# only set io position if necessary
|
421
|
+
unless pos == entry_start
|
422
|
+
pos = entry_start
|
423
|
+
io.pos = pos
|
424
|
+
end
|
425
|
+
|
426
|
+
pos += entry_length
|
427
|
+
|
428
|
+
# read entry
|
429
|
+
str_to_entry( io.read(entry_length) )
|
430
|
+
end
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
# Element Assignment — Sets the entry at index, or replaces a subset starting at start
|
435
|
+
# and continuing for length entries, or replaces a subset specified by range.
|
436
|
+
# A negative indices will count backward from the end of self. Inserts elements if
|
437
|
+
# length is zero. If nil is used in the second and third form, deletes elements from
|
438
|
+
# self. An IndexError is raised if a negative index points past the beginning of self.
|
439
|
+
# See also push, and unshift.
|
440
|
+
#
|
441
|
+
# a = ExternalArchive.new
|
442
|
+
# a[4] = "4"; a #=> [nil, nil, nil, nil, "4"]
|
443
|
+
# a[0, 3] = [ 'a', 'b', 'c' ]; a #=> ["a", "b", "c", nil, "4"]
|
444
|
+
# a[1..2] = [ '1', '2' ]; a #=> ["a", '1', '2', nil, "4"]
|
445
|
+
# a[0, 2] = "?"; a #=> ["?", '2', nil, "4"]
|
446
|
+
# a[0..2] = "A"; a #=> ["A", "4"]
|
447
|
+
# a[-1] = "Z"; a #=> ["A", "Z"]
|
448
|
+
# a[1..-1] = nil; a #=> ["A"]
|
449
|
+
#
|
450
|
+
def []=(*args)
|
451
|
+
raise ArgumentError, "wrong number of arguments (1 for 2)" if args.length < 2
|
452
|
+
|
453
|
+
one, two, value = args
|
454
|
+
if args.length == 2
|
455
|
+
value = two
|
456
|
+
two = nil
|
457
|
+
end
|
458
|
+
|
459
|
+
one = convert_to_int(one)
|
460
|
+
case one
|
461
|
+
when Fixnum
|
462
|
+
if one < 0
|
463
|
+
one += length
|
464
|
+
raise IndexError, "index #{one} out of range" if one < 0
|
465
|
+
end
|
466
|
+
|
467
|
+
entry_start = io.length
|
468
|
+
io.pos = entry_start
|
469
|
+
|
470
|
+
if two == nil
|
471
|
+
# simple insertion
|
472
|
+
# (note it is important to write the entry to io
|
473
|
+
# first as a check that io is open for writing)
|
474
|
+
|
475
|
+
entry_length = io.write( entry_to_str(value) )
|
476
|
+
io.length += entry_length
|
477
|
+
io_index[one] = [entry_start, entry_length]
|
478
|
+
|
479
|
+
else
|
480
|
+
values = case value
|
481
|
+
when Array then value
|
482
|
+
when ExternalArchive
|
483
|
+
# special case, self will be reading and
|
484
|
+
# writing from the same io, producing
|
485
|
+
# incorrect results
|
486
|
+
|
487
|
+
# potential to load a huge amount of data
|
488
|
+
value == self ? value.to_a : value
|
489
|
+
else convert_to_ary(value)
|
490
|
+
end
|
491
|
+
|
492
|
+
# write each value to self, collecting the indicies
|
493
|
+
indicies = []
|
494
|
+
values.each do |value|
|
495
|
+
entry_length = io.write( entry_to_str(value) )
|
496
|
+
indicies << [entry_start, entry_length]
|
497
|
+
|
498
|
+
io.length += entry_length
|
499
|
+
entry_start += entry_length
|
500
|
+
end
|
501
|
+
|
502
|
+
# register the indicies
|
503
|
+
io_index[one, two] = indicies
|
504
|
+
end
|
505
|
+
|
506
|
+
when Range
|
507
|
+
raise TypeError, "can't convert Range into Integer" unless two == nil
|
508
|
+
start, length, total = split_range(one)
|
509
|
+
|
510
|
+
raise RangeError, "#{one} out of range" if start < 0
|
511
|
+
self[start, length < 0 ? 0 : length + 1] = value
|
512
|
+
|
513
|
+
when nil
|
514
|
+
raise TypeError, "no implicit conversion from nil to integer"
|
515
|
+
else
|
516
|
+
raise TypeError, "can't convert #{one.class} into Integer"
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
520
|
+
# def abbrev(pattern=nil)
|
521
|
+
# not_implemented
|
522
|
+
# end
|
523
|
+
|
524
|
+
# def assoc(obj)
|
525
|
+
# not_implemented
|
526
|
+
# end
|
527
|
+
|
528
|
+
# Returns entry at index
|
529
|
+
def at(index)
|
530
|
+
self[index]
|
531
|
+
end
|
532
|
+
|
533
|
+
# Removes all elements from _self_.
|
534
|
+
def clear
|
535
|
+
io.truncate(0)
|
536
|
+
io_index.clear
|
537
|
+
self
|
538
|
+
end
|
539
|
+
|
540
|
+
def compact
|
541
|
+
# TODO - optimize?
|
542
|
+
another = self.another
|
543
|
+
each do |item|
|
544
|
+
another << item unless item == nil
|
545
|
+
end
|
546
|
+
another
|
547
|
+
end
|
548
|
+
|
549
|
+
# def compact!
|
550
|
+
# not_implemented
|
551
|
+
# end
|
552
|
+
|
553
|
+
def concat(another)
|
554
|
+
case another
|
555
|
+
when Array, ExternalArchive
|
556
|
+
self[length, another.length] = another
|
557
|
+
else
|
558
|
+
raise TypeError.new("can't convert #{another.class} into ExternalArchive or Array")
|
559
|
+
end
|
560
|
+
self
|
561
|
+
end
|
562
|
+
|
563
|
+
# def dclone
|
564
|
+
# not_implemented
|
565
|
+
# end
|
566
|
+
|
567
|
+
# def delete(obj)
|
568
|
+
# not_implemented
|
569
|
+
# end
|
570
|
+
|
571
|
+
# def delete_at(index)
|
572
|
+
# not_implemented
|
573
|
+
# end
|
574
|
+
|
575
|
+
# def delete_if # :yield: item
|
576
|
+
# not_implemented
|
577
|
+
# end
|
578
|
+
|
579
|
+
# Calls block once for each element string in self, passing that string as a parameter.
|
580
|
+
def each_str(&block) # :yield: string
|
581
|
+
# tracking the position using a local variable
|
582
|
+
# is faster than calling io.pos.
|
583
|
+
pos = nil
|
584
|
+
io_index.each do |(start, length)|
|
585
|
+
if start == nil
|
586
|
+
yield("")
|
587
|
+
next
|
588
|
+
end
|
589
|
+
|
590
|
+
# only set io position if necessary
|
591
|
+
unless pos == start
|
592
|
+
pos = start
|
593
|
+
io.pos = pos
|
594
|
+
end
|
595
|
+
|
596
|
+
# advance position
|
597
|
+
pos += length
|
598
|
+
|
599
|
+
# yield entry string
|
600
|
+
yield io.read(length)
|
601
|
+
end
|
602
|
+
self
|
603
|
+
end
|
604
|
+
|
605
|
+
# Calls block once for each element in self, passing that element as a parameter.
|
606
|
+
def each(&block) # :yield: item
|
607
|
+
each_str do |str|
|
608
|
+
# yield entry
|
609
|
+
yield str_to_entry(str)
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
# Same as each, but passes the index of the element instead of the element itself.
|
614
|
+
def eachio_index(&block) # :yield: index
|
615
|
+
0.upto(length-1, &block)
|
616
|
+
self
|
617
|
+
end
|
618
|
+
|
619
|
+
# def fetch(index, default=nil, &block)
|
620
|
+
# index += index_length if index < 0
|
621
|
+
# val = (index >= length ? default : self[index])
|
622
|
+
# block_given? ? yield(val) : val
|
623
|
+
# end
|
624
|
+
#
|
625
|
+
# def fill(*args)
|
626
|
+
# not_implemented
|
627
|
+
# end
|
628
|
+
|
629
|
+
# def flatten
|
630
|
+
# not_implemented
|
631
|
+
# end
|
632
|
+
|
633
|
+
# def flatten!
|
634
|
+
# not_implemented
|
635
|
+
# end
|
636
|
+
|
637
|
+
# def frozen?
|
638
|
+
# not_implemented
|
639
|
+
# end
|
640
|
+
|
641
|
+
# def hash
|
642
|
+
# not_implemented
|
643
|
+
# end
|
644
|
+
|
645
|
+
# def include?(obj)
|
646
|
+
# not_implemented
|
647
|
+
# end
|
648
|
+
|
649
|
+
# def index(obj)
|
650
|
+
# not_implemented
|
651
|
+
# end
|
652
|
+
#
|
653
|
+
# def indexes(*args)
|
654
|
+
# values_at(*args)
|
655
|
+
# end
|
656
|
+
#
|
657
|
+
# def indicies(*args)
|
658
|
+
# values_at(*args)
|
659
|
+
# end
|
660
|
+
|
661
|
+
# def replace(other)
|
662
|
+
# not_implemented
|
663
|
+
# end
|
664
|
+
|
665
|
+
# def insert(index, *obj)
|
666
|
+
# self[index] = obj
|
667
|
+
# end
|
668
|
+
|
669
|
+
# def inspect
|
670
|
+
# not_implemented
|
671
|
+
# end
|
672
|
+
|
673
|
+
# def join(sep=$,)
|
674
|
+
# not_implemented
|
675
|
+
# end
|
676
|
+
|
677
|
+
# Returns the last n entries (default 1)
|
678
|
+
def last(n=nil)
|
679
|
+
return self[-1] if n.nil?
|
680
|
+
|
681
|
+
start = length-n
|
682
|
+
start = 0 if start < 0
|
683
|
+
self[start, n]
|
684
|
+
end
|
685
|
+
|
686
|
+
# Returns the number of entries in self
|
687
|
+
def length
|
688
|
+
io_index.length
|
689
|
+
end
|
690
|
+
|
691
|
+
# Returns the number of non-nil elements in self. May be zero.
|
692
|
+
# def nitems
|
693
|
+
# count = self.length
|
694
|
+
# io_index.each do |(start, length)|
|
695
|
+
# # the logic of this search is that nil,
|
696
|
+
# # (and only nil ?) can have an entry
|
697
|
+
# # length of 5: nil.to_yaml == "--- \n"
|
698
|
+
# count -= 1 if length == nil || length == 5
|
699
|
+
# end
|
700
|
+
# count
|
701
|
+
# end
|
702
|
+
|
703
|
+
# def pack(aTemplateString)
|
704
|
+
# not_implemented
|
705
|
+
# end
|
706
|
+
|
707
|
+
# def pop
|
708
|
+
# not_implemented
|
709
|
+
# end
|
710
|
+
|
711
|
+
# def pretty_print(q)
|
712
|
+
# not_implemented
|
713
|
+
# end
|
714
|
+
|
715
|
+
# def pretty_print_cycle(q)
|
716
|
+
# not_implemented
|
717
|
+
# end
|
718
|
+
|
719
|
+
def push(*obj)
|
720
|
+
obj.each {|obj| self << obj }
|
721
|
+
self
|
722
|
+
end
|
723
|
+
|
724
|
+
# def quote
|
725
|
+
# not_implemented
|
726
|
+
# end
|
727
|
+
|
728
|
+
# def rassoc(key)
|
729
|
+
# not_implemented
|
730
|
+
# end
|
731
|
+
|
732
|
+
# def replace(another)
|
733
|
+
# not_implemented
|
734
|
+
# end
|
735
|
+
|
736
|
+
# def reverse
|
737
|
+
# not_implemented
|
738
|
+
# end
|
739
|
+
|
740
|
+
# def reverse!
|
741
|
+
# not_implemented
|
742
|
+
# end
|
743
|
+
|
744
|
+
def reverse_each_str(&block) # :yield: string
|
745
|
+
io_index.reverse_each do |(start,length)|
|
746
|
+
next if start == nil
|
747
|
+
|
748
|
+
# A more optimized approach would
|
749
|
+
# read in a chunk of entries and
|
750
|
+
# iterate over them?
|
751
|
+
io.pos = start
|
752
|
+
|
753
|
+
# yield entry string
|
754
|
+
yield io.read(length)
|
755
|
+
end
|
756
|
+
self
|
757
|
+
end
|
758
|
+
|
759
|
+
def reverse_each # :yield: item
|
760
|
+
reverse_each_str do |str|
|
761
|
+
yield( str_to_entry(str) )
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
# def rindex(obj)
|
766
|
+
# not_implemented
|
767
|
+
# end
|
768
|
+
|
769
|
+
# def select # :yield: item
|
770
|
+
# not_implemented
|
771
|
+
# end
|
772
|
+
|
773
|
+
# def shift
|
774
|
+
# not_implemented
|
775
|
+
# end
|
776
|
+
|
777
|
+
# Alias for length
|
778
|
+
def size
|
779
|
+
length
|
780
|
+
end
|
781
|
+
|
782
|
+
# def slice(*args)
|
783
|
+
# self.call(:[], *args)
|
784
|
+
# end
|
785
|
+
|
786
|
+
# def slice!(*args)
|
787
|
+
# not_implemented
|
788
|
+
# end
|
789
|
+
|
790
|
+
def to_a(length=self.length)
|
791
|
+
length == 0 ? [] : self[0, length]
|
792
|
+
end
|
793
|
+
|
794
|
+
# def to_ary
|
795
|
+
# not_implemented
|
796
|
+
# end
|
797
|
+
|
798
|
+
# Returns _self_.join.
|
799
|
+
# def to_s
|
800
|
+
# self.join
|
801
|
+
# end
|
802
|
+
|
803
|
+
# def to_yaml(opts={})
|
804
|
+
# self[0, self.length].to_yaml(opts)
|
805
|
+
# end
|
806
|
+
|
807
|
+
# def transpose
|
808
|
+
# not_implemented
|
809
|
+
# end
|
810
|
+
|
811
|
+
# def uniq
|
812
|
+
# not_implemented
|
813
|
+
# end
|
814
|
+
|
815
|
+
# def uniq!
|
816
|
+
# not_implemented
|
817
|
+
# end
|
818
|
+
|
819
|
+
# def unshift(*obj)
|
820
|
+
# not_implemented
|
821
|
+
# end
|
822
|
+
|
823
|
+
# Returns an array containing the chars in io corresponding to the given
|
824
|
+
# selector(s). The selectors may be either integer indices or ranges
|
825
|
+
def values_at(*selectors)
|
826
|
+
another = self.another
|
827
|
+
selectors.each do |s|
|
828
|
+
another << self[s]
|
829
|
+
end
|
830
|
+
another
|
831
|
+
end
|
832
|
+
|
833
|
+
# def yaml_initialize(tag, val)
|
834
|
+
# not_implemented
|
835
|
+
# end
|
836
|
+
|
837
|
+
# def |(another)
|
838
|
+
# not_implemented
|
839
|
+
# end
|
840
|
+
end
|