external 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +5 -0
- data/MIT-LICENSE +21 -0
- data/README +168 -0
- data/lib/ext_arc.rb +108 -0
- data/lib/ext_arr.rb +727 -0
- data/lib/ext_ind.rb +1120 -0
- data/lib/external/base.rb +85 -0
- data/lib/external/chunkable.rb +105 -0
- data/lib/external/enumerable.rb +137 -0
- data/lib/external/io.rb +398 -0
- data/lib/external.rb +3 -0
- data/test/benchmarks/benchmarks_20070918.txt +45 -0
- data/test/benchmarks/benchmarks_20070921.txt +91 -0
- data/test/benchmarks/benchmarks_20071006.txt +147 -0
- data/test/benchmarks/test_copy_file.rb +80 -0
- data/test/benchmarks/test_pos_speed.rb +47 -0
- data/test/benchmarks/test_read_time.rb +55 -0
- data/test/cached_ext_ind_test.rb +219 -0
- data/test/check/benchmark_check.rb +441 -0
- data/test/check/namespace_conflicts_check.rb +23 -0
- data/test/check/pack_check.rb +90 -0
- data/test/ext_arc_test.rb +286 -0
- data/test/ext_arr/alt_sep.txt +3 -0
- data/test/ext_arr/cr_lf_input.txt +3 -0
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +1 -0
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +1 -0
- data/test/ext_arr/lf_input.txt +3 -0
- data/test/ext_arr/lines.txt +19 -0
- data/test/ext_arr/without_index.txt +1 -0
- data/test/ext_arr_test.rb +534 -0
- data/test/ext_ind_test.rb +1472 -0
- data/test/external/base_test.rb +74 -0
- data/test/external/chunkable_test.rb +182 -0
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +414 -0
- data/test/external_test_helper.rb +31 -0
- data/test/external_test_suite.rb +4 -0
- data/test/test_array.rb +1192 -0
- metadata +104 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'external/io'
|
2
|
+
require 'external/chunkable'
|
3
|
+
require 'external/enumerable'
|
4
|
+
require 'tempfile'
|
5
|
+
|
6
|
+
module External
|
7
|
+
|
8
|
+
#--
|
9
|
+
# Base provides the basic array functionality shared by ExtArr and Index,
|
10
|
+
# essentially wrapping the IO functions required to access and utilized external
|
11
|
+
# array data with the standard array functions. Bases can be opened with
|
12
|
+
# in any of the IO modes; the capabilities of Base will be reduced accordingly
|
13
|
+
# (ie read-only Bases cannot write values using []=, for instance).
|
14
|
+
#
|
15
|
+
# It is VERY IMPORTANT to realize that the underlying IO will be opened using the
|
16
|
+
# given mode. The 'w' mode will overwrite all existing data; 'r+' is a safer mode
|
17
|
+
# for full read-write functionality. Note that since Base actively scans over
|
18
|
+
# the IO, append modes essentially behaves like write, but does not overwrite existing
|
19
|
+
# data.
|
20
|
+
#
|
21
|
+
# To work properly, Base must be subclassed with methods:
|
22
|
+
# * length
|
23
|
+
# * io_fetch
|
24
|
+
#++
|
25
|
+
#
|
26
|
+
#
|
27
|
+
class Base
|
28
|
+
class << self
|
29
|
+
def open(fd=nil, mode="r", options={})
|
30
|
+
fd = File.open(fd, mode) unless fd == nil
|
31
|
+
ab = self.new(fd, options)
|
32
|
+
|
33
|
+
if block_given?
|
34
|
+
begin
|
35
|
+
yield(ab)
|
36
|
+
ensure
|
37
|
+
ab.close
|
38
|
+
end
|
39
|
+
else
|
40
|
+
ab
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
include External::Enumerable
|
46
|
+
include External::Chunkable
|
47
|
+
|
48
|
+
attr_reader :io
|
49
|
+
|
50
|
+
# Initializes a new Base given the file descriptor, mode and options.
|
51
|
+
# (see open_io for details on what io is opened for a given file descriptor)
|
52
|
+
#
|
53
|
+
# If mode contains an 's', then the Base will be initialized in strio
|
54
|
+
# mode where the underlying IO will be a StringIO. In this case the fd
|
55
|
+
# will be used as the string to initialize the StringIO.
|
56
|
+
#
|
57
|
+
# Standard options for Base include:
|
58
|
+
# nil_value:: the value written to file for nils, and converted to nil on read
|
59
|
+
# (default ' ')
|
60
|
+
# max_gap:: the maximum gap size used by Offset (default 10000)
|
61
|
+
# max_chunk_size:: the chunk size used by Offset (default 1M)
|
62
|
+
def initialize(io=nil)
|
63
|
+
self.io = (io.nil? ? Tempfile.new("array_base") : io)
|
64
|
+
end
|
65
|
+
|
66
|
+
# True if io is closed.
|
67
|
+
def closed?
|
68
|
+
io.closed?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Closes io.
|
72
|
+
def close
|
73
|
+
io.close unless io.closed?
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
77
|
+
|
78
|
+
# Sets io and extends the input io with External::Position.
|
79
|
+
def io=(io)
|
80
|
+
io.extend External::IO unless io.kind_of?(External::IO)
|
81
|
+
@io = io
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module External
|
2
|
+
|
3
|
+
# The Chunkable mixin provides methods for organizing a span or range
|
4
|
+
# into chunks no larger than a specified block size.
|
5
|
+
module Chunkable
|
6
|
+
attr_accessor :length, :default_blksize
|
7
|
+
|
8
|
+
# Returns the default span: [0, length]
|
9
|
+
def default_span
|
10
|
+
[0, length]
|
11
|
+
end
|
12
|
+
|
13
|
+
# Breaks the input range or span into chunks of blksize or less.
|
14
|
+
# The offset and length of each chunk will be provided to the
|
15
|
+
# block, if given.
|
16
|
+
#
|
17
|
+
# blksize # => 100
|
18
|
+
# chunk(0..250) # => [[0,100],[100,100],[200,50]]
|
19
|
+
#
|
20
|
+
# results = []
|
21
|
+
# chunk([10,190]) {|offset, length| results << [offset, length]}
|
22
|
+
# results # => [[10,100],[110,90]]
|
23
|
+
#
|
24
|
+
def chunk(range_or_span=default_span, blksize=default_blksize)
|
25
|
+
return collect_results(:chunk, range_or_span) unless block_given?
|
26
|
+
|
27
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
28
|
+
|
29
|
+
# chunk the final range to make sure that no chunks
|
30
|
+
# greater than blksize are returned
|
31
|
+
while rend - rbegin > blksize
|
32
|
+
yield(rbegin, blksize)
|
33
|
+
rbegin += blksize
|
34
|
+
end
|
35
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# Breaks the input range or span into chunks of blksize or less,
|
39
|
+
# beginning from the end of the interval. The offset and length
|
40
|
+
# of each chunk will be provided to the block, if given.
|
41
|
+
#
|
42
|
+
# blksize # => 100
|
43
|
+
# reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
|
44
|
+
#
|
45
|
+
# results = []
|
46
|
+
# reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
|
47
|
+
# results # => [[100,100],[10,90]]
|
48
|
+
#
|
49
|
+
def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
|
50
|
+
return collect_results(:reverse_chunk, range_or_span) unless block_given?
|
51
|
+
|
52
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
53
|
+
|
54
|
+
# chunk the final range to make sure that no chunks
|
55
|
+
# greater than blksize are returned
|
56
|
+
while rend - rbegin > blksize
|
57
|
+
rend -= blksize
|
58
|
+
yield(rend, blksize)
|
59
|
+
end
|
60
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
# Converts a range into an offset and length. Negative values are
|
66
|
+
# counted back from self.length
|
67
|
+
#
|
68
|
+
# length # => 10
|
69
|
+
# split_range(0..9) # => [0,10]
|
70
|
+
# split_range(0...9) # => [0,9]
|
71
|
+
#
|
72
|
+
# split_range(-1..9) # => [9,1]
|
73
|
+
# split_range(0..-1) # => [0,10]
|
74
|
+
def split_range(range)
|
75
|
+
begin_range = range.begin + (range.begin < 0 ? self.length : 0)
|
76
|
+
end_range = range.end + (range.end < 0 ? self.length : 0)
|
77
|
+
length = end_range - begin_range - (range.exclude_end? ? 1 : 0)
|
78
|
+
|
79
|
+
[begin_range, length]
|
80
|
+
end
|
81
|
+
|
82
|
+
def split_span(span)
|
83
|
+
span[0] += self.length if span[0] < 0
|
84
|
+
span
|
85
|
+
end
|
86
|
+
|
87
|
+
def range_begin_and_end(range_or_span)
|
88
|
+
rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
|
89
|
+
raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
|
90
|
+
rend += rbegin
|
91
|
+
|
92
|
+
[rbegin, rend]
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def collect_results(method, args) # :nodoc:
|
98
|
+
results = []
|
99
|
+
send(method, args) do |*result|
|
100
|
+
results << result
|
101
|
+
end
|
102
|
+
results
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'enumerator'
|
2
|
+
|
3
|
+
module External
|
4
|
+
module Enumerable
|
5
|
+
# def all? # :yield: obj
|
6
|
+
# not_implemented
|
7
|
+
# end
|
8
|
+
|
9
|
+
# def any? # :yield: obj
|
10
|
+
# not_implemented
|
11
|
+
# end
|
12
|
+
|
13
|
+
# def collect # :yield: item
|
14
|
+
# not_implemented
|
15
|
+
# end
|
16
|
+
|
17
|
+
# def collect! # :yield: item
|
18
|
+
# not_implemented
|
19
|
+
# end
|
20
|
+
|
21
|
+
# def detect(ifnone=nil) # :yield: obj
|
22
|
+
# not_implemented
|
23
|
+
# end
|
24
|
+
|
25
|
+
# def each_cons(n) # :yield:
|
26
|
+
# not_implemented
|
27
|
+
# end
|
28
|
+
|
29
|
+
# def each_slice(n) # :yield:
|
30
|
+
# not_implemented
|
31
|
+
# end
|
32
|
+
|
33
|
+
def each_with_index(&block)
|
34
|
+
chunk do |offset, length|
|
35
|
+
self[offset, length].each_with_index do |item, i|
|
36
|
+
yield(item, i + offset)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# def entries
|
42
|
+
# to_a
|
43
|
+
# end
|
44
|
+
|
45
|
+
# def enum_cons(n)
|
46
|
+
# not_implemented
|
47
|
+
# end
|
48
|
+
|
49
|
+
# def enum_slice(n)
|
50
|
+
# not_implemented
|
51
|
+
# end
|
52
|
+
|
53
|
+
# def enum_with_index
|
54
|
+
# not_implemented
|
55
|
+
# end
|
56
|
+
|
57
|
+
# def find(ifnone=nil, &block) # :yield: obj
|
58
|
+
# detect(ifnone, &block)
|
59
|
+
# end
|
60
|
+
|
61
|
+
# def find_all # :yield: obj
|
62
|
+
# not_implemented
|
63
|
+
# end
|
64
|
+
|
65
|
+
# def grep(pattern) # :yield: obj
|
66
|
+
# not_implemented
|
67
|
+
# end
|
68
|
+
|
69
|
+
# def include?(obj)
|
70
|
+
# not_implemented
|
71
|
+
# end
|
72
|
+
|
73
|
+
# def inject(init) # :yield: memo, obj
|
74
|
+
# not_implemented
|
75
|
+
# end
|
76
|
+
|
77
|
+
# def map(&block) # :yield: item
|
78
|
+
# collect(&block)
|
79
|
+
# end
|
80
|
+
|
81
|
+
# def map!(&block) # :yield: item
|
82
|
+
# collect!(&block)
|
83
|
+
# end
|
84
|
+
|
85
|
+
# def max # :yield: a,b
|
86
|
+
# not_implemented
|
87
|
+
# end
|
88
|
+
|
89
|
+
# def member?(obj)
|
90
|
+
# include?(obj)
|
91
|
+
# end
|
92
|
+
|
93
|
+
# def min # :yield: a,b
|
94
|
+
# not_implemented
|
95
|
+
# end
|
96
|
+
|
97
|
+
# def partition # :yield: obj
|
98
|
+
# not_implemented
|
99
|
+
# end
|
100
|
+
|
101
|
+
# def reject # :yield: item
|
102
|
+
# not_implemented
|
103
|
+
# end
|
104
|
+
|
105
|
+
# def reject! # :yield: item
|
106
|
+
# not_implemented
|
107
|
+
# end
|
108
|
+
|
109
|
+
# def select(&block) # :yield: obj
|
110
|
+
# find_all(&block)
|
111
|
+
# end
|
112
|
+
|
113
|
+
# def sort # :yield: a,b
|
114
|
+
# not_implemented
|
115
|
+
# end
|
116
|
+
|
117
|
+
# def sort! # :yield: a,b
|
118
|
+
# not_implemented
|
119
|
+
# end
|
120
|
+
|
121
|
+
# def sort_by # :yield: obj
|
122
|
+
# not_implemented
|
123
|
+
# end
|
124
|
+
|
125
|
+
# def to_a
|
126
|
+
# not_implemented
|
127
|
+
# end
|
128
|
+
|
129
|
+
# def to_set(klass=Set, *args, &block)
|
130
|
+
# not_implemented
|
131
|
+
# end
|
132
|
+
|
133
|
+
# def zip(*arg) # :yield: arr
|
134
|
+
# not_implemented
|
135
|
+
# end
|
136
|
+
end
|
137
|
+
end
|