external 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +5 -0
- data/MIT-LICENSE +21 -0
- data/README +168 -0
- data/lib/ext_arc.rb +108 -0
- data/lib/ext_arr.rb +727 -0
- data/lib/ext_ind.rb +1120 -0
- data/lib/external/base.rb +85 -0
- data/lib/external/chunkable.rb +105 -0
- data/lib/external/enumerable.rb +137 -0
- data/lib/external/io.rb +398 -0
- data/lib/external.rb +3 -0
- data/test/benchmarks/benchmarks_20070918.txt +45 -0
- data/test/benchmarks/benchmarks_20070921.txt +91 -0
- data/test/benchmarks/benchmarks_20071006.txt +147 -0
- data/test/benchmarks/test_copy_file.rb +80 -0
- data/test/benchmarks/test_pos_speed.rb +47 -0
- data/test/benchmarks/test_read_time.rb +55 -0
- data/test/cached_ext_ind_test.rb +219 -0
- data/test/check/benchmark_check.rb +441 -0
- data/test/check/namespace_conflicts_check.rb +23 -0
- data/test/check/pack_check.rb +90 -0
- data/test/ext_arc_test.rb +286 -0
- data/test/ext_arr/alt_sep.txt +3 -0
- data/test/ext_arr/cr_lf_input.txt +3 -0
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +1 -0
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +1 -0
- data/test/ext_arr/lf_input.txt +3 -0
- data/test/ext_arr/lines.txt +19 -0
- data/test/ext_arr/without_index.txt +1 -0
- data/test/ext_arr_test.rb +534 -0
- data/test/ext_ind_test.rb +1472 -0
- data/test/external/base_test.rb +74 -0
- data/test/external/chunkable_test.rb +182 -0
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +414 -0
- data/test/external_test_helper.rb +31 -0
- data/test/external_test_suite.rb +4 -0
- data/test/test_array.rb +1192 -0
- metadata +104 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'external/io'
|
2
|
+
require 'external/chunkable'
|
3
|
+
require 'external/enumerable'
|
4
|
+
require 'tempfile'
|
5
|
+
|
6
|
+
module External
|
7
|
+
|
8
|
+
#--
|
9
|
+
# Base provides the basic array functionality shared by ExtArr and Index,
|
10
|
+
# essentially wrapping the IO functions required to access and utilized external
|
11
|
+
# array data with the standard array functions. Bases can be opened with
|
12
|
+
# in any of the IO modes; the capabilities of Base will be reduced accordingly
|
13
|
+
# (ie read-only Bases cannot write values using []=, for instance).
|
14
|
+
#
|
15
|
+
# It is VERY IMPORTANT to realize that the underlying IO will be opened using the
|
16
|
+
# given mode. The 'w' mode will overwrite all existing data; 'r+' is a safer mode
|
17
|
+
# for full read-write functionality. Note that since Base actively scans over
|
18
|
+
# the IO, append modes essentially behaves like write, but does not overwrite existing
|
19
|
+
# data.
|
20
|
+
#
|
21
|
+
# To work properly, Base must be subclassed with methods:
|
22
|
+
# * length
|
23
|
+
# * io_fetch
|
24
|
+
#++
|
25
|
+
#
|
26
|
+
#
|
27
|
+
class Base
|
28
|
+
class << self
|
29
|
+
def open(fd=nil, mode="r", options={})
|
30
|
+
fd = File.open(fd, mode) unless fd == nil
|
31
|
+
ab = self.new(fd, options)
|
32
|
+
|
33
|
+
if block_given?
|
34
|
+
begin
|
35
|
+
yield(ab)
|
36
|
+
ensure
|
37
|
+
ab.close
|
38
|
+
end
|
39
|
+
else
|
40
|
+
ab
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
include External::Enumerable
|
46
|
+
include External::Chunkable
|
47
|
+
|
48
|
+
attr_reader :io
|
49
|
+
|
50
|
+
# Initializes a new Base given the file descriptor, mode and options.
|
51
|
+
# (see open_io for details on what io is opened for a given file descriptor)
|
52
|
+
#
|
53
|
+
# If mode contains an 's', then the Base will be initialized in strio
|
54
|
+
# mode where the underlying IO will be a StringIO. In this case the fd
|
55
|
+
# will be used as the string to initialize the StringIO.
|
56
|
+
#
|
57
|
+
# Standard options for Base include:
|
58
|
+
# nil_value:: the value written to file for nils, and converted to nil on read
|
59
|
+
# (default ' ')
|
60
|
+
# max_gap:: the maximum gap size used by Offset (default 10000)
|
61
|
+
# max_chunk_size:: the chunk size used by Offset (default 1M)
|
62
|
+
def initialize(io=nil)
|
63
|
+
self.io = (io.nil? ? Tempfile.new("array_base") : io)
|
64
|
+
end
|
65
|
+
|
66
|
+
# True if io is closed.
|
67
|
+
def closed?
|
68
|
+
io.closed?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Closes io.
|
72
|
+
def close
|
73
|
+
io.close unless io.closed?
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
77
|
+
|
78
|
+
# Sets io and extends the input io with External::Position.
|
79
|
+
def io=(io)
|
80
|
+
io.extend External::IO unless io.kind_of?(External::IO)
|
81
|
+
@io = io
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module External
|
2
|
+
|
3
|
+
# The Chunkable mixin provides methods for organizing a span or range
|
4
|
+
# into chunks no larger than a specified block size.
|
5
|
+
module Chunkable
|
6
|
+
attr_accessor :length, :default_blksize
|
7
|
+
|
8
|
+
# Returns the default span: [0, length]
|
9
|
+
def default_span
|
10
|
+
[0, length]
|
11
|
+
end
|
12
|
+
|
13
|
+
# Breaks the input range or span into chunks of blksize or less.
|
14
|
+
# The offset and length of each chunk will be provided to the
|
15
|
+
# block, if given.
|
16
|
+
#
|
17
|
+
# blksize # => 100
|
18
|
+
# chunk(0..250) # => [[0,100],[100,100],[200,50]]
|
19
|
+
#
|
20
|
+
# results = []
|
21
|
+
# chunk([10,190]) {|offset, length| results << [offset, length]}
|
22
|
+
# results # => [[10,100],[110,90]]
|
23
|
+
#
|
24
|
+
def chunk(range_or_span=default_span, blksize=default_blksize)
|
25
|
+
return collect_results(:chunk, range_or_span) unless block_given?
|
26
|
+
|
27
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
28
|
+
|
29
|
+
# chunk the final range to make sure that no chunks
|
30
|
+
# greater than blksize are returned
|
31
|
+
while rend - rbegin > blksize
|
32
|
+
yield(rbegin, blksize)
|
33
|
+
rbegin += blksize
|
34
|
+
end
|
35
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# Breaks the input range or span into chunks of blksize or less,
|
39
|
+
# beginning from the end of the interval. The offset and length
|
40
|
+
# of each chunk will be provided to the block, if given.
|
41
|
+
#
|
42
|
+
# blksize # => 100
|
43
|
+
# reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
|
44
|
+
#
|
45
|
+
# results = []
|
46
|
+
# reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
|
47
|
+
# results # => [[100,100],[10,90]]
|
48
|
+
#
|
49
|
+
def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
|
50
|
+
return collect_results(:reverse_chunk, range_or_span) unless block_given?
|
51
|
+
|
52
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
53
|
+
|
54
|
+
# chunk the final range to make sure that no chunks
|
55
|
+
# greater than blksize are returned
|
56
|
+
while rend - rbegin > blksize
|
57
|
+
rend -= blksize
|
58
|
+
yield(rend, blksize)
|
59
|
+
end
|
60
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
# Converts a range into an offset and length. Negative values are
|
66
|
+
# counted back from self.length
|
67
|
+
#
|
68
|
+
# length # => 10
|
69
|
+
# split_range(0..9) # => [0,10]
|
70
|
+
# split_range(0...9) # => [0,9]
|
71
|
+
#
|
72
|
+
# split_range(-1..9) # => [9,1]
|
73
|
+
# split_range(0..-1) # => [0,10]
|
74
|
+
def split_range(range)
|
75
|
+
begin_range = range.begin + (range.begin < 0 ? self.length : 0)
|
76
|
+
end_range = range.end + (range.end < 0 ? self.length : 0)
|
77
|
+
length = end_range - begin_range - (range.exclude_end? ? 1 : 0)
|
78
|
+
|
79
|
+
[begin_range, length]
|
80
|
+
end
|
81
|
+
|
82
|
+
def split_span(span)
|
83
|
+
span[0] += self.length if span[0] < 0
|
84
|
+
span
|
85
|
+
end
|
86
|
+
|
87
|
+
def range_begin_and_end(range_or_span)
|
88
|
+
rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
|
89
|
+
raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
|
90
|
+
rend += rbegin
|
91
|
+
|
92
|
+
[rbegin, rend]
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def collect_results(method, args) # :nodoc:
|
98
|
+
results = []
|
99
|
+
send(method, args) do |*result|
|
100
|
+
results << result
|
101
|
+
end
|
102
|
+
results
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'enumerator'
|
2
|
+
|
3
|
+
module External
|
4
|
+
module Enumerable
|
5
|
+
# def all? # :yield: obj
|
6
|
+
# not_implemented
|
7
|
+
# end
|
8
|
+
|
9
|
+
# def any? # :yield: obj
|
10
|
+
# not_implemented
|
11
|
+
# end
|
12
|
+
|
13
|
+
# def collect # :yield: item
|
14
|
+
# not_implemented
|
15
|
+
# end
|
16
|
+
|
17
|
+
# def collect! # :yield: item
|
18
|
+
# not_implemented
|
19
|
+
# end
|
20
|
+
|
21
|
+
# def detect(ifnone=nil) # :yield: obj
|
22
|
+
# not_implemented
|
23
|
+
# end
|
24
|
+
|
25
|
+
# def each_cons(n) # :yield:
|
26
|
+
# not_implemented
|
27
|
+
# end
|
28
|
+
|
29
|
+
# def each_slice(n) # :yield:
|
30
|
+
# not_implemented
|
31
|
+
# end
|
32
|
+
|
33
|
+
def each_with_index(&block)
|
34
|
+
chunk do |offset, length|
|
35
|
+
self[offset, length].each_with_index do |item, i|
|
36
|
+
yield(item, i + offset)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# def entries
|
42
|
+
# to_a
|
43
|
+
# end
|
44
|
+
|
45
|
+
# def enum_cons(n)
|
46
|
+
# not_implemented
|
47
|
+
# end
|
48
|
+
|
49
|
+
# def enum_slice(n)
|
50
|
+
# not_implemented
|
51
|
+
# end
|
52
|
+
|
53
|
+
# def enum_with_index
|
54
|
+
# not_implemented
|
55
|
+
# end
|
56
|
+
|
57
|
+
# def find(ifnone=nil, &block) # :yield: obj
|
58
|
+
# detect(ifnone, &block)
|
59
|
+
# end
|
60
|
+
|
61
|
+
# def find_all # :yield: obj
|
62
|
+
# not_implemented
|
63
|
+
# end
|
64
|
+
|
65
|
+
# def grep(pattern) # :yield: obj
|
66
|
+
# not_implemented
|
67
|
+
# end
|
68
|
+
|
69
|
+
# def include?(obj)
|
70
|
+
# not_implemented
|
71
|
+
# end
|
72
|
+
|
73
|
+
# def inject(init) # :yield: memo, obj
|
74
|
+
# not_implemented
|
75
|
+
# end
|
76
|
+
|
77
|
+
# def map(&block) # :yield: item
|
78
|
+
# collect(&block)
|
79
|
+
# end
|
80
|
+
|
81
|
+
# def map!(&block) # :yield: item
|
82
|
+
# collect!(&block)
|
83
|
+
# end
|
84
|
+
|
85
|
+
# def max # :yield: a,b
|
86
|
+
# not_implemented
|
87
|
+
# end
|
88
|
+
|
89
|
+
# def member?(obj)
|
90
|
+
# include?(obj)
|
91
|
+
# end
|
92
|
+
|
93
|
+
# def min # :yield: a,b
|
94
|
+
# not_implemented
|
95
|
+
# end
|
96
|
+
|
97
|
+
# def partition # :yield: obj
|
98
|
+
# not_implemented
|
99
|
+
# end
|
100
|
+
|
101
|
+
# def reject # :yield: item
|
102
|
+
# not_implemented
|
103
|
+
# end
|
104
|
+
|
105
|
+
# def reject! # :yield: item
|
106
|
+
# not_implemented
|
107
|
+
# end
|
108
|
+
|
109
|
+
# def select(&block) # :yield: obj
|
110
|
+
# find_all(&block)
|
111
|
+
# end
|
112
|
+
|
113
|
+
# def sort # :yield: a,b
|
114
|
+
# not_implemented
|
115
|
+
# end
|
116
|
+
|
117
|
+
# def sort! # :yield: a,b
|
118
|
+
# not_implemented
|
119
|
+
# end
|
120
|
+
|
121
|
+
# def sort_by # :yield: obj
|
122
|
+
# not_implemented
|
123
|
+
# end
|
124
|
+
|
125
|
+
# def to_a
|
126
|
+
# not_implemented
|
127
|
+
# end
|
128
|
+
|
129
|
+
# def to_set(klass=Set, *args, &block)
|
130
|
+
# not_implemented
|
131
|
+
# end
|
132
|
+
|
133
|
+
# def zip(*arg) # :yield: arr
|
134
|
+
# not_implemented
|
135
|
+
# end
|
136
|
+
end
|
137
|
+
end
|