bahuvrihi-external 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History +17 -0
- data/MIT-LICENSE +19 -0
- data/README +203 -0
- data/lib/external.rb +2 -0
- data/lib/external/base.rb +217 -0
- data/lib/external/chunkable.rb +131 -0
- data/lib/external/enumerable.rb +182 -0
- data/lib/external/io.rb +163 -0
- data/lib/external/patches/ruby_1_8_io.rb +31 -0
- data/lib/external/patches/windows_io.rb +53 -0
- data/lib/external/patches/windows_utils.rb +27 -0
- data/lib/external/utils.rb +156 -0
- data/lib/external_archive.rb +846 -0
- data/lib/external_array.rb +57 -0
- data/lib/external_index.rb +1053 -0
- metadata +88 -0
@@ -0,0 +1,131 @@
|
|
1
|
+
module External
|
2
|
+
|
3
|
+
# The Chunkable mixin provides methods for organizing a span or range
|
4
|
+
# into chunks no larger than a specified block size. For reference:
|
5
|
+
#
|
6
|
+
# span an array like: [start, length]
|
7
|
+
# range a Range like: start..end or start...(end - 1)
|
8
|
+
#
|
9
|
+
module Chunkable
|
10
|
+
|
11
|
+
# The length of the chunkable object;
|
12
|
+
# length must be set by the object.
|
13
|
+
attr_accessor :length
|
14
|
+
|
15
|
+
# The default block size for chunking a chunkable
|
16
|
+
# object; default_blksize must be set by the object.
|
17
|
+
attr_accessor :default_blksize
|
18
|
+
|
19
|
+
# Returns the default span: [0, length]
|
20
|
+
def default_span
|
21
|
+
[0, length]
|
22
|
+
end
|
23
|
+
|
24
|
+
# Breaks the input range or span into chunks of blksize or less.
|
25
|
+
# The offset and length of each chunk will be provided to the
|
26
|
+
# block, if given.
|
27
|
+
#
|
28
|
+
# blksize # => 100
|
29
|
+
# chunk(0..250) # => [[0,100],[100,100],[200,50]]
|
30
|
+
#
|
31
|
+
# results = []
|
32
|
+
# chunk([10,190]) {|offset, length| results << [offset, length]}
|
33
|
+
# results # => [[10,100],[110,90]]
|
34
|
+
#
|
35
|
+
def chunk(range_or_span=default_span, blksize=default_blksize)
|
36
|
+
return collect_results(:chunk, range_or_span) unless block_given?
|
37
|
+
|
38
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
39
|
+
|
40
|
+
# chunk the final range to make sure that no chunks
|
41
|
+
# greater than blksize are returned
|
42
|
+
while rend - rbegin > blksize
|
43
|
+
yield(rbegin, blksize)
|
44
|
+
rbegin += blksize
|
45
|
+
end
|
46
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
47
|
+
end
|
48
|
+
|
49
|
+
# Breaks the input range or span into chunks of blksize or less,
|
50
|
+
# beginning from the end of the interval. The offset and length
|
51
|
+
# of each chunk will be provided to the block, if given.
|
52
|
+
#
|
53
|
+
# blksize # => 100
|
54
|
+
# reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
|
55
|
+
#
|
56
|
+
# results = []
|
57
|
+
# reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
|
58
|
+
# results # => [[100,100],[10,90]]
|
59
|
+
#
|
60
|
+
def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
|
61
|
+
return collect_results(:reverse_chunk, range_or_span) unless block_given?
|
62
|
+
|
63
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
64
|
+
|
65
|
+
# chunk the final range to make sure that no chunks
|
66
|
+
# greater than blksize are returned
|
67
|
+
while rend - rbegin > blksize
|
68
|
+
rend -= blksize
|
69
|
+
yield(rend, blksize)
|
70
|
+
end
|
71
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
72
|
+
end
|
73
|
+
|
74
|
+
module_function
|
75
|
+
|
76
|
+
# Converts a range into an offset and length. Negative values are
|
77
|
+
# counted back from self.length
|
78
|
+
#
|
79
|
+
# length # => 10
|
80
|
+
# split_range(0..9) # => [0,10]
|
81
|
+
# split_range(0...9) # => [0,9]
|
82
|
+
#
|
83
|
+
# split_range(-1..9) # => [9,1]
|
84
|
+
# split_range(0..-1) # => [0,10]
|
85
|
+
def split_range(range)
|
86
|
+
start, finish = range.begin, range.end
|
87
|
+
start += length if start < 0
|
88
|
+
finish += length if finish < 0
|
89
|
+
|
90
|
+
[start, finish - start - (range.exclude_end? ? 1 : 0)]
|
91
|
+
end
|
92
|
+
|
93
|
+
# The compliment to split_range; returns the span with a negative
|
94
|
+
# start index counted back from self.length.
|
95
|
+
#
|
96
|
+
# length # => 10
|
97
|
+
# split_span([0, 10]) # => [0,10]
|
98
|
+
# split_span([-1, 1]) # => [9,1]
|
99
|
+
#
|
100
|
+
def split_span(span)
|
101
|
+
span[0] += self.length if span[0] < 0
|
102
|
+
span
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the begining and end of a range or span.
|
106
|
+
#
|
107
|
+
# range_begin_and_end(0..10) # => [0, 10]
|
108
|
+
# range_begin_and_end(0...10) # => [0, 9]
|
109
|
+
# range_begin_and_end([0, 10]) # => [0, 10]
|
110
|
+
#
|
111
|
+
def range_begin_and_end(range_or_span)
|
112
|
+
rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
|
113
|
+
raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
|
114
|
+
rend += rbegin
|
115
|
+
|
116
|
+
[rbegin, rend]
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
# a utility method to collect the results of a method
|
122
|
+
# that requires a block.
|
123
|
+
def collect_results(method, args) # :nodoc:
|
124
|
+
results = []
|
125
|
+
send(method, args) do |*result|
|
126
|
+
results << result
|
127
|
+
end
|
128
|
+
results
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require 'enumerator'
|
2
|
+
|
3
|
+
module External
|
4
|
+
|
5
|
+
# An externalized implementation of Enumerable. External::Enumerable
|
6
|
+
# requires several methods with the following functionality:
|
7
|
+
#
|
8
|
+
# each:: iterates over items in self
|
9
|
+
# another:: provide a another instance of self
|
10
|
+
# to_a:: converts self to an Array
|
11
|
+
#
|
12
|
+
module Enumerable
|
13
|
+
# Flag indicating whether to enumerate (ie collect,
|
14
|
+
# select, etc) into an array or into an instance
|
15
|
+
# of self. In most cases enumerating to an array
|
16
|
+
# performs better, but enumerating to another
|
17
|
+
# instance of self may be desired for especially
|
18
|
+
# large collections.
|
19
|
+
attr_accessor :enumerate_to_a
|
20
|
+
|
21
|
+
def all? # :yield: obj
|
22
|
+
# WARN -- no tests for this in test_array
|
23
|
+
each do |obj|
|
24
|
+
return false unless yield(obj)
|
25
|
+
end
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
def any? # :yield: obj
|
30
|
+
# WARN -- no tests for this in test_array
|
31
|
+
each do |obj|
|
32
|
+
return true if yield(obj)
|
33
|
+
end
|
34
|
+
false
|
35
|
+
end
|
36
|
+
|
37
|
+
def collect # :yield: item
|
38
|
+
if block_given?
|
39
|
+
another = enumerate_to_a ? [] : self.another
|
40
|
+
each do |item|
|
41
|
+
another << yield(item)
|
42
|
+
end
|
43
|
+
another
|
44
|
+
else
|
45
|
+
# Not sure if Enumerator works right for large externals...
|
46
|
+
Object::Enumerable::Enumerator.new(self)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# def collect! # :yield: item
|
51
|
+
# not_implemented
|
52
|
+
# end
|
53
|
+
|
54
|
+
def detect(ifnone=nil) # :yield: obj
|
55
|
+
# WARN -- no tests for this in test_array
|
56
|
+
each do |obj|
|
57
|
+
return obj if yield(obj)
|
58
|
+
end
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# def each_cons(n) # :yield:
|
63
|
+
# not_implemented
|
64
|
+
# end
|
65
|
+
|
66
|
+
# def each_slice(n) # :yield:
|
67
|
+
# not_implemented
|
68
|
+
# end
|
69
|
+
|
70
|
+
def each_with_index(&block)
|
71
|
+
chunk do |offset, length|
|
72
|
+
self[offset, length].each_with_index do |item, i|
|
73
|
+
yield(item, i + offset)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def entries
|
79
|
+
to_a
|
80
|
+
end
|
81
|
+
|
82
|
+
# def enum_cons(n)
|
83
|
+
# not_implemented
|
84
|
+
# end
|
85
|
+
|
86
|
+
# def enum_slice(n)
|
87
|
+
# not_implemented
|
88
|
+
# end
|
89
|
+
|
90
|
+
# def enum_with_index
|
91
|
+
# not_implemented
|
92
|
+
# end
|
93
|
+
|
94
|
+
def find(ifnone=nil, &block) # :yield: obj
|
95
|
+
# WARN -- no tests for this in test_array
|
96
|
+
detect(ifnone, &block)
|
97
|
+
end
|
98
|
+
|
99
|
+
def find_all # :yield: obj
|
100
|
+
another = enumerate_to_a ? [] : self.another
|
101
|
+
each do |item|
|
102
|
+
another << item if yield(item)
|
103
|
+
end
|
104
|
+
another
|
105
|
+
end
|
106
|
+
|
107
|
+
# def grep(pattern) # :yield: obj
|
108
|
+
# not_implemented
|
109
|
+
# end
|
110
|
+
|
111
|
+
def include?(obj)
|
112
|
+
each do |current|
|
113
|
+
return true if current == obj
|
114
|
+
end
|
115
|
+
false
|
116
|
+
end
|
117
|
+
|
118
|
+
# def inject(init) # :yield: memo, obj
|
119
|
+
# not_implemented
|
120
|
+
# end
|
121
|
+
|
122
|
+
def map(&block) # :yield: item
|
123
|
+
collect(&block)
|
124
|
+
end
|
125
|
+
|
126
|
+
# def map!(&block) # :yield: item
|
127
|
+
# collect!(&block)
|
128
|
+
# end
|
129
|
+
|
130
|
+
# def max # :yield: a,b
|
131
|
+
# not_implemented
|
132
|
+
# end
|
133
|
+
|
134
|
+
def member?(obj)
|
135
|
+
include?(obj)
|
136
|
+
end
|
137
|
+
|
138
|
+
# def min # :yield: a,b
|
139
|
+
# not_implemented
|
140
|
+
# end
|
141
|
+
|
142
|
+
# def partition # :yield: obj
|
143
|
+
# not_implemented
|
144
|
+
# end
|
145
|
+
|
146
|
+
# def reject # :yield: item
|
147
|
+
# not_implemented
|
148
|
+
# end
|
149
|
+
|
150
|
+
# def reject! # :yield: item
|
151
|
+
# not_implemented
|
152
|
+
# end
|
153
|
+
|
154
|
+
def select(&block) # :yield: obj
|
155
|
+
find_all(&block)
|
156
|
+
end
|
157
|
+
|
158
|
+
# def sort # :yield: a,b
|
159
|
+
# not_implemented
|
160
|
+
# end
|
161
|
+
|
162
|
+
# def sort! # :yield: a,b
|
163
|
+
# not_implemented
|
164
|
+
# end
|
165
|
+
|
166
|
+
# def sort_by # :yield: obj
|
167
|
+
# not_implemented
|
168
|
+
# end
|
169
|
+
|
170
|
+
# def to_a
|
171
|
+
# not_implemented
|
172
|
+
# end
|
173
|
+
|
174
|
+
# def to_set(klass=Set, *args, &block)
|
175
|
+
# not_implemented
|
176
|
+
# end
|
177
|
+
|
178
|
+
# def zip(*arg) # :yield: arr
|
179
|
+
# not_implemented
|
180
|
+
# end
|
181
|
+
end
|
182
|
+
end
|
data/lib/external/io.rb
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'external/chunkable'
|
2
|
+
require 'external/utils'
|
3
|
+
|
4
|
+
autoload(:StringIO, 'stringio')
|
5
|
+
autoload(:Tempfile, 'tempfile')
|
6
|
+
autoload(:FileUtils, 'fileutils')
|
7
|
+
|
8
|
+
module External
|
9
|
+
|
10
|
+
# Adds functionality to an IO required by External.
|
11
|
+
#
|
12
|
+
# IO adds/overrides the length accessor for getting the size of the IO contents.
|
13
|
+
# Note that length is not automatically adjusted by write, for performance
|
14
|
+
# reasons. length must be managed manually, or reset after writes using
|
15
|
+
# reset_length.
|
16
|
+
#
|
17
|
+
module Io
|
18
|
+
include Chunkable
|
19
|
+
|
20
|
+
PATCHES = []
|
21
|
+
|
22
|
+
# Add version-specific patches
|
23
|
+
case RUBY_VERSION
|
24
|
+
when /^1.8/ then require "external/patches/ruby_1_8_io"
|
25
|
+
end
|
26
|
+
|
27
|
+
# Add platform-specific patches
|
28
|
+
# case RUBY_PLATFORM
|
29
|
+
# when 'java'
|
30
|
+
# end
|
31
|
+
|
32
|
+
def self.extended(base)
|
33
|
+
PATCHES.each {|patch| base.extend patch }
|
34
|
+
base.reset_length
|
35
|
+
base.default_blksize = 1024
|
36
|
+
base.binmode
|
37
|
+
end
|
38
|
+
|
39
|
+
# Resets length to the length returned by Utils.length
|
40
|
+
def reset_length
|
41
|
+
self.length = Utils.length(self)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Modified truncate that adjusts length
|
45
|
+
def truncate(n)
|
46
|
+
super
|
47
|
+
self.pos = n if self.pos > n
|
48
|
+
self.length = n
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
|
53
|
+
carryover = 0
|
54
|
+
chunk(range_or_span, blksize) do |offset, length|
|
55
|
+
raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
|
56
|
+
|
57
|
+
scan_begin = offset - carryover
|
58
|
+
self.pos = scan_begin
|
59
|
+
string = self.read(length + carryover)
|
60
|
+
carryover = yield(scan_begin, string)
|
61
|
+
end
|
62
|
+
carryover
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
def insert(src, range=0..src.length, pos=nil)
|
67
|
+
self.pos = pos unless pos == nil
|
68
|
+
|
69
|
+
start_pos = self.pos
|
70
|
+
length_written = 0
|
71
|
+
|
72
|
+
src.flush
|
73
|
+
src.pos = range.begin
|
74
|
+
src.chunk(range) do |offset, length|
|
75
|
+
length_written += write(src.read(length))
|
76
|
+
end
|
77
|
+
|
78
|
+
end_pos = start_pos + length_written
|
79
|
+
self.length = end_pos if end_pos > self.length
|
80
|
+
length_written
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
def concat(src, range=0..src.length)
|
85
|
+
insert(src, range, length)
|
86
|
+
end
|
87
|
+
|
88
|
+
#--
|
89
|
+
# it appears that as long as the io opening t.path closes,
|
90
|
+
# the tempfile will be deleted at the exit of the ruby
|
91
|
+
# instance... otherwise it WILL NOT BE DELETED
|
92
|
+
# Make note of this in the documentation to be sure to close
|
93
|
+
# files if you start inserting because it may make tempfiles
|
94
|
+
#++
|
95
|
+
def copy(mode="r", range=0..length)
|
96
|
+
self.flush
|
97
|
+
|
98
|
+
temp = Tempfile.new("copy")
|
99
|
+
temp.extend Io
|
100
|
+
temp.insert(self, range)
|
101
|
+
temp.close
|
102
|
+
|
103
|
+
cp = File.open(temp.path, mode)
|
104
|
+
cp.extend Io
|
105
|
+
|
106
|
+
if block_given?
|
107
|
+
begin
|
108
|
+
yield(cp)
|
109
|
+
ensure
|
110
|
+
cp.close unless cp.closed?
|
111
|
+
FileUtils.rm(cp.path) if File.exists?(cp.path)
|
112
|
+
end
|
113
|
+
else
|
114
|
+
cp
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Quick comparision with another IO. Returns true if
|
119
|
+
# another == self, or if both are file-type IOs and
|
120
|
+
# their paths are equal.
|
121
|
+
def quick_compare(another)
|
122
|
+
self == another || (self.kind_of?(File) && another.kind_of?(File) && self.path == another.path)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Sort compare (ie <=>) with another IO, behaving like
|
126
|
+
# a comparison between the full string contents of self
|
127
|
+
# and another. This obviously can be a long operation
|
128
|
+
# if it requires the full read of two large IO objects.
|
129
|
+
def sort_compare(another, blksize=default_blksize)
|
130
|
+
# equal in comparison if the ios are equal
|
131
|
+
return 0 if quick_compare(another)
|
132
|
+
|
133
|
+
self.flush
|
134
|
+
self.reset_length
|
135
|
+
|
136
|
+
another.flush
|
137
|
+
another.reset_length
|
138
|
+
|
139
|
+
if another.length > self.length
|
140
|
+
return -1
|
141
|
+
elsif self.length < another.length
|
142
|
+
return 1
|
143
|
+
else
|
144
|
+
self.pos = 0
|
145
|
+
another.pos = 0
|
146
|
+
|
147
|
+
sa = sb = nil
|
148
|
+
while sa == sb
|
149
|
+
sa = self.read(blksize)
|
150
|
+
sb = another.read(blksize)
|
151
|
+
break if sa.nil? || sb.nil?
|
152
|
+
end
|
153
|
+
|
154
|
+
sa.to_s <=> sb.to_s
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Alias for sort_compare.
|
159
|
+
def <=>(another)
|
160
|
+
sort_compare(another)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|