external 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +7 -0
- data/MIT-LICENSE +1 -3
- data/README +162 -127
- data/lib/external.rb +2 -3
- data/lib/external/base.rb +174 -47
- data/lib/external/chunkable.rb +131 -105
- data/lib/external/enumerable.rb +78 -33
- data/lib/external/io.rb +163 -398
- data/lib/external/patches/ruby_1_8_io.rb +31 -0
- data/lib/external/patches/windows_io.rb +53 -0
- data/lib/external/patches/windows_utils.rb +27 -0
- data/lib/external/utils.rb +148 -0
- data/lib/external_archive.rb +840 -0
- data/lib/external_array.rb +57 -0
- data/lib/external_index.rb +1053 -0
- metadata +42 -58
- data/lib/ext_arc.rb +0 -108
- data/lib/ext_arr.rb +0 -727
- data/lib/ext_ind.rb +0 -1120
- data/test/benchmarks/benchmarks_20070918.txt +0 -45
- data/test/benchmarks/benchmarks_20070921.txt +0 -91
- data/test/benchmarks/benchmarks_20071006.txt +0 -147
- data/test/benchmarks/test_copy_file.rb +0 -80
- data/test/benchmarks/test_pos_speed.rb +0 -47
- data/test/benchmarks/test_read_time.rb +0 -55
- data/test/cached_ext_ind_test.rb +0 -219
- data/test/check/benchmark_check.rb +0 -441
- data/test/check/namespace_conflicts_check.rb +0 -23
- data/test/check/pack_check.rb +0 -90
- data/test/ext_arc_test.rb +0 -286
- data/test/ext_arr/alt_sep.txt +0 -3
- data/test/ext_arr/cr_lf_input.txt +0 -3
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +0 -1
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +0 -1
- data/test/ext_arr/lf_input.txt +0 -3
- data/test/ext_arr/lines.txt +0 -19
- data/test/ext_arr/without_index.txt +0 -1
- data/test/ext_arr_test.rb +0 -534
- data/test/ext_ind_test.rb +0 -1472
- data/test/external/base_test.rb +0 -74
- data/test/external/chunkable_test.rb +0 -182
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +0 -414
- data/test/external_test_helper.rb +0 -31
- data/test/external_test_suite.rb +0 -4
- data/test/test_array.rb +0 -1192
data/lib/external/chunkable.rb
CHANGED
@@ -1,105 +1,131 @@
|
|
1
|
-
module External
|
2
|
-
|
3
|
-
# The Chunkable mixin provides methods for organizing a span or range
|
4
|
-
# into chunks no larger than a specified block size.
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# block
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
end
|
1
|
+
module External
|
2
|
+
|
3
|
+
# The Chunkable mixin provides methods for organizing a span or range
|
4
|
+
# into chunks no larger than a specified block size. For reference:
|
5
|
+
#
|
6
|
+
# span an array like: [start, length]
|
7
|
+
# range a Range like: start..end or start...(end - 1)
|
8
|
+
#
|
9
|
+
module Chunkable
|
10
|
+
|
11
|
+
# The length of the chunkable object;
|
12
|
+
# length must be set by the object.
|
13
|
+
attr_accessor :length
|
14
|
+
|
15
|
+
# The default block size for chunking a chunkable
|
16
|
+
# object; default_blksize must be set by the object.
|
17
|
+
attr_accessor :default_blksize
|
18
|
+
|
19
|
+
# Returns the default span: [0, length]
|
20
|
+
def default_span
|
21
|
+
[0, length]
|
22
|
+
end
|
23
|
+
|
24
|
+
# Breaks the input range or span into chunks of blksize or less.
|
25
|
+
# The offset and length of each chunk will be provided to the
|
26
|
+
# block, if given.
|
27
|
+
#
|
28
|
+
# blksize # => 100
|
29
|
+
# chunk(0..250) # => [[0,100],[100,100],[200,50]]
|
30
|
+
#
|
31
|
+
# results = []
|
32
|
+
# chunk([10,190]) {|offset, length| results << [offset, length]}
|
33
|
+
# results # => [[10,100],[110,90]]
|
34
|
+
#
|
35
|
+
def chunk(range_or_span=default_span, blksize=default_blksize)
|
36
|
+
return collect_results(:chunk, range_or_span) unless block_given?
|
37
|
+
|
38
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
39
|
+
|
40
|
+
# chunk the final range to make sure that no chunks
|
41
|
+
# greater than blksize are returned
|
42
|
+
while rend - rbegin > blksize
|
43
|
+
yield(rbegin, blksize)
|
44
|
+
rbegin += blksize
|
45
|
+
end
|
46
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
47
|
+
end
|
48
|
+
|
49
|
+
# Breaks the input range or span into chunks of blksize or less,
|
50
|
+
# beginning from the end of the interval. The offset and length
|
51
|
+
# of each chunk will be provided to the block, if given.
|
52
|
+
#
|
53
|
+
# blksize # => 100
|
54
|
+
# reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
|
55
|
+
#
|
56
|
+
# results = []
|
57
|
+
# reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
|
58
|
+
# results # => [[100,100],[10,90]]
|
59
|
+
#
|
60
|
+
def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
|
61
|
+
return collect_results(:reverse_chunk, range_or_span) unless block_given?
|
62
|
+
|
63
|
+
rbegin, rend = range_begin_and_end(range_or_span)
|
64
|
+
|
65
|
+
# chunk the final range to make sure that no chunks
|
66
|
+
# greater than blksize are returned
|
67
|
+
while rend - rbegin > blksize
|
68
|
+
rend -= blksize
|
69
|
+
yield(rend, blksize)
|
70
|
+
end
|
71
|
+
yield(rbegin, rend - rbegin) if rend - rbegin > 0
|
72
|
+
end
|
73
|
+
|
74
|
+
module_function
|
75
|
+
|
76
|
+
# Converts a range into an offset and length. Negative values are
|
77
|
+
# counted back from self.length
|
78
|
+
#
|
79
|
+
# length # => 10
|
80
|
+
# split_range(0..9) # => [0,10]
|
81
|
+
# split_range(0...9) # => [0,9]
|
82
|
+
#
|
83
|
+
# split_range(-1..9) # => [9,1]
|
84
|
+
# split_range(0..-1) # => [0,10]
|
85
|
+
def split_range(range)
|
86
|
+
start, finish = range.begin, range.end
|
87
|
+
start += length if start < 0
|
88
|
+
finish += length if finish < 0
|
89
|
+
|
90
|
+
[start, finish - start - (range.exclude_end? ? 1 : 0)]
|
91
|
+
end
|
92
|
+
|
93
|
+
# The compliment to split_range; returns the span with a negative
|
94
|
+
# start index counted back from self.length.
|
95
|
+
#
|
96
|
+
# length # => 10
|
97
|
+
# split_span([0, 10]) # => [0,10]
|
98
|
+
# split_span([-1, 1]) # => [9,1]
|
99
|
+
#
|
100
|
+
def split_span(span)
|
101
|
+
span[0] += self.length if span[0] < 0
|
102
|
+
span
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the begining and end of a range or span.
|
106
|
+
#
|
107
|
+
# range_begin_and_end(0..10) # => [0, 10]
|
108
|
+
# range_begin_and_end(0...10) # => [0, 9]
|
109
|
+
# range_begin_and_end([0, 10]) # => [0, 10]
|
110
|
+
#
|
111
|
+
def range_begin_and_end(range_or_span)
|
112
|
+
rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
|
113
|
+
raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
|
114
|
+
rend += rbegin
|
115
|
+
|
116
|
+
[rbegin, rend]
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
# a utility method to collect the results of a method
|
122
|
+
# that requires a block.
|
123
|
+
def collect_results(method, args) # :nodoc:
|
124
|
+
results = []
|
125
|
+
send(method, args) do |*result|
|
126
|
+
results << result
|
127
|
+
end
|
128
|
+
results
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
data/lib/external/enumerable.rb
CHANGED
@@ -1,26 +1,63 @@
|
|
1
1
|
require 'enumerator'
|
2
2
|
|
3
3
|
module External
|
4
|
+
|
5
|
+
# An externalized implementation of Enumerable. External::Enumerable
|
6
|
+
# requires several methods with the following functionality:
|
7
|
+
#
|
8
|
+
# each:: iterates over items in self
|
9
|
+
# another:: provide a another instance of self
|
10
|
+
# to_a:: converts self to an Array
|
11
|
+
#
|
4
12
|
module Enumerable
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
13
|
+
# Flag indicating whether to enumerate (ie collect,
|
14
|
+
# select, etc) into an array or into an instance
|
15
|
+
# of self. In most cases enumerating to an array
|
16
|
+
# performs better, but enumerating to another
|
17
|
+
# instance of self may be desired for especially
|
18
|
+
# large collections.
|
19
|
+
attr_accessor :enumerate_to_a
|
20
|
+
|
21
|
+
def all? # :yield: obj
|
22
|
+
# WARN -- no tests for this in test_array
|
23
|
+
each do |obj|
|
24
|
+
return false unless yield(obj)
|
25
|
+
end
|
26
|
+
true
|
27
|
+
end
|
8
28
|
|
9
|
-
|
10
|
-
|
11
|
-
|
29
|
+
def any? # :yield: obj
|
30
|
+
# WARN -- no tests for this in test_array
|
31
|
+
each do |obj|
|
32
|
+
return true if yield(obj)
|
33
|
+
end
|
34
|
+
false
|
35
|
+
end
|
12
36
|
|
13
|
-
|
14
|
-
|
15
|
-
|
37
|
+
def collect # :yield: item
|
38
|
+
if block_given?
|
39
|
+
another = enumerate_to_a ? [] : self.another
|
40
|
+
each do |item|
|
41
|
+
another << yield(item)
|
42
|
+
end
|
43
|
+
another
|
44
|
+
else
|
45
|
+
# Not sure if Enumerator works right for large externals...
|
46
|
+
Object::Enumerable::Enumerator.new(self)
|
47
|
+
end
|
48
|
+
end
|
16
49
|
|
17
50
|
# def collect! # :yield: item
|
18
51
|
# not_implemented
|
19
52
|
# end
|
20
53
|
|
21
|
-
|
22
|
-
|
23
|
-
|
54
|
+
def detect(ifnone=nil) # :yield: obj
|
55
|
+
# WARN -- no tests for this in test_array
|
56
|
+
each do |obj|
|
57
|
+
return obj if yield(obj)
|
58
|
+
end
|
59
|
+
nil
|
60
|
+
end
|
24
61
|
|
25
62
|
# def each_cons(n) # :yield:
|
26
63
|
# not_implemented
|
@@ -38,9 +75,9 @@ module External
|
|
38
75
|
end
|
39
76
|
end
|
40
77
|
|
41
|
-
|
42
|
-
|
43
|
-
|
78
|
+
def entries
|
79
|
+
to_a
|
80
|
+
end
|
44
81
|
|
45
82
|
# def enum_cons(n)
|
46
83
|
# not_implemented
|
@@ -54,29 +91,37 @@ module External
|
|
54
91
|
# not_implemented
|
55
92
|
# end
|
56
93
|
|
57
|
-
|
58
|
-
|
59
|
-
|
94
|
+
def find(ifnone=nil, &block) # :yield: obj
|
95
|
+
# WARN -- no tests for this in test_array
|
96
|
+
detect(ifnone, &block)
|
97
|
+
end
|
60
98
|
|
61
|
-
|
62
|
-
|
63
|
-
|
99
|
+
def find_all # :yield: obj
|
100
|
+
another = enumerate_to_a ? [] : self.another
|
101
|
+
each do |item|
|
102
|
+
another << item if yield(item)
|
103
|
+
end
|
104
|
+
another
|
105
|
+
end
|
64
106
|
|
65
107
|
# def grep(pattern) # :yield: obj
|
66
108
|
# not_implemented
|
67
109
|
# end
|
68
110
|
|
69
|
-
|
70
|
-
|
71
|
-
|
111
|
+
def include?(obj)
|
112
|
+
each do |current|
|
113
|
+
return true if current == obj
|
114
|
+
end
|
115
|
+
false
|
116
|
+
end
|
72
117
|
|
73
118
|
# def inject(init) # :yield: memo, obj
|
74
119
|
# not_implemented
|
75
120
|
# end
|
76
121
|
|
77
|
-
|
78
|
-
|
79
|
-
|
122
|
+
def map(&block) # :yield: item
|
123
|
+
collect(&block)
|
124
|
+
end
|
80
125
|
|
81
126
|
# def map!(&block) # :yield: item
|
82
127
|
# collect!(&block)
|
@@ -86,9 +131,9 @@ module External
|
|
86
131
|
# not_implemented
|
87
132
|
# end
|
88
133
|
|
89
|
-
|
90
|
-
|
91
|
-
|
134
|
+
def member?(obj)
|
135
|
+
include?(obj)
|
136
|
+
end
|
92
137
|
|
93
138
|
# def min # :yield: a,b
|
94
139
|
# not_implemented
|
@@ -106,9 +151,9 @@ module External
|
|
106
151
|
# not_implemented
|
107
152
|
# end
|
108
153
|
|
109
|
-
|
110
|
-
|
111
|
-
|
154
|
+
def select(&block) # :yield: obj
|
155
|
+
find_all(&block)
|
156
|
+
end
|
112
157
|
|
113
158
|
# def sort # :yield: a,b
|
114
159
|
# not_implemented
|
data/lib/external/io.rb
CHANGED
@@ -1,398 +1,163 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
#
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
#
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
|
89
|
-
#
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
#
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
#
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
# * the _mode method for StringIO is 'stringio_mode'
|
165
|
-
# * the _length method for StringIO is 'stringio_length'
|
166
|
-
#
|
167
|
-
# Nested classes have '::' replaced by '_'. Thus to add support
|
168
|
-
# for Some::Unknown::IO, extend External::IO as below:
|
169
|
-
#
|
170
|
-
# module External::IO
|
171
|
-
# def some_unknown_io_mode(io)
|
172
|
-
# ...
|
173
|
-
# end
|
174
|
-
#
|
175
|
-
# def some_unknown_io_length(io)
|
176
|
-
# ...
|
177
|
-
# end
|
178
|
-
# end
|
179
|
-
#
|
180
|
-
# See stringio_mode and stringio_length for more details.
|
181
|
-
def self.try_handle(io, method)
|
182
|
-
method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
|
183
|
-
if self.respond_to?(method_name)
|
184
|
-
External::IO.send(method_name, io)
|
185
|
-
else
|
186
|
-
raise "cannot determine #{method} for '%s'" % io.class
|
187
|
-
end
|
188
|
-
end
|
189
|
-
|
190
|
-
public
|
191
|
-
|
192
|
-
include Chunkable
|
193
|
-
attr_reader :generic_mode
|
194
|
-
|
195
|
-
# True if self is a File or Tempfile
|
196
|
-
def file?
|
197
|
-
self.kind_of?(File) || self.kind_of?(Tempfile)
|
198
|
-
end
|
199
|
-
|
200
|
-
# Modified truncate that adjusts length
|
201
|
-
def truncate(n)
|
202
|
-
super
|
203
|
-
self.pos = n if self.pos > n
|
204
|
-
self.length = n
|
205
|
-
end
|
206
|
-
|
207
|
-
# Resets length to the length returned by External::IO.length
|
208
|
-
def reset_length
|
209
|
-
self.length = External::IO.length(self)
|
210
|
-
end
|
211
|
-
|
212
|
-
#
|
213
|
-
# comparison
|
214
|
-
#
|
215
|
-
|
216
|
-
# Quick comparision with another IO. Returns true if
|
217
|
-
# another == self, or if both are file-type IOs and
|
218
|
-
# their paths are equal.
|
219
|
-
def quick_compare(another)
|
220
|
-
self == another || (self.file? && another.file? && self.path == another.path)
|
221
|
-
end
|
222
|
-
|
223
|
-
# Sort compare with another IO, behaving like a comparison between
|
224
|
-
# the full string contents of self and another. Can be a long
|
225
|
-
# operation if it requires the full read of two large IO objects.
|
226
|
-
def sort_compare(another, blksize=default_blksize)
|
227
|
-
# equal in comparison if the ios are equal
|
228
|
-
return 0 if quick_compare(another)
|
229
|
-
|
230
|
-
if another.length > self.length
|
231
|
-
return -1
|
232
|
-
elsif self.length < another.length
|
233
|
-
return 1
|
234
|
-
else
|
235
|
-
self.flush unless self.generic_mode == 'r'
|
236
|
-
self.pos = 0
|
237
|
-
another.flush unless another.generic_mode == 'r'
|
238
|
-
another.pos = 0
|
239
|
-
|
240
|
-
sa = sb = nil
|
241
|
-
while sa == sb
|
242
|
-
sa = self.read(blksize)
|
243
|
-
sb = another.read(blksize)
|
244
|
-
break if sa.nil? || sb.nil?
|
245
|
-
end
|
246
|
-
|
247
|
-
sa.to_s <=> sb.to_s
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
# Sort compare with another IO, behaving like a comparison between
|
252
|
-
# the full string contents of self and another. Can be a long
|
253
|
-
# operation if it requires the full read of two large IO objects.
|
254
|
-
def <=>(another)
|
255
|
-
sort_compare(another)
|
256
|
-
end
|
257
|
-
|
258
|
-
#
|
259
|
-
# reading
|
260
|
-
#
|
261
|
-
|
262
|
-
def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
|
263
|
-
carryover = 0
|
264
|
-
chunk(range_or_span, blksize) do |offset, length|
|
265
|
-
raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
|
266
|
-
|
267
|
-
scan_begin = offset - carryover
|
268
|
-
self.pos = scan_begin
|
269
|
-
string = self.read(length + carryover)
|
270
|
-
carryover = yield(scan_begin, string)
|
271
|
-
end
|
272
|
-
carryover
|
273
|
-
end
|
274
|
-
|
275
|
-
#
|
276
|
-
# writing
|
277
|
-
#
|
278
|
-
|
279
|
-
#
|
280
|
-
def insert(src, range=0..src.length, pos=nil)
|
281
|
-
self.pos = pos unless pos == nil
|
282
|
-
|
283
|
-
start_pos = self.pos
|
284
|
-
length_written = 0
|
285
|
-
|
286
|
-
src.flush unless src.generic_mode == 'r'
|
287
|
-
src.pos = range.begin
|
288
|
-
src.chunk(range) do |offset, length|
|
289
|
-
length_written += write(src.read(length))
|
290
|
-
end
|
291
|
-
|
292
|
-
end_pos = start_pos + length_written
|
293
|
-
self.length = end_pos if end_pos > self.length
|
294
|
-
length_written
|
295
|
-
end
|
296
|
-
|
297
|
-
#
|
298
|
-
def concat(src, range=0..src.length)
|
299
|
-
insert(src, range, length)
|
300
|
-
end
|
301
|
-
|
302
|
-
#--
|
303
|
-
# it appears that as long as the io opening t.path closes,
|
304
|
-
# the tempfile will be deleted at the exit of the ruby
|
305
|
-
# instance... otherwise it WILL NOT BE DELETED
|
306
|
-
# Make note of this in the documentation to be sure to close
|
307
|
-
# files if you start inserting because it may make tempfiles
|
308
|
-
#++
|
309
|
-
def copy(mode="r", range=0..length)
|
310
|
-
self.flush
|
311
|
-
|
312
|
-
temp = Tempfile.new("copy")
|
313
|
-
temp.extend IO
|
314
|
-
temp.insert(self, range)
|
315
|
-
temp.close
|
316
|
-
|
317
|
-
cp = File.open(temp.path, mode)
|
318
|
-
cp.extend IO
|
319
|
-
|
320
|
-
if block_given?
|
321
|
-
begin
|
322
|
-
yield(cp)
|
323
|
-
ensure
|
324
|
-
cp.close unless cp.closed?
|
325
|
-
FileUtils.rm(cp.path) if File.exists?(cp.path)
|
326
|
-
end
|
327
|
-
else
|
328
|
-
cp
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
|
-
end
|
333
|
-
end
|
334
|
-
|
335
|
-
# This code block modifies IO only if running on windows
|
336
|
-
unless RUBY_PLATFORM.index('mswin').nil?
|
337
|
-
require 'Win32API'
|
338
|
-
|
339
|
-
module External
|
340
|
-
module IO
|
341
|
-
# Modfied to properly determine file lengths on Windows. Uses code
|
342
|
-
# from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
|
343
|
-
def self.file_length(io) # :nodoc:
|
344
|
-
io.fsync unless io.generic_mode == 'r'
|
345
|
-
|
346
|
-
# I would have liked to use win32/file/stat to do this... however, some issue
|
347
|
-
# arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
|
348
|
-
# error because the mode would be nil for files. I wasn't sure how to fix it,
|
349
|
-
# so I've lifted the relevant code for pulling the large file size.
|
350
|
-
|
351
|
-
# Note this is a simplified version... if you base.path point to a chardev,
|
352
|
-
# this may need to be changed, because apparently the call to the Win32API
|
353
|
-
# may fail
|
354
|
-
|
355
|
-
stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
|
356
|
-
Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
|
357
|
-
stat_buf[24, 4].unpack('L').first # Size of file in bytes
|
358
|
-
end
|
359
|
-
|
360
|
-
POSITION_MAX = 2147483647 # maximum size of long
|
361
|
-
|
362
|
-
# Modified to handle positions past the 2Gb limit
|
363
|
-
def pos # :nodoc:
|
364
|
-
@pos || super
|
365
|
-
end
|
366
|
-
|
367
|
-
# Positions larger than the max value of a long cannot be directly given
|
368
|
-
# to the default +pos=+. This version incrementally seeks to positions
|
369
|
-
# beyond the maximum, if necessary.
|
370
|
-
#
|
371
|
-
# Note: setting the position beyond the 2Gb limit requires the use of a
|
372
|
-
# sysseek statement. As such, errors will arise if you try to position
|
373
|
-
# an IO object that does not support this method (for example StringIO...
|
374
|
-
# but then what are you doing with a 2Gb StringIO anyhow?)
|
375
|
-
def pos=(pos)
|
376
|
-
if pos < POSITION_MAX
|
377
|
-
super(pos)
|
378
|
-
@pos = nil
|
379
|
-
elsif @pos != pos
|
380
|
-
# note sysseek appears to be necessary here, rather than io.seek
|
381
|
-
@pos = pos
|
382
|
-
|
383
|
-
super(POSITION_MAX)
|
384
|
-
pos -= POSITION_MAX
|
385
|
-
|
386
|
-
while pos > POSITION_MAX
|
387
|
-
pos -= POSITION_MAX
|
388
|
-
self.sysseek(POSITION_MAX, Object::IO::SEEK_CUR)
|
389
|
-
end
|
390
|
-
|
391
|
-
self.sysseek(pos, Object::IO::SEEK_CUR)
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
end # end the windows-specific code
|
1
|
+
require 'external/chunkable'
|
2
|
+
require 'external/utils'
|
3
|
+
|
4
|
+
autoload(:StringIO, 'stringio')
|
5
|
+
autoload(:Tempfile, 'tempfile')
|
6
|
+
autoload(:FileUtils, 'fileutils')
|
7
|
+
|
8
|
+
module External
|
9
|
+
|
10
|
+
# Adds functionality to an IO required by External.
|
11
|
+
#
|
12
|
+
# IO adds/overrides the length accessor for getting the size of the IO contents.
|
13
|
+
# Note that length is not automatically adjusted by write, for performance
|
14
|
+
# reasons. length must be managed manually, or reset after writes using
|
15
|
+
# reset_length.
|
16
|
+
#
|
17
|
+
module Io
|
18
|
+
include Chunkable
|
19
|
+
|
20
|
+
PATCHES = []
|
21
|
+
|
22
|
+
# Add version-specific patches
|
23
|
+
case RUBY_VERSION
|
24
|
+
when /^1.8/ then require "external/patches/ruby_1_8_io"
|
25
|
+
end
|
26
|
+
|
27
|
+
# Add platform-specific patches
|
28
|
+
# case RUBY_PLATFORM
|
29
|
+
# when 'java'
|
30
|
+
# end
|
31
|
+
|
32
|
+
def self.extended(base)
|
33
|
+
PATCHES.each {|patch| base.extend patch }
|
34
|
+
base.reset_length
|
35
|
+
base.default_blksize = 1024
|
36
|
+
base.binmode
|
37
|
+
end
|
38
|
+
|
39
|
+
# Resets length to the length returned by Utils.length
|
40
|
+
def reset_length
|
41
|
+
self.length = Utils.length(self)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Modified truncate that adjusts length
|
45
|
+
def truncate(n)
|
46
|
+
super
|
47
|
+
self.pos = n if self.pos > n
|
48
|
+
self.length = n
|
49
|
+
end
|
50
|
+
|
51
|
+
#
|
52
|
+
def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
|
53
|
+
carryover = 0
|
54
|
+
chunk(range_or_span, blksize) do |offset, length|
|
55
|
+
raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
|
56
|
+
|
57
|
+
scan_begin = offset - carryover
|
58
|
+
self.pos = scan_begin
|
59
|
+
string = self.read(length + carryover)
|
60
|
+
carryover = yield(scan_begin, string)
|
61
|
+
end
|
62
|
+
carryover
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
def insert(src, range=0..src.length, pos=nil)
|
67
|
+
self.pos = pos unless pos == nil
|
68
|
+
|
69
|
+
start_pos = self.pos
|
70
|
+
length_written = 0
|
71
|
+
|
72
|
+
src.flush
|
73
|
+
src.pos = range.begin
|
74
|
+
src.chunk(range) do |offset, length|
|
75
|
+
length_written += write(src.read(length))
|
76
|
+
end
|
77
|
+
|
78
|
+
end_pos = start_pos + length_written
|
79
|
+
self.length = end_pos if end_pos > self.length
|
80
|
+
length_written
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
def concat(src, range=0..src.length)
|
85
|
+
insert(src, range, length)
|
86
|
+
end
|
87
|
+
|
88
|
+
#--
|
89
|
+
# it appears that as long as the io opening t.path closes,
|
90
|
+
# the tempfile will be deleted at the exit of the ruby
|
91
|
+
# instance... otherwise it WILL NOT BE DELETED
|
92
|
+
# Make note of this in the documentation to be sure to close
|
93
|
+
# files if you start inserting because it may make tempfiles
|
94
|
+
#++
|
95
|
+
def copy(mode="r", range=0..length)
|
96
|
+
self.flush
|
97
|
+
|
98
|
+
temp = Tempfile.new("copy")
|
99
|
+
temp.extend Io
|
100
|
+
temp.insert(self, range)
|
101
|
+
temp.close
|
102
|
+
|
103
|
+
cp = File.open(temp.path, mode)
|
104
|
+
cp.extend Io
|
105
|
+
|
106
|
+
if block_given?
|
107
|
+
begin
|
108
|
+
yield(cp)
|
109
|
+
ensure
|
110
|
+
cp.close unless cp.closed?
|
111
|
+
FileUtils.rm(cp.path) if File.exists?(cp.path)
|
112
|
+
end
|
113
|
+
else
|
114
|
+
cp
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Quick comparision with another IO. Returns true if
|
119
|
+
# another == self, or if both are file-type IOs and
|
120
|
+
# their paths are equal.
|
121
|
+
def quick_compare(another)
|
122
|
+
self == another || (self.kind_of?(File) && another.kind_of?(File) && self.path == another.path)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Sort compare (ie <=>) with another IO, behaving like
|
126
|
+
# a comparison between the full string contents of self
|
127
|
+
# and another. This obviously can be a long operation
|
128
|
+
# if it requires the full read of two large IO objects.
|
129
|
+
def sort_compare(another, blksize=default_blksize)
|
130
|
+
# equal in comparison if the ios are equal
|
131
|
+
return 0 if quick_compare(another)
|
132
|
+
|
133
|
+
self.flush
|
134
|
+
self.reset_length
|
135
|
+
|
136
|
+
another.flush
|
137
|
+
another.reset_length
|
138
|
+
|
139
|
+
if another.length > self.length
|
140
|
+
return -1
|
141
|
+
elsif self.length < another.length
|
142
|
+
return 1
|
143
|
+
else
|
144
|
+
self.pos = 0
|
145
|
+
another.pos = 0
|
146
|
+
|
147
|
+
sa = sb = nil
|
148
|
+
while sa == sb
|
149
|
+
sa = self.read(blksize)
|
150
|
+
sb = another.read(blksize)
|
151
|
+
break if sa.nil? || sb.nil?
|
152
|
+
end
|
153
|
+
|
154
|
+
sa.to_s <=> sb.to_s
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Alias for sort_compare.
|
159
|
+
def <=>(another)
|
160
|
+
sort_compare(another)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|