external 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/History +7 -0
  2. data/MIT-LICENSE +1 -3
  3. data/README +162 -127
  4. data/lib/external.rb +2 -3
  5. data/lib/external/base.rb +174 -47
  6. data/lib/external/chunkable.rb +131 -105
  7. data/lib/external/enumerable.rb +78 -33
  8. data/lib/external/io.rb +163 -398
  9. data/lib/external/patches/ruby_1_8_io.rb +31 -0
  10. data/lib/external/patches/windows_io.rb +53 -0
  11. data/lib/external/patches/windows_utils.rb +27 -0
  12. data/lib/external/utils.rb +148 -0
  13. data/lib/external_archive.rb +840 -0
  14. data/lib/external_array.rb +57 -0
  15. data/lib/external_index.rb +1053 -0
  16. metadata +42 -58
  17. data/lib/ext_arc.rb +0 -108
  18. data/lib/ext_arr.rb +0 -727
  19. data/lib/ext_ind.rb +0 -1120
  20. data/test/benchmarks/benchmarks_20070918.txt +0 -45
  21. data/test/benchmarks/benchmarks_20070921.txt +0 -91
  22. data/test/benchmarks/benchmarks_20071006.txt +0 -147
  23. data/test/benchmarks/test_copy_file.rb +0 -80
  24. data/test/benchmarks/test_pos_speed.rb +0 -47
  25. data/test/benchmarks/test_read_time.rb +0 -55
  26. data/test/cached_ext_ind_test.rb +0 -219
  27. data/test/check/benchmark_check.rb +0 -441
  28. data/test/check/namespace_conflicts_check.rb +0 -23
  29. data/test/check/pack_check.rb +0 -90
  30. data/test/ext_arc_test.rb +0 -286
  31. data/test/ext_arr/alt_sep.txt +0 -3
  32. data/test/ext_arr/cr_lf_input.txt +0 -3
  33. data/test/ext_arr/input.index +0 -0
  34. data/test/ext_arr/input.txt +0 -1
  35. data/test/ext_arr/inputb.index +0 -0
  36. data/test/ext_arr/inputb.txt +0 -1
  37. data/test/ext_arr/lf_input.txt +0 -3
  38. data/test/ext_arr/lines.txt +0 -19
  39. data/test/ext_arr/without_index.txt +0 -1
  40. data/test/ext_arr_test.rb +0 -534
  41. data/test/ext_ind_test.rb +0 -1472
  42. data/test/external/base_test.rb +0 -74
  43. data/test/external/chunkable_test.rb +0 -182
  44. data/test/external/index/input.index +0 -0
  45. data/test/external/index/inputb.index +0 -0
  46. data/test/external/io_test.rb +0 -414
  47. data/test/external_test_helper.rb +0 -31
  48. data/test/external_test_suite.rb +0 -4
  49. data/test/test_array.rb +0 -1192
@@ -1,105 +1,131 @@
1
- module External
2
-
3
- # The Chunkable mixin provides methods for organizing a span or range
4
- # into chunks no larger than a specified block size.
5
- module Chunkable
6
- attr_accessor :length, :default_blksize
7
-
8
- # Returns the default span: [0, length]
9
- def default_span
10
- [0, length]
11
- end
12
-
13
- # Breaks the input range or span into chunks of blksize or less.
14
- # The offset and length of each chunk will be provided to the
15
- # block, if given.
16
- #
17
- # blksize # => 100
18
- # chunk(0..250) # => [[0,100],[100,100],[200,50]]
19
- #
20
- # results = []
21
- # chunk([10,190]) {|offset, length| results << [offset, length]}
22
- # results # => [[10,100],[110,90]]
23
- #
24
- def chunk(range_or_span=default_span, blksize=default_blksize)
25
- return collect_results(:chunk, range_or_span) unless block_given?
26
-
27
- rbegin, rend = range_begin_and_end(range_or_span)
28
-
29
- # chunk the final range to make sure that no chunks
30
- # greater than blksize are returned
31
- while rend - rbegin > blksize
32
- yield(rbegin, blksize)
33
- rbegin += blksize
34
- end
35
- yield(rbegin, rend - rbegin) if rend - rbegin > 0
36
- end
37
-
38
- # Breaks the input range or span into chunks of blksize or less,
39
- # beginning from the end of the interval. The offset and length
40
- # of each chunk will be provided to the block, if given.
41
- #
42
- # blksize # => 100
43
- # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
44
- #
45
- # results = []
46
- # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
47
- # results # => [[100,100],[10,90]]
48
- #
49
- def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
50
- return collect_results(:reverse_chunk, range_or_span) unless block_given?
51
-
52
- rbegin, rend = range_begin_and_end(range_or_span)
53
-
54
- # chunk the final range to make sure that no chunks
55
- # greater than blksize are returned
56
- while rend - rbegin > blksize
57
- rend -= blksize
58
- yield(rend, blksize)
59
- end
60
- yield(rbegin, rend - rbegin) if rend - rbegin > 0
61
- end
62
-
63
- protected
64
-
65
- # Converts a range into an offset and length. Negative values are
66
- # counted back from self.length
67
- #
68
- # length # => 10
69
- # split_range(0..9) # => [0,10]
70
- # split_range(0...9) # => [0,9]
71
- #
72
- # split_range(-1..9) # => [9,1]
73
- # split_range(0..-1) # => [0,10]
74
- def split_range(range)
75
- begin_range = range.begin + (range.begin < 0 ? self.length : 0)
76
- end_range = range.end + (range.end < 0 ? self.length : 0)
77
- length = end_range - begin_range - (range.exclude_end? ? 1 : 0)
78
-
79
- [begin_range, length]
80
- end
81
-
82
- def split_span(span)
83
- span[0] += self.length if span[0] < 0
84
- span
85
- end
86
-
87
- def range_begin_and_end(range_or_span)
88
- rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
89
- raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
90
- rend += rbegin
91
-
92
- [rbegin, rend]
93
- end
94
-
95
- private
96
-
97
- def collect_results(method, args) # :nodoc:
98
- results = []
99
- send(method, args) do |*result|
100
- results << result
101
- end
102
- results
103
- end
104
- end
105
- end
1
+ module External
2
+
3
+ # The Chunkable mixin provides methods for organizing a span or range
4
+ # into chunks no larger than a specified block size. For reference:
5
+ #
6
+ # span an array like: [start, length]
7
+ # range a Range like: start..end or start...(end - 1)
8
+ #
9
+ module Chunkable
10
+
11
+ # The length of the chunkable object;
12
+ # length must be set by the object.
13
+ attr_accessor :length
14
+
15
+ # The default block size for chunking a chunkable
16
+ # object; default_blksize must be set by the object.
17
+ attr_accessor :default_blksize
18
+
19
+ # Returns the default span: [0, length]
20
+ def default_span
21
+ [0, length]
22
+ end
23
+
24
+ # Breaks the input range or span into chunks of blksize or less.
25
+ # The offset and length of each chunk will be provided to the
26
+ # block, if given.
27
+ #
28
+ # blksize # => 100
29
+ # chunk(0..250) # => [[0,100],[100,100],[200,50]]
30
+ #
31
+ # results = []
32
+ # chunk([10,190]) {|offset, length| results << [offset, length]}
33
+ # results # => [[10,100],[110,90]]
34
+ #
35
+ def chunk(range_or_span=default_span, blksize=default_blksize)
36
+ return collect_results(:chunk, range_or_span) unless block_given?
37
+
38
+ rbegin, rend = range_begin_and_end(range_or_span)
39
+
40
+ # chunk the final range to make sure that no chunks
41
+ # greater than blksize are returned
42
+ while rend - rbegin > blksize
43
+ yield(rbegin, blksize)
44
+ rbegin += blksize
45
+ end
46
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
47
+ end
48
+
49
+ # Breaks the input range or span into chunks of blksize or less,
50
+ # beginning from the end of the interval. The offset and length
51
+ # of each chunk will be provided to the block, if given.
52
+ #
53
+ # blksize # => 100
54
+ # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
55
+ #
56
+ # results = []
57
+ # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
58
+ # results # => [[100,100],[10,90]]
59
+ #
60
+ def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
61
+ return collect_results(:reverse_chunk, range_or_span) unless block_given?
62
+
63
+ rbegin, rend = range_begin_and_end(range_or_span)
64
+
65
+ # chunk the final range to make sure that no chunks
66
+ # greater than blksize are returned
67
+ while rend - rbegin > blksize
68
+ rend -= blksize
69
+ yield(rend, blksize)
70
+ end
71
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
72
+ end
73
+
74
+ module_function
75
+
76
+ # Converts a range into an offset and length. Negative values are
77
+ # counted back from self.length
78
+ #
79
+ # length # => 10
80
+ # split_range(0..9) # => [0,10]
81
+ # split_range(0...9) # => [0,9]
82
+ #
83
+ # split_range(-1..9) # => [9,1]
84
+ # split_range(0..-1) # => [0,10]
85
+ def split_range(range)
86
+ start, finish = range.begin, range.end
87
+ start += length if start < 0
88
+ finish += length if finish < 0
89
+
90
+ [start, finish - start - (range.exclude_end? ? 1 : 0)]
91
+ end
92
+
93
+ # The compliment to split_range; returns the span with a negative
94
+ # start index counted back from self.length.
95
+ #
96
+ # length # => 10
97
+ # split_span([0, 10]) # => [0,10]
98
+ # split_span([-1, 1]) # => [9,1]
99
+ #
100
+ def split_span(span)
101
+ span[0] += self.length if span[0] < 0
102
+ span
103
+ end
104
+
105
+ # Returns the begining and end of a range or span.
106
+ #
107
+ # range_begin_and_end(0..10) # => [0, 10]
108
+ # range_begin_and_end(0...10) # => [0, 9]
109
+ # range_begin_and_end([0, 10]) # => [0, 10]
110
+ #
111
+ def range_begin_and_end(range_or_span)
112
+ rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
113
+ raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
114
+ rend += rbegin
115
+
116
+ [rbegin, rend]
117
+ end
118
+
119
+ private
120
+
121
+ # a utility method to collect the results of a method
122
+ # that requires a block.
123
+ def collect_results(method, args) # :nodoc:
124
+ results = []
125
+ send(method, args) do |*result|
126
+ results << result
127
+ end
128
+ results
129
+ end
130
+ end
131
+ end
@@ -1,26 +1,63 @@
1
1
  require 'enumerator'
2
2
 
3
3
  module External
4
+
5
+ # An externalized implementation of Enumerable. External::Enumerable
6
+ # requires several methods with the following functionality:
7
+ #
8
+ # each:: iterates over items in self
9
+ # another:: provide a another instance of self
10
+ # to_a:: converts self to an Array
11
+ #
4
12
  module Enumerable
5
- # def all? # :yield: obj
6
- # not_implemented
7
- # end
13
+ # Flag indicating whether to enumerate (ie collect,
14
+ # select, etc) into an array or into an instance
15
+ # of self. In most cases enumerating to an array
16
+ # performs better, but enumerating to another
17
+ # instance of self may be desired for especially
18
+ # large collections.
19
+ attr_accessor :enumerate_to_a
20
+
21
+ def all? # :yield: obj
22
+ # WARN -- no tests for this in test_array
23
+ each do |obj|
24
+ return false unless yield(obj)
25
+ end
26
+ true
27
+ end
8
28
 
9
- # def any? # :yield: obj
10
- # not_implemented
11
- # end
29
+ def any? # :yield: obj
30
+ # WARN -- no tests for this in test_array
31
+ each do |obj|
32
+ return true if yield(obj)
33
+ end
34
+ false
35
+ end
12
36
 
13
- # def collect # :yield: item
14
- # not_implemented
15
- # end
37
+ def collect # :yield: item
38
+ if block_given?
39
+ another = enumerate_to_a ? [] : self.another
40
+ each do |item|
41
+ another << yield(item)
42
+ end
43
+ another
44
+ else
45
+ # Not sure if Enumerator works right for large externals...
46
+ Object::Enumerable::Enumerator.new(self)
47
+ end
48
+ end
16
49
 
17
50
  # def collect! # :yield: item
18
51
  # not_implemented
19
52
  # end
20
53
 
21
- # def detect(ifnone=nil) # :yield: obj
22
- # not_implemented
23
- # end
54
+ def detect(ifnone=nil) # :yield: obj
55
+ # WARN -- no tests for this in test_array
56
+ each do |obj|
57
+ return obj if yield(obj)
58
+ end
59
+ nil
60
+ end
24
61
 
25
62
  # def each_cons(n) # :yield:
26
63
  # not_implemented
@@ -38,9 +75,9 @@ module External
38
75
  end
39
76
  end
40
77
 
41
- # def entries
42
- # to_a
43
- # end
78
+ def entries
79
+ to_a
80
+ end
44
81
 
45
82
  # def enum_cons(n)
46
83
  # not_implemented
@@ -54,29 +91,37 @@ module External
54
91
  # not_implemented
55
92
  # end
56
93
 
57
- # def find(ifnone=nil, &block) # :yield: obj
58
- # detect(ifnone, &block)
59
- # end
94
+ def find(ifnone=nil, &block) # :yield: obj
95
+ # WARN -- no tests for this in test_array
96
+ detect(ifnone, &block)
97
+ end
60
98
 
61
- # def find_all # :yield: obj
62
- # not_implemented
63
- # end
99
+ def find_all # :yield: obj
100
+ another = enumerate_to_a ? [] : self.another
101
+ each do |item|
102
+ another << item if yield(item)
103
+ end
104
+ another
105
+ end
64
106
 
65
107
  # def grep(pattern) # :yield: obj
66
108
  # not_implemented
67
109
  # end
68
110
 
69
- # def include?(obj)
70
- # not_implemented
71
- # end
111
+ def include?(obj)
112
+ each do |current|
113
+ return true if current == obj
114
+ end
115
+ false
116
+ end
72
117
 
73
118
  # def inject(init) # :yield: memo, obj
74
119
  # not_implemented
75
120
  # end
76
121
 
77
- # def map(&block) # :yield: item
78
- # collect(&block)
79
- # end
122
+ def map(&block) # :yield: item
123
+ collect(&block)
124
+ end
80
125
 
81
126
  # def map!(&block) # :yield: item
82
127
  # collect!(&block)
@@ -86,9 +131,9 @@ module External
86
131
  # not_implemented
87
132
  # end
88
133
 
89
- # def member?(obj)
90
- # include?(obj)
91
- # end
134
+ def member?(obj)
135
+ include?(obj)
136
+ end
92
137
 
93
138
  # def min # :yield: a,b
94
139
  # not_implemented
@@ -106,9 +151,9 @@ module External
106
151
  # not_implemented
107
152
  # end
108
153
 
109
- # def select(&block) # :yield: obj
110
- # find_all(&block)
111
- # end
154
+ def select(&block) # :yield: obj
155
+ find_all(&block)
156
+ end
112
157
 
113
158
  # def sort # :yield: a,b
114
159
  # not_implemented
data/lib/external/io.rb CHANGED
@@ -1,398 +1,163 @@
1
- require 'stringio'
2
- require 'tempfile'
3
- require 'external/chunkable'
4
-
5
- module External
6
- # Position gets IO objects to work properly for large files. Additionally,
7
- # IO adds a length accessor for getting the size of the IO contents. Note
8
- # that length is not automatically adjusted by write, for performance
9
- # reasons. length must be managed manually, or reset after writes using
10
- # reset_length.
11
- #
12
- # A variety of bugs needed to be addressed per-platform:
13
- #
14
- # == Mac OS X Tiger
15
- #
16
- # Using the default (broken) installation of Ruby, StringIO does not correctly
17
- # position itself when a pos= statement is issued.
18
- #
19
- # s = StringIO.new "abc"
20
- # s.read # => "abc"
21
- # s.pos = 0
22
- # s.read # => nil
23
- #
24
- # For regular IO objects, as expected, the second read statement returns
25
- # "abc". Install the a fixed version of Ruby, perhaps with the one-click
26
- # installer: http://rubyosx.rubyforge.org/
27
- #
28
- # == Windows
29
- #
30
- # Ruby on Windows has problems with files larger than ~2 gigabytes.
31
- # Sizes return as negative, and positions cannot be set beyond the max
32
- # size of a long (2147483647 ~ 2GB = 2475636895). IO corrects both of
33
- # these issues thanks in large part to a bit of code taken from
34
- # 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
35
- #
36
- # == Others
37
- #
38
- # I haven't found errors on Fedora and haven't tested on any other platforms.
39
- # If you find and solve some wierd positioning errors, please let me know.
40
- module IO
41
-
42
- # Determines the generic mode of the input io using the _mode
43
- # method for the input io class. By default IO provides _mode
44
- # methods for File, Tempfile, and StringIO. The return string
45
- # is determined as follows:
46
- #
47
- # readable & writable:: r+
48
- # readable:: r
49
- # writable:: w
50
- #
51
- # The _mode method takes the input io and should return an array
52
- # specifying whether or not io is readable and writable
53
- # (ie [readable, writable]).
54
- #
55
- # See try_handle for more details.
56
- def self.mode(io)
57
- readable, writable = try_handle(io, "mode")
58
-
59
- case
60
- when readable && writable then "r+"
61
- when readable then "r"
62
- when writable then "w"
63
- else
64
- # occurs for r+ mode, for some reason
65
- "r+"
66
- end
67
- end
68
-
69
- # Determines the length of the input io using the _length method
70
- # for the input io class. Non-External::IO inputs are extended
71
- # in this process.
72
- #
73
- # The _length method takes the input io, and should return the
74
- # current length of the input io (ie a flush operation may be
75
- # required).
76
- #
77
- # See try_handle for more details.
78
- def self.length(io)
79
- case io
80
- when External::IO
81
- try_handle(io, "length")
82
- else
83
- io.extend External::IO
84
- io.length
85
- end
86
- end
87
-
88
- # Returns an array of bools determining if the input File
89
- # is readable and writable.
90
- def self.file_mode(io)
91
- begin
92
- dup = io.dup
93
-
94
- # determine readable/writable by sending close methods
95
- # to the duplicated IO. If the io cannot be closed for
96
- # read/write then it will raise an error, indicating that
97
- # it was not open in the given mode.
98
- [:close_read, :close_write].collect do |method|
99
- begin
100
- dup.send(method)
101
- true
102
- rescue(IOError)
103
- false
104
- end
105
- end
106
- ensure
107
- # Be sure that the io is fully closed before proceeding!
108
- # (Otherwise Tempfiles will not be properly disposed of
109
- # ... at least on Windows, perhaps on others)
110
- dup.close if dup && !dup.closed?
111
- end
112
- end
113
-
114
- # Returns the length of the input File
115
- def self.file_length(io)
116
- io.fsync unless io.generic_mode == 'r'
117
- File.size(io.path)
118
- end
119
-
120
- # Returns an array of bools determining if the input Tempfile
121
- # is readable and writable.
122
- def self.tempfile_mode(io)
123
- file_mode(io.instance_variable_get("@tmpfile"))
124
- end
125
-
126
- # Returns the length of the input Tempfile
127
- def self.tempfile_length(io)
128
- file_length(io)
129
- end
130
-
131
- # Returns an array of bools determining if the input StringIO
132
- # is readable and writable.
133
- #
134
- # s = StringIO.new("abcde", "r+")
135
- # External::IO.stringio_mode(s) # => [true, true]
136
- #
137
- def self.stringio_mode(io)
138
- [!io.closed_read?, !io.closed_write?]
139
- end
140
-
141
- # Returns the length of the input StringIO
142
- #
143
- # s = StringIO.new("abcde", "r+")
144
- # External::IO.length(s) # => 5
145
- #
146
- def self.stringio_length(io)
147
- io.string.length
148
- end
149
-
150
- def self.extended(base) # :nodoc:
151
- base.instance_variable_set("@generic_mode", mode(base))
152
- base.reset_length
153
- base.default_blksize = 1024
154
- base.binmode
155
- end
156
-
157
- protected
158
-
159
- # try_handle is a forwarding method allowing External::IO to handle
160
- # non-File, non-Tempfile IO objects. try_handle infers a method
161
- # name based on the class of the input and trys to forward the
162
- # input io to that method within External::IO. For instance:
163
- #
164
- # * the _mode method for StringIO is 'stringio_mode'
165
- # * the _length method for StringIO is 'stringio_length'
166
- #
167
- # Nested classes have '::' replaced by '_'. Thus to add support
168
- # for Some::Unknown::IO, extend External::IO as below:
169
- #
170
- # module External::IO
171
- # def some_unknown_io_mode(io)
172
- # ...
173
- # end
174
- #
175
- # def some_unknown_io_length(io)
176
- # ...
177
- # end
178
- # end
179
- #
180
- # See stringio_mode and stringio_length for more details.
181
- def self.try_handle(io, method)
182
- method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
183
- if self.respond_to?(method_name)
184
- External::IO.send(method_name, io)
185
- else
186
- raise "cannot determine #{method} for '%s'" % io.class
187
- end
188
- end
189
-
190
- public
191
-
192
- include Chunkable
193
- attr_reader :generic_mode
194
-
195
- # True if self is a File or Tempfile
196
- def file?
197
- self.kind_of?(File) || self.kind_of?(Tempfile)
198
- end
199
-
200
- # Modified truncate that adjusts length
201
- def truncate(n)
202
- super
203
- self.pos = n if self.pos > n
204
- self.length = n
205
- end
206
-
207
- # Resets length to the length returned by External::IO.length
208
- def reset_length
209
- self.length = External::IO.length(self)
210
- end
211
-
212
- #
213
- # comparison
214
- #
215
-
216
- # Quick comparision with another IO. Returns true if
217
- # another == self, or if both are file-type IOs and
218
- # their paths are equal.
219
- def quick_compare(another)
220
- self == another || (self.file? && another.file? && self.path == another.path)
221
- end
222
-
223
- # Sort compare with another IO, behaving like a comparison between
224
- # the full string contents of self and another. Can be a long
225
- # operation if it requires the full read of two large IO objects.
226
- def sort_compare(another, blksize=default_blksize)
227
- # equal in comparison if the ios are equal
228
- return 0 if quick_compare(another)
229
-
230
- if another.length > self.length
231
- return -1
232
- elsif self.length < another.length
233
- return 1
234
- else
235
- self.flush unless self.generic_mode == 'r'
236
- self.pos = 0
237
- another.flush unless another.generic_mode == 'r'
238
- another.pos = 0
239
-
240
- sa = sb = nil
241
- while sa == sb
242
- sa = self.read(blksize)
243
- sb = another.read(blksize)
244
- break if sa.nil? || sb.nil?
245
- end
246
-
247
- sa.to_s <=> sb.to_s
248
- end
249
- end
250
-
251
- # Sort compare with another IO, behaving like a comparison between
252
- # the full string contents of self and another. Can be a long
253
- # operation if it requires the full read of two large IO objects.
254
- def <=>(another)
255
- sort_compare(another)
256
- end
257
-
258
- #
259
- # reading
260
- #
261
-
262
- def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
263
- carryover = 0
264
- chunk(range_or_span, blksize) do |offset, length|
265
- raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
266
-
267
- scan_begin = offset - carryover
268
- self.pos = scan_begin
269
- string = self.read(length + carryover)
270
- carryover = yield(scan_begin, string)
271
- end
272
- carryover
273
- end
274
-
275
- #
276
- # writing
277
- #
278
-
279
- #
280
- def insert(src, range=0..src.length, pos=nil)
281
- self.pos = pos unless pos == nil
282
-
283
- start_pos = self.pos
284
- length_written = 0
285
-
286
- src.flush unless src.generic_mode == 'r'
287
- src.pos = range.begin
288
- src.chunk(range) do |offset, length|
289
- length_written += write(src.read(length))
290
- end
291
-
292
- end_pos = start_pos + length_written
293
- self.length = end_pos if end_pos > self.length
294
- length_written
295
- end
296
-
297
- #
298
- def concat(src, range=0..src.length)
299
- insert(src, range, length)
300
- end
301
-
302
- #--
303
- # it appears that as long as the io opening t.path closes,
304
- # the tempfile will be deleted at the exit of the ruby
305
- # instance... otherwise it WILL NOT BE DELETED
306
- # Make note of this in the documentation to be sure to close
307
- # files if you start inserting because it may make tempfiles
308
- #++
309
- def copy(mode="r", range=0..length)
310
- self.flush
311
-
312
- temp = Tempfile.new("copy")
313
- temp.extend IO
314
- temp.insert(self, range)
315
- temp.close
316
-
317
- cp = File.open(temp.path, mode)
318
- cp.extend IO
319
-
320
- if block_given?
321
- begin
322
- yield(cp)
323
- ensure
324
- cp.close unless cp.closed?
325
- FileUtils.rm(cp.path) if File.exists?(cp.path)
326
- end
327
- else
328
- cp
329
- end
330
- end
331
-
332
- end
333
- end
334
-
335
- # This code block modifies IO only if running on windows
336
- unless RUBY_PLATFORM.index('mswin').nil?
337
- require 'Win32API'
338
-
339
- module External
340
- module IO
341
- # Modfied to properly determine file lengths on Windows. Uses code
342
- # from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
343
- def self.file_length(io) # :nodoc:
344
- io.fsync unless io.generic_mode == 'r'
345
-
346
- # I would have liked to use win32/file/stat to do this... however, some issue
347
- # arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
348
- # error because the mode would be nil for files. I wasn't sure how to fix it,
349
- # so I've lifted the relevant code for pulling the large file size.
350
-
351
- # Note this is a simplified version... if you base.path point to a chardev,
352
- # this may need to be changed, because apparently the call to the Win32API
353
- # may fail
354
-
355
- stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
356
- Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
357
- stat_buf[24, 4].unpack('L').first # Size of file in bytes
358
- end
359
-
360
- POSITION_MAX = 2147483647 # maximum size of long
361
-
362
- # Modified to handle positions past the 2Gb limit
363
- def pos # :nodoc:
364
- @pos || super
365
- end
366
-
367
- # Positions larger than the max value of a long cannot be directly given
368
- # to the default +pos=+. This version incrementally seeks to positions
369
- # beyond the maximum, if necessary.
370
- #
371
- # Note: setting the position beyond the 2Gb limit requires the use of a
372
- # sysseek statement. As such, errors will arise if you try to position
373
- # an IO object that does not support this method (for example StringIO...
374
- # but then what are you doing with a 2Gb StringIO anyhow?)
375
- def pos=(pos)
376
- if pos < POSITION_MAX
377
- super(pos)
378
- @pos = nil
379
- elsif @pos != pos
380
- # note sysseek appears to be necessary here, rather than io.seek
381
- @pos = pos
382
-
383
- super(POSITION_MAX)
384
- pos -= POSITION_MAX
385
-
386
- while pos > POSITION_MAX
387
- pos -= POSITION_MAX
388
- self.sysseek(POSITION_MAX, Object::IO::SEEK_CUR)
389
- end
390
-
391
- self.sysseek(pos, Object::IO::SEEK_CUR)
392
- end
393
- end
394
-
395
- end
396
- end
397
-
398
- end # end the windows-specific code
1
+ require 'external/chunkable'
2
+ require 'external/utils'
3
+
4
+ autoload(:StringIO, 'stringio')
5
+ autoload(:Tempfile, 'tempfile')
6
+ autoload(:FileUtils, 'fileutils')
7
+
8
+ module External
9
+
10
+ # Adds functionality to an IO required by External.
11
+ #
12
+ # IO adds/overrides the length accessor for getting the size of the IO contents.
13
+ # Note that length is not automatically adjusted by write, for performance
14
+ # reasons. length must be managed manually, or reset after writes using
15
+ # reset_length.
16
+ #
17
+ module Io
18
+ include Chunkable
19
+
20
+ PATCHES = []
21
+
22
+ # Add version-specific patches
23
+ case RUBY_VERSION
24
+ when /^1.8/ then require "external/patches/ruby_1_8_io"
25
+ end
26
+
27
+ # Add platform-specific patches
28
+ # case RUBY_PLATFORM
29
+ # when 'java'
30
+ # end
31
+
32
+ def self.extended(base)
33
+ PATCHES.each {|patch| base.extend patch }
34
+ base.reset_length
35
+ base.default_blksize = 1024
36
+ base.binmode
37
+ end
38
+
39
+ # Resets length to the length returned by Utils.length
40
+ def reset_length
41
+ self.length = Utils.length(self)
42
+ end
43
+
44
+ # Modified truncate that adjusts length
45
+ def truncate(n)
46
+ super
47
+ self.pos = n if self.pos > n
48
+ self.length = n
49
+ end
50
+
51
+ #
52
+ def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
53
+ carryover = 0
54
+ chunk(range_or_span, blksize) do |offset, length|
55
+ raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
56
+
57
+ scan_begin = offset - carryover
58
+ self.pos = scan_begin
59
+ string = self.read(length + carryover)
60
+ carryover = yield(scan_begin, string)
61
+ end
62
+ carryover
63
+ end
64
+
65
+ #
66
+ def insert(src, range=0..src.length, pos=nil)
67
+ self.pos = pos unless pos == nil
68
+
69
+ start_pos = self.pos
70
+ length_written = 0
71
+
72
+ src.flush
73
+ src.pos = range.begin
74
+ src.chunk(range) do |offset, length|
75
+ length_written += write(src.read(length))
76
+ end
77
+
78
+ end_pos = start_pos + length_written
79
+ self.length = end_pos if end_pos > self.length
80
+ length_written
81
+ end
82
+
83
+ #
84
+ def concat(src, range=0..src.length)
85
+ insert(src, range, length)
86
+ end
87
+
88
+ #--
89
+ # it appears that as long as the io opening t.path closes,
90
+ # the tempfile will be deleted at the exit of the ruby
91
+ # instance... otherwise it WILL NOT BE DELETED
92
+ # Make note of this in the documentation to be sure to close
93
+ # files if you start inserting because it may make tempfiles
94
+ #++
95
+ def copy(mode="r", range=0..length)
96
+ self.flush
97
+
98
+ temp = Tempfile.new("copy")
99
+ temp.extend Io
100
+ temp.insert(self, range)
101
+ temp.close
102
+
103
+ cp = File.open(temp.path, mode)
104
+ cp.extend Io
105
+
106
+ if block_given?
107
+ begin
108
+ yield(cp)
109
+ ensure
110
+ cp.close unless cp.closed?
111
+ FileUtils.rm(cp.path) if File.exists?(cp.path)
112
+ end
113
+ else
114
+ cp
115
+ end
116
+ end
117
+
118
+ # Quick comparision with another IO. Returns true if
119
+ # another == self, or if both are file-type IOs and
120
+ # their paths are equal.
121
+ def quick_compare(another)
122
+ self == another || (self.kind_of?(File) && another.kind_of?(File) && self.path == another.path)
123
+ end
124
+
125
+ # Sort compare (ie <=>) with another IO, behaving like
126
+ # a comparison between the full string contents of self
127
+ # and another. This obviously can be a long operation
128
+ # if it requires the full read of two large IO objects.
129
+ def sort_compare(another, blksize=default_blksize)
130
+ # equal in comparison if the ios are equal
131
+ return 0 if quick_compare(another)
132
+
133
+ self.flush
134
+ self.reset_length
135
+
136
+ another.flush
137
+ another.reset_length
138
+
139
+ if another.length > self.length
140
+ return -1
141
+ elsif self.length < another.length
142
+ return 1
143
+ else
144
+ self.pos = 0
145
+ another.pos = 0
146
+
147
+ sa = sb = nil
148
+ while sa == sb
149
+ sa = self.read(blksize)
150
+ sb = another.read(blksize)
151
+ break if sa.nil? || sb.nil?
152
+ end
153
+
154
+ sa.to_s <=> sb.to_s
155
+ end
156
+ end
157
+
158
+ # Alias for sort_compare.
159
+ def <=>(another)
160
+ sort_compare(another)
161
+ end
162
+ end
163
+ end