external 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/History +7 -0
  2. data/MIT-LICENSE +1 -3
  3. data/README +162 -127
  4. data/lib/external.rb +2 -3
  5. data/lib/external/base.rb +174 -47
  6. data/lib/external/chunkable.rb +131 -105
  7. data/lib/external/enumerable.rb +78 -33
  8. data/lib/external/io.rb +163 -398
  9. data/lib/external/patches/ruby_1_8_io.rb +31 -0
  10. data/lib/external/patches/windows_io.rb +53 -0
  11. data/lib/external/patches/windows_utils.rb +27 -0
  12. data/lib/external/utils.rb +148 -0
  13. data/lib/external_archive.rb +840 -0
  14. data/lib/external_array.rb +57 -0
  15. data/lib/external_index.rb +1053 -0
  16. metadata +42 -58
  17. data/lib/ext_arc.rb +0 -108
  18. data/lib/ext_arr.rb +0 -727
  19. data/lib/ext_ind.rb +0 -1120
  20. data/test/benchmarks/benchmarks_20070918.txt +0 -45
  21. data/test/benchmarks/benchmarks_20070921.txt +0 -91
  22. data/test/benchmarks/benchmarks_20071006.txt +0 -147
  23. data/test/benchmarks/test_copy_file.rb +0 -80
  24. data/test/benchmarks/test_pos_speed.rb +0 -47
  25. data/test/benchmarks/test_read_time.rb +0 -55
  26. data/test/cached_ext_ind_test.rb +0 -219
  27. data/test/check/benchmark_check.rb +0 -441
  28. data/test/check/namespace_conflicts_check.rb +0 -23
  29. data/test/check/pack_check.rb +0 -90
  30. data/test/ext_arc_test.rb +0 -286
  31. data/test/ext_arr/alt_sep.txt +0 -3
  32. data/test/ext_arr/cr_lf_input.txt +0 -3
  33. data/test/ext_arr/input.index +0 -0
  34. data/test/ext_arr/input.txt +0 -1
  35. data/test/ext_arr/inputb.index +0 -0
  36. data/test/ext_arr/inputb.txt +0 -1
  37. data/test/ext_arr/lf_input.txt +0 -3
  38. data/test/ext_arr/lines.txt +0 -19
  39. data/test/ext_arr/without_index.txt +0 -1
  40. data/test/ext_arr_test.rb +0 -534
  41. data/test/ext_ind_test.rb +0 -1472
  42. data/test/external/base_test.rb +0 -74
  43. data/test/external/chunkable_test.rb +0 -182
  44. data/test/external/index/input.index +0 -0
  45. data/test/external/index/inputb.index +0 -0
  46. data/test/external/io_test.rb +0 -414
  47. data/test/external_test_helper.rb +0 -31
  48. data/test/external_test_suite.rb +0 -4
  49. data/test/test_array.rb +0 -1192
@@ -1,105 +1,131 @@
1
- module External
2
-
3
- # The Chunkable mixin provides methods for organizing a span or range
4
- # into chunks no larger than a specified block size.
5
- module Chunkable
6
- attr_accessor :length, :default_blksize
7
-
8
- # Returns the default span: [0, length]
9
- def default_span
10
- [0, length]
11
- end
12
-
13
- # Breaks the input range or span into chunks of blksize or less.
14
- # The offset and length of each chunk will be provided to the
15
- # block, if given.
16
- #
17
- # blksize # => 100
18
- # chunk(0..250) # => [[0,100],[100,100],[200,50]]
19
- #
20
- # results = []
21
- # chunk([10,190]) {|offset, length| results << [offset, length]}
22
- # results # => [[10,100],[110,90]]
23
- #
24
- def chunk(range_or_span=default_span, blksize=default_blksize)
25
- return collect_results(:chunk, range_or_span) unless block_given?
26
-
27
- rbegin, rend = range_begin_and_end(range_or_span)
28
-
29
- # chunk the final range to make sure that no chunks
30
- # greater than blksize are returned
31
- while rend - rbegin > blksize
32
- yield(rbegin, blksize)
33
- rbegin += blksize
34
- end
35
- yield(rbegin, rend - rbegin) if rend - rbegin > 0
36
- end
37
-
38
- # Breaks the input range or span into chunks of blksize or less,
39
- # beginning from the end of the interval. The offset and length
40
- # of each chunk will be provided to the block, if given.
41
- #
42
- # blksize # => 100
43
- # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
44
- #
45
- # results = []
46
- # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
47
- # results # => [[100,100],[10,90]]
48
- #
49
- def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
50
- return collect_results(:reverse_chunk, range_or_span) unless block_given?
51
-
52
- rbegin, rend = range_begin_and_end(range_or_span)
53
-
54
- # chunk the final range to make sure that no chunks
55
- # greater than blksize are returned
56
- while rend - rbegin > blksize
57
- rend -= blksize
58
- yield(rend, blksize)
59
- end
60
- yield(rbegin, rend - rbegin) if rend - rbegin > 0
61
- end
62
-
63
- protected
64
-
65
- # Converts a range into an offset and length. Negative values are
66
- # counted back from self.length
67
- #
68
- # length # => 10
69
- # split_range(0..9) # => [0,10]
70
- # split_range(0...9) # => [0,9]
71
- #
72
- # split_range(-1..9) # => [9,1]
73
- # split_range(0..-1) # => [0,10]
74
- def split_range(range)
75
- begin_range = range.begin + (range.begin < 0 ? self.length : 0)
76
- end_range = range.end + (range.end < 0 ? self.length : 0)
77
- length = end_range - begin_range - (range.exclude_end? ? 1 : 0)
78
-
79
- [begin_range, length]
80
- end
81
-
82
- def split_span(span)
83
- span[0] += self.length if span[0] < 0
84
- span
85
- end
86
-
87
- def range_begin_and_end(range_or_span)
88
- rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
89
- raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
90
- rend += rbegin
91
-
92
- [rbegin, rend]
93
- end
94
-
95
- private
96
-
97
- def collect_results(method, args) # :nodoc:
98
- results = []
99
- send(method, args) do |*result|
100
- results << result
101
- end
102
- results
103
- end
104
- end
105
- end
1
+ module External
2
+
3
+ # The Chunkable mixin provides methods for organizing a span or range
4
+ # into chunks no larger than a specified block size. For reference:
5
+ #
6
+ # span an array like: [start, length]
7
+ # range a Range like: start..end or start...(end - 1)
8
+ #
9
+ module Chunkable
10
+
11
+ # The length of the chunkable object;
12
+ # length must be set by the object.
13
+ attr_accessor :length
14
+
15
+ # The default block size for chunking a chunkable
16
+ # object; default_blksize must be set by the object.
17
+ attr_accessor :default_blksize
18
+
19
+ # Returns the default span: [0, length]
20
+ def default_span
21
+ [0, length]
22
+ end
23
+
24
+ # Breaks the input range or span into chunks of blksize or less.
25
+ # The offset and length of each chunk will be provided to the
26
+ # block, if given.
27
+ #
28
+ # blksize # => 100
29
+ # chunk(0..250) # => [[0,100],[100,100],[200,50]]
30
+ #
31
+ # results = []
32
+ # chunk([10,190]) {|offset, length| results << [offset, length]}
33
+ # results # => [[10,100],[110,90]]
34
+ #
35
+ def chunk(range_or_span=default_span, blksize=default_blksize)
36
+ return collect_results(:chunk, range_or_span) unless block_given?
37
+
38
+ rbegin, rend = range_begin_and_end(range_or_span)
39
+
40
+ # chunk the final range to make sure that no chunks
41
+ # greater than blksize are returned
42
+ while rend - rbegin > blksize
43
+ yield(rbegin, blksize)
44
+ rbegin += blksize
45
+ end
46
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
47
+ end
48
+
49
+ # Breaks the input range or span into chunks of blksize or less,
50
+ # beginning from the end of the interval. The offset and length
51
+ # of each chunk will be provided to the block, if given.
52
+ #
53
+ # blksize # => 100
54
+ # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
55
+ #
56
+ # results = []
57
+ # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
58
+ # results # => [[100,100],[10,90]]
59
+ #
60
+ def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
61
+ return collect_results(:reverse_chunk, range_or_span) unless block_given?
62
+
63
+ rbegin, rend = range_begin_and_end(range_or_span)
64
+
65
+ # chunk the final range to make sure that no chunks
66
+ # greater than blksize are returned
67
+ while rend - rbegin > blksize
68
+ rend -= blksize
69
+ yield(rend, blksize)
70
+ end
71
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
72
+ end
73
+
74
+ module_function
75
+
76
+ # Converts a range into an offset and length. Negative values are
77
+ # counted back from self.length
78
+ #
79
+ # length # => 10
80
+ # split_range(0..9) # => [0,10]
81
+ # split_range(0...9) # => [0,9]
82
+ #
83
+ # split_range(-1..9) # => [9,1]
84
+ # split_range(0..-1) # => [0,10]
85
+ def split_range(range)
86
+ start, finish = range.begin, range.end
87
+ start += length if start < 0
88
+ finish += length if finish < 0
89
+
90
+ [start, finish - start - (range.exclude_end? ? 1 : 0)]
91
+ end
92
+
93
+ # The compliment to split_range; returns the span with a negative
94
+ # start index counted back from self.length.
95
+ #
96
+ # length # => 10
97
+ # split_span([0, 10]) # => [0,10]
98
+ # split_span([-1, 1]) # => [9,1]
99
+ #
100
+ def split_span(span)
101
+ span[0] += self.length if span[0] < 0
102
+ span
103
+ end
104
+
105
+ # Returns the begining and end of a range or span.
106
+ #
107
+ # range_begin_and_end(0..10) # => [0, 10]
108
+ # range_begin_and_end(0...10) # => [0, 9]
109
+ # range_begin_and_end([0, 10]) # => [0, 10]
110
+ #
111
+ def range_begin_and_end(range_or_span)
112
+ rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
113
+ raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
114
+ rend += rbegin
115
+
116
+ [rbegin, rend]
117
+ end
118
+
119
+ private
120
+
121
+ # a utility method to collect the results of a method
122
+ # that requires a block.
123
+ def collect_results(method, args) # :nodoc:
124
+ results = []
125
+ send(method, args) do |*result|
126
+ results << result
127
+ end
128
+ results
129
+ end
130
+ end
131
+ end
@@ -1,26 +1,63 @@
1
1
  require 'enumerator'
2
2
 
3
3
  module External
4
+
5
+ # An externalized implementation of Enumerable. External::Enumerable
6
+ # requires several methods with the following functionality:
7
+ #
8
+ # each:: iterates over items in self
9
+ # another:: provide a another instance of self
10
+ # to_a:: converts self to an Array
11
+ #
4
12
  module Enumerable
5
- # def all? # :yield: obj
6
- # not_implemented
7
- # end
13
+ # Flag indicating whether to enumerate (ie collect,
14
+ # select, etc) into an array or into an instance
15
+ # of self. In most cases enumerating to an array
16
+ # performs better, but enumerating to another
17
+ # instance of self may be desired for especially
18
+ # large collections.
19
+ attr_accessor :enumerate_to_a
20
+
21
+ def all? # :yield: obj
22
+ # WARN -- no tests for this in test_array
23
+ each do |obj|
24
+ return false unless yield(obj)
25
+ end
26
+ true
27
+ end
8
28
 
9
- # def any? # :yield: obj
10
- # not_implemented
11
- # end
29
+ def any? # :yield: obj
30
+ # WARN -- no tests for this in test_array
31
+ each do |obj|
32
+ return true if yield(obj)
33
+ end
34
+ false
35
+ end
12
36
 
13
- # def collect # :yield: item
14
- # not_implemented
15
- # end
37
+ def collect # :yield: item
38
+ if block_given?
39
+ another = enumerate_to_a ? [] : self.another
40
+ each do |item|
41
+ another << yield(item)
42
+ end
43
+ another
44
+ else
45
+ # Not sure if Enumerator works right for large externals...
46
+ Object::Enumerable::Enumerator.new(self)
47
+ end
48
+ end
16
49
 
17
50
  # def collect! # :yield: item
18
51
  # not_implemented
19
52
  # end
20
53
 
21
- # def detect(ifnone=nil) # :yield: obj
22
- # not_implemented
23
- # end
54
+ def detect(ifnone=nil) # :yield: obj
55
+ # WARN -- no tests for this in test_array
56
+ each do |obj|
57
+ return obj if yield(obj)
58
+ end
59
+ nil
60
+ end
24
61
 
25
62
  # def each_cons(n) # :yield:
26
63
  # not_implemented
@@ -38,9 +75,9 @@ module External
38
75
  end
39
76
  end
40
77
 
41
- # def entries
42
- # to_a
43
- # end
78
+ def entries
79
+ to_a
80
+ end
44
81
 
45
82
  # def enum_cons(n)
46
83
  # not_implemented
@@ -54,29 +91,37 @@ module External
54
91
  # not_implemented
55
92
  # end
56
93
 
57
- # def find(ifnone=nil, &block) # :yield: obj
58
- # detect(ifnone, &block)
59
- # end
94
+ def find(ifnone=nil, &block) # :yield: obj
95
+ # WARN -- no tests for this in test_array
96
+ detect(ifnone, &block)
97
+ end
60
98
 
61
- # def find_all # :yield: obj
62
- # not_implemented
63
- # end
99
+ def find_all # :yield: obj
100
+ another = enumerate_to_a ? [] : self.another
101
+ each do |item|
102
+ another << item if yield(item)
103
+ end
104
+ another
105
+ end
64
106
 
65
107
  # def grep(pattern) # :yield: obj
66
108
  # not_implemented
67
109
  # end
68
110
 
69
- # def include?(obj)
70
- # not_implemented
71
- # end
111
+ def include?(obj)
112
+ each do |current|
113
+ return true if current == obj
114
+ end
115
+ false
116
+ end
72
117
 
73
118
  # def inject(init) # :yield: memo, obj
74
119
  # not_implemented
75
120
  # end
76
121
 
77
- # def map(&block) # :yield: item
78
- # collect(&block)
79
- # end
122
+ def map(&block) # :yield: item
123
+ collect(&block)
124
+ end
80
125
 
81
126
  # def map!(&block) # :yield: item
82
127
  # collect!(&block)
@@ -86,9 +131,9 @@ module External
86
131
  # not_implemented
87
132
  # end
88
133
 
89
- # def member?(obj)
90
- # include?(obj)
91
- # end
134
+ def member?(obj)
135
+ include?(obj)
136
+ end
92
137
 
93
138
  # def min # :yield: a,b
94
139
  # not_implemented
@@ -106,9 +151,9 @@ module External
106
151
  # not_implemented
107
152
  # end
108
153
 
109
- # def select(&block) # :yield: obj
110
- # find_all(&block)
111
- # end
154
+ def select(&block) # :yield: obj
155
+ find_all(&block)
156
+ end
112
157
 
113
158
  # def sort # :yield: a,b
114
159
  # not_implemented
data/lib/external/io.rb CHANGED
@@ -1,398 +1,163 @@
1
- require 'stringio'
2
- require 'tempfile'
3
- require 'external/chunkable'
4
-
5
- module External
6
- # Position gets IO objects to work properly for large files. Additionally,
7
- # IO adds a length accessor for getting the size of the IO contents. Note
8
- # that length is not automatically adjusted by write, for performance
9
- # reasons. length must be managed manually, or reset after writes using
10
- # reset_length.
11
- #
12
- # A variety of bugs needed to be addressed per-platform:
13
- #
14
- # == Mac OS X Tiger
15
- #
16
- # Using the default (broken) installation of Ruby, StringIO does not correctly
17
- # position itself when a pos= statement is issued.
18
- #
19
- # s = StringIO.new "abc"
20
- # s.read # => "abc"
21
- # s.pos = 0
22
- # s.read # => nil
23
- #
24
- # For regular IO objects, as expected, the second read statement returns
25
- # "abc". Install the a fixed version of Ruby, perhaps with the one-click
26
- # installer: http://rubyosx.rubyforge.org/
27
- #
28
- # == Windows
29
- #
30
- # Ruby on Windows has problems with files larger than ~2 gigabytes.
31
- # Sizes return as negative, and positions cannot be set beyond the max
32
- # size of a long (2147483647 ~ 2GB = 2475636895). IO corrects both of
33
- # these issues thanks in large part to a bit of code taken from
34
- # 'win32/file/stat' (http://rubyforge.org/projects/win32utils/).
35
- #
36
- # == Others
37
- #
38
- # I haven't found errors on Fedora and haven't tested on any other platforms.
39
- # If you find and solve some wierd positioning errors, please let me know.
40
- module IO
41
-
42
- # Determines the generic mode of the input io using the _mode
43
- # method for the input io class. By default IO provides _mode
44
- # methods for File, Tempfile, and StringIO. The return string
45
- # is determined as follows:
46
- #
47
- # readable & writable:: r+
48
- # readable:: r
49
- # writable:: w
50
- #
51
- # The _mode method takes the input io and should return an array
52
- # specifying whether or not io is readable and writable
53
- # (ie [readable, writable]).
54
- #
55
- # See try_handle for more details.
56
- def self.mode(io)
57
- readable, writable = try_handle(io, "mode")
58
-
59
- case
60
- when readable && writable then "r+"
61
- when readable then "r"
62
- when writable then "w"
63
- else
64
- # occurs for r+ mode, for some reason
65
- "r+"
66
- end
67
- end
68
-
69
- # Determines the length of the input io using the _length method
70
- # for the input io class. Non-External::IO inputs are extended
71
- # in this process.
72
- #
73
- # The _length method takes the input io, and should return the
74
- # current length of the input io (ie a flush operation may be
75
- # required).
76
- #
77
- # See try_handle for more details.
78
- def self.length(io)
79
- case io
80
- when External::IO
81
- try_handle(io, "length")
82
- else
83
- io.extend External::IO
84
- io.length
85
- end
86
- end
87
-
88
- # Returns an array of bools determining if the input File
89
- # is readable and writable.
90
- def self.file_mode(io)
91
- begin
92
- dup = io.dup
93
-
94
- # determine readable/writable by sending close methods
95
- # to the duplicated IO. If the io cannot be closed for
96
- # read/write then it will raise an error, indicating that
97
- # it was not open in the given mode.
98
- [:close_read, :close_write].collect do |method|
99
- begin
100
- dup.send(method)
101
- true
102
- rescue(IOError)
103
- false
104
- end
105
- end
106
- ensure
107
- # Be sure that the io is fully closed before proceeding!
108
- # (Otherwise Tempfiles will not be properly disposed of
109
- # ... at least on Windows, perhaps on others)
110
- dup.close if dup && !dup.closed?
111
- end
112
- end
113
-
114
- # Returns the length of the input File
115
- def self.file_length(io)
116
- io.fsync unless io.generic_mode == 'r'
117
- File.size(io.path)
118
- end
119
-
120
- # Returns an array of bools determining if the input Tempfile
121
- # is readable and writable.
122
- def self.tempfile_mode(io)
123
- file_mode(io.instance_variable_get("@tmpfile"))
124
- end
125
-
126
- # Returns the length of the input Tempfile
127
- def self.tempfile_length(io)
128
- file_length(io)
129
- end
130
-
131
- # Returns an array of bools determining if the input StringIO
132
- # is readable and writable.
133
- #
134
- # s = StringIO.new("abcde", "r+")
135
- # External::IO.stringio_mode(s) # => [true, true]
136
- #
137
- def self.stringio_mode(io)
138
- [!io.closed_read?, !io.closed_write?]
139
- end
140
-
141
- # Returns the length of the input StringIO
142
- #
143
- # s = StringIO.new("abcde", "r+")
144
- # External::IO.length(s) # => 5
145
- #
146
- def self.stringio_length(io)
147
- io.string.length
148
- end
149
-
150
- def self.extended(base) # :nodoc:
151
- base.instance_variable_set("@generic_mode", mode(base))
152
- base.reset_length
153
- base.default_blksize = 1024
154
- base.binmode
155
- end
156
-
157
- protected
158
-
159
- # try_handle is a forwarding method allowing External::IO to handle
160
- # non-File, non-Tempfile IO objects. try_handle infers a method
161
- # name based on the class of the input and trys to forward the
162
- # input io to that method within External::IO. For instance:
163
- #
164
- # * the _mode method for StringIO is 'stringio_mode'
165
- # * the _length method for StringIO is 'stringio_length'
166
- #
167
- # Nested classes have '::' replaced by '_'. Thus to add support
168
- # for Some::Unknown::IO, extend External::IO as below:
169
- #
170
- # module External::IO
171
- # def some_unknown_io_mode(io)
172
- # ...
173
- # end
174
- #
175
- # def some_unknown_io_length(io)
176
- # ...
177
- # end
178
- # end
179
- #
180
- # See stringio_mode and stringio_length for more details.
181
- def self.try_handle(io, method)
182
- method_name = io.class.to_s.downcase.gsub(/::/, "_") + "_#{method}"
183
- if self.respond_to?(method_name)
184
- External::IO.send(method_name, io)
185
- else
186
- raise "cannot determine #{method} for '%s'" % io.class
187
- end
188
- end
189
-
190
- public
191
-
192
- include Chunkable
193
- attr_reader :generic_mode
194
-
195
- # True if self is a File or Tempfile
196
- def file?
197
- self.kind_of?(File) || self.kind_of?(Tempfile)
198
- end
199
-
200
- # Modified truncate that adjusts length
201
- def truncate(n)
202
- super
203
- self.pos = n if self.pos > n
204
- self.length = n
205
- end
206
-
207
- # Resets length to the length returned by External::IO.length
208
- def reset_length
209
- self.length = External::IO.length(self)
210
- end
211
-
212
- #
213
- # comparison
214
- #
215
-
216
- # Quick comparision with another IO. Returns true if
217
- # another == self, or if both are file-type IOs and
218
- # their paths are equal.
219
- def quick_compare(another)
220
- self == another || (self.file? && another.file? && self.path == another.path)
221
- end
222
-
223
- # Sort compare with another IO, behaving like a comparison between
224
- # the full string contents of self and another. Can be a long
225
- # operation if it requires the full read of two large IO objects.
226
- def sort_compare(another, blksize=default_blksize)
227
- # equal in comparison if the ios are equal
228
- return 0 if quick_compare(another)
229
-
230
- if another.length > self.length
231
- return -1
232
- elsif self.length < another.length
233
- return 1
234
- else
235
- self.flush unless self.generic_mode == 'r'
236
- self.pos = 0
237
- another.flush unless another.generic_mode == 'r'
238
- another.pos = 0
239
-
240
- sa = sb = nil
241
- while sa == sb
242
- sa = self.read(blksize)
243
- sb = another.read(blksize)
244
- break if sa.nil? || sb.nil?
245
- end
246
-
247
- sa.to_s <=> sb.to_s
248
- end
249
- end
250
-
251
- # Sort compare with another IO, behaving like a comparison between
252
- # the full string contents of self and another. Can be a long
253
- # operation if it requires the full read of two large IO objects.
254
- def <=>(another)
255
- sort_compare(another)
256
- end
257
-
258
- #
259
- # reading
260
- #
261
-
262
- def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
263
- carryover = 0
264
- chunk(range_or_span, blksize) do |offset, length|
265
- raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
266
-
267
- scan_begin = offset - carryover
268
- self.pos = scan_begin
269
- string = self.read(length + carryover)
270
- carryover = yield(scan_begin, string)
271
- end
272
- carryover
273
- end
274
-
275
- #
276
- # writing
277
- #
278
-
279
- #
280
- def insert(src, range=0..src.length, pos=nil)
281
- self.pos = pos unless pos == nil
282
-
283
- start_pos = self.pos
284
- length_written = 0
285
-
286
- src.flush unless src.generic_mode == 'r'
287
- src.pos = range.begin
288
- src.chunk(range) do |offset, length|
289
- length_written += write(src.read(length))
290
- end
291
-
292
- end_pos = start_pos + length_written
293
- self.length = end_pos if end_pos > self.length
294
- length_written
295
- end
296
-
297
- #
298
- def concat(src, range=0..src.length)
299
- insert(src, range, length)
300
- end
301
-
302
- #--
303
- # it appears that as long as the io opening t.path closes,
304
- # the tempfile will be deleted at the exit of the ruby
305
- # instance... otherwise it WILL NOT BE DELETED
306
- # Make note of this in the documentation to be sure to close
307
- # files if you start inserting because it may make tempfiles
308
- #++
309
- def copy(mode="r", range=0..length)
310
- self.flush
311
-
312
- temp = Tempfile.new("copy")
313
- temp.extend IO
314
- temp.insert(self, range)
315
- temp.close
316
-
317
- cp = File.open(temp.path, mode)
318
- cp.extend IO
319
-
320
- if block_given?
321
- begin
322
- yield(cp)
323
- ensure
324
- cp.close unless cp.closed?
325
- FileUtils.rm(cp.path) if File.exists?(cp.path)
326
- end
327
- else
328
- cp
329
- end
330
- end
331
-
332
- end
333
- end
334
-
335
- # This code block modifies IO only if running on windows
336
- unless RUBY_PLATFORM.index('mswin').nil?
337
- require 'Win32API'
338
-
339
- module External
340
- module IO
341
- # Modfied to properly determine file lengths on Windows. Uses code
342
- # from 'win32/file/stat' (http://rubyforge.org/projects/win32utils/)
343
- def self.file_length(io) # :nodoc:
344
- io.fsync unless io.generic_mode == 'r'
345
-
346
- # I would have liked to use win32/file/stat to do this... however, some issue
347
- # arose involving FileUtils.cp, File.stat, and File::Stat.mode. cp raised an
348
- # error because the mode would be nil for files. I wasn't sure how to fix it,
349
- # so I've lifted the relevant code for pulling the large file size.
350
-
351
- # Note this is a simplified version... if you base.path point to a chardev,
352
- # this may need to be changed, because apparently the call to the Win32API
353
- # may fail
354
-
355
- stat_buf = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].pack('ISSssssIILILILIL')
356
- Win32API.new('msvcrt', '_stat64', 'PP', 'I').call(io.path, stat_buf)
357
- stat_buf[24, 4].unpack('L').first # Size of file in bytes
358
- end
359
-
360
- POSITION_MAX = 2147483647 # maximum size of long
361
-
362
- # Modified to handle positions past the 2Gb limit
363
- def pos # :nodoc:
364
- @pos || super
365
- end
366
-
367
- # Positions larger than the max value of a long cannot be directly given
368
- # to the default +pos=+. This version incrementally seeks to positions
369
- # beyond the maximum, if necessary.
370
- #
371
- # Note: setting the position beyond the 2Gb limit requires the use of a
372
- # sysseek statement. As such, errors will arise if you try to position
373
- # an IO object that does not support this method (for example StringIO...
374
- # but then what are you doing with a 2Gb StringIO anyhow?)
375
- def pos=(pos)
376
- if pos < POSITION_MAX
377
- super(pos)
378
- @pos = nil
379
- elsif @pos != pos
380
- # note sysseek appears to be necessary here, rather than io.seek
381
- @pos = pos
382
-
383
- super(POSITION_MAX)
384
- pos -= POSITION_MAX
385
-
386
- while pos > POSITION_MAX
387
- pos -= POSITION_MAX
388
- self.sysseek(POSITION_MAX, Object::IO::SEEK_CUR)
389
- end
390
-
391
- self.sysseek(pos, Object::IO::SEEK_CUR)
392
- end
393
- end
394
-
395
- end
396
- end
397
-
398
- end # end the windows-specific code
1
+ require 'external/chunkable'
2
+ require 'external/utils'
3
+
4
+ autoload(:StringIO, 'stringio')
5
+ autoload(:Tempfile, 'tempfile')
6
+ autoload(:FileUtils, 'fileutils')
7
+
8
+ module External
9
+
10
+ # Adds functionality to an IO required by External.
11
+ #
12
+ # IO adds/overrides the length accessor for getting the size of the IO contents.
13
+ # Note that length is not automatically adjusted by write, for performance
14
+ # reasons. length must be managed manually, or reset after writes using
15
+ # reset_length.
16
+ #
17
+ module Io
18
+ include Chunkable
19
+
20
+ PATCHES = []
21
+
22
+ # Add version-specific patches
23
+ case RUBY_VERSION
24
+ when /^1.8/ then require "external/patches/ruby_1_8_io"
25
+ end
26
+
27
+ # Add platform-specific patches
28
+ # case RUBY_PLATFORM
29
+ # when 'java'
30
+ # end
31
+
32
+ def self.extended(base)
33
+ PATCHES.each {|patch| base.extend patch }
34
+ base.reset_length
35
+ base.default_blksize = 1024
36
+ base.binmode
37
+ end
38
+
39
+ # Resets length to the length returned by Utils.length
40
+ def reset_length
41
+ self.length = Utils.length(self)
42
+ end
43
+
44
+ # Modified truncate that adjusts length
45
+ def truncate(n)
46
+ super
47
+ self.pos = n if self.pos > n
48
+ self.length = n
49
+ end
50
+
51
+ #
52
+ def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
53
+ carryover = 0
54
+ chunk(range_or_span, blksize) do |offset, length|
55
+ raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
56
+
57
+ scan_begin = offset - carryover
58
+ self.pos = scan_begin
59
+ string = self.read(length + carryover)
60
+ carryover = yield(scan_begin, string)
61
+ end
62
+ carryover
63
+ end
64
+
65
+ #
66
+ def insert(src, range=0..src.length, pos=nil)
67
+ self.pos = pos unless pos == nil
68
+
69
+ start_pos = self.pos
70
+ length_written = 0
71
+
72
+ src.flush
73
+ src.pos = range.begin
74
+ src.chunk(range) do |offset, length|
75
+ length_written += write(src.read(length))
76
+ end
77
+
78
+ end_pos = start_pos + length_written
79
+ self.length = end_pos if end_pos > self.length
80
+ length_written
81
+ end
82
+
83
+ #
84
+ def concat(src, range=0..src.length)
85
+ insert(src, range, length)
86
+ end
87
+
88
+ #--
89
+ # it appears that as long as the io opening t.path closes,
90
+ # the tempfile will be deleted at the exit of the ruby
91
+ # instance... otherwise it WILL NOT BE DELETED
92
+ # Make note of this in the documentation to be sure to close
93
+ # files if you start inserting because it may make tempfiles
94
+ #++
95
+ def copy(mode="r", range=0..length)
96
+ self.flush
97
+
98
+ temp = Tempfile.new("copy")
99
+ temp.extend Io
100
+ temp.insert(self, range)
101
+ temp.close
102
+
103
+ cp = File.open(temp.path, mode)
104
+ cp.extend Io
105
+
106
+ if block_given?
107
+ begin
108
+ yield(cp)
109
+ ensure
110
+ cp.close unless cp.closed?
111
+ FileUtils.rm(cp.path) if File.exists?(cp.path)
112
+ end
113
+ else
114
+ cp
115
+ end
116
+ end
117
+
118
+ # Quick comparision with another IO. Returns true if
119
+ # another == self, or if both are file-type IOs and
120
+ # their paths are equal.
121
+ def quick_compare(another)
122
+ self == another || (self.kind_of?(File) && another.kind_of?(File) && self.path == another.path)
123
+ end
124
+
125
+ # Sort compare (ie <=>) with another IO, behaving like
126
+ # a comparison between the full string contents of self
127
+ # and another. This obviously can be a long operation
128
+ # if it requires the full read of two large IO objects.
129
+ def sort_compare(another, blksize=default_blksize)
130
+ # equal in comparison if the ios are equal
131
+ return 0 if quick_compare(another)
132
+
133
+ self.flush
134
+ self.reset_length
135
+
136
+ another.flush
137
+ another.reset_length
138
+
139
+ if another.length > self.length
140
+ return -1
141
+ elsif self.length < another.length
142
+ return 1
143
+ else
144
+ self.pos = 0
145
+ another.pos = 0
146
+
147
+ sa = sb = nil
148
+ while sa == sb
149
+ sa = self.read(blksize)
150
+ sb = another.read(blksize)
151
+ break if sa.nil? || sb.nil?
152
+ end
153
+
154
+ sa.to_s <=> sb.to_s
155
+ end
156
+ end
157
+
158
+ # Alias for sort_compare.
159
+ def <=>(another)
160
+ sort_compare(another)
161
+ end
162
+ end
163
+ end