bahuvrihi-external 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,131 @@
1
+ module External
2
+
3
+ # The Chunkable mixin provides methods for organizing a span or range
4
+ # into chunks no larger than a specified block size. For reference:
5
+ #
6
+ # span an array like: [start, length]
7
+ # range a Range like: start..end or start...(end - 1)
8
+ #
9
+ module Chunkable
10
+
11
+ # The length of the chunkable object;
12
+ # length must be set by the object.
13
+ attr_accessor :length
14
+
15
+ # The default block size for chunking a chunkable
16
+ # object; default_blksize must be set by the object.
17
+ attr_accessor :default_blksize
18
+
19
+ # Returns the default span: [0, length]
20
+ def default_span
21
+ [0, length]
22
+ end
23
+
24
+ # Breaks the input range or span into chunks of blksize or less.
25
+ # The offset and length of each chunk will be provided to the
26
+ # block, if given.
27
+ #
28
+ # blksize # => 100
29
+ # chunk(0..250) # => [[0,100],[100,100],[200,50]]
30
+ #
31
+ # results = []
32
+ # chunk([10,190]) {|offset, length| results << [offset, length]}
33
+ # results # => [[10,100],[110,90]]
34
+ #
35
+ def chunk(range_or_span=default_span, blksize=default_blksize)
36
+ return collect_results(:chunk, range_or_span) unless block_given?
37
+
38
+ rbegin, rend = range_begin_and_end(range_or_span)
39
+
40
+ # chunk the final range to make sure that no chunks
41
+ # greater than blksize are returned
42
+ while rend - rbegin > blksize
43
+ yield(rbegin, blksize)
44
+ rbegin += blksize
45
+ end
46
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
47
+ end
48
+
49
+ # Breaks the input range or span into chunks of blksize or less,
50
+ # beginning from the end of the interval. The offset and length
51
+ # of each chunk will be provided to the block, if given.
52
+ #
53
+ # blksize # => 100
54
+ # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
55
+ #
56
+ # results = []
57
+ # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
58
+ # results # => [[100,100],[10,90]]
59
+ #
60
+ def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
61
+ return collect_results(:reverse_chunk, range_or_span) unless block_given?
62
+
63
+ rbegin, rend = range_begin_and_end(range_or_span)
64
+
65
+ # chunk the final range to make sure that no chunks
66
+ # greater than blksize are returned
67
+ while rend - rbegin > blksize
68
+ rend -= blksize
69
+ yield(rend, blksize)
70
+ end
71
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
72
+ end
73
+
74
+ module_function
75
+
76
+ # Converts a range into an offset and length. Negative values are
77
+ # counted back from self.length
78
+ #
79
+ # length # => 10
80
+ # split_range(0..9) # => [0,10]
81
+ # split_range(0...9) # => [0,9]
82
+ #
83
+ # split_range(-1..9) # => [9,1]
84
+ # split_range(0..-1) # => [0,10]
85
+ def split_range(range)
86
+ start, finish = range.begin, range.end
87
+ start += length if start < 0
88
+ finish += length if finish < 0
89
+
90
+ [start, finish - start - (range.exclude_end? ? 1 : 0)]
91
+ end
92
+
93
+ # The compliment to split_range; returns the span with a negative
94
+ # start index counted back from self.length.
95
+ #
96
+ # length # => 10
97
+ # split_span([0, 10]) # => [0,10]
98
+ # split_span([-1, 1]) # => [9,1]
99
+ #
100
+ def split_span(span)
101
+ span[0] += self.length if span[0] < 0
102
+ span
103
+ end
104
+
105
+ # Returns the begining and end of a range or span.
106
+ #
107
+ # range_begin_and_end(0..10) # => [0, 10]
108
+ # range_begin_and_end(0...10) # => [0, 9]
109
+ # range_begin_and_end([0, 10]) # => [0, 10]
110
+ #
111
+ def range_begin_and_end(range_or_span)
112
+ rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
113
+ raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
114
+ rend += rbegin
115
+
116
+ [rbegin, rend]
117
+ end
118
+
119
+ private
120
+
121
+ # a utility method to collect the results of a method
122
+ # that requires a block.
123
+ def collect_results(method, args) # :nodoc:
124
+ results = []
125
+ send(method, args) do |*result|
126
+ results << result
127
+ end
128
+ results
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,182 @@
1
+ require 'enumerator'
2
+
3
+ module External
4
+
5
+ # An externalized implementation of Enumerable. External::Enumerable
6
+ # requires several methods with the following functionality:
7
+ #
8
+ # each:: iterates over items in self
9
+ # another:: provide a another instance of self
10
+ # to_a:: converts self to an Array
11
+ #
12
+ module Enumerable
13
+ # Flag indicating whether to enumerate (ie collect,
14
+ # select, etc) into an array or into an instance
15
+ # of self. In most cases enumerating to an array
16
+ # performs better, but enumerating to another
17
+ # instance of self may be desired for especially
18
+ # large collections.
19
+ attr_accessor :enumerate_to_a
20
+
21
+ def all? # :yield: obj
22
+ # WARN -- no tests for this in test_array
23
+ each do |obj|
24
+ return false unless yield(obj)
25
+ end
26
+ true
27
+ end
28
+
29
+ def any? # :yield: obj
30
+ # WARN -- no tests for this in test_array
31
+ each do |obj|
32
+ return true if yield(obj)
33
+ end
34
+ false
35
+ end
36
+
37
+ def collect # :yield: item
38
+ if block_given?
39
+ another = enumerate_to_a ? [] : self.another
40
+ each do |item|
41
+ another << yield(item)
42
+ end
43
+ another
44
+ else
45
+ # Not sure if Enumerator works right for large externals...
46
+ Object::Enumerable::Enumerator.new(self)
47
+ end
48
+ end
49
+
50
+ # def collect! # :yield: item
51
+ # not_implemented
52
+ # end
53
+
54
+ def detect(ifnone=nil) # :yield: obj
55
+ # WARN -- no tests for this in test_array
56
+ each do |obj|
57
+ return obj if yield(obj)
58
+ end
59
+ nil
60
+ end
61
+
62
+ # def each_cons(n) # :yield:
63
+ # not_implemented
64
+ # end
65
+
66
+ # def each_slice(n) # :yield:
67
+ # not_implemented
68
+ # end
69
+
70
+ def each_with_index(&block)
71
+ chunk do |offset, length|
72
+ self[offset, length].each_with_index do |item, i|
73
+ yield(item, i + offset)
74
+ end
75
+ end
76
+ end
77
+
78
+ def entries
79
+ to_a
80
+ end
81
+
82
+ # def enum_cons(n)
83
+ # not_implemented
84
+ # end
85
+
86
+ # def enum_slice(n)
87
+ # not_implemented
88
+ # end
89
+
90
+ # def enum_with_index
91
+ # not_implemented
92
+ # end
93
+
94
+ def find(ifnone=nil, &block) # :yield: obj
95
+ # WARN -- no tests for this in test_array
96
+ detect(ifnone, &block)
97
+ end
98
+
99
+ def find_all # :yield: obj
100
+ another = enumerate_to_a ? [] : self.another
101
+ each do |item|
102
+ another << item if yield(item)
103
+ end
104
+ another
105
+ end
106
+
107
+ # def grep(pattern) # :yield: obj
108
+ # not_implemented
109
+ # end
110
+
111
+ def include?(obj)
112
+ each do |current|
113
+ return true if current == obj
114
+ end
115
+ false
116
+ end
117
+
118
+ # def inject(init) # :yield: memo, obj
119
+ # not_implemented
120
+ # end
121
+
122
+ def map(&block) # :yield: item
123
+ collect(&block)
124
+ end
125
+
126
+ # def map!(&block) # :yield: item
127
+ # collect!(&block)
128
+ # end
129
+
130
+ # def max # :yield: a,b
131
+ # not_implemented
132
+ # end
133
+
134
+ def member?(obj)
135
+ include?(obj)
136
+ end
137
+
138
+ # def min # :yield: a,b
139
+ # not_implemented
140
+ # end
141
+
142
+ # def partition # :yield: obj
143
+ # not_implemented
144
+ # end
145
+
146
+ # def reject # :yield: item
147
+ # not_implemented
148
+ # end
149
+
150
+ # def reject! # :yield: item
151
+ # not_implemented
152
+ # end
153
+
154
+ def select(&block) # :yield: obj
155
+ find_all(&block)
156
+ end
157
+
158
+ # def sort # :yield: a,b
159
+ # not_implemented
160
+ # end
161
+
162
+ # def sort! # :yield: a,b
163
+ # not_implemented
164
+ # end
165
+
166
+ # def sort_by # :yield: obj
167
+ # not_implemented
168
+ # end
169
+
170
+ # def to_a
171
+ # not_implemented
172
+ # end
173
+
174
+ # def to_set(klass=Set, *args, &block)
175
+ # not_implemented
176
+ # end
177
+
178
+ # def zip(*arg) # :yield: arr
179
+ # not_implemented
180
+ # end
181
+ end
182
+ end
@@ -0,0 +1,163 @@
1
+ require 'external/chunkable'
2
+ require 'external/utils'
3
+
4
+ autoload(:StringIO, 'stringio')
5
+ autoload(:Tempfile, 'tempfile')
6
+ autoload(:FileUtils, 'fileutils')
7
+
8
+ module External
9
+
10
+ # Adds functionality to an IO required by External.
11
+ #
12
+ # IO adds/overrides the length accessor for getting the size of the IO contents.
13
+ # Note that length is not automatically adjusted by write, for performance
14
+ # reasons. length must be managed manually, or reset after writes using
15
+ # reset_length.
16
+ #
17
+ module Io
18
+ include Chunkable
19
+
20
+ PATCHES = []
21
+
22
+ # Add version-specific patches
23
+ case RUBY_VERSION
24
+ when /^1.8/ then require "external/patches/ruby_1_8_io"
25
+ end
26
+
27
+ # Add platform-specific patches
28
+ # case RUBY_PLATFORM
29
+ # when 'java'
30
+ # end
31
+
32
+ def self.extended(base)
33
+ PATCHES.each {|patch| base.extend patch }
34
+ base.reset_length
35
+ base.default_blksize = 1024
36
+ base.binmode
37
+ end
38
+
39
+ # Resets length to the length returned by Utils.length
40
+ def reset_length
41
+ self.length = Utils.length(self)
42
+ end
43
+
44
+ # Modified truncate that adjusts length
45
+ def truncate(n)
46
+ super
47
+ self.pos = n if self.pos > n
48
+ self.length = n
49
+ end
50
+
51
+ #
52
+ def scan(range_or_span=default_span, blksize=default_blksize, carryover_limit=default_blksize)
53
+ carryover = 0
54
+ chunk(range_or_span, blksize) do |offset, length|
55
+ raise "carryover exceeds limit: #{carryover} (#{carryover_limit})" if carryover > carryover_limit
56
+
57
+ scan_begin = offset - carryover
58
+ self.pos = scan_begin
59
+ string = self.read(length + carryover)
60
+ carryover = yield(scan_begin, string)
61
+ end
62
+ carryover
63
+ end
64
+
65
+ #
66
+ def insert(src, range=0..src.length, pos=nil)
67
+ self.pos = pos unless pos == nil
68
+
69
+ start_pos = self.pos
70
+ length_written = 0
71
+
72
+ src.flush
73
+ src.pos = range.begin
74
+ src.chunk(range) do |offset, length|
75
+ length_written += write(src.read(length))
76
+ end
77
+
78
+ end_pos = start_pos + length_written
79
+ self.length = end_pos if end_pos > self.length
80
+ length_written
81
+ end
82
+
83
+ #
84
+ def concat(src, range=0..src.length)
85
+ insert(src, range, length)
86
+ end
87
+
88
+ #--
89
+ # it appears that as long as the io opening t.path closes,
90
+ # the tempfile will be deleted at the exit of the ruby
91
+ # instance... otherwise it WILL NOT BE DELETED
92
+ # Make note of this in the documentation to be sure to close
93
+ # files if you start inserting because it may make tempfiles
94
+ #++
95
+ def copy(mode="r", range=0..length)
96
+ self.flush
97
+
98
+ temp = Tempfile.new("copy")
99
+ temp.extend Io
100
+ temp.insert(self, range)
101
+ temp.close
102
+
103
+ cp = File.open(temp.path, mode)
104
+ cp.extend Io
105
+
106
+ if block_given?
107
+ begin
108
+ yield(cp)
109
+ ensure
110
+ cp.close unless cp.closed?
111
+ FileUtils.rm(cp.path) if File.exists?(cp.path)
112
+ end
113
+ else
114
+ cp
115
+ end
116
+ end
117
+
118
+ # Quick comparision with another IO. Returns true if
119
+ # another == self, or if both are file-type IOs and
120
+ # their paths are equal.
121
+ def quick_compare(another)
122
+ self == another || (self.kind_of?(File) && another.kind_of?(File) && self.path == another.path)
123
+ end
124
+
125
+ # Sort compare (ie <=>) with another IO, behaving like
126
+ # a comparison between the full string contents of self
127
+ # and another. This obviously can be a long operation
128
+ # if it requires the full read of two large IO objects.
129
+ def sort_compare(another, blksize=default_blksize)
130
+ # equal in comparison if the ios are equal
131
+ return 0 if quick_compare(another)
132
+
133
+ self.flush
134
+ self.reset_length
135
+
136
+ another.flush
137
+ another.reset_length
138
+
139
+ if another.length > self.length
140
+ return -1
141
+ elsif self.length < another.length
142
+ return 1
143
+ else
144
+ self.pos = 0
145
+ another.pos = 0
146
+
147
+ sa = sb = nil
148
+ while sa == sb
149
+ sa = self.read(blksize)
150
+ sb = another.read(blksize)
151
+ break if sa.nil? || sb.nil?
152
+ end
153
+
154
+ sa.to_s <=> sb.to_s
155
+ end
156
+ end
157
+
158
+ # Alias for sort_compare.
159
+ def <=>(another)
160
+ sort_compare(another)
161
+ end
162
+ end
163
+ end