external 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
@@ -0,0 +1,85 @@
1
+ require 'external/io'
2
+ require 'external/chunkable'
3
+ require 'external/enumerable'
4
+ require 'tempfile'
5
+
6
+ module External
7
+
8
+ #--
9
+ # Base provides the basic array functionality shared by ExtArr and Index,
10
+ # essentially wrapping the IO functions required to access and utilized external
11
+ # array data with the standard array functions. Bases can be opened with
12
+ # in any of the IO modes; the capabilities of Base will be reduced accordingly
13
+ # (ie read-only Bases cannot write values using []=, for instance).
14
+ #
15
+ # It is VERY IMPORTANT to realize that the underlying IO will be opened using the
16
+ # given mode. The 'w' mode will overwrite all existing data; 'r+' is a safer mode
17
+ # for full read-write functionality. Note that since Base actively scans over
18
+ # the IO, append modes essentially behaves like write, but does not overwrite existing
19
+ # data.
20
+ #
21
+ # To work properly, Base must be subclassed with methods:
22
+ # * length
23
+ # * io_fetch
24
+ #++
25
+ #
26
+ #
27
+ class Base
28
+ class << self
29
+ def open(fd=nil, mode="r", options={})
30
+ fd = File.open(fd, mode) unless fd == nil
31
+ ab = self.new(fd, options)
32
+
33
+ if block_given?
34
+ begin
35
+ yield(ab)
36
+ ensure
37
+ ab.close
38
+ end
39
+ else
40
+ ab
41
+ end
42
+ end
43
+ end
44
+
45
+ include External::Enumerable
46
+ include External::Chunkable
47
+
48
+ attr_reader :io
49
+
50
+ # Initializes a new Base given the file descriptor, mode and options.
51
+ # (see open_io for details on what io is opened for a given file descriptor)
52
+ #
53
+ # If mode contains an 's', then the Base will be initialized in strio
54
+ # mode where the underlying IO will be a StringIO. In this case the fd
55
+ # will be used as the string to initialize the StringIO.
56
+ #
57
+ # Standard options for Base include:
58
+ # nil_value:: the value written to file for nils, and converted to nil on read
59
+ # (default ' ')
60
+ # max_gap:: the maximum gap size used by Offset (default 10000)
61
+ # max_chunk_size:: the chunk size used by Offset (default 1M)
62
+ def initialize(io=nil)
63
+ self.io = (io.nil? ? Tempfile.new("array_base") : io)
64
+ end
65
+
66
+ # True if io is closed.
67
+ def closed?
68
+ io.closed?
69
+ end
70
+
71
+ # Closes io.
72
+ def close
73
+ io.close unless io.closed?
74
+ end
75
+
76
+ protected
77
+
78
+ # Sets io and extends the input io with External::Position.
79
+ def io=(io)
80
+ io.extend External::IO unless io.kind_of?(External::IO)
81
+ @io = io
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,105 @@
1
+ module External
2
+
3
+ # The Chunkable mixin provides methods for organizing a span or range
4
+ # into chunks no larger than a specified block size.
5
+ module Chunkable
6
+ attr_accessor :length, :default_blksize
7
+
8
+ # Returns the default span: [0, length]
9
+ def default_span
10
+ [0, length]
11
+ end
12
+
13
+ # Breaks the input range or span into chunks of blksize or less.
14
+ # The offset and length of each chunk will be provided to the
15
+ # block, if given.
16
+ #
17
+ # blksize # => 100
18
+ # chunk(0..250) # => [[0,100],[100,100],[200,50]]
19
+ #
20
+ # results = []
21
+ # chunk([10,190]) {|offset, length| results << [offset, length]}
22
+ # results # => [[10,100],[110,90]]
23
+ #
24
+ def chunk(range_or_span=default_span, blksize=default_blksize)
25
+ return collect_results(:chunk, range_or_span) unless block_given?
26
+
27
+ rbegin, rend = range_begin_and_end(range_or_span)
28
+
29
+ # chunk the final range to make sure that no chunks
30
+ # greater than blksize are returned
31
+ while rend - rbegin > blksize
32
+ yield(rbegin, blksize)
33
+ rbegin += blksize
34
+ end
35
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
36
+ end
37
+
38
+ # Breaks the input range or span into chunks of blksize or less,
39
+ # beginning from the end of the interval. The offset and length
40
+ # of each chunk will be provided to the block, if given.
41
+ #
42
+ # blksize # => 100
43
+ # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
44
+ #
45
+ # results = []
46
+ # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
47
+ # results # => [[100,100],[10,90]]
48
+ #
49
+ def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
50
+ return collect_results(:reverse_chunk, range_or_span) unless block_given?
51
+
52
+ rbegin, rend = range_begin_and_end(range_or_span)
53
+
54
+ # chunk the final range to make sure that no chunks
55
+ # greater than blksize are returned
56
+ while rend - rbegin > blksize
57
+ rend -= blksize
58
+ yield(rend, blksize)
59
+ end
60
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
61
+ end
62
+
63
+ protected
64
+
65
+ # Converts a range into an offset and length. Negative values are
66
+ # counted back from self.length
67
+ #
68
+ # length # => 10
69
+ # split_range(0..9) # => [0,10]
70
+ # split_range(0...9) # => [0,9]
71
+ #
72
+ # split_range(-1..9) # => [9,1]
73
+ # split_range(0..-1) # => [0,10]
74
+ def split_range(range)
75
+ begin_range = range.begin + (range.begin < 0 ? self.length : 0)
76
+ end_range = range.end + (range.end < 0 ? self.length : 0)
77
+ length = end_range - begin_range - (range.exclude_end? ? 1 : 0)
78
+
79
+ [begin_range, length]
80
+ end
81
+
82
+ def split_span(span)
83
+ span[0] += self.length if span[0] < 0
84
+ span
85
+ end
86
+
87
+ def range_begin_and_end(range_or_span)
88
+ rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
89
+ raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
90
+ rend += rbegin
91
+
92
+ [rbegin, rend]
93
+ end
94
+
95
+ private
96
+
97
+ def collect_results(method, args) # :nodoc:
98
+ results = []
99
+ send(method, args) do |*result|
100
+ results << result
101
+ end
102
+ results
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,137 @@
1
+ require 'enumerator'
2
+
3
+ module External
4
+ module Enumerable
5
+ # def all? # :yield: obj
6
+ # not_implemented
7
+ # end
8
+
9
+ # def any? # :yield: obj
10
+ # not_implemented
11
+ # end
12
+
13
+ # def collect # :yield: item
14
+ # not_implemented
15
+ # end
16
+
17
+ # def collect! # :yield: item
18
+ # not_implemented
19
+ # end
20
+
21
+ # def detect(ifnone=nil) # :yield: obj
22
+ # not_implemented
23
+ # end
24
+
25
+ # def each_cons(n) # :yield:
26
+ # not_implemented
27
+ # end
28
+
29
+ # def each_slice(n) # :yield:
30
+ # not_implemented
31
+ # end
32
+
33
+ def each_with_index(&block)
34
+ chunk do |offset, length|
35
+ self[offset, length].each_with_index do |item, i|
36
+ yield(item, i + offset)
37
+ end
38
+ end
39
+ end
40
+
41
+ # def entries
42
+ # to_a
43
+ # end
44
+
45
+ # def enum_cons(n)
46
+ # not_implemented
47
+ # end
48
+
49
+ # def enum_slice(n)
50
+ # not_implemented
51
+ # end
52
+
53
+ # def enum_with_index
54
+ # not_implemented
55
+ # end
56
+
57
+ # def find(ifnone=nil, &block) # :yield: obj
58
+ # detect(ifnone, &block)
59
+ # end
60
+
61
+ # def find_all # :yield: obj
62
+ # not_implemented
63
+ # end
64
+
65
+ # def grep(pattern) # :yield: obj
66
+ # not_implemented
67
+ # end
68
+
69
+ # def include?(obj)
70
+ # not_implemented
71
+ # end
72
+
73
+ # def inject(init) # :yield: memo, obj
74
+ # not_implemented
75
+ # end
76
+
77
+ # def map(&block) # :yield: item
78
+ # collect(&block)
79
+ # end
80
+
81
+ # def map!(&block) # :yield: item
82
+ # collect!(&block)
83
+ # end
84
+
85
+ # def max # :yield: a,b
86
+ # not_implemented
87
+ # end
88
+
89
+ # def member?(obj)
90
+ # include?(obj)
91
+ # end
92
+
93
+ # def min # :yield: a,b
94
+ # not_implemented
95
+ # end
96
+
97
+ # def partition # :yield: obj
98
+ # not_implemented
99
+ # end
100
+
101
+ # def reject # :yield: item
102
+ # not_implemented
103
+ # end
104
+
105
+ # def reject! # :yield: item
106
+ # not_implemented
107
+ # end
108
+
109
+ # def select(&block) # :yield: obj
110
+ # find_all(&block)
111
+ # end
112
+
113
+ # def sort # :yield: a,b
114
+ # not_implemented
115
+ # end
116
+
117
+ # def sort! # :yield: a,b
118
+ # not_implemented
119
+ # end
120
+
121
+ # def sort_by # :yield: obj
122
+ # not_implemented
123
+ # end
124
+
125
+ # def to_a
126
+ # not_implemented
127
+ # end
128
+
129
+ # def to_set(klass=Set, *args, &block)
130
+ # not_implemented
131
+ # end
132
+
133
+ # def zip(*arg) # :yield: arr
134
+ # not_implemented
135
+ # end
136
+ end
137
+ end