external 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
@@ -0,0 +1,85 @@
1
+ require 'external/io'
2
+ require 'external/chunkable'
3
+ require 'external/enumerable'
4
+ require 'tempfile'
5
+
6
+ module External
7
+
8
+ #--
9
+ # Base provides the basic array functionality shared by ExtArr and Index,
10
+ # essentially wrapping the IO functions required to access and utilized external
11
+ # array data with the standard array functions. Bases can be opened with
12
+ # in any of the IO modes; the capabilities of Base will be reduced accordingly
13
+ # (ie read-only Bases cannot write values using []=, for instance).
14
+ #
15
+ # It is VERY IMPORTANT to realize that the underlying IO will be opened using the
16
+ # given mode. The 'w' mode will overwrite all existing data; 'r+' is a safer mode
17
+ # for full read-write functionality. Note that since Base actively scans over
18
+ # the IO, append modes essentially behaves like write, but does not overwrite existing
19
+ # data.
20
+ #
21
+ # To work properly, Base must be subclassed with methods:
22
+ # * length
23
+ # * io_fetch
24
+ #++
25
+ #
26
+ #
27
+ class Base
28
+ class << self
29
+ def open(fd=nil, mode="r", options={})
30
+ fd = File.open(fd, mode) unless fd == nil
31
+ ab = self.new(fd, options)
32
+
33
+ if block_given?
34
+ begin
35
+ yield(ab)
36
+ ensure
37
+ ab.close
38
+ end
39
+ else
40
+ ab
41
+ end
42
+ end
43
+ end
44
+
45
+ include External::Enumerable
46
+ include External::Chunkable
47
+
48
+ attr_reader :io
49
+
50
+ # Initializes a new Base given the file descriptor, mode and options.
51
+ # (see open_io for details on what io is opened for a given file descriptor)
52
+ #
53
+ # If mode contains an 's', then the Base will be initialized in strio
54
+ # mode where the underlying IO will be a StringIO. In this case the fd
55
+ # will be used as the string to initialize the StringIO.
56
+ #
57
+ # Standard options for Base include:
58
+ # nil_value:: the value written to file for nils, and converted to nil on read
59
+ # (default ' ')
60
+ # max_gap:: the maximum gap size used by Offset (default 10000)
61
+ # max_chunk_size:: the chunk size used by Offset (default 1M)
62
+ def initialize(io=nil)
63
+ self.io = (io.nil? ? Tempfile.new("array_base") : io)
64
+ end
65
+
66
+ # True if io is closed.
67
+ def closed?
68
+ io.closed?
69
+ end
70
+
71
+ # Closes io.
72
+ def close
73
+ io.close unless io.closed?
74
+ end
75
+
76
+ protected
77
+
78
+ # Sets io and extends the input io with External::Position.
79
+ def io=(io)
80
+ io.extend External::IO unless io.kind_of?(External::IO)
81
+ @io = io
82
+ end
83
+
84
+ end
85
+ end
@@ -0,0 +1,105 @@
1
+ module External
2
+
3
+ # The Chunkable mixin provides methods for organizing a span or range
4
+ # into chunks no larger than a specified block size.
5
+ module Chunkable
6
+ attr_accessor :length, :default_blksize
7
+
8
+ # Returns the default span: [0, length]
9
+ def default_span
10
+ [0, length]
11
+ end
12
+
13
+ # Breaks the input range or span into chunks of blksize or less.
14
+ # The offset and length of each chunk will be provided to the
15
+ # block, if given.
16
+ #
17
+ # blksize # => 100
18
+ # chunk(0..250) # => [[0,100],[100,100],[200,50]]
19
+ #
20
+ # results = []
21
+ # chunk([10,190]) {|offset, length| results << [offset, length]}
22
+ # results # => [[10,100],[110,90]]
23
+ #
24
+ def chunk(range_or_span=default_span, blksize=default_blksize)
25
+ return collect_results(:chunk, range_or_span) unless block_given?
26
+
27
+ rbegin, rend = range_begin_and_end(range_or_span)
28
+
29
+ # chunk the final range to make sure that no chunks
30
+ # greater than blksize are returned
31
+ while rend - rbegin > blksize
32
+ yield(rbegin, blksize)
33
+ rbegin += blksize
34
+ end
35
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
36
+ end
37
+
38
+ # Breaks the input range or span into chunks of blksize or less,
39
+ # beginning from the end of the interval. The offset and length
40
+ # of each chunk will be provided to the block, if given.
41
+ #
42
+ # blksize # => 100
43
+ # reverse_chunk(0..250) # => [[150,100],[50,100],[0,50]]
44
+ #
45
+ # results = []
46
+ # reverse_chunk([10,190]) {|offset, length| results << [offset, length]}
47
+ # results # => [[100,100],[10,90]]
48
+ #
49
+ def reverse_chunk(range_or_span=default_span, blksize=default_blksize)
50
+ return collect_results(:reverse_chunk, range_or_span) unless block_given?
51
+
52
+ rbegin, rend = range_begin_and_end(range_or_span)
53
+
54
+ # chunk the final range to make sure that no chunks
55
+ # greater than blksize are returned
56
+ while rend - rbegin > blksize
57
+ rend -= blksize
58
+ yield(rend, blksize)
59
+ end
60
+ yield(rbegin, rend - rbegin) if rend - rbegin > 0
61
+ end
62
+
63
+ protected
64
+
65
+ # Converts a range into an offset and length. Negative values are
66
+ # counted back from self.length
67
+ #
68
+ # length # => 10
69
+ # split_range(0..9) # => [0,10]
70
+ # split_range(0...9) # => [0,9]
71
+ #
72
+ # split_range(-1..9) # => [9,1]
73
+ # split_range(0..-1) # => [0,10]
74
+ def split_range(range)
75
+ begin_range = range.begin + (range.begin < 0 ? self.length : 0)
76
+ end_range = range.end + (range.end < 0 ? self.length : 0)
77
+ length = end_range - begin_range - (range.exclude_end? ? 1 : 0)
78
+
79
+ [begin_range, length]
80
+ end
81
+
82
+ def split_span(span)
83
+ span[0] += self.length if span[0] < 0
84
+ span
85
+ end
86
+
87
+ def range_begin_and_end(range_or_span)
88
+ rbegin, rend = range_or_span.kind_of?(Range) ? split_range(range_or_span) : split_span(range_or_span)
89
+ raise ArgumentError.new("negative offset specified: #{PP.singleline_pp(range_or_span,'')}") if rbegin < 0
90
+ rend += rbegin
91
+
92
+ [rbegin, rend]
93
+ end
94
+
95
+ private
96
+
97
+ def collect_results(method, args) # :nodoc:
98
+ results = []
99
+ send(method, args) do |*result|
100
+ results << result
101
+ end
102
+ results
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,137 @@
1
+ require 'enumerator'
2
+
3
+ module External
4
+ module Enumerable
5
+ # def all? # :yield: obj
6
+ # not_implemented
7
+ # end
8
+
9
+ # def any? # :yield: obj
10
+ # not_implemented
11
+ # end
12
+
13
+ # def collect # :yield: item
14
+ # not_implemented
15
+ # end
16
+
17
+ # def collect! # :yield: item
18
+ # not_implemented
19
+ # end
20
+
21
+ # def detect(ifnone=nil) # :yield: obj
22
+ # not_implemented
23
+ # end
24
+
25
+ # def each_cons(n) # :yield:
26
+ # not_implemented
27
+ # end
28
+
29
+ # def each_slice(n) # :yield:
30
+ # not_implemented
31
+ # end
32
+
33
+ def each_with_index(&block)
34
+ chunk do |offset, length|
35
+ self[offset, length].each_with_index do |item, i|
36
+ yield(item, i + offset)
37
+ end
38
+ end
39
+ end
40
+
41
+ # def entries
42
+ # to_a
43
+ # end
44
+
45
+ # def enum_cons(n)
46
+ # not_implemented
47
+ # end
48
+
49
+ # def enum_slice(n)
50
+ # not_implemented
51
+ # end
52
+
53
+ # def enum_with_index
54
+ # not_implemented
55
+ # end
56
+
57
+ # def find(ifnone=nil, &block) # :yield: obj
58
+ # detect(ifnone, &block)
59
+ # end
60
+
61
+ # def find_all # :yield: obj
62
+ # not_implemented
63
+ # end
64
+
65
+ # def grep(pattern) # :yield: obj
66
+ # not_implemented
67
+ # end
68
+
69
+ # def include?(obj)
70
+ # not_implemented
71
+ # end
72
+
73
+ # def inject(init) # :yield: memo, obj
74
+ # not_implemented
75
+ # end
76
+
77
+ # def map(&block) # :yield: item
78
+ # collect(&block)
79
+ # end
80
+
81
+ # def map!(&block) # :yield: item
82
+ # collect!(&block)
83
+ # end
84
+
85
+ # def max # :yield: a,b
86
+ # not_implemented
87
+ # end
88
+
89
+ # def member?(obj)
90
+ # include?(obj)
91
+ # end
92
+
93
+ # def min # :yield: a,b
94
+ # not_implemented
95
+ # end
96
+
97
+ # def partition # :yield: obj
98
+ # not_implemented
99
+ # end
100
+
101
+ # def reject # :yield: item
102
+ # not_implemented
103
+ # end
104
+
105
+ # def reject! # :yield: item
106
+ # not_implemented
107
+ # end
108
+
109
+ # def select(&block) # :yield: obj
110
+ # find_all(&block)
111
+ # end
112
+
113
+ # def sort # :yield: a,b
114
+ # not_implemented
115
+ # end
116
+
117
+ # def sort! # :yield: a,b
118
+ # not_implemented
119
+ # end
120
+
121
+ # def sort_by # :yield: obj
122
+ # not_implemented
123
+ # end
124
+
125
+ # def to_a
126
+ # not_implemented
127
+ # end
128
+
129
+ # def to_set(klass=Set, *args, &block)
130
+ # not_implemented
131
+ # end
132
+
133
+ # def zip(*arg) # :yield: arr
134
+ # not_implemented
135
+ # end
136
+ end
137
+ end