arrayio 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,76 @@
1
+ # = ArrayIO
2
+ # Array-like behavior for archival files.
3
+ #
4
+ # == Introduction
5
+ # Archival files contain many entries following a regular pattern. These files often grow very large, making them
6
+ # inconvenient or impossible to parse directly into memory. ArrayIO provides an easy way to index these files
7
+ # so that array-like calls can retrieve entries on the fly.
8
+ #
9
+ # Internally ArrayIO keeps an IO object for the archive, and an index of ranges recording where each entry
10
+ # begins and ends. The index is an array, unless ArrayIO operates in 'uncached' mode. In this case the index is
11
+ # a separate file from which the ranges can be looked up. Uncached mode is useful when dealing with an extremely
12
+ # large number of ranges (entries) that would chew up lots of memory if kept in an array.
13
+ #
14
+ # When operating in a writable mode, entries can be added, inserted, and deleted. All changes are recorded in the index
15
+ # and will not reflect the actual order of entries in the archive unless consolidated.
16
+ #
17
+ # Example: If you add an entry at index 1000, the index records the range at index 1000, but the actual entry is appended
18
+ # to the end of the archive. If you delete an entry, it remains in the archive, but the range is removed from the index.
19
+ # Consolidation re-writes entries in their proper order and removes deleted entries.
20
+ #
21
+ # Notes:
22
+ # - BE CAREFUL to specify the correct mode when you open up an ArrayIO - as with File, a 'w' mode will overwrite
23
+ # the ENTIRE archive file. It is safer to use the append mode 'a'.
24
+ #
25
+ # Copyright (c) 2007 Simon Chiang
26
+ # Version: 0.1
27
+ # Licence: MIT-Style
28
+ #
29
+ # == Usage
30
+ #
31
+ # # Open, autoindex and work with 'archive.txt'
32
+ # ArrayIO.open('archive.txt', 'r') do |aio|
33
+ # aio[100] # -> entry 100
34
+ # aio[100] = "new entry" # -> reassigns entry 100
35
+ # aio.each do |entry|
36
+ # # do something
37
+ # end
38
+ # end
39
+ #
40
+ # # Open 'archive.txt' in uncached mode. This creates a file 'archive.index'
41
+ # # that will be filled with the entry ranges. You can specify where entries
42
+ # # begin and end using options and a block in +reindex+.
43
+ # # If the block returns true, the line in considered the beginning of
44
+ # # an entry. This block looks for entries delmited by '>' like:
45
+ # # > entry 0
46
+ # # still entry0
47
+ # # > entry 1
48
+ # #
49
+ # aio = ArrayIO.new('archive.txt', 'ru')
50
+ # aio.reindex do |line|
51
+ # line =~ /^>/
52
+ # end
53
+ # aio.close
54
+ #
55
+ # # Subclass ArrayIO by overwriting +str_to_entry+, +entry_to_str+, and +reindex+
56
+ # # EntryIO parses entries as above, and functions like:
57
+ # # entryio[0] # => [0, "\nstill entry 0"]
58
+ # # entryio[1] # => [1, ""]
59
+ # # entryio[1] = [100, " is the new entry"] # => writes "> entry 100 is the new entry"
60
+ # # entryio[1] # => [100, " is the new entry"]
61
+ # class EntryIO
62
+ # def str_to_entry(str)
63
+ # str =~ /^> entry (\d+)(.*)$"
64
+ # [$1, $2]
65
+ # end
66
+ #
67
+ # def entry_to_str(entry)
68
+ # "> entry #{entry[0]}#{entry[1]}"
69
+ # end
70
+ #
71
+ # def reindex(options={}, &block)
72
+ # super(options) do |line|
73
+ # block_given? ? yield(line) : line =~ /^>/
74
+ # end
75
+ # end
76
+ # end
@@ -0,0 +1,381 @@
1
+ require 'stringio'
2
+
3
+ class ArrayIO
4
+ attr_reader :io, :io_index
5
+
6
+ include Enumerable
7
+
8
+ class << self
9
+ def open(fd, mode='r', index_file=nil, auto_index=true, &block)
10
+ aio = self.new(fd, mode, index_file)
11
+
12
+ if block_given?
13
+ aio.reindex if auto_index && aio.empty?
14
+ yield(aio)
15
+ aio.close
16
+ else
17
+ aio
18
+ end
19
+ end
20
+ end
21
+
22
+ def initialize(fd, mode='r', io_index=nil)
23
+ if mode =~ /u/i
24
+ mode = mode.delete('u')
25
+ @cached = false
26
+ else
27
+ @cached = true
28
+ end
29
+
30
+ if mode =~ /s/i
31
+ mode = mode.delete('s')
32
+ @strio = true
33
+ else
34
+ @strio = false
35
+ end
36
+
37
+ if strio?
38
+ mode = mode.delete('s')
39
+ @io = StringIO.open(fd, mode)
40
+ io_index = '' if io_index.nil?
41
+ else
42
+ @io = File.open(fd, mode)
43
+ set_binmode(io)
44
+ io_index = default_index_file if io_index.nil?
45
+ end
46
+
47
+ self.io_index = io_index
48
+ end
49
+
50
+ def default_index_file
51
+ io.path.chomp(File.extname(io.path)) + '.index'
52
+ end
53
+
54
+ #
55
+ # array functionality
56
+ #
57
+
58
+ def size
59
+ length
60
+ end
61
+
62
+ def length
63
+ if cached?
64
+ io_index.length
65
+ else
66
+ index_length/RANGE_SIZE
67
+ end
68
+ end
69
+
70
+ def at(index)
71
+ self[index]
72
+ end
73
+
74
+ def [](input, length=nil)
75
+ range = range(input, length)
76
+ case range
77
+ when Array
78
+ range.collect { |r| read_entry(r) }
79
+ else
80
+ read_entry(range)
81
+ end
82
+ end
83
+
84
+ def []=(input, entry)
85
+ string = entry_to_str(entry)
86
+
87
+ range_begin = strio? ? io.string.length : io.stat.size
88
+ range_end = range_begin + string.length
89
+ range = format_range(range_begin, range_end)
90
+
91
+ # do this first in case io is not open for writing.
92
+ set_pos(io, range_begin)
93
+ io << string
94
+
95
+ if cached?
96
+ io_index[input] = range
97
+ else
98
+ pos = index_pos(input)
99
+ if strio?
100
+ io_index.string[pos...pos+RANGE_SIZE] = range
101
+ else
102
+ #io_index.close
103
+ end
104
+ end
105
+ end
106
+
107
+ def fetch(index, default=nil, &block)
108
+ index += index_length if index < 0
109
+ val = (index >= length ? default : self[index])
110
+ block_given? ? yield(val) : val
111
+ end
112
+
113
+ def first(n=nil)
114
+ n.nil? ? self[0] : self[0,n]
115
+ end
116
+
117
+ def each_index(&block)
118
+ raise LocalJumpError("no block given") unless block_given?
119
+
120
+ 0.upto(length-1, &block)
121
+ end
122
+
123
+ def each(&block)
124
+ raise LocalJumpError("no block given") unless block_given?
125
+
126
+ if cached?
127
+ io_index.each do |range|
128
+ yield( read_entry(range) )
129
+ end
130
+ else
131
+ io_index.pos = 0
132
+ while !io_index.eof?
133
+ begin_index, end_index = io_index.read(RANGE_SIZE).unpack(FORMAT)
134
+ yield( read_entry(begin_index..end_index) )
135
+ end
136
+ end
137
+ end
138
+
139
+ def empty?
140
+ length == 0
141
+ end
142
+
143
+ def last(n=nil)
144
+ return self[-1] if n.nil?
145
+
146
+ start = length-n
147
+ start = 0 if start < 0
148
+ self[start, n]
149
+ end
150
+
151
+ def values_at(*selectors)
152
+ selectors.collect {|s| self[s]}.flatten
153
+ end
154
+
155
+ #
156
+ # arrayio functionality
157
+ #
158
+
159
+ def cached?
160
+ @cached
161
+ end
162
+
163
+ def strio?
164
+ @strio
165
+ end
166
+
167
+ def load_index(index_file)
168
+ input = strio? ? index_file : (File.exists?(index_file) ? File.read(index_file) : '')
169
+ @io_index = parse_index(input)
170
+ end
171
+
172
+ def dump_index(index_file=default_index_file)
173
+ dumping_to_io = io_index.respond_to?(:path) ?
174
+ File.expand_path(io_index.path) != File.expand_path(index_file) :
175
+ false
176
+
177
+ File.open(index_file, 'w') do |file|
178
+ set_binmode(file)
179
+ if cached?
180
+ io_index.each do |range|
181
+ file << [range.begin, range.end].pack(FORMAT)
182
+ end
183
+ else
184
+ file << io_index.read
185
+ end
186
+ end unless dumping_to_io
187
+
188
+ index_file
189
+ end
190
+
191
+ def close
192
+ io.close
193
+ io_index.close unless cached?
194
+ end
195
+
196
+ def str_to_entry(string)
197
+ string
198
+ end
199
+
200
+ def entry_to_str(entry)
201
+ entry.to_s
202
+ end
203
+
204
+ def range(input, length=nil)
205
+ if cached?
206
+ if length.nil?
207
+ return io_index[input]
208
+ else
209
+ return io_index[input, length]
210
+ end
211
+ end
212
+
213
+ unless length.nil?
214
+ return nil if length < 0
215
+ input = input...(input + length)
216
+ end
217
+
218
+ if input.kind_of?(Range)
219
+ begin_pos = index_pos(input.begin)
220
+ return nil if begin_pos < 0 || begin_pos >= index_length
221
+
222
+ end_pos = index_pos(input.end) + (input.exclude_end? ? 0 : RANGE_SIZE)
223
+ read_length = end_pos-begin_pos
224
+ return [] if read_length <= 0
225
+
226
+ set_pos(io_index, begin_pos)
227
+ parse_index(io_index.read(read_length))
228
+ else
229
+ begin_pos = index_pos(input)
230
+ return nil if begin_pos < 0 || begin_pos >= index_length
231
+
232
+ set_pos(io_index, begin_pos)
233
+ array = io_index.read(RANGE_SIZE).unpack(FORMAT)
234
+ array.first...array.last
235
+ end
236
+ end
237
+
238
+ def reindex(options={}, &block)
239
+ io.rewind
240
+
241
+ options = {
242
+ :sep_string => $/,
243
+ :break_before => false,
244
+ :exclude_break => false,
245
+ :limit => nil
246
+ }.merge(options)
247
+
248
+ sep_string = options[:sep_string]
249
+ break_before = options[:break_before]
250
+ exclude_break = options[:exclude_break]
251
+
252
+ limit = options[:limit]
253
+ total_count = 0
254
+
255
+ if cached?
256
+ self.io_index = []
257
+ else
258
+ io_index.close
259
+ if strio?
260
+ self.io_index = ''
261
+ else
262
+ self.io_index = io_index.path
263
+ end
264
+ end
265
+
266
+ last_pos = 0
267
+ current_pos = 0
268
+ range_begin = 0
269
+ io.each_line(sep_string) do |line|
270
+ # Note positions MUST be built up using line.length
271
+ # io.pos cannot return positions greater than ~2.1e9
272
+ last_pos = current_pos
273
+ current_pos += line.length
274
+
275
+ if (block_given? ? yield(line) : true)
276
+ range_end = (break_before || exclude_break) ? last_pos : current_pos
277
+ unless range_end == range_begin
278
+ io_index << format_range(range_begin, range_end)
279
+ total_count += 1
280
+ end
281
+ range_begin = (break_before && !exclude_break) ? last_pos : current_pos
282
+
283
+ break unless limit.nil? || total_count < limit
284
+ end
285
+ end
286
+ if limit.nil? || total_count < limit
287
+ range_end = current_pos
288
+ unless range_end == range_begin
289
+ io_index << format_range(range_begin, range_end)
290
+ end
291
+ end
292
+
293
+ # this must be done to re-stat Files, assuring index_length is the length of the io_index file.
294
+ unless cached? || strio?
295
+ io_index.close
296
+ self.io_index = io_index.path
297
+ end
298
+
299
+ self
300
+ end
301
+
302
+ protected
303
+
304
+ def read_entry(range)
305
+ return nil if range.nil?
306
+
307
+ set_pos(io, range.begin)
308
+ str_to_entry( io.read(range.end-range.begin) )
309
+ end
310
+
311
+ POSITION_MAX = 2100000000
312
+ # Positions larger than ~2.1e9 cannot be directly given to +pos+ for File objects.
313
+ # +set_pos+ incrementally seeks to positions beyond the maximum, if necessary.
314
+ def set_pos(io, pos)
315
+ if pos < POSITION_MAX
316
+ io.pos = pos
317
+ else
318
+ # note sysseek appears to be necessary here, rather than io.seek
319
+ io.pos = 0
320
+ while pos > POSITION_MAX
321
+ pos -= POSITION_MAX
322
+ io.sysseek(POSITION_MAX, IO::SEEK_CUR)
323
+ end
324
+ io.sysseek(pos, IO::SEEK_CUR)
325
+ end
326
+ end
327
+
328
+ FORMAT = 'I*'
329
+ RANGE_SIZE = 8
330
+
331
+ def format_range(range_begin, range_end)
332
+ cached? ? range_begin...range_end : [range_begin, range_end].pack(FORMAT)
333
+ end
334
+
335
+ def index_length
336
+ strio? ? io_index.string.length : io_index.stat.size
337
+ end
338
+
339
+ def index_pos(index)
340
+ RANGE_SIZE*index + (index < 0 ? index_length : 0)
341
+ end
342
+
343
+ def parse_index(input)
344
+ last = nil
345
+ results = []
346
+ input.unpack(FORMAT).each do |current|
347
+ if last.nil?
348
+ last = current
349
+ else
350
+ results << (last...current)
351
+ last = nil
352
+ end
353
+ end
354
+ results
355
+ end
356
+
357
+ def io_index=(input)
358
+ if input.kind_of?(Array) || input.kind_of?(StringIO) || input.kind_of?(File)
359
+ @io_index = input
360
+ else
361
+ case
362
+ when strio? && cached?
363
+ load_index(input)
364
+ when strio?
365
+ @io_index = StringIO.new(input, 'a+')
366
+ when cached?
367
+ load_index(input)
368
+ else
369
+ @io_index = File.open(input, 'a+')
370
+ set_binmode(io_index)
371
+ end
372
+ end
373
+ end
374
+
375
+ # ArrayIO requires Files to operate in binmode on Windows. Otherwise ranges get
376
+ # improperly shifted, and additionally the unpacking frame gets shifted due to improper
377
+ # handling of cr (\r) characters.
378
+ def set_binmode(file)
379
+ file.binmode unless RUBY_PLATFORM.index('mswin').nil?
380
+ end
381
+ end
@@ -0,0 +1,28 @@
1
+ require 'array_io'
2
+ require 'pp'
3
+
4
+ class InspectArrayIO
5
+ class << self
6
+ def inspect(aio, options={})
7
+ options = {
8
+ :from_start => 100,
9
+ :from_end => 100}.merge(options)
10
+
11
+ puts "N indexed entries: #{aio.length}"
12
+
13
+ puts "****************************"
14
+ puts "First #{options[:from_start]} bytes:"
15
+ aio.io.pos = 0
16
+ pp aio.io.read(options[:from_start])
17
+ puts "First entry:"
18
+ puts aio[0]
19
+
20
+ puts "****************************"
21
+ puts "Last #{options[:from_end]} bytes:"
22
+ aio.io.seek(-options[:from_end], IO::SEEK_END)
23
+ pp aio.io.read(options[:from_end] + 10)
24
+ puts "Last entry:"
25
+ puts aio[-1]
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ >abc
2
+ >def
3
+ >gh
@@ -0,0 +1,3 @@
1
+ 012
2
+ 56
3
+ 9
Binary file
@@ -0,0 +1 @@
1
+ abcdefgh
@@ -0,0 +1 @@
1
+ abcdefgh
@@ -0,0 +1,3 @@
1
+ 012
2
+ 56
3
+ 9
@@ -0,0 +1,19 @@
1
+ line 1
2
+
3
+ line 2
4
+
5
+ line 3
6
+
7
+ line 4
8
+
9
+ line 5
10
+
11
+ line 6
12
+
13
+ line 7
14
+
15
+ line 8
16
+
17
+ line 9
18
+
19
+ line 10
@@ -0,0 +1 @@
1
+ abcdefgh
@@ -0,0 +1,569 @@
1
+ require File.join(File.dirname(__FILE__), 'arrayio_test_helper.rb')
2
+
3
+ class ArrayIOTest < Test::Unit::TestCase
4
+ include Benchmark
5
+
6
+
7
+ def aio_filepath(path)
8
+ File.join(File.dirname(__FILE__), 'array_io', path)
9
+ end
10
+
11
+ #
12
+ # file initialize tests
13
+ #
14
+
15
+ def test_initialize_with_index
16
+ aio = ArrayIO.new(aio_filepath('input.txt'))
17
+
18
+ assert_equal 'input.txt', File.basename(aio.io.path)
19
+ assert_equal "abcdefgh", aio.io.read
20
+
21
+ assert_equal [1...1, 2...4, 6...10], aio.io_index
22
+ aio.close
23
+ end
24
+
25
+ def test_initialize_uncached_with_index
26
+ # non-windows only
27
+ if RUBY_PLATFORM.index('mswin').nil?
28
+ aio = ArrayIO.new(aio_filepath('input.txt'), 'ru')
29
+
30
+ assert aio.io_index.kind_of?(File)
31
+ assert_equal 'input.index', File.basename(aio.io_index.path)
32
+ assert_equal [1,1,2,4,6,10], aio.io_index.read.unpack('I*')
33
+
34
+ aio.close
35
+ end
36
+ end
37
+
38
+ def test_initialize_uncached_with_index_binary_mode
39
+ # windows only
40
+ unless RUBY_PLATFORM.index('mswin').nil?
41
+ aio = ArrayIO.new(aio_filepath('inputb.txt'), 'ru')
42
+
43
+ assert aio.io_index.kind_of?(File)
44
+ assert_equal 'inputb.index', File.basename(aio.io_index.path)
45
+ assert_equal [1,1,2,4,6,10], aio.io_index.read.unpack('I*')
46
+
47
+ aio.close
48
+ end
49
+ end
50
+
51
+ def test_initialize_without_index
52
+ aio = ArrayIO.new(aio_filepath('without_index.txt'))
53
+
54
+ assert_equal [], aio.io_index
55
+ aio.close
56
+ end
57
+
58
+ def test_initialize_with_alternate_index
59
+ aio = ArrayIO.new(aio_filepath('without_index.txt'), 'r', aio_filepath('input.index'))
60
+
61
+ assert_equal [1...1, 2...4, 6...10], aio.io_index
62
+ aio.close
63
+ end
64
+
65
+ def test_initialize_with_array_index
66
+ array = [0...1,2...3]
67
+ aio = ArrayIO.new(aio_filepath('input.txt'), 'r', array)
68
+ assert_equal array, aio.io_index
69
+ end
70
+
71
+ #
72
+ # string initialize tests
73
+ #
74
+
75
+ def test_initialize_string
76
+ aio = ArrayIO.new('abcdefgh', 'rs')
77
+
78
+ assert aio.io.kind_of?(StringIO)
79
+ assert_equal "abcdefgh", aio.io.read
80
+
81
+ assert_equal [], aio.io_index
82
+ aio.close
83
+ end
84
+
85
+ def test_initialize_string_uncached
86
+ aio = ArrayIO.new('abcdefgh', 'rsu')
87
+
88
+ assert aio.io_index.kind_of?(StringIO)
89
+ assert_equal '', aio.io_index.string
90
+ aio.close
91
+ end
92
+
93
+ def setup_aio
94
+ ArrayIO.new("abcdefgh", 'r+s', [0...3, 3...5, 5...8])
95
+ end
96
+
97
+ def test_initialize_string_with_array
98
+ aio = setup_aio
99
+
100
+ assert aio.io.kind_of?(StringIO)
101
+ assert_equal "abcdefgh", aio.io.read
102
+
103
+ assert_equal [0...3, 3...5, 5...8], aio.io_index
104
+ aio.close
105
+ end
106
+
107
+ def setup_uaio
108
+ ArrayIO.new("abcdefgh", 'r+su', [0,3,3,5,5,8].pack('I*'))
109
+ end
110
+
111
+ def test_initialize_string_uncached_with_index
112
+ uaio = setup_uaio
113
+
114
+ assert uaio.io.kind_of?(StringIO)
115
+ assert_equal "abcdefgh", uaio.io.read
116
+
117
+ assert uaio.io_index.kind_of?(StringIO)
118
+ assert_equal [0,3,3,5,5,8], uaio.io_index.read.unpack('I*')
119
+ uaio.close
120
+ end
121
+
122
+ #
123
+ # range tests
124
+ #
125
+
126
+ def test_range
127
+ [setup_aio, setup_uaio].each do |aio|
128
+ assert_equal 0...3, aio.range(0)
129
+ assert_equal 3...5, aio.range(1)
130
+ assert_equal 5...8, aio.range(2)
131
+ assert_nil aio.range(3)
132
+
133
+ assert_equal 5...8, aio.range(-1)
134
+ assert_equal 3...5, aio.range(-2)
135
+ assert_equal 0...3, aio.range(-3)
136
+ assert_nil aio.range(-4)
137
+ end
138
+ end
139
+
140
+ def test_range_when_input_is_start_length
141
+ array = [0...3, 3...5, 5...8]
142
+
143
+ [setup_aio, setup_uaio].each do |aio|
144
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5].each do |r|
145
+ start = r.begin
146
+ length = r.end - r.begin
147
+
148
+ assert_equal array[start, length], aio.range(start, length), r.to_s
149
+ end
150
+ end
151
+ end
152
+
153
+ def test__range_when_input_is_range
154
+ array = [0...3, 3...5, 5...8]
155
+
156
+ [setup_aio, setup_uaio].each do |aio|
157
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5].each do |r|
158
+ assert_equal array[r], aio.range(r), r.to_s
159
+ end
160
+ end
161
+ end
162
+
163
+ def test_range_speed
164
+ benchmark_test do
165
+ aio = setup_aio
166
+ uaio = setup_uaio
167
+ bm(12) do |x|
168
+ x.report("10k index") { 10000.times { aio.range(1) } }
169
+ x.report("10k range") { 10000.times { aio.range(1..1) } }
170
+ x.report("10k s,l") { 10000.times { aio.range(1, 1) } }
171
+
172
+ x.report("10k uindex") { 10000.times { uaio.range(1) } }
173
+ x.report("10k urange") { 10000.times { uaio.range(1..1) } }
174
+ x.report("10k us,l") { 10000.times { uaio.range(1, 1) } }
175
+ end
176
+ end
177
+ end
178
+
179
+ #
180
+ # get tests
181
+ #
182
+
183
+ def test_get_when_input_is_index
184
+ [setup_aio, setup_uaio].each do |aio|
185
+ assert_equal "abc", aio[0]
186
+ assert_equal "de", aio[1]
187
+ assert_equal "fgh", aio[2]
188
+ assert_nil aio[3]
189
+
190
+ assert_equal "fgh", aio[-1]
191
+ assert_equal "de", aio[-2]
192
+ assert_equal "abc", aio[-3]
193
+ assert_nil aio[-4]
194
+ end
195
+ end
196
+
197
+ def test_get_when_input_is_start_length
198
+ array = ["abc", "de", "fgh"]
199
+
200
+ [setup_aio, setup_uaio].each do |aio|
201
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5, -1..5].each do |r|
202
+ start = r.begin
203
+ length = r.end - r.begin
204
+
205
+ assert_equal array[start, length], aio[start, length], r.to_s
206
+ end
207
+ end
208
+ end
209
+
210
+ def test_get_when_input_is_range
211
+ array = ["abc", "de", "fgh"]
212
+
213
+ [setup_aio, setup_uaio].each do |aio|
214
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5].each do |r|
215
+ assert_equal array[r], aio[r], r.to_s
216
+ end
217
+ end
218
+ end
219
+
220
+ def test_get_speed
221
+ benchmark_test do
222
+ array = []
223
+ 0.upto(10000-1) {|i| array << "line #{i}\n" }
224
+
225
+ aio = ArrayIO.new(array.join(''), 'rs')
226
+ aio.reindex
227
+
228
+ uaio = ArrayIO.new(array.join(''), 'rsu')
229
+ uaio.reindex
230
+
231
+ assert_equal "line 1000\n", aio[1000]
232
+ bm(12) do |x|
233
+ x.report("10k index") { 10000.times { aio[1000] } }
234
+ x.report("10k range") { 10000.times { aio[1000...1000] } }
235
+ x.report("10k s,l") { 10000.times { aio[1000, 1] } }
236
+
237
+ x.report("10k uindex") { 10000.times { uaio[1000] } }
238
+ x.report("10k urange") { 10000.times { uaio[1000..1000] } }
239
+ x.report("10k us,l") { 10000.times { uaio[1000, 1] } }
240
+ end
241
+ end
242
+ end
243
+
244
+ #
245
+ # test insert
246
+ #
247
+
248
+ def test_insert
249
+ [setup_aio, setup_uaio].each do |aio|
250
+ assert_equal "abc", aio[0]
251
+ assert_equal "fgh", aio[-1]
252
+ assert_equal "de", aio[1]
253
+ assert_equal 3, aio.length
254
+
255
+ aio[0] = 'xyz'
256
+ aio[-1] = 'pq'
257
+ aio[1] = 'mno'
258
+
259
+ assert_equal "xyz",aio[0]
260
+ assert_equal "pq", aio[-1]
261
+ assert_equal "mno", aio[1]
262
+ assert_equal 3, aio.length
263
+
264
+ assert_equal 'abcdefghxyzpqmno', aio.io.string
265
+ end
266
+ end
267
+
268
+ def test_insert_raises_error_if_io_is_not_writable
269
+ aio = ArrayIO.new('', 'rs')
270
+ assert_raise(IOError) { aio[0] = 'abc'}
271
+ end
272
+
273
+ def test_insert_speed
274
+ benchmark_test do
275
+ array = []
276
+ 0.upto(10000-1) {|i| array << "line #{i}\n" }
277
+
278
+ aio = ArrayIO.new(array.join(''), 'r+s')
279
+ aio.reindex
280
+
281
+ uaio = ArrayIO.new(array.join(''), 'r+su')
282
+ uaio.reindex
283
+
284
+ bm(12) do |x|
285
+ x.report("10k index=") { 10000.times { aio[1000] = "line 1000\n" } }
286
+ #x.report("10k range=") { 10000.times { aio[1000..1000] = "line 1000\n" } }
287
+ #x.report("10k s,l=") { 10000.times { aio[1000, 1] = "line 1000\n" } }
288
+
289
+ x.report("10k uindex=") { 10000.times { uaio[1000] = "line 1000\n" } }
290
+ #x.report("10k crange=") { 10000.times { caio[1000..1000] = "line 1000\n" } }
291
+ #x.report("10k cs,l=") { 10000.times { caio[1000, 1] = "line 1000\n" } }
292
+ end
293
+ end
294
+ end
295
+
296
+ #
297
+ # indexing tests
298
+ #
299
+
300
+ def reindex_test(expected, options={}, &block)
301
+ cases = {
302
+ :end_midline => "012\n\n56\n\n9",
303
+ :end_on_line => "012\n\n56\n\n9\n",
304
+ :end_on_break => "012\n\n56\n\n9\n\n",
305
+ :no_break => "0123456789",
306
+ :backing_breaks => "012\n\n\n\n\n\n9",
307
+ :cr_lf => "012\r\n\r\n56\r\n\r\n9"}
308
+
309
+ cases.each_pair do |key, string|
310
+ next unless expected.has_key?(key)
311
+
312
+ ['rs', 'rsu'].each do |mode|
313
+ aio = ArrayIO.new(string, mode)
314
+ aio.reindex(options, &block)
315
+
316
+ assert_equal expected[key].length, aio.length, "#{mode} #{key}"
317
+ assert_equal expected[key], aio[0..-1], "#{mode} #{key}"
318
+ end
319
+ end
320
+ end
321
+
322
+ def test_reindex_treats_each_line_as_break_by_default
323
+ reindex_test(
324
+ :end_midline => ["012\n", "\n", "56\n", "\n", "9"],
325
+ :end_on_line => ["012\n", "\n", "56\n", "\n", "9\n"],
326
+ :end_on_break => ["012\n", "\n", "56\n", "\n", "9\n", "\n"],
327
+ :no_break => ["0123456789"],
328
+ :backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n", "9"],
329
+ :cr_lf => ["012\r\n", "\r\n", "56\r\n", "\r\n", "9"])
330
+ end
331
+
332
+ def test_reindex_block_determines_if_line_is_a_break
333
+ reindex_test(
334
+ :end_midline => ["012\n\n", "56\n\n", "9"],
335
+ :end_on_line => ["012\n\n", "56\n\n", "9\n"],
336
+ :end_on_break => ["012\n\n", "56\n\n", "9\n\n"],
337
+ :no_break => ["0123456789"],
338
+ :backing_breaks => ["012\n\n", "\n", "\n", "\n", "\n", "9"],
339
+ :cr_lf => ["012\r\n\r\n", "56\r\n\r\n", "9"]) do |line|
340
+ line.strip.empty?
341
+ end
342
+ end
343
+
344
+ def test_reindex_breaking_before
345
+ reindex_test({
346
+ :end_midline => ["012\n", "\n56\n", "\n9"],
347
+ :end_on_line => ["012\n", "\n56\n", "\n9\n"],
348
+ :end_on_break => ["012\n", "\n56\n", "\n9\n", "\n"],
349
+ :no_break => ["0123456789"],
350
+ :backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n9"],
351
+ :cr_lf => ["012\r\n", "\r\n56\r\n", "\r\n9"]},
352
+ :break_before => true) do |line|
353
+ line.strip.empty?
354
+ end
355
+ end
356
+
357
+ def test_reindex_excluding_break
358
+ reindex_test({
359
+ :end_midline => ["012\n", "56\n", "9"],
360
+ :end_on_line => ["012\n", "56\n", "9\n"],
361
+ :end_on_break => ["012\n", "56\n", "9\n"],
362
+ :no_break => ["0123456789"],
363
+ :backing_breaks => ["012\n", "9"],
364
+ :cr_lf => ["012\r\n", "56\r\n", "9"]},
365
+ :exclude_break => true) do |line|
366
+ line.strip.empty?
367
+ end
368
+ end
369
+
370
+ def test_reindex_breaking_before_and_excluding_break
371
+ # note this is the same as simply excluding the break
372
+ reindex_test({
373
+ :end_midline => ["012\n", "56\n", "9"],
374
+ :end_on_line => ["012\n", "56\n", "9\n"],
375
+ :end_on_break => ["012\n", "56\n", "9\n"],
376
+ :no_break => ["0123456789"],
377
+ :backing_breaks => ["012\n", "9"],
378
+ :cr_lf => ["012\r\n", "56\r\n", "9"]},
379
+ :exclude_break => true,
380
+ :break_before => true) do |line|
381
+ line.strip.empty?
382
+ end
383
+ end
384
+
385
+ def test_reindex_with_alt_sep_string
386
+ reindex_test({
387
+ :end_midline => ["012\n\n", "56\n\n", "9"],
388
+ :end_on_line => ["012\n\n", "56\n\n", "9\n"],
389
+ :end_on_break => ["012\n\n", "56\n\n", "9\n\n"],
390
+ :no_break => ["0123456789"],
391
+ :backing_breaks => ["012\n\n", "\n\n", "\n\n", "9"],
392
+ :cr_lf => ["012\r\n\r\n56\r\n\r\n9"]},
393
+ :sep_string => "\n\n")
394
+
395
+ reindex_test({
396
+ :end_midline => ["012\n\n56", "\n\n9"],
397
+ :end_on_line => ["012\n\n56", "\n\n9\n"],
398
+ :end_on_break => ["012\n\n56", "\n\n9\n\n"],
399
+ :no_break => ["0123456", "789"],
400
+ :backing_breaks => ["012\n\n\n\n\n\n9"],
401
+ :cr_lf => ["012\r\n\r\n56", "\r\n\r\n9"]},
402
+ :sep_string => "56")
403
+ end
404
+
405
+ #
406
+ # file tests
407
+ #
408
+
409
+ def index_test(path, &block)
410
+ aio = ArrayIO.open(aio_filepath(path), 'ru')
411
+
412
+ begin
413
+ yield(aio)
414
+ ensure
415
+ aio.close
416
+ index_path = aio.io_index.path
417
+ File.delete(index_path) if index_path && File.exists?(index_path)
418
+ end
419
+ end
420
+
421
+ def test_cr_lf_file
422
+ index_test('cr_lf_input.txt') do |aio|
423
+ aio.reindex
424
+
425
+ assert_equal "012\r\n", aio[0]
426
+ assert_equal "56\r\n", aio[1]
427
+ assert_equal "9", aio[2]
428
+ end
429
+ end
430
+
431
+ def test_parse_from_lf_file
432
+ index_test('lf_input.txt') do |aio|
433
+ aio.reindex
434
+
435
+ assert_equal "012\n", aio[0]
436
+ assert_equal "56\n", aio[1]
437
+ assert_equal "9", aio[2]
438
+ end
439
+ end
440
+
441
+ def test_parse_from_alt_sep
442
+ index_test('alt_sep.txt') do |aio|
443
+ aio.reindex do |line|
444
+ line =~ /^>/
445
+ end
446
+
447
+ assert_equal ">abc\r\n", aio[0]
448
+ assert_equal ">def\r\n", aio[1]
449
+ assert_equal ">gh", aio[2]
450
+ end
451
+ end
452
+
453
+ def test_reindex_speed
454
+ benchmark_test do
455
+ n = 10000
456
+ filepath = aio_filepath("reindex_speed.txt")
457
+
458
+ bm(12) do |x|
459
+ x.report("building file") do
460
+ File.open(filepath, 'w') do |file|
461
+ 0.upto(n-1) {|i| file << "line #{i}\n" }
462
+ end
463
+ end
464
+ puts " #{n} lines"
465
+
466
+ begin
467
+ aio = ArrayIO.new(filepath, 'r')
468
+ uaio = ArrayIO.new(filepath, 'ru')
469
+
470
+ x.report("reindex") { aio.reindex }
471
+ assert_equal n, aio.length, 'aio'
472
+ assert_equal "line 1000\r\n", aio[1000], 'aio'
473
+
474
+ x.report("ureindex") { uaio.reindex}
475
+ assert_equal n, uaio.length, 'uaio'
476
+ assert_equal "line 1000\r\n", uaio[1000], 'uaio'
477
+ ensure
478
+ aio.close
479
+ uaio.close
480
+ index_path = uaio.io_index.path
481
+ File.delete(index_path) if index_path && File.exists?(index_path)
482
+ File.delete(filepath)
483
+ end
484
+ end
485
+ end
486
+ end
487
+
488
+ #
489
+ # array behavior tests
490
+ #
491
+
492
+ def array_test_setup
493
+ [setup_aio, ["abc", "de", "fgh"]]
494
+ end
495
+
496
+ def test_length_and_size
497
+ aio, array = array_test_setup
498
+
499
+ assert_equal array.length, aio.length
500
+ assert_equal array.size, aio.size
501
+ end
502
+
503
+ def test_fetch
504
+ aio, array = array_test_setup
505
+
506
+ assert_equal array.fetch(0), aio.fetch(0)
507
+ assert_equal array.fetch(0, "default"), aio.fetch(0, "default")
508
+ assert_equal array.fetch(3, "default"), aio.fetch(3, "default")
509
+
510
+ array.fetch(1) do |arr|
511
+ aio.fetch(1) do |a|
512
+ assert_equal arr, a
513
+ end
514
+ end
515
+ end
516
+
517
+ def test_first
518
+ aio, array = array_test_setup
519
+
520
+ assert_equal array.first, aio.first
521
+ assert_equal array.first(2), aio.first(2)
522
+ assert_equal array.first(10), aio.first(10)
523
+ end
524
+
525
+ def test_each_index
526
+ aio, array = array_test_setup
527
+
528
+ aio_result = []
529
+ aio.each_index {|i| aio_result << i}
530
+ array_result = []
531
+ array.each_index {|i| array_result << i}
532
+
533
+ assert_equal array_result, aio_result
534
+ end
535
+
536
+ def test_each
537
+ aio, array = array_test_setup
538
+
539
+ aio_result = []
540
+ aio.each {|i| aio_result << i}
541
+ array_result = []
542
+ array.each {|i| array_result << i}
543
+
544
+ assert_equal array_result, aio_result
545
+ end
546
+
547
+ def test_empty
548
+ aio, array = array_test_setup
549
+
550
+ assert_equal array.empty?, aio.empty?
551
+ assert_equal [].empty?, ArrayIO.new('', 'rs', []).empty?
552
+ end
553
+
554
+ def test_last
555
+ aio, array = array_test_setup
556
+
557
+ assert_equal array.last, aio.last
558
+ assert_equal array.last(2), aio.last(2)
559
+ assert_equal array.last(10), aio.last(10)
560
+ end
561
+
562
+ def btest_values_at
563
+ aio, array = array_test_setup
564
+
565
+ assert_equal array.values_at, aio.values_at
566
+ assert_equal array.values_at(2), aio.values_at(2)
567
+ assert_equal array.values_at(1, 10, 3..3, 1..2), aio.values_at(1, 10, 3..3, 1..2)
568
+ end
569
+ end
@@ -0,0 +1,72 @@
1
+ require 'test/unit'
2
+ require 'array_io'
3
+ require 'benchmark'
4
+ require 'pp'
5
+
6
+ # The testing subset code here is taken from an as-yet unreleased gem 'prosperity'
7
+ #
8
+ # These subsets facilitate testing by using the ENV variables specified on the command line
9
+ # to indicate which tests to run. The ENV variables are set by rake, so this code implicitly
10
+ # assumes that you're running your tests through rake.
11
+ #
12
+ class Test::Unit::TestCase
13
+ def run_subset?(type)
14
+ ENV[type] == "true" || ENV["ALL"] == "true"
15
+ end
16
+
17
+ def match_regexp?(type, obj, default=true)
18
+ return default if ENV["ALL"] == "true"
19
+ return default unless ENV[type]
20
+
21
+ str = ""
22
+ PP.singleline_pp(obj, str)
23
+ str =~ Regexp.new(ENV[type])
24
+ end
25
+
26
+ def extended_test(&block)
27
+ subset_test("EXTENDED", "x", &block)
28
+ end
29
+
30
+ def benchmark_test(&block)
31
+ subset_test("BENCHMARK", "b") do
32
+ puts calling_method
33
+ block.call
34
+ end
35
+ end
36
+
37
+ def case_test(hash, &block)
38
+ if match_regexp?("CASE_TEST", calling_method)
39
+ hash.each_pair do |testcase, expected|
40
+ yield(testcase, expected) if match_regexp?("CASE", testcase)
41
+ end
42
+ end
43
+ end
44
+
45
+ protected
46
+
47
+ # Calling method iterates over the call stack, and returns the first calling
48
+ # method name that matches the input pattern (by default /^test/)
49
+ def calling_method(pattern=/^test/)
50
+ 0.upto(caller.length) do |i|
51
+ caller[i] =~ /:in `(.*)'$/
52
+ method_name = $1
53
+ return method_name if method_name =~ pattern
54
+ end
55
+
56
+ ''
57
+ end
58
+
59
+ def subset_test(type, skip, &block)
60
+ type = type.upcase
61
+ type_test = "#{type}_TEST"
62
+ if run_subset?(type) || ENV[type_test]
63
+ if match_regexp?(type_test, calling_method)
64
+ block.call
65
+ else
66
+ print skip
67
+ end
68
+ else
69
+ print skip
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ ENV["ALL"] = 'true'
4
+ Dir.glob("./**/*_test.rb").each {|test| require test}
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: arrayio
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2007-03-15 00:00:00 -06:00
8
+ summary: Array-like behavior for archival files.
9
+ require_paths:
10
+ - lib
11
+ email: simon.chiang@uchsc.edu
12
+ homepage: http://rubyforge.org/projects/arrayio/
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: arrayio
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Simon Chiang
31
+ files:
32
+ - test/arrayio_test_helper.rb
33
+ - test/arrayio_test_suite.rb
34
+ - test/array_io
35
+ - test/array_io_test.rb
36
+ - test/array_io/alt_sep.txt
37
+ - test/array_io/cr_lf_input.txt
38
+ - test/array_io/input.index
39
+ - test/array_io/input.txt
40
+ - test/array_io/inputb.index
41
+ - test/array_io/inputb.txt
42
+ - test/array_io/lf_input.txt
43
+ - test/array_io/lines.txt
44
+ - test/array_io/without_index.txt
45
+ - lib/array_io.rb
46
+ - lib/inspect_array_io.rb
47
+ - README
48
+ test_files:
49
+ - test/arrayio_test_suite.rb
50
+ rdoc_options: []
51
+
52
+ extra_rdoc_files:
53
+ - README
54
+ executables: []
55
+
56
+ extensions: []
57
+
58
+ requirements: []
59
+
60
+ dependencies: []
61
+