arrayio 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,76 @@
1
+ # = ArrayIO
2
+ # Array-like behavior for archival files.
3
+ #
4
+ # == Introduction
5
+ # Archival files contain many entries following a regular pattern. These files often grow very large, making them
6
+ # inconvenient or impossible to parse directly into memory. ArrayIO provides an easy way to index these files
7
+ # so that array-like calls can retrieve entries on the fly.
8
+ #
9
+ # Internally ArrayIO keeps an IO object for the archive, and an index of ranges recording where each entry
10
+ # begins and ends. The index is an array, unless ArrayIO operates in 'uncached' mode. In this case the index is
11
+ # a separate file from which the ranges can be looked up. Uncached mode is useful when dealing with an extremely
12
+ # large number of ranges (entries) that would chew up lots of memory if kept in an array.
13
+ #
14
+ # When operating in a writable mode, entries can be added, inserted, and deleted. All changes are recorded in the index
15
+ # and will not reflect the actual order of entries in the archive unless consolidated.
16
+ #
17
+ # Example: If you add an entry at index 1000, the index records the range at index 1000, but the actual entry is appended
18
+ # to the end of the archive. If you delete an entry, it remains in the archive, but the range is removed from the index.
19
+ # Consolidation re-writes entries in their proper order and removes deleted entries.
20
+ #
21
+ # Notes:
22
+ # - BE CAREFUL to specify the correct mode when you open up an ArrayIO - as with File, a 'w' mode will overwrite
23
+ # the ENTIRE archive file. It is safer to use the append mode 'a'.
24
+ #
25
+ # Copyright (c) 2007 Simon Chiang
26
+ # Version: 0.1
27
+ # Licence: MIT-Style
28
+ #
29
+ # == Usage
30
+ #
31
+ # # Open, autoindex and work with 'archive.txt'
32
+ # ArrayIO.open('archive.txt', 'r') do |aio|
33
+ # aio[100] # -> entry 100
34
+ # aio[100] = "new entry" # -> reassigns entry 100
35
+ # aio.each do |entry|
36
+ # # do something
37
+ # end
38
+ # end
39
+ #
40
+ # # Open 'archive.txt' in uncached mode. This creates a file 'archive.index'
41
+ # # that will be filled with the entry ranges. You can specify where entries
42
+ # # begin and end using options and a block in +reindex+.
43
+ # # If the block returns true, the line in considered the beginning of
44
+ # # an entry. This block looks for entries delmited by '>' like:
45
+ # # > entry 0
46
+ # # still entry0
47
+ # # > entry 1
48
+ # #
49
+ # aio = ArrayIO.new('archive.txt', 'ru')
50
+ # aio.reindex do |line|
51
+ # line =~ /^>/
52
+ # end
53
+ # aio.close
54
+ #
55
+ # # Subclass ArrayIO by overwriting +str_to_entry+, +entry_to_str+, and +reindex+
56
+ # # EntryIO parses entries as above, and functions like:
57
+ # # entryio[0] # => [0, "\nstill entry 0"]
58
+ # # entryio[1] # => [1, ""]
59
+ # # entryio[1] = [100, " is the new entry"] # => writes "> entry 100 is the new entry"
60
+ # # entryio[1] # => [100, " is the new entry"]
61
+ # class EntryIO
62
+ # def str_to_entry(str)
63
+ # str =~ /^> entry (\d+)(.*)$"
64
+ # [$1, $2]
65
+ # end
66
+ #
67
+ # def entry_to_str(entry)
68
+ # "> entry #{entry[0]}#{entry[1]}"
69
+ # end
70
+ #
71
+ # def reindex(options={}, &block)
72
+ # super(options) do |line|
73
+ # block_given? ? yield(line) : line =~ /^>/
74
+ # end
75
+ # end
76
+ # end
@@ -0,0 +1,381 @@
1
+ require 'stringio'
2
+
3
+ class ArrayIO
4
+ attr_reader :io, :io_index
5
+
6
+ include Enumerable
7
+
8
+ class << self
9
+ def open(fd, mode='r', index_file=nil, auto_index=true, &block)
10
+ aio = self.new(fd, mode, index_file)
11
+
12
+ if block_given?
13
+ aio.reindex if auto_index && aio.empty?
14
+ yield(aio)
15
+ aio.close
16
+ else
17
+ aio
18
+ end
19
+ end
20
+ end
21
+
22
+ def initialize(fd, mode='r', io_index=nil)
23
+ if mode =~ /u/i
24
+ mode = mode.delete('u')
25
+ @cached = false
26
+ else
27
+ @cached = true
28
+ end
29
+
30
+ if mode =~ /s/i
31
+ mode = mode.delete('s')
32
+ @strio = true
33
+ else
34
+ @strio = false
35
+ end
36
+
37
+ if strio?
38
+ mode = mode.delete('s')
39
+ @io = StringIO.open(fd, mode)
40
+ io_index = '' if io_index.nil?
41
+ else
42
+ @io = File.open(fd, mode)
43
+ set_binmode(io)
44
+ io_index = default_index_file if io_index.nil?
45
+ end
46
+
47
+ self.io_index = io_index
48
+ end
49
+
50
+ def default_index_file
51
+ io.path.chomp(File.extname(io.path)) + '.index'
52
+ end
53
+
54
+ #
55
+ # array functionality
56
+ #
57
+
58
+ def size
59
+ length
60
+ end
61
+
62
+ def length
63
+ if cached?
64
+ io_index.length
65
+ else
66
+ index_length/RANGE_SIZE
67
+ end
68
+ end
69
+
70
+ def at(index)
71
+ self[index]
72
+ end
73
+
74
+ def [](input, length=nil)
75
+ range = range(input, length)
76
+ case range
77
+ when Array
78
+ range.collect { |r| read_entry(r) }
79
+ else
80
+ read_entry(range)
81
+ end
82
+ end
83
+
84
+ def []=(input, entry)
85
+ string = entry_to_str(entry)
86
+
87
+ range_begin = strio? ? io.string.length : io.stat.size
88
+ range_end = range_begin + string.length
89
+ range = format_range(range_begin, range_end)
90
+
91
+ # do this first in case io is not open for writing.
92
+ set_pos(io, range_begin)
93
+ io << string
94
+
95
+ if cached?
96
+ io_index[input] = range
97
+ else
98
+ pos = index_pos(input)
99
+ if strio?
100
+ io_index.string[pos...pos+RANGE_SIZE] = range
101
+ else
102
+ #io_index.close
103
+ end
104
+ end
105
+ end
106
+
107
+ def fetch(index, default=nil, &block)
108
+ index += index_length if index < 0
109
+ val = (index >= length ? default : self[index])
110
+ block_given? ? yield(val) : val
111
+ end
112
+
113
+ def first(n=nil)
114
+ n.nil? ? self[0] : self[0,n]
115
+ end
116
+
117
+ def each_index(&block)
118
+ raise LocalJumpError("no block given") unless block_given?
119
+
120
+ 0.upto(length-1, &block)
121
+ end
122
+
123
+ def each(&block)
124
+ raise LocalJumpError("no block given") unless block_given?
125
+
126
+ if cached?
127
+ io_index.each do |range|
128
+ yield( read_entry(range) )
129
+ end
130
+ else
131
+ io_index.pos = 0
132
+ while !io_index.eof?
133
+ begin_index, end_index = io_index.read(RANGE_SIZE).unpack(FORMAT)
134
+ yield( read_entry(begin_index..end_index) )
135
+ end
136
+ end
137
+ end
138
+
139
+ def empty?
140
+ length == 0
141
+ end
142
+
143
+ def last(n=nil)
144
+ return self[-1] if n.nil?
145
+
146
+ start = length-n
147
+ start = 0 if start < 0
148
+ self[start, n]
149
+ end
150
+
151
+ def values_at(*selectors)
152
+ selectors.collect {|s| self[s]}.flatten
153
+ end
154
+
155
+ #
156
+ # arrayio functionality
157
+ #
158
+
159
+ def cached?
160
+ @cached
161
+ end
162
+
163
+ def strio?
164
+ @strio
165
+ end
166
+
167
+ def load_index(index_file)
168
+ input = strio? ? index_file : (File.exists?(index_file) ? File.read(index_file) : '')
169
+ @io_index = parse_index(input)
170
+ end
171
+
172
+ def dump_index(index_file=default_index_file)
173
+ dumping_to_io = io_index.respond_to?(:path) ?
174
+ File.expand_path(io_index.path) != File.expand_path(index_file) :
175
+ false
176
+
177
+ File.open(index_file, 'w') do |file|
178
+ set_binmode(file)
179
+ if cached?
180
+ io_index.each do |range|
181
+ file << [range.begin, range.end].pack(FORMAT)
182
+ end
183
+ else
184
+ file << io_index.read
185
+ end
186
+ end unless dumping_to_io
187
+
188
+ index_file
189
+ end
190
+
191
+ def close
192
+ io.close
193
+ io_index.close unless cached?
194
+ end
195
+
196
+ def str_to_entry(string)
197
+ string
198
+ end
199
+
200
+ def entry_to_str(entry)
201
+ entry.to_s
202
+ end
203
+
204
+ def range(input, length=nil)
205
+ if cached?
206
+ if length.nil?
207
+ return io_index[input]
208
+ else
209
+ return io_index[input, length]
210
+ end
211
+ end
212
+
213
+ unless length.nil?
214
+ return nil if length < 0
215
+ input = input...(input + length)
216
+ end
217
+
218
+ if input.kind_of?(Range)
219
+ begin_pos = index_pos(input.begin)
220
+ return nil if begin_pos < 0 || begin_pos >= index_length
221
+
222
+ end_pos = index_pos(input.end) + (input.exclude_end? ? 0 : RANGE_SIZE)
223
+ read_length = end_pos-begin_pos
224
+ return [] if read_length <= 0
225
+
226
+ set_pos(io_index, begin_pos)
227
+ parse_index(io_index.read(read_length))
228
+ else
229
+ begin_pos = index_pos(input)
230
+ return nil if begin_pos < 0 || begin_pos >= index_length
231
+
232
+ set_pos(io_index, begin_pos)
233
+ array = io_index.read(RANGE_SIZE).unpack(FORMAT)
234
+ array.first...array.last
235
+ end
236
+ end
237
+
238
+ def reindex(options={}, &block)
239
+ io.rewind
240
+
241
+ options = {
242
+ :sep_string => $/,
243
+ :break_before => false,
244
+ :exclude_break => false,
245
+ :limit => nil
246
+ }.merge(options)
247
+
248
+ sep_string = options[:sep_string]
249
+ break_before = options[:break_before]
250
+ exclude_break = options[:exclude_break]
251
+
252
+ limit = options[:limit]
253
+ total_count = 0
254
+
255
+ if cached?
256
+ self.io_index = []
257
+ else
258
+ io_index.close
259
+ if strio?
260
+ self.io_index = ''
261
+ else
262
+ self.io_index = io_index.path
263
+ end
264
+ end
265
+
266
+ last_pos = 0
267
+ current_pos = 0
268
+ range_begin = 0
269
+ io.each_line(sep_string) do |line|
270
+ # Note positions MUST be built up using line.length
271
+ # io.pos cannot return positions greater than ~2.1e9
272
+ last_pos = current_pos
273
+ current_pos += line.length
274
+
275
+ if (block_given? ? yield(line) : true)
276
+ range_end = (break_before || exclude_break) ? last_pos : current_pos
277
+ unless range_end == range_begin
278
+ io_index << format_range(range_begin, range_end)
279
+ total_count += 1
280
+ end
281
+ range_begin = (break_before && !exclude_break) ? last_pos : current_pos
282
+
283
+ break unless limit.nil? || total_count < limit
284
+ end
285
+ end
286
+ if limit.nil? || total_count < limit
287
+ range_end = current_pos
288
+ unless range_end == range_begin
289
+ io_index << format_range(range_begin, range_end)
290
+ end
291
+ end
292
+
293
+ # this must be done to re-stat Files, assuring index_length is the length of the io_index file.
294
+ unless cached? || strio?
295
+ io_index.close
296
+ self.io_index = io_index.path
297
+ end
298
+
299
+ self
300
+ end
301
+
302
+ protected
303
+
304
+ def read_entry(range)
305
+ return nil if range.nil?
306
+
307
+ set_pos(io, range.begin)
308
+ str_to_entry( io.read(range.end-range.begin) )
309
+ end
310
+
311
+ POSITION_MAX = 2100000000
312
+ # Positions larger than ~2.1e9 cannot be directly given to +pos+ for File objects.
313
+ # +set_pos+ incrementally seeks to positions beyond the maximum, if necessary.
314
+ def set_pos(io, pos)
315
+ if pos < POSITION_MAX
316
+ io.pos = pos
317
+ else
318
+ # note sysseek appears to be necessary here, rather than io.seek
319
+ io.pos = 0
320
+ while pos > POSITION_MAX
321
+ pos -= POSITION_MAX
322
+ io.sysseek(POSITION_MAX, IO::SEEK_CUR)
323
+ end
324
+ io.sysseek(pos, IO::SEEK_CUR)
325
+ end
326
+ end
327
+
328
+ FORMAT = 'I*'
329
+ RANGE_SIZE = 8
330
+
331
+ def format_range(range_begin, range_end)
332
+ cached? ? range_begin...range_end : [range_begin, range_end].pack(FORMAT)
333
+ end
334
+
335
+ def index_length
336
+ strio? ? io_index.string.length : io_index.stat.size
337
+ end
338
+
339
+ def index_pos(index)
340
+ RANGE_SIZE*index + (index < 0 ? index_length : 0)
341
+ end
342
+
343
+ def parse_index(input)
344
+ last = nil
345
+ results = []
346
+ input.unpack(FORMAT).each do |current|
347
+ if last.nil?
348
+ last = current
349
+ else
350
+ results << (last...current)
351
+ last = nil
352
+ end
353
+ end
354
+ results
355
+ end
356
+
357
+ def io_index=(input)
358
+ if input.kind_of?(Array) || input.kind_of?(StringIO) || input.kind_of?(File)
359
+ @io_index = input
360
+ else
361
+ case
362
+ when strio? && cached?
363
+ load_index(input)
364
+ when strio?
365
+ @io_index = StringIO.new(input, 'a+')
366
+ when cached?
367
+ load_index(input)
368
+ else
369
+ @io_index = File.open(input, 'a+')
370
+ set_binmode(io_index)
371
+ end
372
+ end
373
+ end
374
+
375
+ # ArrayIO requires Files to operate in binmode on Windows. Otherwise ranges get
376
+ # improperly shifted, and additionally the unpacking frame gets shifted due to improper
377
+ # handling of cr (\r) characters.
378
+ def set_binmode(file)
379
+ file.binmode unless RUBY_PLATFORM.index('mswin').nil?
380
+ end
381
+ end
@@ -0,0 +1,28 @@
1
+ require 'array_io'
2
+ require 'pp'
3
+
4
+ class InspectArrayIO
5
+ class << self
6
+ def inspect(aio, options={})
7
+ options = {
8
+ :from_start => 100,
9
+ :from_end => 100}.merge(options)
10
+
11
+ puts "N indexed entries: #{aio.length}"
12
+
13
+ puts "****************************"
14
+ puts "First #{options[:from_start]} bytes:"
15
+ aio.io.pos = 0
16
+ pp aio.io.read(options[:from_start])
17
+ puts "First entry:"
18
+ puts aio[0]
19
+
20
+ puts "****************************"
21
+ puts "Last #{options[:from_end]} bytes:"
22
+ aio.io.seek(-options[:from_end], IO::SEEK_END)
23
+ pp aio.io.read(options[:from_end] + 10)
24
+ puts "Last entry:"
25
+ puts aio[-1]
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ >abc
2
+ >def
3
+ >gh
@@ -0,0 +1,3 @@
1
+ 012
2
+ 56
3
+ 9
Binary file
@@ -0,0 +1 @@
1
+ abcdefgh
@@ -0,0 +1 @@
1
+ abcdefgh
@@ -0,0 +1,3 @@
1
+ 012
2
+ 56
3
+ 9
@@ -0,0 +1,19 @@
1
+ line 1
2
+
3
+ line 2
4
+
5
+ line 3
6
+
7
+ line 4
8
+
9
+ line 5
10
+
11
+ line 6
12
+
13
+ line 7
14
+
15
+ line 8
16
+
17
+ line 9
18
+
19
+ line 10
@@ -0,0 +1 @@
1
+ abcdefgh
@@ -0,0 +1,569 @@
1
+ require File.join(File.dirname(__FILE__), 'arrayio_test_helper.rb')
2
+
3
+ class ArrayIOTest < Test::Unit::TestCase
4
+ include Benchmark
5
+
6
+
7
+ def aio_filepath(path)
8
+ File.join(File.dirname(__FILE__), 'array_io', path)
9
+ end
10
+
11
+ #
12
+ # file initialize tests
13
+ #
14
+
15
+ def test_initialize_with_index
16
+ aio = ArrayIO.new(aio_filepath('input.txt'))
17
+
18
+ assert_equal 'input.txt', File.basename(aio.io.path)
19
+ assert_equal "abcdefgh", aio.io.read
20
+
21
+ assert_equal [1...1, 2...4, 6...10], aio.io_index
22
+ aio.close
23
+ end
24
+
25
+ def test_initialize_uncached_with_index
26
+ # non-windows only
27
+ if RUBY_PLATFORM.index('mswin').nil?
28
+ aio = ArrayIO.new(aio_filepath('input.txt'), 'ru')
29
+
30
+ assert aio.io_index.kind_of?(File)
31
+ assert_equal 'input.index', File.basename(aio.io_index.path)
32
+ assert_equal [1,1,2,4,6,10], aio.io_index.read.unpack('I*')
33
+
34
+ aio.close
35
+ end
36
+ end
37
+
38
+ def test_initialize_uncached_with_index_binary_mode
39
+ # windows only
40
+ unless RUBY_PLATFORM.index('mswin').nil?
41
+ aio = ArrayIO.new(aio_filepath('inputb.txt'), 'ru')
42
+
43
+ assert aio.io_index.kind_of?(File)
44
+ assert_equal 'inputb.index', File.basename(aio.io_index.path)
45
+ assert_equal [1,1,2,4,6,10], aio.io_index.read.unpack('I*')
46
+
47
+ aio.close
48
+ end
49
+ end
50
+
51
+ def test_initialize_without_index
52
+ aio = ArrayIO.new(aio_filepath('without_index.txt'))
53
+
54
+ assert_equal [], aio.io_index
55
+ aio.close
56
+ end
57
+
58
+ def test_initialize_with_alternate_index
59
+ aio = ArrayIO.new(aio_filepath('without_index.txt'), 'r', aio_filepath('input.index'))
60
+
61
+ assert_equal [1...1, 2...4, 6...10], aio.io_index
62
+ aio.close
63
+ end
64
+
65
+ def test_initialize_with_array_index
66
+ array = [0...1,2...3]
67
+ aio = ArrayIO.new(aio_filepath('input.txt'), 'r', array)
68
+ assert_equal array, aio.io_index
69
+ end
70
+
71
+ #
72
+ # string initialize tests
73
+ #
74
+
75
+ def test_initialize_string
76
+ aio = ArrayIO.new('abcdefgh', 'rs')
77
+
78
+ assert aio.io.kind_of?(StringIO)
79
+ assert_equal "abcdefgh", aio.io.read
80
+
81
+ assert_equal [], aio.io_index
82
+ aio.close
83
+ end
84
+
85
+ def test_initialize_string_uncached
86
+ aio = ArrayIO.new('abcdefgh', 'rsu')
87
+
88
+ assert aio.io_index.kind_of?(StringIO)
89
+ assert_equal '', aio.io_index.string
90
+ aio.close
91
+ end
92
+
93
+ def setup_aio
94
+ ArrayIO.new("abcdefgh", 'r+s', [0...3, 3...5, 5...8])
95
+ end
96
+
97
+ def test_initialize_string_with_array
98
+ aio = setup_aio
99
+
100
+ assert aio.io.kind_of?(StringIO)
101
+ assert_equal "abcdefgh", aio.io.read
102
+
103
+ assert_equal [0...3, 3...5, 5...8], aio.io_index
104
+ aio.close
105
+ end
106
+
107
+ def setup_uaio
108
+ ArrayIO.new("abcdefgh", 'r+su', [0,3,3,5,5,8].pack('I*'))
109
+ end
110
+
111
+ def test_initialize_string_uncached_with_index
112
+ uaio = setup_uaio
113
+
114
+ assert uaio.io.kind_of?(StringIO)
115
+ assert_equal "abcdefgh", uaio.io.read
116
+
117
+ assert uaio.io_index.kind_of?(StringIO)
118
+ assert_equal [0,3,3,5,5,8], uaio.io_index.read.unpack('I*')
119
+ uaio.close
120
+ end
121
+
122
+ #
123
+ # range tests
124
+ #
125
+
126
+ def test_range
127
+ [setup_aio, setup_uaio].each do |aio|
128
+ assert_equal 0...3, aio.range(0)
129
+ assert_equal 3...5, aio.range(1)
130
+ assert_equal 5...8, aio.range(2)
131
+ assert_nil aio.range(3)
132
+
133
+ assert_equal 5...8, aio.range(-1)
134
+ assert_equal 3...5, aio.range(-2)
135
+ assert_equal 0...3, aio.range(-3)
136
+ assert_nil aio.range(-4)
137
+ end
138
+ end
139
+
140
+ def test_range_when_input_is_start_length
141
+ array = [0...3, 3...5, 5...8]
142
+
143
+ [setup_aio, setup_uaio].each do |aio|
144
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5].each do |r|
145
+ start = r.begin
146
+ length = r.end - r.begin
147
+
148
+ assert_equal array[start, length], aio.range(start, length), r.to_s
149
+ end
150
+ end
151
+ end
152
+
153
+ def test__range_when_input_is_range
154
+ array = [0...3, 3...5, 5...8]
155
+
156
+ [setup_aio, setup_uaio].each do |aio|
157
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5].each do |r|
158
+ assert_equal array[r], aio.range(r), r.to_s
159
+ end
160
+ end
161
+ end
162
+
163
+ def test_range_speed
164
+ benchmark_test do
165
+ aio = setup_aio
166
+ uaio = setup_uaio
167
+ bm(12) do |x|
168
+ x.report("10k index") { 10000.times { aio.range(1) } }
169
+ x.report("10k range") { 10000.times { aio.range(1..1) } }
170
+ x.report("10k s,l") { 10000.times { aio.range(1, 1) } }
171
+
172
+ x.report("10k uindex") { 10000.times { uaio.range(1) } }
173
+ x.report("10k urange") { 10000.times { uaio.range(1..1) } }
174
+ x.report("10k us,l") { 10000.times { uaio.range(1, 1) } }
175
+ end
176
+ end
177
+ end
178
+
179
+ #
180
+ # get tests
181
+ #
182
+
183
+ def test_get_when_input_is_index
184
+ [setup_aio, setup_uaio].each do |aio|
185
+ assert_equal "abc", aio[0]
186
+ assert_equal "de", aio[1]
187
+ assert_equal "fgh", aio[2]
188
+ assert_nil aio[3]
189
+
190
+ assert_equal "fgh", aio[-1]
191
+ assert_equal "de", aio[-2]
192
+ assert_equal "abc", aio[-3]
193
+ assert_nil aio[-4]
194
+ end
195
+ end
196
+
197
+ def test_get_when_input_is_start_length
198
+ array = ["abc", "de", "fgh"]
199
+
200
+ [setup_aio, setup_uaio].each do |aio|
201
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5, -1..5].each do |r|
202
+ start = r.begin
203
+ length = r.end - r.begin
204
+
205
+ assert_equal array[start, length], aio[start, length], r.to_s
206
+ end
207
+ end
208
+ end
209
+
210
+ def test_get_when_input_is_range
211
+ array = ["abc", "de", "fgh"]
212
+
213
+ [setup_aio, setup_uaio].each do |aio|
214
+ [0..0, 0...0, 0..1, 0..2, 0..-1, 0..3, 0...3, -4..1, 5..10, -3..-5].each do |r|
215
+ assert_equal array[r], aio[r], r.to_s
216
+ end
217
+ end
218
+ end
219
+
220
+ def test_get_speed
221
+ benchmark_test do
222
+ array = []
223
+ 0.upto(10000-1) {|i| array << "line #{i}\n" }
224
+
225
+ aio = ArrayIO.new(array.join(''), 'rs')
226
+ aio.reindex
227
+
228
+ uaio = ArrayIO.new(array.join(''), 'rsu')
229
+ uaio.reindex
230
+
231
+ assert_equal "line 1000\n", aio[1000]
232
+ bm(12) do |x|
233
+ x.report("10k index") { 10000.times { aio[1000] } }
234
+ x.report("10k range") { 10000.times { aio[1000...1000] } }
235
+ x.report("10k s,l") { 10000.times { aio[1000, 1] } }
236
+
237
+ x.report("10k uindex") { 10000.times { uaio[1000] } }
238
+ x.report("10k urange") { 10000.times { uaio[1000..1000] } }
239
+ x.report("10k us,l") { 10000.times { uaio[1000, 1] } }
240
+ end
241
+ end
242
+ end
243
+
244
+ #
245
+ # test insert
246
+ #
247
+
248
+ def test_insert
249
+ [setup_aio, setup_uaio].each do |aio|
250
+ assert_equal "abc", aio[0]
251
+ assert_equal "fgh", aio[-1]
252
+ assert_equal "de", aio[1]
253
+ assert_equal 3, aio.length
254
+
255
+ aio[0] = 'xyz'
256
+ aio[-1] = 'pq'
257
+ aio[1] = 'mno'
258
+
259
+ assert_equal "xyz",aio[0]
260
+ assert_equal "pq", aio[-1]
261
+ assert_equal "mno", aio[1]
262
+ assert_equal 3, aio.length
263
+
264
+ assert_equal 'abcdefghxyzpqmno', aio.io.string
265
+ end
266
+ end
267
+
268
+ def test_insert_raises_error_if_io_is_not_writable
269
+ aio = ArrayIO.new('', 'rs')
270
+ assert_raise(IOError) { aio[0] = 'abc'}
271
+ end
272
+
273
+ def test_insert_speed
274
+ benchmark_test do
275
+ array = []
276
+ 0.upto(10000-1) {|i| array << "line #{i}\n" }
277
+
278
+ aio = ArrayIO.new(array.join(''), 'r+s')
279
+ aio.reindex
280
+
281
+ uaio = ArrayIO.new(array.join(''), 'r+su')
282
+ uaio.reindex
283
+
284
+ bm(12) do |x|
285
+ x.report("10k index=") { 10000.times { aio[1000] = "line 1000\n" } }
286
+ #x.report("10k range=") { 10000.times { aio[1000..1000] = "line 1000\n" } }
287
+ #x.report("10k s,l=") { 10000.times { aio[1000, 1] = "line 1000\n" } }
288
+
289
+ x.report("10k uindex=") { 10000.times { uaio[1000] = "line 1000\n" } }
290
+ #x.report("10k crange=") { 10000.times { caio[1000..1000] = "line 1000\n" } }
291
+ #x.report("10k cs,l=") { 10000.times { caio[1000, 1] = "line 1000\n" } }
292
+ end
293
+ end
294
+ end
295
+
296
+ #
297
+ # indexing tests
298
+ #
299
+
300
+ def reindex_test(expected, options={}, &block)
301
+ cases = {
302
+ :end_midline => "012\n\n56\n\n9",
303
+ :end_on_line => "012\n\n56\n\n9\n",
304
+ :end_on_break => "012\n\n56\n\n9\n\n",
305
+ :no_break => "0123456789",
306
+ :backing_breaks => "012\n\n\n\n\n\n9",
307
+ :cr_lf => "012\r\n\r\n56\r\n\r\n9"}
308
+
309
+ cases.each_pair do |key, string|
310
+ next unless expected.has_key?(key)
311
+
312
+ ['rs', 'rsu'].each do |mode|
313
+ aio = ArrayIO.new(string, mode)
314
+ aio.reindex(options, &block)
315
+
316
+ assert_equal expected[key].length, aio.length, "#{mode} #{key}"
317
+ assert_equal expected[key], aio[0..-1], "#{mode} #{key}"
318
+ end
319
+ end
320
+ end
321
+
322
+ def test_reindex_treats_each_line_as_break_by_default
323
+ reindex_test(
324
+ :end_midline => ["012\n", "\n", "56\n", "\n", "9"],
325
+ :end_on_line => ["012\n", "\n", "56\n", "\n", "9\n"],
326
+ :end_on_break => ["012\n", "\n", "56\n", "\n", "9\n", "\n"],
327
+ :no_break => ["0123456789"],
328
+ :backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n", "9"],
329
+ :cr_lf => ["012\r\n", "\r\n", "56\r\n", "\r\n", "9"])
330
+ end
331
+
332
+ def test_reindex_block_determines_if_line_is_a_break
333
+ reindex_test(
334
+ :end_midline => ["012\n\n", "56\n\n", "9"],
335
+ :end_on_line => ["012\n\n", "56\n\n", "9\n"],
336
+ :end_on_break => ["012\n\n", "56\n\n", "9\n\n"],
337
+ :no_break => ["0123456789"],
338
+ :backing_breaks => ["012\n\n", "\n", "\n", "\n", "\n", "9"],
339
+ :cr_lf => ["012\r\n\r\n", "56\r\n\r\n", "9"]) do |line|
340
+ line.strip.empty?
341
+ end
342
+ end
343
+
344
+ def test_reindex_breaking_before
345
+ reindex_test({
346
+ :end_midline => ["012\n", "\n56\n", "\n9"],
347
+ :end_on_line => ["012\n", "\n56\n", "\n9\n"],
348
+ :end_on_break => ["012\n", "\n56\n", "\n9\n", "\n"],
349
+ :no_break => ["0123456789"],
350
+ :backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n9"],
351
+ :cr_lf => ["012\r\n", "\r\n56\r\n", "\r\n9"]},
352
+ :break_before => true) do |line|
353
+ line.strip.empty?
354
+ end
355
+ end
356
+
357
+ def test_reindex_excluding_break
358
+ reindex_test({
359
+ :end_midline => ["012\n", "56\n", "9"],
360
+ :end_on_line => ["012\n", "56\n", "9\n"],
361
+ :end_on_break => ["012\n", "56\n", "9\n"],
362
+ :no_break => ["0123456789"],
363
+ :backing_breaks => ["012\n", "9"],
364
+ :cr_lf => ["012\r\n", "56\r\n", "9"]},
365
+ :exclude_break => true) do |line|
366
+ line.strip.empty?
367
+ end
368
+ end
369
+
370
+ def test_reindex_breaking_before_and_excluding_break
371
+ # note this is the same as simply excluding the break
372
+ reindex_test({
373
+ :end_midline => ["012\n", "56\n", "9"],
374
+ :end_on_line => ["012\n", "56\n", "9\n"],
375
+ :end_on_break => ["012\n", "56\n", "9\n"],
376
+ :no_break => ["0123456789"],
377
+ :backing_breaks => ["012\n", "9"],
378
+ :cr_lf => ["012\r\n", "56\r\n", "9"]},
379
+ :exclude_break => true,
380
+ :break_before => true) do |line|
381
+ line.strip.empty?
382
+ end
383
+ end
384
+
385
+ def test_reindex_with_alt_sep_string
386
+ reindex_test({
387
+ :end_midline => ["012\n\n", "56\n\n", "9"],
388
+ :end_on_line => ["012\n\n", "56\n\n", "9\n"],
389
+ :end_on_break => ["012\n\n", "56\n\n", "9\n\n"],
390
+ :no_break => ["0123456789"],
391
+ :backing_breaks => ["012\n\n", "\n\n", "\n\n", "9"],
392
+ :cr_lf => ["012\r\n\r\n56\r\n\r\n9"]},
393
+ :sep_string => "\n\n")
394
+
395
+ reindex_test({
396
+ :end_midline => ["012\n\n56", "\n\n9"],
397
+ :end_on_line => ["012\n\n56", "\n\n9\n"],
398
+ :end_on_break => ["012\n\n56", "\n\n9\n\n"],
399
+ :no_break => ["0123456", "789"],
400
+ :backing_breaks => ["012\n\n\n\n\n\n9"],
401
+ :cr_lf => ["012\r\n\r\n56", "\r\n\r\n9"]},
402
+ :sep_string => "56")
403
+ end
404
+
405
+ #
406
+ # file tests
407
+ #
408
+
409
+ def index_test(path, &block)
410
+ aio = ArrayIO.open(aio_filepath(path), 'ru')
411
+
412
+ begin
413
+ yield(aio)
414
+ ensure
415
+ aio.close
416
+ index_path = aio.io_index.path
417
+ File.delete(index_path) if index_path && File.exists?(index_path)
418
+ end
419
+ end
420
+
421
+ def test_cr_lf_file
422
+ index_test('cr_lf_input.txt') do |aio|
423
+ aio.reindex
424
+
425
+ assert_equal "012\r\n", aio[0]
426
+ assert_equal "56\r\n", aio[1]
427
+ assert_equal "9", aio[2]
428
+ end
429
+ end
430
+
431
+ def test_parse_from_lf_file
432
+ index_test('lf_input.txt') do |aio|
433
+ aio.reindex
434
+
435
+ assert_equal "012\n", aio[0]
436
+ assert_equal "56\n", aio[1]
437
+ assert_equal "9", aio[2]
438
+ end
439
+ end
440
+
441
+ def test_parse_from_alt_sep
442
+ index_test('alt_sep.txt') do |aio|
443
+ aio.reindex do |line|
444
+ line =~ /^>/
445
+ end
446
+
447
+ assert_equal ">abc\r\n", aio[0]
448
+ assert_equal ">def\r\n", aio[1]
449
+ assert_equal ">gh", aio[2]
450
+ end
451
+ end
452
+
453
+ def test_reindex_speed
454
+ benchmark_test do
455
+ n = 10000
456
+ filepath = aio_filepath("reindex_speed.txt")
457
+
458
+ bm(12) do |x|
459
+ x.report("building file") do
460
+ File.open(filepath, 'w') do |file|
461
+ 0.upto(n-1) {|i| file << "line #{i}\n" }
462
+ end
463
+ end
464
+ puts " #{n} lines"
465
+
466
+ begin
467
+ aio = ArrayIO.new(filepath, 'r')
468
+ uaio = ArrayIO.new(filepath, 'ru')
469
+
470
+ x.report("reindex") { aio.reindex }
471
+ assert_equal n, aio.length, 'aio'
472
+ assert_equal "line 1000\r\n", aio[1000], 'aio'
473
+
474
+ x.report("ureindex") { uaio.reindex}
475
+ assert_equal n, uaio.length, 'uaio'
476
+ assert_equal "line 1000\r\n", uaio[1000], 'uaio'
477
+ ensure
478
+ aio.close
479
+ uaio.close
480
+ index_path = uaio.io_index.path
481
+ File.delete(index_path) if index_path && File.exists?(index_path)
482
+ File.delete(filepath)
483
+ end
484
+ end
485
+ end
486
+ end
487
+
488
+ #
489
+ # array behavior tests
490
+ #
491
+
492
+ def array_test_setup
493
+ [setup_aio, ["abc", "de", "fgh"]]
494
+ end
495
+
496
+ def test_length_and_size
497
+ aio, array = array_test_setup
498
+
499
+ assert_equal array.length, aio.length
500
+ assert_equal array.size, aio.size
501
+ end
502
+
503
+ def test_fetch
504
+ aio, array = array_test_setup
505
+
506
+ assert_equal array.fetch(0), aio.fetch(0)
507
+ assert_equal array.fetch(0, "default"), aio.fetch(0, "default")
508
+ assert_equal array.fetch(3, "default"), aio.fetch(3, "default")
509
+
510
+ array.fetch(1) do |arr|
511
+ aio.fetch(1) do |a|
512
+ assert_equal arr, a
513
+ end
514
+ end
515
+ end
516
+
517
+ def test_first
518
+ aio, array = array_test_setup
519
+
520
+ assert_equal array.first, aio.first
521
+ assert_equal array.first(2), aio.first(2)
522
+ assert_equal array.first(10), aio.first(10)
523
+ end
524
+
525
+ def test_each_index
526
+ aio, array = array_test_setup
527
+
528
+ aio_result = []
529
+ aio.each_index {|i| aio_result << i}
530
+ array_result = []
531
+ array.each_index {|i| array_result << i}
532
+
533
+ assert_equal array_result, aio_result
534
+ end
535
+
536
+ def test_each
537
+ aio, array = array_test_setup
538
+
539
+ aio_result = []
540
+ aio.each {|i| aio_result << i}
541
+ array_result = []
542
+ array.each {|i| array_result << i}
543
+
544
+ assert_equal array_result, aio_result
545
+ end
546
+
547
+ def test_empty
548
+ aio, array = array_test_setup
549
+
550
+ assert_equal array.empty?, aio.empty?
551
+ assert_equal [].empty?, ArrayIO.new('', 'rs', []).empty?
552
+ end
553
+
554
+ def test_last
555
+ aio, array = array_test_setup
556
+
557
+ assert_equal array.last, aio.last
558
+ assert_equal array.last(2), aio.last(2)
559
+ assert_equal array.last(10), aio.last(10)
560
+ end
561
+
562
+ def btest_values_at
563
+ aio, array = array_test_setup
564
+
565
+ assert_equal array.values_at, aio.values_at
566
+ assert_equal array.values_at(2), aio.values_at(2)
567
+ assert_equal array.values_at(1, 10, 3..3, 1..2), aio.values_at(1, 10, 3..3, 1..2)
568
+ end
569
+ end
@@ -0,0 +1,72 @@
1
+ require 'test/unit'
2
+ require 'array_io'
3
+ require 'benchmark'
4
+ require 'pp'
5
+
6
+ # The testing subset code here is taken from an as-yet unreleased gem 'prosperity'
7
+ #
8
+ # These subsets facilitate testing by using the ENV variables specified on the command line
9
+ # to indicate which tests to run. The ENV variables are set by rake, so this code implicitly
10
+ # assumes that you're running your tests through rake.
11
+ #
12
+ class Test::Unit::TestCase
13
+ def run_subset?(type)
14
+ ENV[type] == "true" || ENV["ALL"] == "true"
15
+ end
16
+
17
+ def match_regexp?(type, obj, default=true)
18
+ return default if ENV["ALL"] == "true"
19
+ return default unless ENV[type]
20
+
21
+ str = ""
22
+ PP.singleline_pp(obj, str)
23
+ str =~ Regexp.new(ENV[type])
24
+ end
25
+
26
+ def extended_test(&block)
27
+ subset_test("EXTENDED", "x", &block)
28
+ end
29
+
30
+ def benchmark_test(&block)
31
+ subset_test("BENCHMARK", "b") do
32
+ puts calling_method
33
+ block.call
34
+ end
35
+ end
36
+
37
+ def case_test(hash, &block)
38
+ if match_regexp?("CASE_TEST", calling_method)
39
+ hash.each_pair do |testcase, expected|
40
+ yield(testcase, expected) if match_regexp?("CASE", testcase)
41
+ end
42
+ end
43
+ end
44
+
45
+ protected
46
+
47
+ # Calling method iterates over the call stack, and returns the first calling
48
+ # method name that matches the input pattern (by default /^test/)
49
+ def calling_method(pattern=/^test/)
50
+ 0.upto(caller.length) do |i|
51
+ caller[i] =~ /:in `(.*)'$/
52
+ method_name = $1
53
+ return method_name if method_name =~ pattern
54
+ end
55
+
56
+ ''
57
+ end
58
+
59
+ def subset_test(type, skip, &block)
60
+ type = type.upcase
61
+ type_test = "#{type}_TEST"
62
+ if run_subset?(type) || ENV[type_test]
63
+ if match_regexp?(type_test, calling_method)
64
+ block.call
65
+ else
66
+ print skip
67
+ end
68
+ else
69
+ print skip
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
2
+
3
+ ENV["ALL"] = 'true'
4
+ Dir.glob("./**/*_test.rb").each {|test| require test}
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: arrayio
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2007-03-15 00:00:00 -06:00
8
+ summary: Array-like behavior for archival files.
9
+ require_paths:
10
+ - lib
11
+ email: simon.chiang@uchsc.edu
12
+ homepage: http://rubyforge.org/projects/arrayio/
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: arrayio
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Simon Chiang
31
+ files:
32
+ - test/arrayio_test_helper.rb
33
+ - test/arrayio_test_suite.rb
34
+ - test/array_io
35
+ - test/array_io_test.rb
36
+ - test/array_io/alt_sep.txt
37
+ - test/array_io/cr_lf_input.txt
38
+ - test/array_io/input.index
39
+ - test/array_io/input.txt
40
+ - test/array_io/inputb.index
41
+ - test/array_io/inputb.txt
42
+ - test/array_io/lf_input.txt
43
+ - test/array_io/lines.txt
44
+ - test/array_io/without_index.txt
45
+ - lib/array_io.rb
46
+ - lib/inspect_array_io.rb
47
+ - README
48
+ test_files:
49
+ - test/arrayio_test_suite.rb
50
+ rdoc_options: []
51
+
52
+ extra_rdoc_files:
53
+ - README
54
+ executables: []
55
+
56
+ extensions: []
57
+
58
+ requirements: []
59
+
60
+ dependencies: []
61
+