external 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
data/lib/ext_arr.rb ADDED
@@ -0,0 +1,727 @@
1
+ require 'external/base'
2
+ require 'ext_ind'
3
+ require 'yaml'
4
+
5
+ #--
6
+ # later separate out individual objects logically
7
+ # If writing, create new files:
8
+ # - base/object_id.aio (new file for recieving appends)
9
+ # - base/object_id.index (copy of existing index -- made on first insertion)
10
+ # - in index, -index indicates object_id.aio file whereas +index indicates original file
11
+ # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
12
+ # requires index rewrite as well, to remove negatives
13
+ #
14
+ # If appending, ONLY allow << and all changes get committed to the original file.
15
+ #
16
+ # This should allow returning of new arrayio objects under read/write conditions
17
+ # By default read-only. No insertions. New ExtArr objects inherit parent mode.
18
+ #
19
+ # Independent modes:
20
+ # - r
21
+ # - r+
22
+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
23
+ # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
24
+ # - b ALWAYS on with Windows
25
+ #++
26
+
27
+ class ExtArr < External::Base
28
+ class << self
29
+ def [](*args)
30
+ ab = self.new
31
+ args.each do |arg|
32
+ ab[ab.length] = arg
33
+ end
34
+ ab
35
+ end
36
+
37
+ def default_index_options
38
+ {:format => 'II', :nil_value => [0,0], :cached => true}
39
+ end
40
+
41
+ def default_index_filepath(filepath)
42
+ filepath.chomp(File.extname(filepath)) + '.index'
43
+ end
44
+ end
45
+
46
+ attr_reader :index, :pos_index, :length_index
47
+
48
+ def initialize(io=nil, options={})
49
+ super(io)
50
+
51
+ @max_gap = 10000
52
+ @max_chunk_size = 1000000
53
+
54
+ @pos_index = options[:pos_index] == nil ? 0 : options[:pos_index]
55
+ @length_index = options[:length_index] == nil ? 1 : options[:length_index]
56
+
57
+ # TODO -- merge in specified index options
58
+ index_options = self.class.default_index_options
59
+
60
+ # determine the index file. if a file is specified,
61
+ # use it, otherwise infer the index filepath from
62
+ # the io. Note a nil index_file is ok -- this simply
63
+ # means the index file will be a Tempfile
64
+ index_file = if options.has_key?(:index_file)
65
+ options[:index_file]
66
+ elsif io.kind_of?(File)
67
+ self.class.default_index_filepath(io.path)
68
+ else
69
+ nil
70
+ end
71
+
72
+ # ensure the file exists before trying to open it
73
+ if io.kind_of?(File) && !File.exists?(index_file)
74
+ FileUtils.touch(index_file)
75
+ end
76
+ @index = ExtInd.open(index_file, "r+", index_options)
77
+ end
78
+
79
+ def closed?
80
+ super && (!index.respond_to?(:close) || index.closed?)
81
+ end
82
+
83
+ def close
84
+ super
85
+ index.close unless !index.respond_to?(:close) || index.closed?
86
+ end
87
+
88
+ def reindex
89
+ index.clear
90
+ io.flush unless io.generic_mode == "r"
91
+ io.rewind
92
+
93
+ if block_given?
94
+ yield(index)
95
+ else
96
+ current_pos = 0
97
+ entry_begin = 0
98
+
99
+ io_length = io.length
100
+ io.each_line('---') do |line|
101
+ # Note positions MUST be built up using line.length
102
+ # io.pos cannot return positions greater than ~2.1e9
103
+ current_pos += line.length
104
+ entry_end = current_pos - (current_pos == io_length ? 0 : 3)
105
+
106
+ unless entry_begin == entry_end
107
+ index.unframed_write [entry_begin, entry_end-entry_begin]
108
+ entry_begin = entry_end
109
+ end
110
+ end
111
+ end
112
+ self
113
+ end
114
+
115
+ def str_to_entry(str)
116
+ str == nil ? nil : YAML.load(str)
117
+ end
118
+
119
+ def entry_to_str(entry)
120
+ # could chop the beginning and end to save space
121
+ # (ie "--- blah\n" => "blah") but there would be
122
+ # a tradeoff for time. especially true for Numerics
123
+ # which don't need anything to be retranslated properly
124
+ entry.to_yaml
125
+ end
126
+
127
+ def entry_pos(index)
128
+ index[pos_index]
129
+ end
130
+
131
+ def entry_length(index)
132
+ index[length_index]
133
+ end
134
+
135
+ ###########################
136
+ # Array methods
137
+ ###########################
138
+
139
+ # def &(another)
140
+ # not_implemented
141
+ # end
142
+
143
+ # def *(arg)
144
+ # not_implemented
145
+ # end
146
+
147
+ def +(another)
148
+ self.concat(another)
149
+ end
150
+
151
+ # def -(another)
152
+ # not_implemented
153
+ # end
154
+
155
+ def <<(obj)
156
+ self[length] = obj
157
+ self
158
+ end
159
+
160
+ def <=>(another)
161
+ case another
162
+ when Array
163
+ if another.length < self.length
164
+ # if another is equal to the matching subset of self,
165
+ # then self is obviously the longer array and wins.
166
+ result = (self.to_a(another.length) <=> another)
167
+ result == 0 ? 1 : result
168
+ else
169
+ self.to_a <=> another
170
+ end
171
+ when ExtArr
172
+ # if indexes are equal, additional
173
+ # 'quick' comparisons are allowed
174
+ if self.index == another.index
175
+
176
+ # equal in comparison if the ios are equal
177
+ return 0 if self.io.quick_compare(another.io)
178
+ end
179
+
180
+ self.io.flush
181
+ another.io.flush
182
+
183
+ # should chunk compare
184
+ if another.length > self.length
185
+ result = (self.to_a <=> another.to_a(self.length))
186
+ result == 0 ? -1 : result
187
+ elsif another.length < self.length
188
+ result = (self.to_a(another.length) <=> another.to_a)
189
+ result == 0 ? 1 : result
190
+ else
191
+ self.to_a <=> another.to_a
192
+ end
193
+ else
194
+ raise TypeError.new("can't convert from #{another.class} to ExtArr or Array")
195
+ end
196
+ end
197
+
198
+ def ==(another)
199
+ case another
200
+ when Array
201
+ # test simply based on length
202
+ return false unless self.length == another.length
203
+
204
+ # compare arrays
205
+ self.to_a == another
206
+
207
+ when ExtArr
208
+ # test simply based on length
209
+ return false unless self.length == another.length
210
+
211
+ # if indexes are equal, additional
212
+ # 'quick' comparisons are allowed
213
+ if self.index == another.index
214
+
215
+ # equal in comparison if the ios are equal
216
+ return true if (self.io.sort_compare(another.io, (self.index.buffer_size/2).ceil)) == 0
217
+ end
218
+
219
+ # compare arrays
220
+ self.to_a == another.to_a
221
+ else
222
+ false
223
+ end
224
+ end
225
+
226
+ def [](input, length=nil)
227
+ # two call types are required because while ExtInd can take
228
+ # a nil length, Array cannot and index can be either
229
+ entries = (length == nil ? index[input] : index[input, length])
230
+
231
+ # for conformance with array range retrieval
232
+ return entries if entries.nil? || entries.empty?
233
+
234
+ if length == nil && !input.kind_of?(Range)
235
+ pos, length =
236
+
237
+ # single entry, just read it
238
+ io.pos = entry_pos(entries)
239
+ str_to_entry( io.read(entry_length(entries)) )
240
+ else
241
+ pos = nil
242
+ entries.collect do |entry|
243
+
244
+ # only set io position if necessary
245
+ epos = entry_pos(entry)
246
+ unless pos == epos
247
+ pos = epos
248
+ io.pos = pos
249
+ end
250
+
251
+ elen = entry_length(entry)
252
+ pos += elen
253
+
254
+ # read entry
255
+ str_to_entry( io.read(elen) )
256
+ end
257
+ end
258
+ end
259
+
260
+ def []=(*args)
261
+ raise ArgumentError.new("wrong number of arguments (1 for 2)") if args.length < 2
262
+ index, length, value = args
263
+ value = length if args.length == 2
264
+
265
+ if index.kind_of?(Range)
266
+ raise TypeError.new("can't convert Range into Integer") if args.length == 3
267
+ # for conformance with setting a range with nil (truncates)
268
+ value = [] if value.nil?
269
+ offset, length = split_range(index)
270
+ return (self[offset, length + 1] = value)
271
+ end
272
+
273
+ index += self.length if index < 0
274
+ raise IndexError.new("index #{index} out of range") if index < 0
275
+
276
+ entry_pos = self.io.length
277
+ io.pos = entry_pos
278
+
279
+ if args.length == 2
280
+
281
+ #value = self.to_a if value.kind_of?(ExtInd)
282
+
283
+ # write entry to io first as a check
284
+ # that io is open for writing.
285
+ entry_length = io.write( entry_to_str(value) )
286
+ io.length += entry_length
287
+
288
+ self.index[index] = [entry_pos, entry_length]
289
+
290
+ else
291
+ indicies = []
292
+
293
+ values = case value
294
+ when Array then value
295
+ when ExtArr
296
+ if value.object_id == self.object_id
297
+ # special case, self will be reading and
298
+ # writing from the same io, producing
299
+ # incorrect results
300
+
301
+ # potential to load a huge amount of data
302
+ self.to_a
303
+ else
304
+ value
305
+ end
306
+ else
307
+ [value]
308
+ end
309
+
310
+ values.each do |value|
311
+ entry_length = io.write( entry_to_str(value) )
312
+ indicies << [entry_pos, entry_length]
313
+
314
+ io.length += entry_length
315
+ entry_pos += entry_length
316
+ end
317
+
318
+ self.index[index, length] = indicies
319
+ end
320
+ end
321
+
322
+ # def abbrev(pattern=nil)
323
+ # not_implemented
324
+ # end
325
+
326
+ # def assoc(obj)
327
+ # not_implemented
328
+ # end
329
+
330
+ # Returns entry at index
331
+ def at(index)
332
+ self[index]
333
+ end
334
+
335
+ # Removes all elements from _self_.
336
+ def clear
337
+ io.truncate(0)
338
+ index.clear
339
+ self
340
+ end
341
+
342
+ # def compact
343
+ # not_implemented
344
+ # end
345
+
346
+ # def compact!
347
+ # not_implemented
348
+ # end
349
+
350
+ def concat(another)
351
+ case another
352
+ when Array, ExtArr
353
+ another.each {|item| self[length] = item }
354
+ else
355
+ raise TypeError.new("can't convert #{another.class} into ExtArr or Array")
356
+ end
357
+ self
358
+ end
359
+
360
+ # def dclone
361
+ # not_implemented
362
+ # end
363
+
364
+ # def delete(obj)
365
+ # not_implemented
366
+ # end
367
+
368
+ # def delete_at(index)
369
+ # not_implemented
370
+ # end
371
+
372
+ # def delete_if # :yield: item
373
+ # not_implemented
374
+ # end
375
+
376
+ def each(&block) # :yield: item
377
+ pos = nil
378
+ index.each do |entry|
379
+ # only set io position if necessary
380
+ unless pos == entry[pos_index]
381
+ pos = entry[pos_index]
382
+ io.pos = pos
383
+ end
384
+
385
+ pos += entry[length_index]
386
+
387
+ # yield entry
388
+ yield str_to_entry( io.read(entry[length_index]) )
389
+ end
390
+ self
391
+ end
392
+
393
+ #
394
+ def each_index(&block) # :yield: index
395
+ 0.upto(length-1, &block)
396
+ self
397
+ end
398
+
399
+ # Returns true if _self_ contains no elements
400
+ def empty?
401
+ length == 0
402
+ end
403
+
404
+ def eql?(another)
405
+ self == another
406
+ end
407
+
408
+ # def fetch(index, default=nil, &block)
409
+ # index += index_length if index < 0
410
+ # val = (index >= length ? default : self[index])
411
+ # block_given? ? yield(val) : val
412
+ # end
413
+ #
414
+ # def fill(*args)
415
+ # not_implemented
416
+ # end
417
+
418
+ # Returns the first n entries (default 1)
419
+ def first(n=nil)
420
+ n.nil? ? self[0] : self[0,n]
421
+ end
422
+
423
+ # def flatten
424
+ # not_implemented
425
+ # end
426
+
427
+ # def flatten!
428
+ # not_implemented
429
+ # end
430
+
431
+ # def frozen?
432
+ # not_implemented
433
+ # end
434
+
435
+ # def hash
436
+ # not_implemented
437
+ # end
438
+
439
+ # def include?(obj)
440
+ # not_implemented
441
+ # end
442
+
443
+ # def index(obj)
444
+ # not_implemented
445
+ # end
446
+ #
447
+ # def indexes(*args)
448
+ # values_at(*args)
449
+ # end
450
+ #
451
+ # def indicies(*args)
452
+ # values_at(*args)
453
+ # end
454
+
455
+ # def replace(other)
456
+ # not_implemented
457
+ # end
458
+
459
+ # def insert(index, *obj)
460
+ # self[index] = obj
461
+ # end
462
+
463
+ # def inspect
464
+ # not_implemented
465
+ # end
466
+
467
+ # def join(sep=$,)
468
+ # not_implemented
469
+ # end
470
+
471
+ # Returns the last n entries (default 1)
472
+ def last(n=nil)
473
+ return self[-1] if n.nil?
474
+
475
+ start = length-n
476
+ start = 0 if start < 0
477
+ self[start, n]
478
+ end
479
+
480
+ # Returns the number of entries in self
481
+ def length
482
+ index.length
483
+ end
484
+
485
+ # def nitems
486
+ # not_implemented
487
+ # end
488
+
489
+ # def pack(aTemplateString)
490
+ # not_implemented
491
+ # end
492
+
493
+ # def pop
494
+ # not_implemented
495
+ # end
496
+
497
+ # def pretty_print(q)
498
+ # not_implemented
499
+ # end
500
+
501
+ # def pretty_print_cycle(q)
502
+ # not_implemented
503
+ # end
504
+
505
+ # def push(*obj)
506
+ # not_implemented
507
+ # end
508
+
509
+ # def quote
510
+ # not_implemented
511
+ # end
512
+
513
+ # def rassoc(key)
514
+ # not_implemented
515
+ # end
516
+
517
+ # def replace(another)
518
+ # not_implemented
519
+ # end
520
+
521
+ # def reverse
522
+ # not_implemented
523
+ # end
524
+
525
+ # def reverse!
526
+ # not_implemented
527
+ # end
528
+
529
+ # def reverse_each(&block)
530
+ # reverse_chunk do |offset, length|
531
+ # self[offset, length].reverse_each(&block)
532
+ # end
533
+ # end
534
+
535
+ # def rindex(obj)
536
+ # not_implemented
537
+ # end
538
+
539
+ # def select # :yield: item
540
+ # not_implemented
541
+ # end
542
+
543
+ # def shift
544
+ # not_implemented
545
+ # end
546
+
547
+ # Alias for length
548
+ def size
549
+ length
550
+ end
551
+
552
+ # def slice(*args)
553
+ # self.call(:[], *args)
554
+ # end
555
+
556
+ # def slice!(*args)
557
+ # not_implemented
558
+ # end
559
+
560
+ def to_a(length=self.length)
561
+ length == 0 ? [] : self[0, length]
562
+ end
563
+
564
+ # def to_ary
565
+ # not_implemented
566
+ # end
567
+
568
+ # Returns _self_.join.
569
+ # def to_s
570
+ # self.join
571
+ # end
572
+
573
+ # def to_yaml(opts={})
574
+ # self[0, self.length].to_yaml(opts)
575
+ # end
576
+
577
+ # def transpose
578
+ # not_implemented
579
+ # end
580
+
581
+ # def uniq
582
+ # not_implemented
583
+ # end
584
+
585
+ # def uniq!
586
+ # not_implemented
587
+ # end
588
+
589
+ # def unshift(*obj)
590
+ # not_implemented
591
+ # end
592
+
593
+ # Returns an array containing the chars in io corresponding to the given
594
+ # selector(s). The selectors may be either integer indices or ranges
595
+ # def values_at(*selectors)
596
+ # selectors.collect {|s| self[s]}.flatten
597
+ # end
598
+
599
+ # def yaml_initialize(tag, val)
600
+ # not_implemented
601
+ # end
602
+
603
+ # def |(another)
604
+ # not_implemented
605
+ # end
606
+ end
607
+
608
+ # class Hold # :nodoc:
609
+ #
610
+ #
611
+ # #
612
+ # # def each_with_slice(&block)
613
+ # # index.each do |slice|
614
+ # # yield( read(slice), slice )
615
+ # # end
616
+ # # end
617
+ #
618
+ #
619
+ # def scan_collect(indicies=index, &block)
620
+ # return indicies if indicies.nil? || indicies.empty?
621
+ #
622
+ # collection = []
623
+ # span(*indicies) do |offset, length, spans|
624
+ # io.pos = offset
625
+ # scanner = StringScanner.new(io.read(length))
626
+ #
627
+ # spans.collect {|i| indicies[i]}.each do |span|
628
+ # soffset, slength = span
629
+ # sbegin = soffset - offset
630
+ # send = sbegin + slength
631
+ #
632
+ # scanner.pos = sbegin
633
+ # collection << yield(scanner, offset, span)
634
+ # raise "Oops! Scanned beyond end [begin, end, index]: #{[sbegin, send, span]}" if scanner.pos > send
635
+ # end
636
+ # end
637
+ # collection
638
+ # end
639
+ #
640
+ #
641
+ #
642
+ #
643
+ #
644
+ # # def set_modes(mode)
645
+ # # @uncached = parse_mode(mode, 'u')
646
+ # # end
647
+ # #
648
+ # # def parse_mode(mode, let)
649
+ # # if mode =~ Regexp.new(let, Regexp::IGNORECASE)
650
+ # # mode.delete!(let)
651
+ # # true
652
+ # # else
653
+ # # false
654
+ # # end
655
+ # # end
656
+ #
657
+ # #def default_span
658
+ # # use this rather than io.length because for very large io, io.length is prone to errors
659
+ # # last = index.last
660
+ # # [0, last.first + last.last]
661
+ # #end
662
+ #
663
+ # # def io_fetch(index, length)
664
+ # # io.pos = index unless io.pos = index
665
+ # #
666
+ # # if length.nil?
667
+ # # io.read(1)
668
+ # # else
669
+ # # results = []
670
+ # # str = io.read(length)
671
+ # # str.each_char {|c| results << c} if str
672
+ # # results
673
+ # # end
674
+ # # end
675
+ # #
676
+ # # # MUST increment io length to the end position if it writes past io.length
677
+ # # def io_store(index, value)
678
+ # # io.pos = index unless io.pos = index
679
+ # # end_pos = index + io.write(value)
680
+ # # io.length = end_pos if end_pos > io.length
681
+ # # end
682
+ #
683
+ # def index=(index)
684
+ # @index.close if @index.respond_to?(:close)
685
+ #
686
+ # # cache the index if specified
687
+ # @index = if cached?
688
+ # # if in cached mode, transform
689
+ #
690
+ # case index
691
+ # when Array then index
692
+ # when ExtInd
693
+ # cached_index = index.read(nil, 0)
694
+ # index.close
695
+ # cached_index
696
+ # when nil
697
+ # if io.respond_to?(:path)
698
+ # index_file = self.class.default_index_filepath(io.path)
699
+ # ExtInd.read(index_file, index_options)
700
+ # else
701
+ # []
702
+ # end
703
+ # else
704
+ # raise "unknown index type: #{index}"
705
+ # end
706
+ # else
707
+ # case index
708
+ # when Array
709
+ # uncached_index = ExtInd.new(nil, index_options)
710
+ # uncached_index.write(index)
711
+ # uncached_index
712
+ # when ExtInd then index
713
+ # when nil
714
+ # if io.respond_to?(:path)
715
+ # index_file = self.class.default_index_filepath(io.path)
716
+ # ExtInd.open(index_file, index_options)
717
+ # else
718
+ # ExtInd.new(nil, index_options)
719
+ # end
720
+ # else
721
+ # raise "unknown index type: #{index}"
722
+ # end
723
+ # end
724
+ #
725
+ #
726
+ # end
727
+ # end