external 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
data/lib/ext_arr.rb ADDED
@@ -0,0 +1,727 @@
1
+ require 'external/base'
2
+ require 'ext_ind'
3
+ require 'yaml'
4
+
5
+ #--
6
+ # later separate out individual objects logically
7
+ # If writing, create new files:
8
+ # - base/object_id.aio (new file for recieving appends)
9
+ # - base/object_id.index (copy of existing index -- made on first insertion)
10
+ # - in index, -index indicates object_id.aio file whereas +index indicates original file
11
+ # - .consolidate(rename) resolves changes in index into the object_id file, renaming as needed
12
+ # requires index rewrite as well, to remove negatives
13
+ #
14
+ # If appending, ONLY allow << and all changes get committed to the original file.
15
+ #
16
+ # This should allow returning of new arrayio objects under read/write conditions
17
+ # By default read-only. No insertions. New ExtArr objects inherit parent mode.
18
+ #
19
+ # Independent modes:
20
+ # - r
21
+ # - r+
22
+ # - For safety, w/w+ will by default act as r/r+, simply creating new .aio and .index files
23
+ # changes to the originals will NOT be made unless .consolidate(rename) is used. Allow option io_w => true
24
+ # - b ALWAYS on with Windows
25
+ #++
26
+
27
+ class ExtArr < External::Base
28
+ class << self
29
+ def [](*args)
30
+ ab = self.new
31
+ args.each do |arg|
32
+ ab[ab.length] = arg
33
+ end
34
+ ab
35
+ end
36
+
37
+ def default_index_options
38
+ {:format => 'II', :nil_value => [0,0], :cached => true}
39
+ end
40
+
41
+ def default_index_filepath(filepath)
42
+ filepath.chomp(File.extname(filepath)) + '.index'
43
+ end
44
+ end
45
+
46
+ attr_reader :index, :pos_index, :length_index
47
+
48
+ def initialize(io=nil, options={})
49
+ super(io)
50
+
51
+ @max_gap = 10000
52
+ @max_chunk_size = 1000000
53
+
54
+ @pos_index = options[:pos_index] == nil ? 0 : options[:pos_index]
55
+ @length_index = options[:length_index] == nil ? 1 : options[:length_index]
56
+
57
+ # TODO -- merge in specified index options
58
+ index_options = self.class.default_index_options
59
+
60
+ # determine the index file. if a file is specified,
61
+ # use it, otherwise infer the index filepath from
62
+ # the io. Note a nil index_file is ok -- this simply
63
+ # means the index file will be a Tempfile
64
+ index_file = if options.has_key?(:index_file)
65
+ options[:index_file]
66
+ elsif io.kind_of?(File)
67
+ self.class.default_index_filepath(io.path)
68
+ else
69
+ nil
70
+ end
71
+
72
+ # ensure the file exists before trying to open it
73
+ if io.kind_of?(File) && !File.exists?(index_file)
74
+ FileUtils.touch(index_file)
75
+ end
76
+ @index = ExtInd.open(index_file, "r+", index_options)
77
+ end
78
+
79
+ def closed?
80
+ super && (!index.respond_to?(:close) || index.closed?)
81
+ end
82
+
83
+ def close
84
+ super
85
+ index.close unless !index.respond_to?(:close) || index.closed?
86
+ end
87
+
88
+ def reindex
89
+ index.clear
90
+ io.flush unless io.generic_mode == "r"
91
+ io.rewind
92
+
93
+ if block_given?
94
+ yield(index)
95
+ else
96
+ current_pos = 0
97
+ entry_begin = 0
98
+
99
+ io_length = io.length
100
+ io.each_line('---') do |line|
101
+ # Note positions MUST be built up using line.length
102
+ # io.pos cannot return positions greater than ~2.1e9
103
+ current_pos += line.length
104
+ entry_end = current_pos - (current_pos == io_length ? 0 : 3)
105
+
106
+ unless entry_begin == entry_end
107
+ index.unframed_write [entry_begin, entry_end-entry_begin]
108
+ entry_begin = entry_end
109
+ end
110
+ end
111
+ end
112
+ self
113
+ end
114
+
115
+ def str_to_entry(str)
116
+ str == nil ? nil : YAML.load(str)
117
+ end
118
+
119
+ def entry_to_str(entry)
120
+ # could chop the beginning and end to save space
121
+ # (ie "--- blah\n" => "blah") but there would be
122
+ # a tradeoff for time. especially true for Numerics
123
+ # which don't need anything to be retranslated properly
124
+ entry.to_yaml
125
+ end
126
+
127
+ def entry_pos(index)
128
+ index[pos_index]
129
+ end
130
+
131
+ def entry_length(index)
132
+ index[length_index]
133
+ end
134
+
135
+ ###########################
136
+ # Array methods
137
+ ###########################
138
+
139
+ # def &(another)
140
+ # not_implemented
141
+ # end
142
+
143
+ # def *(arg)
144
+ # not_implemented
145
+ # end
146
+
147
+ def +(another)
148
+ self.concat(another)
149
+ end
150
+
151
+ # def -(another)
152
+ # not_implemented
153
+ # end
154
+
155
+ def <<(obj)
156
+ self[length] = obj
157
+ self
158
+ end
159
+
160
+ def <=>(another)
161
+ case another
162
+ when Array
163
+ if another.length < self.length
164
+ # if another is equal to the matching subset of self,
165
+ # then self is obviously the longer array and wins.
166
+ result = (self.to_a(another.length) <=> another)
167
+ result == 0 ? 1 : result
168
+ else
169
+ self.to_a <=> another
170
+ end
171
+ when ExtArr
172
+ # if indexes are equal, additional
173
+ # 'quick' comparisons are allowed
174
+ if self.index == another.index
175
+
176
+ # equal in comparison if the ios are equal
177
+ return 0 if self.io.quick_compare(another.io)
178
+ end
179
+
180
+ self.io.flush
181
+ another.io.flush
182
+
183
+ # should chunk compare
184
+ if another.length > self.length
185
+ result = (self.to_a <=> another.to_a(self.length))
186
+ result == 0 ? -1 : result
187
+ elsif another.length < self.length
188
+ result = (self.to_a(another.length) <=> another.to_a)
189
+ result == 0 ? 1 : result
190
+ else
191
+ self.to_a <=> another.to_a
192
+ end
193
+ else
194
+ raise TypeError.new("can't convert from #{another.class} to ExtArr or Array")
195
+ end
196
+ end
197
+
198
+ def ==(another)
199
+ case another
200
+ when Array
201
+ # test simply based on length
202
+ return false unless self.length == another.length
203
+
204
+ # compare arrays
205
+ self.to_a == another
206
+
207
+ when ExtArr
208
+ # test simply based on length
209
+ return false unless self.length == another.length
210
+
211
+ # if indexes are equal, additional
212
+ # 'quick' comparisons are allowed
213
+ if self.index == another.index
214
+
215
+ # equal in comparison if the ios are equal
216
+ return true if (self.io.sort_compare(another.io, (self.index.buffer_size/2).ceil)) == 0
217
+ end
218
+
219
+ # compare arrays
220
+ self.to_a == another.to_a
221
+ else
222
+ false
223
+ end
224
+ end
225
+
226
+ def [](input, length=nil)
227
+ # two call types are required because while ExtInd can take
228
+ # a nil length, Array cannot and index can be either
229
+ entries = (length == nil ? index[input] : index[input, length])
230
+
231
+ # for conformance with array range retrieval
232
+ return entries if entries.nil? || entries.empty?
233
+
234
+ if length == nil && !input.kind_of?(Range)
235
+ pos, length =
236
+
237
+ # single entry, just read it
238
+ io.pos = entry_pos(entries)
239
+ str_to_entry( io.read(entry_length(entries)) )
240
+ else
241
+ pos = nil
242
+ entries.collect do |entry|
243
+
244
+ # only set io position if necessary
245
+ epos = entry_pos(entry)
246
+ unless pos == epos
247
+ pos = epos
248
+ io.pos = pos
249
+ end
250
+
251
+ elen = entry_length(entry)
252
+ pos += elen
253
+
254
+ # read entry
255
+ str_to_entry( io.read(elen) )
256
+ end
257
+ end
258
+ end
259
+
260
+ def []=(*args)
261
+ raise ArgumentError.new("wrong number of arguments (1 for 2)") if args.length < 2
262
+ index, length, value = args
263
+ value = length if args.length == 2
264
+
265
+ if index.kind_of?(Range)
266
+ raise TypeError.new("can't convert Range into Integer") if args.length == 3
267
+ # for conformance with setting a range with nil (truncates)
268
+ value = [] if value.nil?
269
+ offset, length = split_range(index)
270
+ return (self[offset, length + 1] = value)
271
+ end
272
+
273
+ index += self.length if index < 0
274
+ raise IndexError.new("index #{index} out of range") if index < 0
275
+
276
+ entry_pos = self.io.length
277
+ io.pos = entry_pos
278
+
279
+ if args.length == 2
280
+
281
+ #value = self.to_a if value.kind_of?(ExtInd)
282
+
283
+ # write entry to io first as a check
284
+ # that io is open for writing.
285
+ entry_length = io.write( entry_to_str(value) )
286
+ io.length += entry_length
287
+
288
+ self.index[index] = [entry_pos, entry_length]
289
+
290
+ else
291
+ indicies = []
292
+
293
+ values = case value
294
+ when Array then value
295
+ when ExtArr
296
+ if value.object_id == self.object_id
297
+ # special case, self will be reading and
298
+ # writing from the same io, producing
299
+ # incorrect results
300
+
301
+ # potential to load a huge amount of data
302
+ self.to_a
303
+ else
304
+ value
305
+ end
306
+ else
307
+ [value]
308
+ end
309
+
310
+ values.each do |value|
311
+ entry_length = io.write( entry_to_str(value) )
312
+ indicies << [entry_pos, entry_length]
313
+
314
+ io.length += entry_length
315
+ entry_pos += entry_length
316
+ end
317
+
318
+ self.index[index, length] = indicies
319
+ end
320
+ end
321
+
322
+ # def abbrev(pattern=nil)
323
+ # not_implemented
324
+ # end
325
+
326
+ # def assoc(obj)
327
+ # not_implemented
328
+ # end
329
+
330
+ # Returns entry at index
331
+ def at(index)
332
+ self[index]
333
+ end
334
+
335
+ # Removes all elements from _self_.
336
+ def clear
337
+ io.truncate(0)
338
+ index.clear
339
+ self
340
+ end
341
+
342
+ # def compact
343
+ # not_implemented
344
+ # end
345
+
346
+ # def compact!
347
+ # not_implemented
348
+ # end
349
+
350
+ def concat(another)
351
+ case another
352
+ when Array, ExtArr
353
+ another.each {|item| self[length] = item }
354
+ else
355
+ raise TypeError.new("can't convert #{another.class} into ExtArr or Array")
356
+ end
357
+ self
358
+ end
359
+
360
+ # def dclone
361
+ # not_implemented
362
+ # end
363
+
364
+ # def delete(obj)
365
+ # not_implemented
366
+ # end
367
+
368
+ # def delete_at(index)
369
+ # not_implemented
370
+ # end
371
+
372
+ # def delete_if # :yield: item
373
+ # not_implemented
374
+ # end
375
+
376
+ def each(&block) # :yield: item
377
+ pos = nil
378
+ index.each do |entry|
379
+ # only set io position if necessary
380
+ unless pos == entry[pos_index]
381
+ pos = entry[pos_index]
382
+ io.pos = pos
383
+ end
384
+
385
+ pos += entry[length_index]
386
+
387
+ # yield entry
388
+ yield str_to_entry( io.read(entry[length_index]) )
389
+ end
390
+ self
391
+ end
392
+
393
+ #
394
+ def each_index(&block) # :yield: index
395
+ 0.upto(length-1, &block)
396
+ self
397
+ end
398
+
399
+ # Returns true if _self_ contains no elements
400
+ def empty?
401
+ length == 0
402
+ end
403
+
404
+ def eql?(another)
405
+ self == another
406
+ end
407
+
408
+ # def fetch(index, default=nil, &block)
409
+ # index += index_length if index < 0
410
+ # val = (index >= length ? default : self[index])
411
+ # block_given? ? yield(val) : val
412
+ # end
413
+ #
414
+ # def fill(*args)
415
+ # not_implemented
416
+ # end
417
+
418
+ # Returns the first n entries (default 1)
419
+ def first(n=nil)
420
+ n.nil? ? self[0] : self[0,n]
421
+ end
422
+
423
+ # def flatten
424
+ # not_implemented
425
+ # end
426
+
427
+ # def flatten!
428
+ # not_implemented
429
+ # end
430
+
431
+ # def frozen?
432
+ # not_implemented
433
+ # end
434
+
435
+ # def hash
436
+ # not_implemented
437
+ # end
438
+
439
+ # def include?(obj)
440
+ # not_implemented
441
+ # end
442
+
443
+ # def index(obj)
444
+ # not_implemented
445
+ # end
446
+ #
447
+ # def indexes(*args)
448
+ # values_at(*args)
449
+ # end
450
+ #
451
+ # def indicies(*args)
452
+ # values_at(*args)
453
+ # end
454
+
455
+ # def replace(other)
456
+ # not_implemented
457
+ # end
458
+
459
+ # def insert(index, *obj)
460
+ # self[index] = obj
461
+ # end
462
+
463
+ # def inspect
464
+ # not_implemented
465
+ # end
466
+
467
+ # def join(sep=$,)
468
+ # not_implemented
469
+ # end
470
+
471
+ # Returns the last n entries (default 1)
472
+ def last(n=nil)
473
+ return self[-1] if n.nil?
474
+
475
+ start = length-n
476
+ start = 0 if start < 0
477
+ self[start, n]
478
+ end
479
+
480
+ # Returns the number of entries in self
481
+ def length
482
+ index.length
483
+ end
484
+
485
+ # def nitems
486
+ # not_implemented
487
+ # end
488
+
489
+ # def pack(aTemplateString)
490
+ # not_implemented
491
+ # end
492
+
493
+ # def pop
494
+ # not_implemented
495
+ # end
496
+
497
+ # def pretty_print(q)
498
+ # not_implemented
499
+ # end
500
+
501
+ # def pretty_print_cycle(q)
502
+ # not_implemented
503
+ # end
504
+
505
+ # def push(*obj)
506
+ # not_implemented
507
+ # end
508
+
509
+ # def quote
510
+ # not_implemented
511
+ # end
512
+
513
+ # def rassoc(key)
514
+ # not_implemented
515
+ # end
516
+
517
+ # def replace(another)
518
+ # not_implemented
519
+ # end
520
+
521
+ # def reverse
522
+ # not_implemented
523
+ # end
524
+
525
+ # def reverse!
526
+ # not_implemented
527
+ # end
528
+
529
+ # def reverse_each(&block)
530
+ # reverse_chunk do |offset, length|
531
+ # self[offset, length].reverse_each(&block)
532
+ # end
533
+ # end
534
+
535
+ # def rindex(obj)
536
+ # not_implemented
537
+ # end
538
+
539
+ # def select # :yield: item
540
+ # not_implemented
541
+ # end
542
+
543
+ # def shift
544
+ # not_implemented
545
+ # end
546
+
547
+ # Alias for length
548
+ def size
549
+ length
550
+ end
551
+
552
+ # def slice(*args)
553
+ # self.call(:[], *args)
554
+ # end
555
+
556
+ # def slice!(*args)
557
+ # not_implemented
558
+ # end
559
+
560
+ def to_a(length=self.length)
561
+ length == 0 ? [] : self[0, length]
562
+ end
563
+
564
+ # def to_ary
565
+ # not_implemented
566
+ # end
567
+
568
+ # Returns _self_.join.
569
+ # def to_s
570
+ # self.join
571
+ # end
572
+
573
+ # def to_yaml(opts={})
574
+ # self[0, self.length].to_yaml(opts)
575
+ # end
576
+
577
+ # def transpose
578
+ # not_implemented
579
+ # end
580
+
581
+ # def uniq
582
+ # not_implemented
583
+ # end
584
+
585
+ # def uniq!
586
+ # not_implemented
587
+ # end
588
+
589
+ # def unshift(*obj)
590
+ # not_implemented
591
+ # end
592
+
593
+ # Returns an array containing the chars in io corresponding to the given
594
+ # selector(s). The selectors may be either integer indices or ranges
595
+ # def values_at(*selectors)
596
+ # selectors.collect {|s| self[s]}.flatten
597
+ # end
598
+
599
+ # def yaml_initialize(tag, val)
600
+ # not_implemented
601
+ # end
602
+
603
+ # def |(another)
604
+ # not_implemented
605
+ # end
606
+ end
607
+
608
+ # class Hold # :nodoc:
609
+ #
610
+ #
611
+ # #
612
+ # # def each_with_slice(&block)
613
+ # # index.each do |slice|
614
+ # # yield( read(slice), slice )
615
+ # # end
616
+ # # end
617
+ #
618
+ #
619
+ # def scan_collect(indicies=index, &block)
620
+ # return indicies if indicies.nil? || indicies.empty?
621
+ #
622
+ # collection = []
623
+ # span(*indicies) do |offset, length, spans|
624
+ # io.pos = offset
625
+ # scanner = StringScanner.new(io.read(length))
626
+ #
627
+ # spans.collect {|i| indicies[i]}.each do |span|
628
+ # soffset, slength = span
629
+ # sbegin = soffset - offset
630
+ # send = sbegin + slength
631
+ #
632
+ # scanner.pos = sbegin
633
+ # collection << yield(scanner, offset, span)
634
+ # raise "Oops! Scanned beyond end [begin, end, index]: #{[sbegin, send, span]}" if scanner.pos > send
635
+ # end
636
+ # end
637
+ # collection
638
+ # end
639
+ #
640
+ #
641
+ #
642
+ #
643
+ #
644
+ # # def set_modes(mode)
645
+ # # @uncached = parse_mode(mode, 'u')
646
+ # # end
647
+ # #
648
+ # # def parse_mode(mode, let)
649
+ # # if mode =~ Regexp.new(let, Regexp::IGNORECASE)
650
+ # # mode.delete!(let)
651
+ # # true
652
+ # # else
653
+ # # false
654
+ # # end
655
+ # # end
656
+ #
657
+ # #def default_span
658
+ # # use this rather than io.length because for very large io, io.length is prone to errors
659
+ # # last = index.last
660
+ # # [0, last.first + last.last]
661
+ # #end
662
+ #
663
+ # # def io_fetch(index, length)
664
+ # # io.pos = index unless io.pos = index
665
+ # #
666
+ # # if length.nil?
667
+ # # io.read(1)
668
+ # # else
669
+ # # results = []
670
+ # # str = io.read(length)
671
+ # # str.each_char {|c| results << c} if str
672
+ # # results
673
+ # # end
674
+ # # end
675
+ # #
676
+ # # # MUST increment io length to the end position if it writes past io.length
677
+ # # def io_store(index, value)
678
+ # # io.pos = index unless io.pos = index
679
+ # # end_pos = index + io.write(value)
680
+ # # io.length = end_pos if end_pos > io.length
681
+ # # end
682
+ #
683
+ # def index=(index)
684
+ # @index.close if @index.respond_to?(:close)
685
+ #
686
+ # # cache the index if specified
687
+ # @index = if cached?
688
+ # # if in cached mode, transform
689
+ #
690
+ # case index
691
+ # when Array then index
692
+ # when ExtInd
693
+ # cached_index = index.read(nil, 0)
694
+ # index.close
695
+ # cached_index
696
+ # when nil
697
+ # if io.respond_to?(:path)
698
+ # index_file = self.class.default_index_filepath(io.path)
699
+ # ExtInd.read(index_file, index_options)
700
+ # else
701
+ # []
702
+ # end
703
+ # else
704
+ # raise "unknown index type: #{index}"
705
+ # end
706
+ # else
707
+ # case index
708
+ # when Array
709
+ # uncached_index = ExtInd.new(nil, index_options)
710
+ # uncached_index.write(index)
711
+ # uncached_index
712
+ # when ExtInd then index
713
+ # when nil
714
+ # if io.respond_to?(:path)
715
+ # index_file = self.class.default_index_filepath(io.path)
716
+ # ExtInd.open(index_file, index_options)
717
+ # else
718
+ # ExtInd.new(nil, index_options)
719
+ # end
720
+ # else
721
+ # raise "unknown index type: #{index}"
722
+ # end
723
+ # end
724
+ #
725
+ #
726
+ # end
727
+ # end