zip_tricks 4.5.2 → 4.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,7 +53,7 @@ class ZipDownload
53
53
 
54
54
  # Add a Content-Disposition so that the download has a .zip extension
55
55
  # (this will not work well with UTF-8 filenames on Windows, but hey!)
56
- content_disposition = format('attachment; filename=%s.zip', filename)
56
+ content_disposition = 'attachment; filename=%<filename>s.zip' % {filename: filename}
57
57
 
58
58
  # and return the response, adding the Content-Length we have computed earlier
59
59
  [
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Naming/ConstantName
4
-
5
3
  require 'stringio'
6
4
 
7
5
  # A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
@@ -146,8 +144,8 @@ class ZipTricks::FileReader
146
144
  when 0
147
145
  StoredReader.new(from_io, compressed_size)
148
146
  else
149
- raise UnsupportedFeature, format('Unsupported storage mode for reading - %d',
150
- storage_mode)
147
+ raise UnsupportedFeature, 'Unsupported storage mode for reading - %<storage_mode>d' %
148
+ {storage_mode: storage_mode}
151
149
  end
152
150
  end
153
151
 
@@ -208,7 +206,10 @@ class ZipTricks::FileReader
208
206
  num_files_and_central_directory_offset(io, eocd_offset)
209
207
  end
210
208
 
211
- log { format('Located the central directory start at %d', cdir_location) }
209
+ log do
210
+ 'Located the central directory start at %<location>d' %
211
+ {location: cdir_location}
212
+ end
212
213
  seek(io, cdir_location)
213
214
 
214
215
  # Read the entire central directory AND anything behind it, in one fell swoop.
@@ -227,15 +228,15 @@ class ZipTricks::FileReader
227
228
  central_directory_str = io.read # and not read_n(io, cdir_size), see above
228
229
  central_directory_io = StringIO.new(central_directory_str)
229
230
  log do
230
- format('Read %d bytes with central directory + EOCD record and locator',
231
- central_directory_str.bytesize)
231
+ 'Read %<byte_size>d bytes with central directory + EOCD record and locator' %
232
+ {byte_size: central_directory_str.bytesize}
232
233
  end
233
234
 
234
235
  entries = (0...num_files).map do |entry_n|
235
236
  offset_location = cdir_location + central_directory_io.tell
236
237
  log do
237
- format('Reading the central directory entry %d starting at offset %d',
238
- entry_n, offset_location)
238
+ 'Reading the central directory entry %<entry_n>d starting at offset %<offset>d' %
239
+ {entry_n: entry_n, offset: offset_location}
239
240
  end
240
241
  read_cdir_entry(central_directory_io)
241
242
  end
@@ -272,16 +273,16 @@ class ZipTricks::FileReader
272
273
  entries << entry
273
274
  next_local_header_offset = entry.compressed_data_offset + entry.compressed_size
274
275
  log do
275
- format('Recovered a local file file header at offset %d, seeking to the next at %d',
276
- cur_offset, next_local_header_offset)
276
+ 'Recovered a local file file header at offset %<cur_offset>d, seeking to the next at %<header_offset>d' %
277
+ {cur_offset: cur_offset, header_offset: next_local_header_offset}
277
278
  end
278
279
  seek(io, next_local_header_offset) # Seek to the next entry, and raise if seek is impossible
279
280
  end
280
281
  entries
281
282
  rescue ReadError
282
283
  log do
283
- format('Got a read/seek error after reaching %d, no more entries can be recovered',
284
- cur_offset)
284
+ 'Got a read/seek error after reaching %<cur_offset>d, no more entries can be recovered' %
285
+ {cur_offset: cur_offset}
285
286
  end
286
287
  entries
287
288
  end
@@ -332,8 +333,8 @@ class ZipTricks::FileReader
332
333
  # the values fetched from the conventional header
333
334
  zip64_extra = StringIO.new(zip64_extra_contents)
334
335
  log do
335
- format('Will read Zip64 extra data from local header field for %s, %d bytes',
336
- e.filename, zip64_extra.size)
336
+ 'Will read Zip64 extra data from local header field for %<filename>s, %<size>d bytes' %
337
+ {filename: e.filename, size: zip64_extra.size}
337
338
  end
338
339
  # Now here be dragons. The APPNOTE specifies that
339
340
  #
@@ -396,8 +397,8 @@ class ZipTricks::FileReader
396
397
  def read_local_headers(entries, io)
397
398
  entries.each_with_index do |entry, i|
398
399
  log do
399
- format('Reading the local header for entry %d at offset %d',
400
- i, entry.local_file_header_offset)
400
+ 'Reading the local header for entry %<index>d at offset %<offset>d' %
401
+ {index: i, offset: entry.local_file_header_offset}
401
402
  end
402
403
  off = get_compressed_data_offset(io: io,
403
404
  local_file_header_offset: entry.local_file_header_offset)
@@ -429,7 +430,6 @@ class ZipTricks::FileReader
429
430
 
430
431
  def assert_signature(io, signature_magic_number)
431
432
  readback = read_4b(io)
432
- # Rubocop: Use a guard clause instead of wrapping the code inside a conditional expression
433
433
  if readback != signature_magic_number
434
434
  expected = '0x0' + signature_magic_number.to_s(16)
435
435
  actual = '0x0' + readback.to_s(16)
@@ -460,24 +460,21 @@ class ZipTricks::FileReader
460
460
  end
461
461
 
462
462
  def read_2b(io)
463
- read_n(io, 2).unpack(C_v).shift
463
+ read_n(io, 2).unpack(C_UINT2).shift
464
464
  end
465
465
 
466
466
  def read_4b(io)
467
- read_n(io, 4).unpack(C_V).shift
467
+ read_n(io, 4).unpack(C_UINT4).shift
468
468
  end
469
469
 
470
470
  def read_8b(io)
471
- read_n(io, 8).unpack(C_Qe).shift
471
+ read_n(io, 8).unpack(C_UINT8).shift
472
472
  end
473
473
 
474
474
  def read_cdir_entry(io)
475
- # Rubocop: convention: Assignment Branch Condition size for
476
475
  # read_cdir_entry is too high. [45.66/15]
477
- # Rubocop: convention: Method has too many lines. [30/10]
478
476
  assert_signature(io, 0x02014b50)
479
477
  ZipEntry.new.tap do |e|
480
- # Rubocop: convention: Block has too many lines. [27/25]
481
478
  e.made_by = read_2b(io)
482
479
  e.version_needed_to_extract = read_2b(io)
483
480
  e.gp_flags = read_2b(io)
@@ -510,8 +507,8 @@ class ZipTricks::FileReader
510
507
  # the values fetched from the conventional header
511
508
  zip64_extra = StringIO.new(zip64_extra_contents)
512
509
  log do
513
- format('Will read Zip64 extra data for %s, %d bytes',
514
- e.filename, zip64_extra.size)
510
+ 'Will read Zip64 extra data for %<filename>s, %<size>d bytes' %
511
+ {filename: e.filename, size: zip64_extra.size}
515
512
  end
516
513
  # Now here be dragons. The APPNOTE specifies that
517
514
  #
@@ -522,16 +519,9 @@ class ZipTricks::FileReader
522
519
  #
523
520
  # It means that before we read this stuff we need to check if the previously-read
524
521
  # values are at overflow, and only _then_ proceed to read them. Bah.
525
- # Rubocop: convention: Line is too long.
526
- if e.uncompressed_size == 0xFFFFFFFF
527
- e.uncompressed_size = read_8b(zip64_extra)
528
- end
529
- if e.compressed_size == 0xFFFFFFFF
530
- e.compressed_size = read_8b(zip64_extra)
531
- end
532
- if e.local_file_header_offset == 0xFFFFFFFF
533
- e.local_file_header_offset = read_8b(zip64_extra)
534
- end
522
+ e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
523
+ e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
524
+ e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
535
525
  # Disk number comes last and we can skip it anyway, since we do
536
526
  # not support multi-disk archives
537
527
  end
@@ -553,54 +543,56 @@ class ZipTricks::FileReader
553
543
  raise MissingEOCD unless eocd_idx_in_buf
554
544
 
555
545
  eocd_offset = implied_position_of_eocd_record + eocd_idx_in_buf
556
- log { format('Found EOCD signature at offset %d', eocd_offset) }
546
+ log do
547
+ 'Found EOCD signature at offset %<offset>d' % {offset: eocd_offset}
548
+ end
557
549
 
558
550
  eocd_offset
559
551
  end
560
552
 
561
- # This is tricky. Essentially, we have to scan the maximum possible number
562
- # of bytes (that the EOCD can theoretically occupy including the comment),
553
+ def all_indices_of_substr_in_str(of_substring, in_string)
554
+ last_i = 0
555
+ found_at_indices = []
556
+ while last_i = in_string.index(of_substring, last_i)
557
+ found_at_indices << last_i
558
+ last_i += of_substring.bytesize
559
+ end
560
+ found_at_indices
561
+ end
562
+
563
+ # We have to scan the maximum possible number
564
+ # of bytes that the EOCD can theoretically occupy including the comment after it,
563
565
  # and we have to find a combination of:
564
- # [EOCD signature, <some ZIP medatata>, comment byte size, the comment of
565
- # that size, eof].
566
- # The only way I could find to do this was with a sliding window, but
567
- # there probably is a better way.
568
- # Rubocop: convention: Assignment Branch Condition size for
569
- # locate_eocd_signature is too high. [17.49/15]
570
- # Rubocop: convention: Method has too many lines. [14/10]
566
+ # [EOCD signature, <some ZIP medatata>, comment byte size, comment of size]
567
+ # at the end. To do so, we first find all indices of the signature in the trailer
568
+ # string, and then check whether the bytestring starting at the signature and
569
+ # ending at the end of string satisfies that given pattern.
571
570
  def locate_eocd_signature(in_str)
572
- # We have to scan from the _very_ tail. We read the very minimum size
573
- # the EOCD record can have (up to and including the comment size), using
574
- # a sliding window. Once our end offset matches the comment size we found our
575
- # EOCD marker.
571
+ eocd_signature = 0x06054b50
572
+ eocd_signature_str = [eocd_signature].pack('V')
576
573
  unpack_pattern = 'VvvvvVVv'
577
574
  minimum_record_size = 22
578
- end_location = minimum_record_size * -1
579
- loop do
580
- # If the window is nil, we have rolled off the start of the string, nothing to do here.
581
- # We use negative values because if we used positive slice indices
582
- # we would have to detect the rollover ourselves
583
- break unless window = in_str[end_location, minimum_record_size]
584
-
585
- window_location = in_str.bytesize + end_location
586
- unpacked = window.unpack(unpack_pattern)
587
- # If we found the signarue, pick up the comment size, and check if the size of the window
588
- # plus that comment size is where we are in the string. If we are - bingo.
589
- if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
590
- assumed_eocd_location = in_str.bytesize - comment_size - minimum_record_size
591
- # if the comment size is where we should be at - we found our EOCD
592
- return assumed_eocd_location if assumed_eocd_location == window_location
575
+ str_size = in_str.bytesize
576
+ indices = all_indices_of_substr_in_str(eocd_signature_str, in_str)
577
+ indices.each do |check_at|
578
+ maybe_record = in_str[check_at..str_size]
579
+ # If the record is smaller than the minimum - we will never recover anything
580
+ break if maybe_record.bytesize < minimum_record_size
581
+ # Now we check if the record ends with the combination
582
+ # of the comment size and an arbitrary byte string of that size.
583
+ # If it does - we found our match
584
+ *_unused, comment_size = maybe_record.unpack(unpack_pattern)
585
+ if (maybe_record.bytesize - minimum_record_size) == comment_size
586
+ return check_at # Found the EOCD marker location
593
587
  end
594
-
595
- end_location -= 1 # Shift the window back, by one byte, and try again.
596
588
  end
589
+ # If we haven't caught anything, return nil deliberately instead of returning the last statement
590
+ nil
597
591
  end
598
592
 
599
593
  # Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
600
594
  # EOCD record in the archive by fixed offsets
601
- # Rubocop: convention: Assignment Branch Condition size for
602
595
  # get_zip64_eocd_location is too high. [15.17/15]
603
- # Rubocop: convention: Method has too many lines. [15/10]
604
596
  def get_zip64_eocd_location(file_io, eocd_offset)
605
597
  zip64_eocd_loc_offset = eocd_offset
606
598
  zip64_eocd_loc_offset -= 4 # The signature
@@ -609,8 +601,8 @@ class ZipTricks::FileReader
609
601
  zip64_eocd_loc_offset -= 4 # Total number of disks
610
602
 
611
603
  log do
612
- format('Will look for the Zip64 EOCD locator signature at offset %d',
613
- zip64_eocd_loc_offset)
604
+ 'Will look for the Zip64 EOCD locator signature at offset %<offset>d' %
605
+ {offset: zip64_eocd_loc_offset}
614
606
  end
615
607
 
616
608
  # If the offset is negative there is certainly no Zip64 EOCD locator here
@@ -619,7 +611,9 @@ class ZipTricks::FileReader
619
611
  file_io.seek(zip64_eocd_loc_offset, IO::SEEK_SET)
620
612
  assert_signature(file_io, 0x07064b50)
621
613
 
622
- log { format('Found Zip64 EOCD locator at offset %d', zip64_eocd_loc_offset) }
614
+ log do
615
+ 'Found Zip64 EOCD locator at offset %<offset>d' % {offset: zip64_eocd_loc_offset}
616
+ end
623
617
 
624
618
  disk_num = read_4b(file_io) # number of the disk
625
619
  raise UnsupportedFeature, 'The archive spans multiple disks' if disk_num != 0
@@ -628,9 +622,7 @@ class ZipTricks::FileReader
628
622
  nil
629
623
  end
630
624
 
631
- # Rubocop: convention: Assignment Branch Condition size for
632
625
  # num_files_and_central_directory_offset_zip64 is too high. [21.12/15]
633
- # Rubocop: convention: Method has too many lines. [17/10]
634
626
  def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
635
627
  seek(io, zip64_end_of_cdir_location)
636
628
 
@@ -644,20 +636,16 @@ class ZipTricks::FileReader
644
636
 
645
637
  disk_n = read_4b(zip64_eocdr) # number of this disk
646
638
  disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
647
- if disk_n != disk_n_with_eocdr
648
- raise UnsupportedFeature, 'The archive spans multiple disks'
649
- end
639
+ raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
650
640
 
651
641
  num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
652
642
  num_files_total = read_8b(zip64_eocdr) # files total in the central directory
653
643
 
654
- if num_files_this_disk != num_files_total
655
- raise UnsupportedFeature, 'The archive spans multiple disks'
656
- end
644
+ raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
657
645
 
658
646
  log do
659
- format('Zip64 EOCD record states there are %d files in the archive',
660
- num_files_total)
647
+ 'Zip64 EOCD record states there are %<amount>d files in the archive' %
648
+ {amount: num_files_total}
661
649
  end
662
650
 
663
651
  central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
@@ -666,58 +654,48 @@ class ZipTricks::FileReader
666
654
  [num_files_total, central_dir_offset, central_dir_size]
667
655
  end
668
656
 
669
- C_V = 'V'
670
- C_v = 'v'
671
- C_Qe = 'Q<'
657
+ C_UINT4 = 'V'
658
+ C_UINT2 = 'v'
659
+ C_UINT8 = 'Q<'
672
660
 
673
661
  # To prevent too many tiny reads, read the maximum possible size of end of
674
662
  # central directory record upfront (all the fixed fields + at most 0xFFFF
675
663
  # bytes of the archive comment)
676
- MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
677
- begin
678
- 4 + # Offset of the start of central directory
679
- 4 + # Size of the central directory
680
- 2 + # Number of files in the cdir
681
- 4 + # End-of-central-directory signature
682
- 2 + # Number of this disk
683
- 2 + # Number of disk with the start of cdir
684
- 2 + # Number of files in the cdir of this disk
685
- 2 + # The comment size
686
- 0xFFFF # Maximum comment size
687
- end
664
+ MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
665
+ 4 + # Size of the central directory
666
+ 2 + # Number of files in the cdir
667
+ 4 + # End-of-central-directory signature
668
+ 2 + # Number of this disk
669
+ 2 + # Number of disk with the start of cdir
670
+ 2 + # Number of files in the cdir of this disk
671
+ 2 + # The comment size
672
+ 0xFFFF # Maximum comment size
688
673
 
689
674
  # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
690
675
  # The maximum size is all the usual items, plus the maximum size
691
676
  # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
692
- MAX_LOCAL_HEADER_SIZE =
693
- begin
694
- 4 + # signature
695
- 2 + # Version needed to extract
696
- 2 + # gp flags
697
- 2 + # storage mode
698
- 2 + # dos time
699
- 2 + # dos date
700
- 4 + # CRC32
701
- 4 + # Comp size
702
- 4 + # Uncomp size
703
- 2 + # Filename size
704
- 2 + # Extra fields size
705
- 0xFFFF + # Maximum filename size
706
- 0xFFFF # Maximum extra fields size
707
- end
708
-
709
- SIZE_OF_USABLE_EOCD_RECORD =
710
- begin
711
- 4 + # Signature
712
- 2 + # Number of this disk
713
- 2 + # Number of the disk with the EOCD record
714
- 2 + # Number of entries in the central directory of this disk
715
- 2 + # Number of entries in the central directory total
716
- 4 + # Size of the central directory
717
- 4 # Start of the central directory offset
718
- end
677
+ MAX_LOCAL_HEADER_SIZE = 4 + # signature
678
+ 2 + # Version needed to extract
679
+ 2 + # gp flags
680
+ 2 + # storage mode
681
+ 2 + # dos time
682
+ 2 + # dos date
683
+ 4 + # CRC32
684
+ 4 + # Comp size
685
+ 4 + # Uncomp size
686
+ 2 + # Filename size
687
+ 2 + # Extra fields size
688
+ 0xFFFF + # Maximum filename size
689
+ 0xFFFF # Maximum extra fields size
690
+
691
+ SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
692
+ 2 + # Number of this disk
693
+ 2 + # Number of the disk with the EOCD record
694
+ 2 + # Number of entries in the central directory of this disk
695
+ 2 + # Number of entries in the central directory total
696
+ 4 + # Size of the central directory
697
+ 4 # Start of the central directory offset
719
698
 
720
- # Rubocop: convention: Method has too many lines. [11/10]
721
699
  def num_files_and_central_directory_offset(file_io, eocd_offset)
722
700
  seek(file_io, eocd_offset)
723
701
 
@@ -735,7 +713,7 @@ class ZipTricks::FileReader
735
713
  [num_files, cdir_offset, cdir_size]
736
714
  end
737
715
 
738
- private_constant :C_V, :C_v, :C_Qe, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
716
+ private_constant :C_UINT4, :C_UINT2, :C_UINT8, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
739
717
  :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
740
718
 
741
719
  # Is provided as a stub to be overridden in a subclass if you need it. Will report
@@ -38,13 +38,11 @@ class ZipTricks::RemoteIO
38
38
  #
39
39
  # @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
40
40
  # @return [String] the read bytes
41
- # Rubocop: convention: Assignment Branch Condition size for read is too high. [17.92/15]
42
- # Rubocop: convention: Method has too many lines. [13/10]
43
41
  def read(n_bytes = nil)
44
42
  @remote_size ||= request_object_size
45
43
 
46
44
  # If the resource is empty there is nothing to read
47
- return nil if @remote_size.zero?
45
+ return if @remote_size.zero?
48
46
 
49
47
  maximum_avaialable = @remote_size - @pos
50
48
  n_bytes ||= maximum_avaialable # nil == read to the end of file
@@ -54,9 +52,7 @@ class ZipTricks::RemoteIO
54
52
  n_bytes = clamp(0, n_bytes, maximum_avaialable)
55
53
 
56
54
  read_n_bytes_from_remote(@pos, n_bytes).tap do |data|
57
- if data.bytesize != n_bytes
58
- raise "Remote read returned #{data.bytesize} bytes instead of #{n_bytes} as requested"
59
- end
55
+ raise "Remote read returned #{data.bytesize} bytes instead of #{n_bytes} as requested" if data.bytesize != n_bytes
60
56
  @pos = clamp(0, @pos + data.bytesize, @remote_size)
61
57
  end
62
58
  end
@@ -2,21 +2,18 @@
2
2
 
3
3
  # A simple stateful class for keeping track of a CRC32 value through multiple writes
4
4
  class ZipTricks::StreamCRC32
5
- BUFFER_SIZE = 1024 * 1024 * 5
6
-
7
5
  # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
8
6
  #
9
7
  # @param io[IO] the IO to read the data from
10
8
  # @return [Fixnum] the computed CRC32 value
11
9
  def self.from_io(io)
12
10
  crc = new
13
- crc << io.read(BUFFER_SIZE) until io.eof?
11
+ crc << io.read(1024 * 512) until io.eof?
14
12
  crc.to_i
15
13
  end
16
14
 
17
15
  # Creates a new streaming CRC32 calculator
18
16
  def initialize
19
- @buf = StringIO.new
20
17
  @crc = Zlib.crc32('')
21
18
  end
22
19
 
@@ -25,8 +22,7 @@ class ZipTricks::StreamCRC32
25
22
  # @param blob[String] the string to compute the CRC32 from
26
23
  # @return [self]
27
24
  def <<(blob)
28
- @buf << blob
29
- buf_flush if @buf.size > BUFFER_SIZE
25
+ @crc = Zlib.crc32_combine(@crc, Zlib.crc32(blob), blob.bytesize)
30
26
  self
31
27
  end
32
28
 
@@ -34,7 +30,6 @@ class ZipTricks::StreamCRC32
34
30
  #
35
31
  # @return [Fixnum] the updated CRC32 value for all the blobs so far
36
32
  def to_i
37
- buf_flush if @buf.size > 0
38
33
  @crc
39
34
  end
40
35
 
@@ -45,15 +40,6 @@ class ZipTricks::StreamCRC32
45
40
  # @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
46
41
  # @return [Fixnum] the updated CRC32 value for all the blobs so far
47
42
  def append(crc32, blob_size)
48
- buf_flush if @buf.size > 0
49
43
  @crc = Zlib.crc32_combine(@crc, crc32, blob_size)
50
44
  end
51
-
52
- private
53
-
54
- def buf_flush
55
- @crc = Zlib.crc32_combine(@crc, Zlib.crc32(@buf.string), @buf.size)
56
- @buf.truncate(0)
57
- @buf.rewind
58
- end
59
45
  end