zip_tricks 4.5.2 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,7 +53,7 @@ class ZipDownload
53
53
 
54
54
  # Add a Content-Disposition so that the download has a .zip extension
55
55
  # (this will not work well with UTF-8 filenames on Windows, but hey!)
56
- content_disposition = format('attachment; filename=%s.zip', filename)
56
+ content_disposition = 'attachment; filename=%<filename>s.zip' % {filename: filename}
57
57
 
58
58
  # and return the response, adding the Content-Length we have computed earlier
59
59
  [
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # rubocop:disable Naming/ConstantName
4
-
5
3
  require 'stringio'
6
4
 
7
5
  # A very barebones ZIP file reader. Is made for maximum interoperability, but at the same
@@ -146,8 +144,8 @@ class ZipTricks::FileReader
146
144
  when 0
147
145
  StoredReader.new(from_io, compressed_size)
148
146
  else
149
- raise UnsupportedFeature, format('Unsupported storage mode for reading - %d',
150
- storage_mode)
147
+ raise UnsupportedFeature, 'Unsupported storage mode for reading - %<storage_mode>d' %
148
+ {storage_mode: storage_mode}
151
149
  end
152
150
  end
153
151
 
@@ -208,7 +206,10 @@ class ZipTricks::FileReader
208
206
  num_files_and_central_directory_offset(io, eocd_offset)
209
207
  end
210
208
 
211
- log { format('Located the central directory start at %d', cdir_location) }
209
+ log do
210
+ 'Located the central directory start at %<location>d' %
211
+ {location: cdir_location}
212
+ end
212
213
  seek(io, cdir_location)
213
214
 
214
215
  # Read the entire central directory AND anything behind it, in one fell swoop.
@@ -227,15 +228,15 @@ class ZipTricks::FileReader
227
228
  central_directory_str = io.read # and not read_n(io, cdir_size), see above
228
229
  central_directory_io = StringIO.new(central_directory_str)
229
230
  log do
230
- format('Read %d bytes with central directory + EOCD record and locator',
231
- central_directory_str.bytesize)
231
+ 'Read %<byte_size>d bytes with central directory + EOCD record and locator' %
232
+ {byte_size: central_directory_str.bytesize}
232
233
  end
233
234
 
234
235
  entries = (0...num_files).map do |entry_n|
235
236
  offset_location = cdir_location + central_directory_io.tell
236
237
  log do
237
- format('Reading the central directory entry %d starting at offset %d',
238
- entry_n, offset_location)
238
+ 'Reading the central directory entry %<entry_n>d starting at offset %<offset>d' %
239
+ {entry_n: entry_n, offset: offset_location}
239
240
  end
240
241
  read_cdir_entry(central_directory_io)
241
242
  end
@@ -272,16 +273,16 @@ class ZipTricks::FileReader
272
273
  entries << entry
273
274
  next_local_header_offset = entry.compressed_data_offset + entry.compressed_size
274
275
  log do
275
- format('Recovered a local file file header at offset %d, seeking to the next at %d',
276
- cur_offset, next_local_header_offset)
276
+ 'Recovered a local file file header at offset %<cur_offset>d, seeking to the next at %<header_offset>d' %
277
+ {cur_offset: cur_offset, header_offset: next_local_header_offset}
277
278
  end
278
279
  seek(io, next_local_header_offset) # Seek to the next entry, and raise if seek is impossible
279
280
  end
280
281
  entries
281
282
  rescue ReadError
282
283
  log do
283
- format('Got a read/seek error after reaching %d, no more entries can be recovered',
284
- cur_offset)
284
+ 'Got a read/seek error after reaching %<cur_offset>d, no more entries can be recovered' %
285
+ {cur_offset: cur_offset}
285
286
  end
286
287
  entries
287
288
  end
@@ -332,8 +333,8 @@ class ZipTricks::FileReader
332
333
  # the values fetched from the conventional header
333
334
  zip64_extra = StringIO.new(zip64_extra_contents)
334
335
  log do
335
- format('Will read Zip64 extra data from local header field for %s, %d bytes',
336
- e.filename, zip64_extra.size)
336
+ 'Will read Zip64 extra data from local header field for %<filename>s, %<size>d bytes' %
337
+ {filename: e.filename, size: zip64_extra.size}
337
338
  end
338
339
  # Now here be dragons. The APPNOTE specifies that
339
340
  #
@@ -396,8 +397,8 @@ class ZipTricks::FileReader
396
397
  def read_local_headers(entries, io)
397
398
  entries.each_with_index do |entry, i|
398
399
  log do
399
- format('Reading the local header for entry %d at offset %d',
400
- i, entry.local_file_header_offset)
400
+ 'Reading the local header for entry %<index>d at offset %<offset>d' %
401
+ {index: i, offset: entry.local_file_header_offset}
401
402
  end
402
403
  off = get_compressed_data_offset(io: io,
403
404
  local_file_header_offset: entry.local_file_header_offset)
@@ -429,7 +430,6 @@ class ZipTricks::FileReader
429
430
 
430
431
  def assert_signature(io, signature_magic_number)
431
432
  readback = read_4b(io)
432
- # Rubocop: Use a guard clause instead of wrapping the code inside a conditional expression
433
433
  if readback != signature_magic_number
434
434
  expected = '0x0' + signature_magic_number.to_s(16)
435
435
  actual = '0x0' + readback.to_s(16)
@@ -460,24 +460,21 @@ class ZipTricks::FileReader
460
460
  end
461
461
 
462
462
  def read_2b(io)
463
- read_n(io, 2).unpack(C_v).shift
463
+ read_n(io, 2).unpack(C_UINT2).shift
464
464
  end
465
465
 
466
466
  def read_4b(io)
467
- read_n(io, 4).unpack(C_V).shift
467
+ read_n(io, 4).unpack(C_UINT4).shift
468
468
  end
469
469
 
470
470
  def read_8b(io)
471
- read_n(io, 8).unpack(C_Qe).shift
471
+ read_n(io, 8).unpack(C_UINT8).shift
472
472
  end
473
473
 
474
474
  def read_cdir_entry(io)
475
- # Rubocop: convention: Assignment Branch Condition size for
476
475
  # read_cdir_entry is too high. [45.66/15]
477
- # Rubocop: convention: Method has too many lines. [30/10]
478
476
  assert_signature(io, 0x02014b50)
479
477
  ZipEntry.new.tap do |e|
480
- # Rubocop: convention: Block has too many lines. [27/25]
481
478
  e.made_by = read_2b(io)
482
479
  e.version_needed_to_extract = read_2b(io)
483
480
  e.gp_flags = read_2b(io)
@@ -510,8 +507,8 @@ class ZipTricks::FileReader
510
507
  # the values fetched from the conventional header
511
508
  zip64_extra = StringIO.new(zip64_extra_contents)
512
509
  log do
513
- format('Will read Zip64 extra data for %s, %d bytes',
514
- e.filename, zip64_extra.size)
510
+ 'Will read Zip64 extra data for %<filename>s, %<size>d bytes' %
511
+ {filename: e.filename, size: zip64_extra.size}
515
512
  end
516
513
  # Now here be dragons. The APPNOTE specifies that
517
514
  #
@@ -522,16 +519,9 @@ class ZipTricks::FileReader
522
519
  #
523
520
  # It means that before we read this stuff we need to check if the previously-read
524
521
  # values are at overflow, and only _then_ proceed to read them. Bah.
525
- # Rubocop: convention: Line is too long.
526
- if e.uncompressed_size == 0xFFFFFFFF
527
- e.uncompressed_size = read_8b(zip64_extra)
528
- end
529
- if e.compressed_size == 0xFFFFFFFF
530
- e.compressed_size = read_8b(zip64_extra)
531
- end
532
- if e.local_file_header_offset == 0xFFFFFFFF
533
- e.local_file_header_offset = read_8b(zip64_extra)
534
- end
522
+ e.uncompressed_size = read_8b(zip64_extra) if e.uncompressed_size == 0xFFFFFFFF
523
+ e.compressed_size = read_8b(zip64_extra) if e.compressed_size == 0xFFFFFFFF
524
+ e.local_file_header_offset = read_8b(zip64_extra) if e.local_file_header_offset == 0xFFFFFFFF
535
525
  # Disk number comes last and we can skip it anyway, since we do
536
526
  # not support multi-disk archives
537
527
  end
@@ -553,54 +543,56 @@ class ZipTricks::FileReader
553
543
  raise MissingEOCD unless eocd_idx_in_buf
554
544
 
555
545
  eocd_offset = implied_position_of_eocd_record + eocd_idx_in_buf
556
- log { format('Found EOCD signature at offset %d', eocd_offset) }
546
+ log do
547
+ 'Found EOCD signature at offset %<offset>d' % {offset: eocd_offset}
548
+ end
557
549
 
558
550
  eocd_offset
559
551
  end
560
552
 
561
- # This is tricky. Essentially, we have to scan the maximum possible number
562
- # of bytes (that the EOCD can theoretically occupy including the comment),
553
+ def all_indices_of_substr_in_str(of_substring, in_string)
554
+ last_i = 0
555
+ found_at_indices = []
556
+ while last_i = in_string.index(of_substring, last_i)
557
+ found_at_indices << last_i
558
+ last_i += of_substring.bytesize
559
+ end
560
+ found_at_indices
561
+ end
562
+
563
+ # We have to scan the maximum possible number
564
+ # of bytes that the EOCD can theoretically occupy including the comment after it,
563
565
  # and we have to find a combination of:
564
- # [EOCD signature, <some ZIP medatata>, comment byte size, the comment of
565
- # that size, eof].
566
- # The only way I could find to do this was with a sliding window, but
567
- # there probably is a better way.
568
- # Rubocop: convention: Assignment Branch Condition size for
569
- # locate_eocd_signature is too high. [17.49/15]
570
- # Rubocop: convention: Method has too many lines. [14/10]
566
+ # [EOCD signature, <some ZIP medatata>, comment byte size, comment of size]
567
+ # at the end. To do so, we first find all indices of the signature in the trailer
568
+ # string, and then check whether the bytestring starting at the signature and
569
+ # ending at the end of string satisfies that given pattern.
571
570
  def locate_eocd_signature(in_str)
572
- # We have to scan from the _very_ tail. We read the very minimum size
573
- # the EOCD record can have (up to and including the comment size), using
574
- # a sliding window. Once our end offset matches the comment size we found our
575
- # EOCD marker.
571
+ eocd_signature = 0x06054b50
572
+ eocd_signature_str = [eocd_signature].pack('V')
576
573
  unpack_pattern = 'VvvvvVVv'
577
574
  minimum_record_size = 22
578
- end_location = minimum_record_size * -1
579
- loop do
580
- # If the window is nil, we have rolled off the start of the string, nothing to do here.
581
- # We use negative values because if we used positive slice indices
582
- # we would have to detect the rollover ourselves
583
- break unless window = in_str[end_location, minimum_record_size]
584
-
585
- window_location = in_str.bytesize + end_location
586
- unpacked = window.unpack(unpack_pattern)
587
- # If we found the signarue, pick up the comment size, and check if the size of the window
588
- # plus that comment size is where we are in the string. If we are - bingo.
589
- if unpacked[0] == 0x06054b50 && comment_size = unpacked[-1]
590
- assumed_eocd_location = in_str.bytesize - comment_size - minimum_record_size
591
- # if the comment size is where we should be at - we found our EOCD
592
- return assumed_eocd_location if assumed_eocd_location == window_location
575
+ str_size = in_str.bytesize
576
+ indices = all_indices_of_substr_in_str(eocd_signature_str, in_str)
577
+ indices.each do |check_at|
578
+ maybe_record = in_str[check_at..str_size]
579
+ # If the record is smaller than the minimum - we will never recover anything
580
+ break if maybe_record.bytesize < minimum_record_size
581
+ # Now we check if the record ends with the combination
582
+ # of the comment size and an arbitrary byte string of that size.
583
+ # If it does - we found our match
584
+ *_unused, comment_size = maybe_record.unpack(unpack_pattern)
585
+ if (maybe_record.bytesize - minimum_record_size) == comment_size
586
+ return check_at # Found the EOCD marker location
593
587
  end
594
-
595
- end_location -= 1 # Shift the window back, by one byte, and try again.
596
588
  end
589
+ # If we haven't caught anything, return nil deliberately instead of returning the last statement
590
+ nil
597
591
  end
598
592
 
599
593
  # Find the Zip64 EOCD locator segment offset. Do this by seeking backwards from the
600
594
  # EOCD record in the archive by fixed offsets
601
- # Rubocop: convention: Assignment Branch Condition size for
602
595
  # get_zip64_eocd_location is too high. [15.17/15]
603
- # Rubocop: convention: Method has too many lines. [15/10]
604
596
  def get_zip64_eocd_location(file_io, eocd_offset)
605
597
  zip64_eocd_loc_offset = eocd_offset
606
598
  zip64_eocd_loc_offset -= 4 # The signature
@@ -609,8 +601,8 @@ class ZipTricks::FileReader
609
601
  zip64_eocd_loc_offset -= 4 # Total number of disks
610
602
 
611
603
  log do
612
- format('Will look for the Zip64 EOCD locator signature at offset %d',
613
- zip64_eocd_loc_offset)
604
+ 'Will look for the Zip64 EOCD locator signature at offset %<offset>d' %
605
+ {offset: zip64_eocd_loc_offset}
614
606
  end
615
607
 
616
608
  # If the offset is negative there is certainly no Zip64 EOCD locator here
@@ -619,7 +611,9 @@ class ZipTricks::FileReader
619
611
  file_io.seek(zip64_eocd_loc_offset, IO::SEEK_SET)
620
612
  assert_signature(file_io, 0x07064b50)
621
613
 
622
- log { format('Found Zip64 EOCD locator at offset %d', zip64_eocd_loc_offset) }
614
+ log do
615
+ 'Found Zip64 EOCD locator at offset %<offset>d' % {offset: zip64_eocd_loc_offset}
616
+ end
623
617
 
624
618
  disk_num = read_4b(file_io) # number of the disk
625
619
  raise UnsupportedFeature, 'The archive spans multiple disks' if disk_num != 0
@@ -628,9 +622,7 @@ class ZipTricks::FileReader
628
622
  nil
629
623
  end
630
624
 
631
- # Rubocop: convention: Assignment Branch Condition size for
632
625
  # num_files_and_central_directory_offset_zip64 is too high. [21.12/15]
633
- # Rubocop: convention: Method has too many lines. [17/10]
634
626
  def num_files_and_central_directory_offset_zip64(io, zip64_end_of_cdir_location)
635
627
  seek(io, zip64_end_of_cdir_location)
636
628
 
@@ -644,20 +636,16 @@ class ZipTricks::FileReader
644
636
 
645
637
  disk_n = read_4b(zip64_eocdr) # number of this disk
646
638
  disk_n_with_eocdr = read_4b(zip64_eocdr) # number of the disk with the EOCDR
647
- if disk_n != disk_n_with_eocdr
648
- raise UnsupportedFeature, 'The archive spans multiple disks'
649
- end
639
+ raise UnsupportedFeature, 'The archive spans multiple disks' if disk_n != disk_n_with_eocdr
650
640
 
651
641
  num_files_this_disk = read_8b(zip64_eocdr) # number of files on this disk
652
642
  num_files_total = read_8b(zip64_eocdr) # files total in the central directory
653
643
 
654
- if num_files_this_disk != num_files_total
655
- raise UnsupportedFeature, 'The archive spans multiple disks'
656
- end
644
+ raise UnsupportedFeature, 'The archive spans multiple disks' if num_files_this_disk != num_files_total
657
645
 
658
646
  log do
659
- format('Zip64 EOCD record states there are %d files in the archive',
660
- num_files_total)
647
+ 'Zip64 EOCD record states there are %<amount>d files in the archive' %
648
+ {amount: num_files_total}
661
649
  end
662
650
 
663
651
  central_dir_size = read_8b(zip64_eocdr) # Size of the central directory
@@ -666,58 +654,48 @@ class ZipTricks::FileReader
666
654
  [num_files_total, central_dir_offset, central_dir_size]
667
655
  end
668
656
 
669
- C_V = 'V'
670
- C_v = 'v'
671
- C_Qe = 'Q<'
657
+ C_UINT4 = 'V'
658
+ C_UINT2 = 'v'
659
+ C_UINT8 = 'Q<'
672
660
 
673
661
  # To prevent too many tiny reads, read the maximum possible size of end of
674
662
  # central directory record upfront (all the fixed fields + at most 0xFFFF
675
663
  # bytes of the archive comment)
676
- MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE =
677
- begin
678
- 4 + # Offset of the start of central directory
679
- 4 + # Size of the central directory
680
- 2 + # Number of files in the cdir
681
- 4 + # End-of-central-directory signature
682
- 2 + # Number of this disk
683
- 2 + # Number of disk with the start of cdir
684
- 2 + # Number of files in the cdir of this disk
685
- 2 + # The comment size
686
- 0xFFFF # Maximum comment size
687
- end
664
+ MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE = 4 + # Offset of the start of central directory
665
+ 4 + # Size of the central directory
666
+ 2 + # Number of files in the cdir
667
+ 4 + # End-of-central-directory signature
668
+ 2 + # Number of this disk
669
+ 2 + # Number of disk with the start of cdir
670
+ 2 + # Number of files in the cdir of this disk
671
+ 2 + # The comment size
672
+ 0xFFFF # Maximum comment size
688
673
 
689
674
  # To prevent too many tiny reads, read the maximum possible size of the local file header upfront.
690
675
  # The maximum size is all the usual items, plus the maximum size
691
676
  # of the filename (0xFFFF bytes) and the maximum size of the extras (0xFFFF bytes)
692
- MAX_LOCAL_HEADER_SIZE =
693
- begin
694
- 4 + # signature
695
- 2 + # Version needed to extract
696
- 2 + # gp flags
697
- 2 + # storage mode
698
- 2 + # dos time
699
- 2 + # dos date
700
- 4 + # CRC32
701
- 4 + # Comp size
702
- 4 + # Uncomp size
703
- 2 + # Filename size
704
- 2 + # Extra fields size
705
- 0xFFFF + # Maximum filename size
706
- 0xFFFF # Maximum extra fields size
707
- end
708
-
709
- SIZE_OF_USABLE_EOCD_RECORD =
710
- begin
711
- 4 + # Signature
712
- 2 + # Number of this disk
713
- 2 + # Number of the disk with the EOCD record
714
- 2 + # Number of entries in the central directory of this disk
715
- 2 + # Number of entries in the central directory total
716
- 4 + # Size of the central directory
717
- 4 # Start of the central directory offset
718
- end
677
+ MAX_LOCAL_HEADER_SIZE = 4 + # signature
678
+ 2 + # Version needed to extract
679
+ 2 + # gp flags
680
+ 2 + # storage mode
681
+ 2 + # dos time
682
+ 2 + # dos date
683
+ 4 + # CRC32
684
+ 4 + # Comp size
685
+ 4 + # Uncomp size
686
+ 2 + # Filename size
687
+ 2 + # Extra fields size
688
+ 0xFFFF + # Maximum filename size
689
+ 0xFFFF # Maximum extra fields size
690
+
691
+ SIZE_OF_USABLE_EOCD_RECORD = 4 + # Signature
692
+ 2 + # Number of this disk
693
+ 2 + # Number of the disk with the EOCD record
694
+ 2 + # Number of entries in the central directory of this disk
695
+ 2 + # Number of entries in the central directory total
696
+ 4 + # Size of the central directory
697
+ 4 # Start of the central directory offset
719
698
 
720
- # Rubocop: convention: Method has too many lines. [11/10]
721
699
  def num_files_and_central_directory_offset(file_io, eocd_offset)
722
700
  seek(file_io, eocd_offset)
723
701
 
@@ -735,7 +713,7 @@ class ZipTricks::FileReader
735
713
  [num_files, cdir_offset, cdir_size]
736
714
  end
737
715
 
738
- private_constant :C_V, :C_v, :C_Qe, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
716
+ private_constant :C_UINT4, :C_UINT2, :C_UINT8, :MAX_END_OF_CENTRAL_DIRECTORY_RECORD_SIZE,
739
717
  :MAX_LOCAL_HEADER_SIZE, :SIZE_OF_USABLE_EOCD_RECORD
740
718
 
741
719
  # Is provided as a stub to be overridden in a subclass if you need it. Will report
@@ -38,13 +38,11 @@ class ZipTricks::RemoteIO
38
38
  #
39
39
  # @param n_bytes[Fixnum, nil] how many bytes to read, or `nil` to read all the way to the end
40
40
  # @return [String] the read bytes
41
- # Rubocop: convention: Assignment Branch Condition size for read is too high. [17.92/15]
42
- # Rubocop: convention: Method has too many lines. [13/10]
43
41
  def read(n_bytes = nil)
44
42
  @remote_size ||= request_object_size
45
43
 
46
44
  # If the resource is empty there is nothing to read
47
- return nil if @remote_size.zero?
45
+ return if @remote_size.zero?
48
46
 
49
47
  maximum_avaialable = @remote_size - @pos
50
48
  n_bytes ||= maximum_avaialable # nil == read to the end of file
@@ -54,9 +52,7 @@ class ZipTricks::RemoteIO
54
52
  n_bytes = clamp(0, n_bytes, maximum_avaialable)
55
53
 
56
54
  read_n_bytes_from_remote(@pos, n_bytes).tap do |data|
57
- if data.bytesize != n_bytes
58
- raise "Remote read returned #{data.bytesize} bytes instead of #{n_bytes} as requested"
59
- end
55
+ raise "Remote read returned #{data.bytesize} bytes instead of #{n_bytes} as requested" if data.bytesize != n_bytes
60
56
  @pos = clamp(0, @pos + data.bytesize, @remote_size)
61
57
  end
62
58
  end
@@ -2,21 +2,18 @@
2
2
 
3
3
  # A simple stateful class for keeping track of a CRC32 value through multiple writes
4
4
  class ZipTricks::StreamCRC32
5
- BUFFER_SIZE = 1024 * 1024 * 5
6
-
7
5
  # Compute a CRC32 value from an IO object. The object should respond to `read` and `eof?`
8
6
  #
9
7
  # @param io[IO] the IO to read the data from
10
8
  # @return [Fixnum] the computed CRC32 value
11
9
  def self.from_io(io)
12
10
  crc = new
13
- crc << io.read(BUFFER_SIZE) until io.eof?
11
+ crc << io.read(1024 * 512) until io.eof?
14
12
  crc.to_i
15
13
  end
16
14
 
17
15
  # Creates a new streaming CRC32 calculator
18
16
  def initialize
19
- @buf = StringIO.new
20
17
  @crc = Zlib.crc32('')
21
18
  end
22
19
 
@@ -25,8 +22,7 @@ class ZipTricks::StreamCRC32
25
22
  # @param blob[String] the string to compute the CRC32 from
26
23
  # @return [self]
27
24
  def <<(blob)
28
- @buf << blob
29
- buf_flush if @buf.size > BUFFER_SIZE
25
+ @crc = Zlib.crc32_combine(@crc, Zlib.crc32(blob), blob.bytesize)
30
26
  self
31
27
  end
32
28
 
@@ -34,7 +30,6 @@ class ZipTricks::StreamCRC32
34
30
  #
35
31
  # @return [Fixnum] the updated CRC32 value for all the blobs so far
36
32
  def to_i
37
- buf_flush if @buf.size > 0
38
33
  @crc
39
34
  end
40
35
 
@@ -45,15 +40,6 @@ class ZipTricks::StreamCRC32
45
40
  # @param blob_size[Fixnum] the size of the daata the `crc32` is computed from
46
41
  # @return [Fixnum] the updated CRC32 value for all the blobs so far
47
42
  def append(crc32, blob_size)
48
- buf_flush if @buf.size > 0
49
43
  @crc = Zlib.crc32_combine(@crc, crc32, blob_size)
50
44
  end
51
-
52
- private
53
-
54
- def buf_flush
55
- @crc = Zlib.crc32_combine(@crc, Zlib.crc32(@buf.string), @buf.size)
56
- @buf.truncate(0)
57
- @buf.rewind
58
- end
59
45
  end