zip_tricks 4.7.4 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1119a96243d5da6b423a2a3c992efc6fbb580fa
4
- data.tar.gz: 63a1d0b2c93e5b231568e83efb60220862c07b7e
3
+ metadata.gz: 5c7e14be1038151588e016705af5249cacbfea8a
4
+ data.tar.gz: fc4e68ce38ea2091f7a718a8dcfff09d3e40fbb6
5
5
  SHA512:
6
- metadata.gz: 89c7300a0c3af8ceb0408d2c4315fc62e4abf5fecc53ca93219bf9ce30c146d9cc8bca193d35908f1d24516bc1298c12332850ff0352dd561ed67a56a6fe7211
7
- data.tar.gz: 20cdfca60a6ac3cc5beaa57bf74dba06bd09ef18931931ddead388de68e6316650e4aad14fe96b7838b415babaafe9f8aa72ca5a9ccd547eb535f3e6f27901f4
6
+ metadata.gz: 05d776ff8a5c0cea81f66aba9ac37ad2400c0abea38660ca94365f15649c5d9cc8c06f6d768d2f657b70fae823fe6704bcd335611ff9693e9550246cc04ba7e6
7
+ data.tar.gz: de2264703c398fba2d9a0c44b485206f2acdfb119c9aff76ab8855e2d84cdc2be4da6802af7fbe967f780229d8582d256439ee923c59aa0782d4b1d07b9104f0
@@ -1,5 +1,7 @@
1
1
  inherit_gem:
2
2
  wetransfer_style: ruby/default.yml
3
+ AllCops:
4
+ TargetRubyVersion: 2.1
3
5
  Layout/FirstMethodArgumentLineBreak:
4
6
  Enabled: false
5
7
  Layout/FirstMethodParameterLineBreak:
@@ -1,3 +1,13 @@
1
+ ## 4.8.0
2
+
3
+ * Make sure that when directories clobber files and vice versa we raise a clear error. Add `PathSet` which keeps track of entries
4
+ and all the directories needed to create them, document `PathSet`
5
+ * Move the `uniquify_filenames` function into a module for easier removal later
6
+ * Add the `auto_rename_duplicate_filenames` parameter to `Streamer` constructor. We need to make this optional
7
+ because making filenames unique can be very tricky when subdirectories are involved, and strictly
8
+ speaking we should not be applying this transformation at all - there should be no output of
9
+ duplicate filenames by the caller. So making the filenames should be available, but optional.
10
+
1
11
  ## 4.7.4
2
12
 
3
13
  * Use a single fixed capacity string in StreamCRC32.from_io to avoid unnecessary allocations
@@ -0,0 +1,148 @@
1
+ # rubocop:disable Layout/IndentHeredoc
2
+
3
+ # A ZIP archive contains a flat list of entries. These entries can implicitly
4
+ # create directories when the archive is expanded. For example, an entry with
5
+ # the filename of "some folder/file.docx" will make the unarchiving application
6
+ # create a directory called "some folder" automatically, and then deposit the
7
+ # file "file.docx" in that directory. These "implicit" directories can be
8
+ # arbitrarily nested, and create a tree structure of directories. That structure
9
+ # however is implicit as the archive contains a flat list.
10
+ #
11
+ # This creates opportunities for conflicts. For example, imagine the following
12
+ # structure:
13
+ #
14
+ # * `something/` - specifies an empty directory with the name "something"
15
+ # * `something` - specifies a file, creates a conflict
16
+ #
17
+ # This can be prevented with filename uniqueness checks. It does get funkier however
18
+ # as the rabbit hole goes down:
19
+ #
20
+ # * `dir/subdir/another_subdir/yet_another_subdir/file.bin` - declares a file and directories
21
+ # * `dir/subdir/another_subdir/yet_another_subdir` - declares a file at one of the levels, creates a conflict
22
+ #
23
+ # The results of this ZIP structure aren't very easy to predict as they depend on the
24
+ # application that opens the archive. For example, BOMArchiveHelper on macOS will expand files
25
+ # as they are declared in the ZIP, but once a conflict occurs it will fail with "error -21". It
26
+ # is not very transparent to the user why unarchiving fails, and it has to - and can reliably - only
27
+ # be prevented when the archive gets created.
28
+ #
29
+ # Unfortunately that conflicts with another "magical" feature of ZipTricks which automatically
30
+ # "fixes" duplicate filenames - filenames (paths) which have already been added to the archive.
31
+ # This fix is performed by appending (1), then (2) and so forth to the filename so that the
32
+ # conflict is avoided. This is not possible to apply to directories, because when one of the
33
+ # path components is reused in multiple filenames it means those entities should end up in
34
+ # the same directory (subdirectory) once the archive is opened.
35
+ class ZipTricks::PathSet
36
+ class Conflict < StandardError
37
+ end
38
+
39
+ class FileClobbersDirectory < Conflict
40
+ end
41
+
42
+ class DirectoryClobbersFile < Conflict
43
+ end
44
+
45
+ def initialize
46
+ @known_directories = Set.new
47
+ @known_files = Set.new
48
+ end
49
+
50
+ # Adds a directory path to the set of known paths, including
51
+ # all the directories that contain it. So, calling
52
+ # add_directory_path("dir/dir2/dir3")
53
+ # will add "dir", "dir/dir2", "dir/dir2/dir3".
54
+ #
55
+ # @param path[String] the path to the directory to add
56
+ # @return [void]
57
+ def add_directory_path(path)
58
+ path_and_ancestors(path).each do |parent_directory_path|
59
+ if @known_files.include?(parent_directory_path)
60
+ # Have to use the old-fashioned heredocs because ZipTricks
61
+ # aims to be compatible with MRI 2.1+ syntax, and squiggly
62
+ # heredoc is only available starting 2.3+
63
+ error_message = <<ERR
64
+ The path #{parent_directory_path.inspect} which has to be added
65
+ as a directory is already used for a file.
66
+
67
+ The directory at this path would get created implicitly
68
+ to produce #{path.inspect} during decompresison.
69
+
70
+ This would make some archive utilities refuse to open
71
+ the ZIP.
72
+ ERR
73
+ raise DirectoryClobbersFile, error_message
74
+ end
75
+ @known_directories << parent_directory_path
76
+ end
77
+ end
78
+
79
+ # Adds a file path to the set of known paths, including
80
+ # all the directories that contain it. Once a file has been added,
81
+ # it is no longer possible to add a directory having the same path
82
+ # as this would cause conflict.
83
+ #
84
+ # The operation also adds all the containing directories for the file, so
85
+ # add_file_path("dir/dir2/file.doc")
86
+ # will add "dir" and "dir/dir2" as directories, "dir/dir2/dir3".
87
+ #
88
+ # @param file_path[String] the path to the directory to add
89
+ # @return [void]
90
+ def add_file_path(file_path)
91
+ if @known_files.include?(file_path)
92
+ error_message = <<ERR
93
+ The file at #{file_path.inspect} has already been included
94
+ in the archive. Adding it the second time would cause
95
+ the first file to be overwritten during unarchiving, and
96
+ could also get the archive flagged as invalid.
97
+ ERR
98
+ raise Conflict, error_message
99
+ end
100
+
101
+ if @known_directories.include?(file_path)
102
+ error_message = <<ERR
103
+ The path #{file_path.inspect} is already used for
104
+ a directory, but you are trying to add it as a file.
105
+
106
+ This would make some archive utilities refuse
107
+ to open the ZIP.
108
+ ERR
109
+ raise FileClobbersDirectory, error_message
110
+ end
111
+
112
+ # Add all the directories which this file is contained in
113
+ *dir_components, _file_name = non_empty_path_components(file_path)
114
+ add_directory_path(dir_components.join('/'))
115
+
116
+ # ...and then the file itself
117
+ @known_files << file_path
118
+ end
119
+
120
+ # Tells whether a specific full path is already known to the PathSet.
121
+ # Can be a path for a directory or for a file.
122
+ #
123
+ # @param path_in_archive[String] the path to check for inclusion
124
+ # @return [Boolean]
125
+ def include?(path_in_archive)
126
+ @known_files.include?(path_in_archive) || @known_directories.include?(path_in_archive)
127
+ end
128
+
129
+ # Clears the contained sets
130
+ # @return [void]
131
+ def clear
132
+ @known_files.clear
133
+ @known_directories.clear
134
+ end
135
+
136
+ private
137
+
138
+ def non_empty_path_components(path)
139
+ path.split('/').reject(&:empty?)
140
+ end
141
+
142
+ def path_and_ancestors(path)
143
+ path_components = non_empty_path_components(path)
144
+ path_components.each_with_object([]) do |component, seen|
145
+ seen << [seen.last, component].compact.join('/')
146
+ end
147
+ end
148
+ end
@@ -20,10 +20,11 @@ class ZipTricks::SizeEstimator
20
20
  # uncompressed_size: 89281911, compressed_size: 121908)
21
21
  # end
22
22
  #
23
+ # @param kwargs_for_streamer_new Any options to pass to Streamer, see {Streamer#initialize}
23
24
  # @return [Integer] the size of the resulting archive, in bytes
24
25
  # @yield [SizeEstimator] the estimator
25
- def self.estimate
26
- streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
26
+ def self.estimate(**kwargs_for_streamer_new)
27
+ streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter, **kwargs_for_streamer_new)
27
28
  estimator = new(streamer)
28
29
  yield(estimator)
29
30
  streamer.close # Returns the .tell of the contained IO
@@ -140,13 +140,19 @@ class ZipTricks::Streamer
140
140
  # @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
141
141
  # @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
142
142
  # Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
143
- def initialize(stream, writer: create_writer)
143
+ # @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
144
+ # should be suffixed with (1), (2) etc. Default value is `true` since it
145
+ # used to be the default behavior.
146
+ #
147
+ # **DEPRECATION NOTICE** In ZipTricks version 5 `auto_rename_duplicate_filenames` will default to `false`
148
+ def initialize(stream, writer: create_writer, auto_rename_duplicate_filenames: true)
144
149
  raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
145
150
 
151
+ @dedupe_filenames = auto_rename_duplicate_filenames
146
152
  @out = ZipTricks::WriteAndTell.new(stream)
147
153
  @files = []
148
154
  @local_header_offsets = []
149
- @filenames_set = Set.new
155
+ @path_set = ZipTricks::PathSet.new
150
156
  @writer = writer
151
157
  end
152
158
 
@@ -387,7 +393,7 @@ class ZipTricks::Streamer
387
393
 
388
394
  # Clear the files so that GC will not have to trace all the way to here to deallocate them
389
395
  @files.clear
390
- @filenames_set.clear
396
+ @path_set.clear
391
397
 
392
398
  # and return the final offset
393
399
  @out.tell
@@ -429,22 +435,31 @@ class ZipTricks::Streamer
429
435
  private
430
436
 
431
437
  def add_file_and_write_local_header(
432
- filename:,
433
- modification_time:,
434
- crc32:,
435
- storage_mode:,
436
- compressed_size:,
437
- uncompressed_size:,
438
- use_data_descriptor:)
439
-
440
- # Clean backslashes and uniqify filenames if there are duplicates
438
+ filename:,
439
+ modification_time:,
440
+ crc32:,
441
+ storage_mode:,
442
+ compressed_size:,
443
+ uncompressed_size:,
444
+ use_data_descriptor:)
445
+
446
+ # Clean backslashes
441
447
  filename = remove_backslash(filename)
442
- filename = uniquify_name(filename) if @filenames_set.include?(filename)
443
-
444
448
  raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
445
-
446
449
  raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
447
450
 
451
+ # If we need to massage filenames to enforce uniqueness,
452
+ # do so before we check for file/directory conflicts
453
+ filename = ZipTricks::UniquifyFilename.call(filename, @path_set) if @dedupe_filenames
454
+
455
+ # Make sure there is no file/directory clobbering (conflicts), or - if deduping is disabled -
456
+ # no duplicate filenames/paths
457
+ if filename.end_with?('/')
458
+ @path_set.add_directory_path(filename)
459
+ else
460
+ @path_set.add_file_path(filename)
461
+ end
462
+
448
463
  if use_data_descriptor
449
464
  crc32 = 0
450
465
  compressed_size = 0
@@ -460,7 +475,6 @@ use_data_descriptor:)
460
475
  use_data_descriptor)
461
476
 
462
477
  @files << e
463
- @filenames_set << e.filename
464
478
  @local_header_offsets << @out.tell
465
479
 
466
480
  @writer.write_local_file_header(io: @out,
@@ -476,28 +490,4 @@ use_data_descriptor:)
476
490
  def remove_backslash(filename)
477
491
  filename.tr('\\', '_')
478
492
  end
479
-
480
- def uniquify_name(filename)
481
- # we add (1), (2), (n) at the end of a filename if there is a duplicate
482
- copy_pattern = /\((\d+)\)$/
483
- parts = filename.split('.')
484
- ext = if parts.last =~ /gz|zip/ && parts.size > 2
485
- parts.pop(2)
486
- elsif parts.size > 1
487
- parts.pop
488
- end
489
- fn_last_part = parts.pop
490
-
491
- duplicate_counter = 1
492
- loop do
493
- fn_last_part = if fn_last_part =~ copy_pattern
494
- fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
495
- else
496
- "#{fn_last_part} (#{duplicate_counter})"
497
- end
498
- new_filename = (parts + [fn_last_part, ext]).compact.join('.')
499
- return new_filename unless @filenames_set.include?(new_filename)
500
- duplicate_counter += 1
501
- end
502
- end
503
493
  end
@@ -0,0 +1,38 @@
1
+ module ZipTricks::UniquifyFilename
2
+
3
+ # Makes a given filename unique by appending a (n) suffix
4
+ # between just before the filename extension. So "file.txt" gets
5
+ # transformed into "file (1).txt". The transformation is applied
6
+ # repeatedly as long as the generated filename is present
7
+ # in `while_included_in` object
8
+ #
9
+ # @param path[String] the path to make unique
10
+ # @param while_included_in[#include?] an object that stores the list of already used paths
11
+ # @return [String] the path as is, or with the suffix required to make it unique
12
+ def self.call(path, while_included_in)
13
+ return path unless while_included_in.include?(path)
14
+
15
+ # we add (1), (2), (n) at the end of a filename before the filename extension,
16
+ # but only if there is a duplicate
17
+ copy_pattern = /\((\d+)\)$/
18
+ parts = path.split('.')
19
+ ext = if parts.last =~ /gz|zip/ && parts.size > 2
20
+ parts.pop(2)
21
+ elsif parts.size > 1
22
+ parts.pop
23
+ end
24
+ fn_last_part = parts.pop
25
+
26
+ duplicate_counter = 1
27
+ loop do
28
+ fn_last_part = if fn_last_part =~ copy_pattern
29
+ fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
30
+ else
31
+ "#{fn_last_part} (#{duplicate_counter})"
32
+ end
33
+ new_path = (parts + [fn_last_part, ext]).compact.join('.')
34
+ return new_path unless while_included_in.include?(new_path)
35
+ duplicate_counter += 1
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ZipTricks
4
- VERSION = '4.7.4'
4
+ VERSION = '4.8.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zip_tricks
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.7.4
4
+ version: 4.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julik Tarkhanov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-16 00:00:00.000000000 Z
11
+ date: 2019-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -228,6 +228,7 @@ files:
228
228
  - lib/zip_tricks/file_reader/stored_reader.rb
229
229
  - lib/zip_tricks/null_writer.rb
230
230
  - lib/zip_tricks/output_enumerator.rb
231
+ - lib/zip_tricks/path_set.rb
231
232
  - lib/zip_tricks/rack_body.rb
232
233
  - lib/zip_tricks/rails_streaming.rb
233
234
  - lib/zip_tricks/remote_io.rb
@@ -239,6 +240,7 @@ files:
239
240
  - lib/zip_tricks/streamer/entry.rb
240
241
  - lib/zip_tricks/streamer/stored_writer.rb
241
242
  - lib/zip_tricks/streamer/writable.rb
243
+ - lib/zip_tricks/uniquify_filename.rb
242
244
  - lib/zip_tricks/version.rb
243
245
  - lib/zip_tricks/write_and_tell.rb
244
246
  - lib/zip_tricks/write_buffer.rb