zip_tricks 4.7.4 → 4.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1119a96243d5da6b423a2a3c992efc6fbb580fa
4
- data.tar.gz: 63a1d0b2c93e5b231568e83efb60220862c07b7e
3
+ metadata.gz: 5c7e14be1038151588e016705af5249cacbfea8a
4
+ data.tar.gz: fc4e68ce38ea2091f7a718a8dcfff09d3e40fbb6
5
5
  SHA512:
6
- metadata.gz: 89c7300a0c3af8ceb0408d2c4315fc62e4abf5fecc53ca93219bf9ce30c146d9cc8bca193d35908f1d24516bc1298c12332850ff0352dd561ed67a56a6fe7211
7
- data.tar.gz: 20cdfca60a6ac3cc5beaa57bf74dba06bd09ef18931931ddead388de68e6316650e4aad14fe96b7838b415babaafe9f8aa72ca5a9ccd547eb535f3e6f27901f4
6
+ metadata.gz: 05d776ff8a5c0cea81f66aba9ac37ad2400c0abea38660ca94365f15649c5d9cc8c06f6d768d2f657b70fae823fe6704bcd335611ff9693e9550246cc04ba7e6
7
+ data.tar.gz: de2264703c398fba2d9a0c44b485206f2acdfb119c9aff76ab8855e2d84cdc2be4da6802af7fbe967f780229d8582d256439ee923c59aa0782d4b1d07b9104f0
@@ -1,5 +1,7 @@
1
1
  inherit_gem:
2
2
  wetransfer_style: ruby/default.yml
3
+ AllCops:
4
+ TargetRubyVersion: 2.1
3
5
  Layout/FirstMethodArgumentLineBreak:
4
6
  Enabled: false
5
7
  Layout/FirstMethodParameterLineBreak:
@@ -1,3 +1,13 @@
1
+ ## 4.8.0
2
+
3
+ * Make sure that when directories clobber files and vice versa we raise a clear error. Add `PathSet` which keeps track of entries
4
+ and all the directories needed to create them, document `PathSet`
5
+ * Move the `uniquify_filenames` function into a module for easier removal later
6
+ * Add the `auto_rename_duplicate_filenames` parameter to `Streamer` constructor. We need to make this optional
7
+ because making filenames unique can be very tricky when subdirectories are involved, and strictly
8
+ speaking we should not be applying this transformation at all - there should be no output of
9
+ duplicate filenames by the caller. So making the filenames should be available, but optional.
10
+
1
11
  ## 4.7.4
2
12
 
3
13
  * Use a single fixed capacity string in StreamCRC32.from_io to avoid unnecessary allocations
@@ -0,0 +1,148 @@
1
+ # rubocop:disable Layout/IndentHeredoc
2
+
3
+ # A ZIP archive contains a flat list of entries. These entries can implicitly
4
+ # create directories when the archive is expanded. For example, an entry with
5
+ # the filename of "some folder/file.docx" will make the unarchiving application
6
+ # create a directory called "some folder" automatically, and then deposit the
7
+ # file "file.docx" in that directory. These "implicit" directories can be
8
+ # arbitrarily nested, and create a tree structure of directories. That structure
9
+ # however is implicit as the archive contains a flat list.
10
+ #
11
+ # This creates opportunities for conflicts. For example, imagine the following
12
+ # structure:
13
+ #
14
+ # * `something/` - specifies an empty directory with the name "something"
15
+ # * `something` - specifies a file, creates a conflict
16
+ #
17
+ # This can be prevented with filename uniqueness checks. It does get funkier however
18
+ # as the rabbit hole goes down:
19
+ #
20
+ # * `dir/subdir/another_subdir/yet_another_subdir/file.bin` - declares a file and directories
21
+ # * `dir/subdir/another_subdir/yet_another_subdir` - declares a file at one of the levels, creates a conflict
22
+ #
23
+ # The results of this ZIP structure aren't very easy to predict as they depend on the
24
+ # application that opens the archive. For example, BOMArchiveHelper on macOS will expand files
25
+ # as they are declared in the ZIP, but once a conflict occurs it will fail with "error -21". It
26
+ # is not very transparent to the user why unarchiving fails, and it has to - and can reliably - only
27
+ # be prevented when the archive gets created.
28
+ #
29
+ # Unfortunately that conflicts with another "magical" feature of ZipTricks which automatically
30
+ # "fixes" duplicate filenames - filenames (paths) which have already been added to the archive.
31
+ # This fix is performed by appending (1), then (2) and so forth to the filename so that the
32
+ # conflict is avoided. This is not possible to apply to directories, because when one of the
33
+ # path components is reused in multiple filenames it means those entities should end up in
34
+ # the same directory (subdirectory) once the archive is opened.
35
+ class ZipTricks::PathSet
36
+ class Conflict < StandardError
37
+ end
38
+
39
+ class FileClobbersDirectory < Conflict
40
+ end
41
+
42
+ class DirectoryClobbersFile < Conflict
43
+ end
44
+
45
+ def initialize
46
+ @known_directories = Set.new
47
+ @known_files = Set.new
48
+ end
49
+
50
+ # Adds a directory path to the set of known paths, including
51
+ # all the directories that contain it. So, calling
52
+ # add_directory_path("dir/dir2/dir3")
53
+ # will add "dir", "dir/dir2", "dir/dir2/dir3".
54
+ #
55
+ # @param path[String] the path to the directory to add
56
+ # @return [void]
57
+ def add_directory_path(path)
58
+ path_and_ancestors(path).each do |parent_directory_path|
59
+ if @known_files.include?(parent_directory_path)
60
+ # Have to use the old-fashioned heredocs because ZipTricks
61
+ # aims to be compatible with MRI 2.1+ syntax, and squiggly
62
+ # heredoc is only available starting 2.3+
63
+ error_message = <<ERR
64
+ The path #{parent_directory_path.inspect} which has to be added
65
+ as a directory is already used for a file.
66
+
67
+ The directory at this path would get created implicitly
68
+ to produce #{path.inspect} during decompresison.
69
+
70
+ This would make some archive utilities refuse to open
71
+ the ZIP.
72
+ ERR
73
+ raise DirectoryClobbersFile, error_message
74
+ end
75
+ @known_directories << parent_directory_path
76
+ end
77
+ end
78
+
79
+ # Adds a file path to the set of known paths, including
80
+ # all the directories that contain it. Once a file has been added,
81
+ # it is no longer possible to add a directory having the same path
82
+ # as this would cause conflict.
83
+ #
84
+ # The operation also adds all the containing directories for the file, so
85
+ # add_file_path("dir/dir2/file.doc")
86
+ # will add "dir" and "dir/dir2" as directories, "dir/dir2/dir3".
87
+ #
88
+ # @param file_path[String] the path to the directory to add
89
+ # @return [void]
90
+ def add_file_path(file_path)
91
+ if @known_files.include?(file_path)
92
+ error_message = <<ERR
93
+ The file at #{file_path.inspect} has already been included
94
+ in the archive. Adding it the second time would cause
95
+ the first file to be overwritten during unarchiving, and
96
+ could also get the archive flagged as invalid.
97
+ ERR
98
+ raise Conflict, error_message
99
+ end
100
+
101
+ if @known_directories.include?(file_path)
102
+ error_message = <<ERR
103
+ The path #{file_path.inspect} is already used for
104
+ a directory, but you are trying to add it as a file.
105
+
106
+ This would make some archive utilities refuse
107
+ to open the ZIP.
108
+ ERR
109
+ raise FileClobbersDirectory, error_message
110
+ end
111
+
112
+ # Add all the directories which this file is contained in
113
+ *dir_components, _file_name = non_empty_path_components(file_path)
114
+ add_directory_path(dir_components.join('/'))
115
+
116
+ # ...and then the file itself
117
+ @known_files << file_path
118
+ end
119
+
120
+ # Tells whether a specific full path is already known to the PathSet.
121
+ # Can be a path for a directory or for a file.
122
+ #
123
+ # @param path_in_archive[String] the path to check for inclusion
124
+ # @return [Boolean]
125
+ def include?(path_in_archive)
126
+ @known_files.include?(path_in_archive) || @known_directories.include?(path_in_archive)
127
+ end
128
+
129
+ # Clears the contained sets
130
+ # @return [void]
131
+ def clear
132
+ @known_files.clear
133
+ @known_directories.clear
134
+ end
135
+
136
+ private
137
+
138
+ def non_empty_path_components(path)
139
+ path.split('/').reject(&:empty?)
140
+ end
141
+
142
+ def path_and_ancestors(path)
143
+ path_components = non_empty_path_components(path)
144
+ path_components.each_with_object([]) do |component, seen|
145
+ seen << [seen.last, component].compact.join('/')
146
+ end
147
+ end
148
+ end
@@ -20,10 +20,11 @@ class ZipTricks::SizeEstimator
20
20
  # uncompressed_size: 89281911, compressed_size: 121908)
21
21
  # end
22
22
  #
23
+ # @param kwargs_for_streamer_new Any options to pass to Streamer, see {Streamer#initialize}
23
24
  # @return [Integer] the size of the resulting archive, in bytes
24
25
  # @yield [SizeEstimator] the estimator
25
- def self.estimate
26
- streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
26
+ def self.estimate(**kwargs_for_streamer_new)
27
+ streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter, **kwargs_for_streamer_new)
27
28
  estimator = new(streamer)
28
29
  yield(estimator)
29
30
  streamer.close # Returns the .tell of the contained IO
@@ -140,13 +140,19 @@ class ZipTricks::Streamer
140
140
  # @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
141
141
  # @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
142
142
  # Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
143
- def initialize(stream, writer: create_writer)
143
+ # @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
144
+ # should be suffixed with (1), (2) etc. Default value is `true` since it
145
+ # used to be the default behavior.
146
+ #
147
+ # **DEPRECATION NOTICE** In ZipTricks version 5 `auto_rename_duplicate_filenames` will default to `false`
148
+ def initialize(stream, writer: create_writer, auto_rename_duplicate_filenames: true)
144
149
  raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
145
150
 
151
+ @dedupe_filenames = auto_rename_duplicate_filenames
146
152
  @out = ZipTricks::WriteAndTell.new(stream)
147
153
  @files = []
148
154
  @local_header_offsets = []
149
- @filenames_set = Set.new
155
+ @path_set = ZipTricks::PathSet.new
150
156
  @writer = writer
151
157
  end
152
158
 
@@ -387,7 +393,7 @@ class ZipTricks::Streamer
387
393
 
388
394
  # Clear the files so that GC will not have to trace all the way to here to deallocate them
389
395
  @files.clear
390
- @filenames_set.clear
396
+ @path_set.clear
391
397
 
392
398
  # and return the final offset
393
399
  @out.tell
@@ -429,22 +435,31 @@ class ZipTricks::Streamer
429
435
  private
430
436
 
431
437
  def add_file_and_write_local_header(
432
- filename:,
433
- modification_time:,
434
- crc32:,
435
- storage_mode:,
436
- compressed_size:,
437
- uncompressed_size:,
438
- use_data_descriptor:)
439
-
440
- # Clean backslashes and uniqify filenames if there are duplicates
438
+ filename:,
439
+ modification_time:,
440
+ crc32:,
441
+ storage_mode:,
442
+ compressed_size:,
443
+ uncompressed_size:,
444
+ use_data_descriptor:)
445
+
446
+ # Clean backslashes
441
447
  filename = remove_backslash(filename)
442
- filename = uniquify_name(filename) if @filenames_set.include?(filename)
443
-
444
448
  raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
445
-
446
449
  raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
447
450
 
451
+ # If we need to massage filenames to enforce uniqueness,
452
+ # do so before we check for file/directory conflicts
453
+ filename = ZipTricks::UniquifyFilename.call(filename, @path_set) if @dedupe_filenames
454
+
455
+ # Make sure there is no file/directory clobbering (conflicts), or - if deduping is disabled -
456
+ # no duplicate filenames/paths
457
+ if filename.end_with?('/')
458
+ @path_set.add_directory_path(filename)
459
+ else
460
+ @path_set.add_file_path(filename)
461
+ end
462
+
448
463
  if use_data_descriptor
449
464
  crc32 = 0
450
465
  compressed_size = 0
@@ -460,7 +475,6 @@ use_data_descriptor:)
460
475
  use_data_descriptor)
461
476
 
462
477
  @files << e
463
- @filenames_set << e.filename
464
478
  @local_header_offsets << @out.tell
465
479
 
466
480
  @writer.write_local_file_header(io: @out,
@@ -476,28 +490,4 @@ use_data_descriptor:)
476
490
  def remove_backslash(filename)
477
491
  filename.tr('\\', '_')
478
492
  end
479
-
480
- def uniquify_name(filename)
481
- # we add (1), (2), (n) at the end of a filename if there is a duplicate
482
- copy_pattern = /\((\d+)\)$/
483
- parts = filename.split('.')
484
- ext = if parts.last =~ /gz|zip/ && parts.size > 2
485
- parts.pop(2)
486
- elsif parts.size > 1
487
- parts.pop
488
- end
489
- fn_last_part = parts.pop
490
-
491
- duplicate_counter = 1
492
- loop do
493
- fn_last_part = if fn_last_part =~ copy_pattern
494
- fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
495
- else
496
- "#{fn_last_part} (#{duplicate_counter})"
497
- end
498
- new_filename = (parts + [fn_last_part, ext]).compact.join('.')
499
- return new_filename unless @filenames_set.include?(new_filename)
500
- duplicate_counter += 1
501
- end
502
- end
503
493
  end
@@ -0,0 +1,38 @@
1
+ module ZipTricks::UniquifyFilename
2
+
3
+ # Makes a given filename unique by appending a (n) suffix
4
+ # between just before the filename extension. So "file.txt" gets
5
+ # transformed into "file (1).txt". The transformation is applied
6
+ # repeatedly as long as the generated filename is present
7
+ # in `while_included_in` object
8
+ #
9
+ # @param path[String] the path to make unique
10
+ # @param while_included_in[#include?] an object that stores the list of already used paths
11
+ # @return [String] the path as is, or with the suffix required to make it unique
12
+ def self.call(path, while_included_in)
13
+ return path unless while_included_in.include?(path)
14
+
15
+ # we add (1), (2), (n) at the end of a filename before the filename extension,
16
+ # but only if there is a duplicate
17
+ copy_pattern = /\((\d+)\)$/
18
+ parts = path.split('.')
19
+ ext = if parts.last =~ /gz|zip/ && parts.size > 2
20
+ parts.pop(2)
21
+ elsif parts.size > 1
22
+ parts.pop
23
+ end
24
+ fn_last_part = parts.pop
25
+
26
+ duplicate_counter = 1
27
+ loop do
28
+ fn_last_part = if fn_last_part =~ copy_pattern
29
+ fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
30
+ else
31
+ "#{fn_last_part} (#{duplicate_counter})"
32
+ end
33
+ new_path = (parts + [fn_last_part, ext]).compact.join('.')
34
+ return new_path unless while_included_in.include?(new_path)
35
+ duplicate_counter += 1
36
+ end
37
+ end
38
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ZipTricks
4
- VERSION = '4.7.4'
4
+ VERSION = '4.8.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zip_tricks
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.7.4
4
+ version: 4.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julik Tarkhanov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-16 00:00:00.000000000 Z
11
+ date: 2019-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -228,6 +228,7 @@ files:
228
228
  - lib/zip_tricks/file_reader/stored_reader.rb
229
229
  - lib/zip_tricks/null_writer.rb
230
230
  - lib/zip_tricks/output_enumerator.rb
231
+ - lib/zip_tricks/path_set.rb
231
232
  - lib/zip_tricks/rack_body.rb
232
233
  - lib/zip_tricks/rails_streaming.rb
233
234
  - lib/zip_tricks/remote_io.rb
@@ -239,6 +240,7 @@ files:
239
240
  - lib/zip_tricks/streamer/entry.rb
240
241
  - lib/zip_tricks/streamer/stored_writer.rb
241
242
  - lib/zip_tricks/streamer/writable.rb
243
+ - lib/zip_tricks/uniquify_filename.rb
242
244
  - lib/zip_tricks/version.rb
243
245
  - lib/zip_tricks/write_and_tell.rb
244
246
  - lib/zip_tricks/write_buffer.rb