zip_tricks 4.7.4 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +10 -0
- data/lib/zip_tricks/path_set.rb +148 -0
- data/lib/zip_tricks/size_estimator.rb +3 -2
- data/lib/zip_tricks/streamer.rb +30 -40
- data/lib/zip_tricks/uniquify_filename.rb +38 -0
- data/lib/zip_tricks/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c7e14be1038151588e016705af5249cacbfea8a
|
4
|
+
data.tar.gz: fc4e68ce38ea2091f7a718a8dcfff09d3e40fbb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05d776ff8a5c0cea81f66aba9ac37ad2400c0abea38660ca94365f15649c5d9cc8c06f6d768d2f657b70fae823fe6704bcd335611ff9693e9550246cc04ba7e6
|
7
|
+
data.tar.gz: de2264703c398fba2d9a0c44b485206f2acdfb119c9aff76ab8855e2d84cdc2be4da6802af7fbe967f780229d8582d256439ee923c59aa0782d4b1d07b9104f0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 4.8.0
|
2
|
+
|
3
|
+
* Make sure that when directories clobber files and vice versa we raise a clear error. Add `PathSet` which keeps track of entries
|
4
|
+
and all the directories needed to create them, document `PathSet`
|
5
|
+
* Move the `uniquify_filenames` function into a module for easier removal later
|
6
|
+
* Add the `auto_rename_duplicate_filenames` parameter to `Streamer` constructor. We need to make this optional
|
7
|
+
because making filenames unique can be very tricky when subdirectories are involved, and strictly
|
8
|
+
speaking we should not be applying this transformation at all - there should be no output of
|
9
|
+
duplicate filenames by the caller. So making the filenames should be available, but optional.
|
10
|
+
|
1
11
|
## 4.7.4
|
2
12
|
|
3
13
|
* Use a single fixed capacity string in StreamCRC32.from_io to avoid unnecessary allocations
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# rubocop:disable Layout/IndentHeredoc
|
2
|
+
|
3
|
+
# A ZIP archive contains a flat list of entries. These entries can implicitly
|
4
|
+
# create directories when the archive is expanded. For example, an entry with
|
5
|
+
# the filename of "some folder/file.docx" will make the unarchiving application
|
6
|
+
# create a directory called "some folder" automatically, and then deposit the
|
7
|
+
# file "file.docx" in that directory. These "implicit" directories can be
|
8
|
+
# arbitrarily nested, and create a tree structure of directories. That structure
|
9
|
+
# however is implicit as the archive contains a flat list.
|
10
|
+
#
|
11
|
+
# This creates opportunities for conflicts. For example, imagine the following
|
12
|
+
# structure:
|
13
|
+
#
|
14
|
+
# * `something/` - specifies an empty directory with the name "something"
|
15
|
+
# * `something` - specifies a file, creates a conflict
|
16
|
+
#
|
17
|
+
# This can be prevented with filename uniqueness checks. It does get funkier however
|
18
|
+
# as the rabbit hole goes down:
|
19
|
+
#
|
20
|
+
# * `dir/subdir/another_subdir/yet_another_subdir/file.bin` - declares a file and directories
|
21
|
+
# * `dir/subdir/another_subdir/yet_another_subdir` - declares a file at one of the levels, creates a conflict
|
22
|
+
#
|
23
|
+
# The results of this ZIP structure aren't very easy to predict as they depend on the
|
24
|
+
# application that opens the archive. For example, BOMArchiveHelper on macOS will expand files
|
25
|
+
# as they are declared in the ZIP, but once a conflict occurs it will fail with "error -21". It
|
26
|
+
# is not very transparent to the user why unarchiving fails, and it has to - and can reliably - only
|
27
|
+
# be prevented when the archive gets created.
|
28
|
+
#
|
29
|
+
# Unfortunately that conflicts with another "magical" feature of ZipTricks which automatically
|
30
|
+
# "fixes" duplicate filenames - filenames (paths) which have already been added to the archive.
|
31
|
+
# This fix is performed by appending (1), then (2) and so forth to the filename so that the
|
32
|
+
# conflict is avoided. This is not possible to apply to directories, because when one of the
|
33
|
+
# path components is reused in multiple filenames it means those entities should end up in
|
34
|
+
# the same directory (subdirectory) once the archive is opened.
|
35
|
+
class ZipTricks::PathSet
|
36
|
+
class Conflict < StandardError
|
37
|
+
end
|
38
|
+
|
39
|
+
class FileClobbersDirectory < Conflict
|
40
|
+
end
|
41
|
+
|
42
|
+
class DirectoryClobbersFile < Conflict
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize
|
46
|
+
@known_directories = Set.new
|
47
|
+
@known_files = Set.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# Adds a directory path to the set of known paths, including
|
51
|
+
# all the directories that contain it. So, calling
|
52
|
+
# add_directory_path("dir/dir2/dir3")
|
53
|
+
# will add "dir", "dir/dir2", "dir/dir2/dir3".
|
54
|
+
#
|
55
|
+
# @param path[String] the path to the directory to add
|
56
|
+
# @return [void]
|
57
|
+
def add_directory_path(path)
|
58
|
+
path_and_ancestors(path).each do |parent_directory_path|
|
59
|
+
if @known_files.include?(parent_directory_path)
|
60
|
+
# Have to use the old-fashioned heredocs because ZipTricks
|
61
|
+
# aims to be compatible with MRI 2.1+ syntax, and squiggly
|
62
|
+
# heredoc is only available starting 2.3+
|
63
|
+
error_message = <<ERR
|
64
|
+
The path #{parent_directory_path.inspect} which has to be added
|
65
|
+
as a directory is already used for a file.
|
66
|
+
|
67
|
+
The directory at this path would get created implicitly
|
68
|
+
to produce #{path.inspect} during decompresison.
|
69
|
+
|
70
|
+
This would make some archive utilities refuse to open
|
71
|
+
the ZIP.
|
72
|
+
ERR
|
73
|
+
raise DirectoryClobbersFile, error_message
|
74
|
+
end
|
75
|
+
@known_directories << parent_directory_path
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Adds a file path to the set of known paths, including
|
80
|
+
# all the directories that contain it. Once a file has been added,
|
81
|
+
# it is no longer possible to add a directory having the same path
|
82
|
+
# as this would cause conflict.
|
83
|
+
#
|
84
|
+
# The operation also adds all the containing directories for the file, so
|
85
|
+
# add_file_path("dir/dir2/file.doc")
|
86
|
+
# will add "dir" and "dir/dir2" as directories, "dir/dir2/dir3".
|
87
|
+
#
|
88
|
+
# @param file_path[String] the path to the directory to add
|
89
|
+
# @return [void]
|
90
|
+
def add_file_path(file_path)
|
91
|
+
if @known_files.include?(file_path)
|
92
|
+
error_message = <<ERR
|
93
|
+
The file at #{file_path.inspect} has already been included
|
94
|
+
in the archive. Adding it the second time would cause
|
95
|
+
the first file to be overwritten during unarchiving, and
|
96
|
+
could also get the archive flagged as invalid.
|
97
|
+
ERR
|
98
|
+
raise Conflict, error_message
|
99
|
+
end
|
100
|
+
|
101
|
+
if @known_directories.include?(file_path)
|
102
|
+
error_message = <<ERR
|
103
|
+
The path #{file_path.inspect} is already used for
|
104
|
+
a directory, but you are trying to add it as a file.
|
105
|
+
|
106
|
+
This would make some archive utilities refuse
|
107
|
+
to open the ZIP.
|
108
|
+
ERR
|
109
|
+
raise FileClobbersDirectory, error_message
|
110
|
+
end
|
111
|
+
|
112
|
+
# Add all the directories which this file is contained in
|
113
|
+
*dir_components, _file_name = non_empty_path_components(file_path)
|
114
|
+
add_directory_path(dir_components.join('/'))
|
115
|
+
|
116
|
+
# ...and then the file itself
|
117
|
+
@known_files << file_path
|
118
|
+
end
|
119
|
+
|
120
|
+
# Tells whether a specific full path is already known to the PathSet.
|
121
|
+
# Can be a path for a directory or for a file.
|
122
|
+
#
|
123
|
+
# @param path_in_archive[String] the path to check for inclusion
|
124
|
+
# @return [Boolean]
|
125
|
+
def include?(path_in_archive)
|
126
|
+
@known_files.include?(path_in_archive) || @known_directories.include?(path_in_archive)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Clears the contained sets
|
130
|
+
# @return [void]
|
131
|
+
def clear
|
132
|
+
@known_files.clear
|
133
|
+
@known_directories.clear
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def non_empty_path_components(path)
|
139
|
+
path.split('/').reject(&:empty?)
|
140
|
+
end
|
141
|
+
|
142
|
+
def path_and_ancestors(path)
|
143
|
+
path_components = non_empty_path_components(path)
|
144
|
+
path_components.each_with_object([]) do |component, seen|
|
145
|
+
seen << [seen.last, component].compact.join('/')
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -20,10 +20,11 @@ class ZipTricks::SizeEstimator
|
|
20
20
|
# uncompressed_size: 89281911, compressed_size: 121908)
|
21
21
|
# end
|
22
22
|
#
|
23
|
+
# @param kwargs_for_streamer_new Any options to pass to Streamer, see {Streamer#initialize}
|
23
24
|
# @return [Integer] the size of the resulting archive, in bytes
|
24
25
|
# @yield [SizeEstimator] the estimator
|
25
|
-
def self.estimate
|
26
|
-
streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
|
26
|
+
def self.estimate(**kwargs_for_streamer_new)
|
27
|
+
streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter, **kwargs_for_streamer_new)
|
27
28
|
estimator = new(streamer)
|
28
29
|
yield(estimator)
|
29
30
|
streamer.close # Returns the .tell of the contained IO
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -140,13 +140,19 @@ class ZipTricks::Streamer
|
|
140
140
|
# @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
141
141
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
142
142
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
143
|
-
|
143
|
+
# @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
|
144
|
+
# should be suffixed with (1), (2) etc. Default value is `true` since it
|
145
|
+
# used to be the default behavior.
|
146
|
+
#
|
147
|
+
# **DEPRECATION NOTICE** In ZipTricks version 5 `auto_rename_duplicate_filenames` will default to `false`
|
148
|
+
def initialize(stream, writer: create_writer, auto_rename_duplicate_filenames: true)
|
144
149
|
raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
|
145
150
|
|
151
|
+
@dedupe_filenames = auto_rename_duplicate_filenames
|
146
152
|
@out = ZipTricks::WriteAndTell.new(stream)
|
147
153
|
@files = []
|
148
154
|
@local_header_offsets = []
|
149
|
-
@
|
155
|
+
@path_set = ZipTricks::PathSet.new
|
150
156
|
@writer = writer
|
151
157
|
end
|
152
158
|
|
@@ -387,7 +393,7 @@ class ZipTricks::Streamer
|
|
387
393
|
|
388
394
|
# Clear the files so that GC will not have to trace all the way to here to deallocate them
|
389
395
|
@files.clear
|
390
|
-
@
|
396
|
+
@path_set.clear
|
391
397
|
|
392
398
|
# and return the final offset
|
393
399
|
@out.tell
|
@@ -429,22 +435,31 @@ class ZipTricks::Streamer
|
|
429
435
|
private
|
430
436
|
|
431
437
|
def add_file_and_write_local_header(
|
432
|
-
filename:,
|
433
|
-
modification_time:,
|
434
|
-
crc32:,
|
435
|
-
storage_mode:,
|
436
|
-
compressed_size:,
|
437
|
-
uncompressed_size:,
|
438
|
-
use_data_descriptor:)
|
439
|
-
|
440
|
-
# Clean backslashes
|
438
|
+
filename:,
|
439
|
+
modification_time:,
|
440
|
+
crc32:,
|
441
|
+
storage_mode:,
|
442
|
+
compressed_size:,
|
443
|
+
uncompressed_size:,
|
444
|
+
use_data_descriptor:)
|
445
|
+
|
446
|
+
# Clean backslashes
|
441
447
|
filename = remove_backslash(filename)
|
442
|
-
filename = uniquify_name(filename) if @filenames_set.include?(filename)
|
443
|
-
|
444
448
|
raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
|
445
|
-
|
446
449
|
raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
|
447
450
|
|
451
|
+
# If we need to massage filenames to enforce uniqueness,
|
452
|
+
# do so before we check for file/directory conflicts
|
453
|
+
filename = ZipTricks::UniquifyFilename.call(filename, @path_set) if @dedupe_filenames
|
454
|
+
|
455
|
+
# Make sure there is no file/directory clobbering (conflicts), or - if deduping is disabled -
|
456
|
+
# no duplicate filenames/paths
|
457
|
+
if filename.end_with?('/')
|
458
|
+
@path_set.add_directory_path(filename)
|
459
|
+
else
|
460
|
+
@path_set.add_file_path(filename)
|
461
|
+
end
|
462
|
+
|
448
463
|
if use_data_descriptor
|
449
464
|
crc32 = 0
|
450
465
|
compressed_size = 0
|
@@ -460,7 +475,6 @@ use_data_descriptor:)
|
|
460
475
|
use_data_descriptor)
|
461
476
|
|
462
477
|
@files << e
|
463
|
-
@filenames_set << e.filename
|
464
478
|
@local_header_offsets << @out.tell
|
465
479
|
|
466
480
|
@writer.write_local_file_header(io: @out,
|
@@ -476,28 +490,4 @@ use_data_descriptor:)
|
|
476
490
|
def remove_backslash(filename)
|
477
491
|
filename.tr('\\', '_')
|
478
492
|
end
|
479
|
-
|
480
|
-
def uniquify_name(filename)
|
481
|
-
# we add (1), (2), (n) at the end of a filename if there is a duplicate
|
482
|
-
copy_pattern = /\((\d+)\)$/
|
483
|
-
parts = filename.split('.')
|
484
|
-
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
485
|
-
parts.pop(2)
|
486
|
-
elsif parts.size > 1
|
487
|
-
parts.pop
|
488
|
-
end
|
489
|
-
fn_last_part = parts.pop
|
490
|
-
|
491
|
-
duplicate_counter = 1
|
492
|
-
loop do
|
493
|
-
fn_last_part = if fn_last_part =~ copy_pattern
|
494
|
-
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
495
|
-
else
|
496
|
-
"#{fn_last_part} (#{duplicate_counter})"
|
497
|
-
end
|
498
|
-
new_filename = (parts + [fn_last_part, ext]).compact.join('.')
|
499
|
-
return new_filename unless @filenames_set.include?(new_filename)
|
500
|
-
duplicate_counter += 1
|
501
|
-
end
|
502
|
-
end
|
503
493
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module ZipTricks::UniquifyFilename
|
2
|
+
|
3
|
+
# Makes a given filename unique by appending a (n) suffix
|
4
|
+
# between just before the filename extension. So "file.txt" gets
|
5
|
+
# transformed into "file (1).txt". The transformation is applied
|
6
|
+
# repeatedly as long as the generated filename is present
|
7
|
+
# in `while_included_in` object
|
8
|
+
#
|
9
|
+
# @param path[String] the path to make unique
|
10
|
+
# @param while_included_in[#include?] an object that stores the list of already used paths
|
11
|
+
# @return [String] the path as is, or with the suffix required to make it unique
|
12
|
+
def self.call(path, while_included_in)
|
13
|
+
return path unless while_included_in.include?(path)
|
14
|
+
|
15
|
+
# we add (1), (2), (n) at the end of a filename before the filename extension,
|
16
|
+
# but only if there is a duplicate
|
17
|
+
copy_pattern = /\((\d+)\)$/
|
18
|
+
parts = path.split('.')
|
19
|
+
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
20
|
+
parts.pop(2)
|
21
|
+
elsif parts.size > 1
|
22
|
+
parts.pop
|
23
|
+
end
|
24
|
+
fn_last_part = parts.pop
|
25
|
+
|
26
|
+
duplicate_counter = 1
|
27
|
+
loop do
|
28
|
+
fn_last_part = if fn_last_part =~ copy_pattern
|
29
|
+
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
30
|
+
else
|
31
|
+
"#{fn_last_part} (#{duplicate_counter})"
|
32
|
+
end
|
33
|
+
new_path = (parts + [fn_last_part, ext]).compact.join('.')
|
34
|
+
return new_path unless while_included_in.include?(new_path)
|
35
|
+
duplicate_counter += 1
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/zip_tricks/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -228,6 +228,7 @@ files:
|
|
228
228
|
- lib/zip_tricks/file_reader/stored_reader.rb
|
229
229
|
- lib/zip_tricks/null_writer.rb
|
230
230
|
- lib/zip_tricks/output_enumerator.rb
|
231
|
+
- lib/zip_tricks/path_set.rb
|
231
232
|
- lib/zip_tricks/rack_body.rb
|
232
233
|
- lib/zip_tricks/rails_streaming.rb
|
233
234
|
- lib/zip_tricks/remote_io.rb
|
@@ -239,6 +240,7 @@ files:
|
|
239
240
|
- lib/zip_tricks/streamer/entry.rb
|
240
241
|
- lib/zip_tricks/streamer/stored_writer.rb
|
241
242
|
- lib/zip_tricks/streamer/writable.rb
|
243
|
+
- lib/zip_tricks/uniquify_filename.rb
|
242
244
|
- lib/zip_tricks/version.rb
|
243
245
|
- lib/zip_tricks/write_and_tell.rb
|
244
246
|
- lib/zip_tricks/write_buffer.rb
|