zip_tricks 4.7.4 → 4.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +10 -0
- data/lib/zip_tricks/path_set.rb +148 -0
- data/lib/zip_tricks/size_estimator.rb +3 -2
- data/lib/zip_tricks/streamer.rb +30 -40
- data/lib/zip_tricks/uniquify_filename.rb +38 -0
- data/lib/zip_tricks/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c7e14be1038151588e016705af5249cacbfea8a
|
4
|
+
data.tar.gz: fc4e68ce38ea2091f7a718a8dcfff09d3e40fbb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05d776ff8a5c0cea81f66aba9ac37ad2400c0abea38660ca94365f15649c5d9cc8c06f6d768d2f657b70fae823fe6704bcd335611ff9693e9550246cc04ba7e6
|
7
|
+
data.tar.gz: de2264703c398fba2d9a0c44b485206f2acdfb119c9aff76ab8855e2d84cdc2be4da6802af7fbe967f780229d8582d256439ee923c59aa0782d4b1d07b9104f0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## 4.8.0
|
2
|
+
|
3
|
+
* Make sure that when directories clobber files and vice versa we raise a clear error. Add `PathSet` which keeps track of entries
|
4
|
+
and all the directories needed to create them, document `PathSet`
|
5
|
+
* Move the `uniquify_filenames` function into a module for easier removal later
|
6
|
+
* Add the `auto_rename_duplicate_filenames` parameter to `Streamer` constructor. We need to make this optional
|
7
|
+
because making filenames unique can be very tricky when subdirectories are involved, and strictly
|
8
|
+
speaking we should not be applying this transformation at all - there should be no output of
|
9
|
+
duplicate filenames by the caller. So making the filenames should be available, but optional.
|
10
|
+
|
1
11
|
## 4.7.4
|
2
12
|
|
3
13
|
* Use a single fixed capacity string in StreamCRC32.from_io to avoid unnecessary allocations
|
@@ -0,0 +1,148 @@
|
|
1
|
+
# rubocop:disable Layout/IndentHeredoc
|
2
|
+
|
3
|
+
# A ZIP archive contains a flat list of entries. These entries can implicitly
|
4
|
+
# create directories when the archive is expanded. For example, an entry with
|
5
|
+
# the filename of "some folder/file.docx" will make the unarchiving application
|
6
|
+
# create a directory called "some folder" automatically, and then deposit the
|
7
|
+
# file "file.docx" in that directory. These "implicit" directories can be
|
8
|
+
# arbitrarily nested, and create a tree structure of directories. That structure
|
9
|
+
# however is implicit as the archive contains a flat list.
|
10
|
+
#
|
11
|
+
# This creates opportunities for conflicts. For example, imagine the following
|
12
|
+
# structure:
|
13
|
+
#
|
14
|
+
# * `something/` - specifies an empty directory with the name "something"
|
15
|
+
# * `something` - specifies a file, creates a conflict
|
16
|
+
#
|
17
|
+
# This can be prevented with filename uniqueness checks. It does get funkier however
|
18
|
+
# as the rabbit hole goes down:
|
19
|
+
#
|
20
|
+
# * `dir/subdir/another_subdir/yet_another_subdir/file.bin` - declares a file and directories
|
21
|
+
# * `dir/subdir/another_subdir/yet_another_subdir` - declares a file at one of the levels, creates a conflict
|
22
|
+
#
|
23
|
+
# The results of this ZIP structure aren't very easy to predict as they depend on the
|
24
|
+
# application that opens the archive. For example, BOMArchiveHelper on macOS will expand files
|
25
|
+
# as they are declared in the ZIP, but once a conflict occurs it will fail with "error -21". It
|
26
|
+
# is not very transparent to the user why unarchiving fails, and it has to - and can reliably - only
|
27
|
+
# be prevented when the archive gets created.
|
28
|
+
#
|
29
|
+
# Unfortunately that conflicts with another "magical" feature of ZipTricks which automatically
|
30
|
+
# "fixes" duplicate filenames - filenames (paths) which have already been added to the archive.
|
31
|
+
# This fix is performed by appending (1), then (2) and so forth to the filename so that the
|
32
|
+
# conflict is avoided. This is not possible to apply to directories, because when one of the
|
33
|
+
# path components is reused in multiple filenames it means those entities should end up in
|
34
|
+
# the same directory (subdirectory) once the archive is opened.
|
35
|
+
class ZipTricks::PathSet
|
36
|
+
class Conflict < StandardError
|
37
|
+
end
|
38
|
+
|
39
|
+
class FileClobbersDirectory < Conflict
|
40
|
+
end
|
41
|
+
|
42
|
+
class DirectoryClobbersFile < Conflict
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize
|
46
|
+
@known_directories = Set.new
|
47
|
+
@known_files = Set.new
|
48
|
+
end
|
49
|
+
|
50
|
+
# Adds a directory path to the set of known paths, including
|
51
|
+
# all the directories that contain it. So, calling
|
52
|
+
# add_directory_path("dir/dir2/dir3")
|
53
|
+
# will add "dir", "dir/dir2", "dir/dir2/dir3".
|
54
|
+
#
|
55
|
+
# @param path[String] the path to the directory to add
|
56
|
+
# @return [void]
|
57
|
+
def add_directory_path(path)
|
58
|
+
path_and_ancestors(path).each do |parent_directory_path|
|
59
|
+
if @known_files.include?(parent_directory_path)
|
60
|
+
# Have to use the old-fashioned heredocs because ZipTricks
|
61
|
+
# aims to be compatible with MRI 2.1+ syntax, and squiggly
|
62
|
+
# heredoc is only available starting 2.3+
|
63
|
+
error_message = <<ERR
|
64
|
+
The path #{parent_directory_path.inspect} which has to be added
|
65
|
+
as a directory is already used for a file.
|
66
|
+
|
67
|
+
The directory at this path would get created implicitly
|
68
|
+
to produce #{path.inspect} during decompresison.
|
69
|
+
|
70
|
+
This would make some archive utilities refuse to open
|
71
|
+
the ZIP.
|
72
|
+
ERR
|
73
|
+
raise DirectoryClobbersFile, error_message
|
74
|
+
end
|
75
|
+
@known_directories << parent_directory_path
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Adds a file path to the set of known paths, including
|
80
|
+
# all the directories that contain it. Once a file has been added,
|
81
|
+
# it is no longer possible to add a directory having the same path
|
82
|
+
# as this would cause conflict.
|
83
|
+
#
|
84
|
+
# The operation also adds all the containing directories for the file, so
|
85
|
+
# add_file_path("dir/dir2/file.doc")
|
86
|
+
# will add "dir" and "dir/dir2" as directories, "dir/dir2/dir3".
|
87
|
+
#
|
88
|
+
# @param file_path[String] the path to the directory to add
|
89
|
+
# @return [void]
|
90
|
+
def add_file_path(file_path)
|
91
|
+
if @known_files.include?(file_path)
|
92
|
+
error_message = <<ERR
|
93
|
+
The file at #{file_path.inspect} has already been included
|
94
|
+
in the archive. Adding it the second time would cause
|
95
|
+
the first file to be overwritten during unarchiving, and
|
96
|
+
could also get the archive flagged as invalid.
|
97
|
+
ERR
|
98
|
+
raise Conflict, error_message
|
99
|
+
end
|
100
|
+
|
101
|
+
if @known_directories.include?(file_path)
|
102
|
+
error_message = <<ERR
|
103
|
+
The path #{file_path.inspect} is already used for
|
104
|
+
a directory, but you are trying to add it as a file.
|
105
|
+
|
106
|
+
This would make some archive utilities refuse
|
107
|
+
to open the ZIP.
|
108
|
+
ERR
|
109
|
+
raise FileClobbersDirectory, error_message
|
110
|
+
end
|
111
|
+
|
112
|
+
# Add all the directories which this file is contained in
|
113
|
+
*dir_components, _file_name = non_empty_path_components(file_path)
|
114
|
+
add_directory_path(dir_components.join('/'))
|
115
|
+
|
116
|
+
# ...and then the file itself
|
117
|
+
@known_files << file_path
|
118
|
+
end
|
119
|
+
|
120
|
+
# Tells whether a specific full path is already known to the PathSet.
|
121
|
+
# Can be a path for a directory or for a file.
|
122
|
+
#
|
123
|
+
# @param path_in_archive[String] the path to check for inclusion
|
124
|
+
# @return [Boolean]
|
125
|
+
def include?(path_in_archive)
|
126
|
+
@known_files.include?(path_in_archive) || @known_directories.include?(path_in_archive)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Clears the contained sets
|
130
|
+
# @return [void]
|
131
|
+
def clear
|
132
|
+
@known_files.clear
|
133
|
+
@known_directories.clear
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def non_empty_path_components(path)
|
139
|
+
path.split('/').reject(&:empty?)
|
140
|
+
end
|
141
|
+
|
142
|
+
def path_and_ancestors(path)
|
143
|
+
path_components = non_empty_path_components(path)
|
144
|
+
path_components.each_with_object([]) do |component, seen|
|
145
|
+
seen << [seen.last, component].compact.join('/')
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -20,10 +20,11 @@ class ZipTricks::SizeEstimator
|
|
20
20
|
# uncompressed_size: 89281911, compressed_size: 121908)
|
21
21
|
# end
|
22
22
|
#
|
23
|
+
# @param kwargs_for_streamer_new Any options to pass to Streamer, see {Streamer#initialize}
|
23
24
|
# @return [Integer] the size of the resulting archive, in bytes
|
24
25
|
# @yield [SizeEstimator] the estimator
|
25
|
-
def self.estimate
|
26
|
-
streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter)
|
26
|
+
def self.estimate(**kwargs_for_streamer_new)
|
27
|
+
streamer = ZipTricks::Streamer.new(ZipTricks::NullWriter, **kwargs_for_streamer_new)
|
27
28
|
estimator = new(streamer)
|
28
29
|
yield(estimator)
|
29
30
|
streamer.close # Returns the .tell of the contained IO
|
data/lib/zip_tricks/streamer.rb
CHANGED
@@ -140,13 +140,19 @@ class ZipTricks::Streamer
|
|
140
140
|
# @param stream[IO] the destination IO for the ZIP. Anything that responds to `<<` can be used.
|
141
141
|
# @param writer[ZipTricks::ZipWriter] the object to be used as the writer.
|
142
142
|
# Defaults to an instance of ZipTricks::ZipWriter, normally you won't need to override it
|
143
|
-
|
143
|
+
# @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered,
|
144
|
+
# should be suffixed with (1), (2) etc. Default value is `true` since it
|
145
|
+
# used to be the default behavior.
|
146
|
+
#
|
147
|
+
# **DEPRECATION NOTICE** In ZipTricks version 5 `auto_rename_duplicate_filenames` will default to `false`
|
148
|
+
def initialize(stream, writer: create_writer, auto_rename_duplicate_filenames: true)
|
144
149
|
raise InvalidOutput, 'The stream must respond to #<<' unless stream.respond_to?(:<<)
|
145
150
|
|
151
|
+
@dedupe_filenames = auto_rename_duplicate_filenames
|
146
152
|
@out = ZipTricks::WriteAndTell.new(stream)
|
147
153
|
@files = []
|
148
154
|
@local_header_offsets = []
|
149
|
-
@
|
155
|
+
@path_set = ZipTricks::PathSet.new
|
150
156
|
@writer = writer
|
151
157
|
end
|
152
158
|
|
@@ -387,7 +393,7 @@ class ZipTricks::Streamer
|
|
387
393
|
|
388
394
|
# Clear the files so that GC will not have to trace all the way to here to deallocate them
|
389
395
|
@files.clear
|
390
|
-
@
|
396
|
+
@path_set.clear
|
391
397
|
|
392
398
|
# and return the final offset
|
393
399
|
@out.tell
|
@@ -429,22 +435,31 @@ class ZipTricks::Streamer
|
|
429
435
|
private
|
430
436
|
|
431
437
|
def add_file_and_write_local_header(
|
432
|
-
filename:,
|
433
|
-
modification_time:,
|
434
|
-
crc32:,
|
435
|
-
storage_mode:,
|
436
|
-
compressed_size:,
|
437
|
-
uncompressed_size:,
|
438
|
-
use_data_descriptor:)
|
439
|
-
|
440
|
-
# Clean backslashes
|
438
|
+
filename:,
|
439
|
+
modification_time:,
|
440
|
+
crc32:,
|
441
|
+
storage_mode:,
|
442
|
+
compressed_size:,
|
443
|
+
uncompressed_size:,
|
444
|
+
use_data_descriptor:)
|
445
|
+
|
446
|
+
# Clean backslashes
|
441
447
|
filename = remove_backslash(filename)
|
442
|
-
filename = uniquify_name(filename) if @filenames_set.include?(filename)
|
443
|
-
|
444
448
|
raise UnknownMode, "Unknown compression mode #{storage_mode}" unless [STORED, DEFLATED].include?(storage_mode)
|
445
|
-
|
446
449
|
raise Overflow, 'Filename is too long' if filename.bytesize > 0xFFFF
|
447
450
|
|
451
|
+
# If we need to massage filenames to enforce uniqueness,
|
452
|
+
# do so before we check for file/directory conflicts
|
453
|
+
filename = ZipTricks::UniquifyFilename.call(filename, @path_set) if @dedupe_filenames
|
454
|
+
|
455
|
+
# Make sure there is no file/directory clobbering (conflicts), or - if deduping is disabled -
|
456
|
+
# no duplicate filenames/paths
|
457
|
+
if filename.end_with?('/')
|
458
|
+
@path_set.add_directory_path(filename)
|
459
|
+
else
|
460
|
+
@path_set.add_file_path(filename)
|
461
|
+
end
|
462
|
+
|
448
463
|
if use_data_descriptor
|
449
464
|
crc32 = 0
|
450
465
|
compressed_size = 0
|
@@ -460,7 +475,6 @@ use_data_descriptor:)
|
|
460
475
|
use_data_descriptor)
|
461
476
|
|
462
477
|
@files << e
|
463
|
-
@filenames_set << e.filename
|
464
478
|
@local_header_offsets << @out.tell
|
465
479
|
|
466
480
|
@writer.write_local_file_header(io: @out,
|
@@ -476,28 +490,4 @@ use_data_descriptor:)
|
|
476
490
|
def remove_backslash(filename)
|
477
491
|
filename.tr('\\', '_')
|
478
492
|
end
|
479
|
-
|
480
|
-
def uniquify_name(filename)
|
481
|
-
# we add (1), (2), (n) at the end of a filename if there is a duplicate
|
482
|
-
copy_pattern = /\((\d+)\)$/
|
483
|
-
parts = filename.split('.')
|
484
|
-
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
485
|
-
parts.pop(2)
|
486
|
-
elsif parts.size > 1
|
487
|
-
parts.pop
|
488
|
-
end
|
489
|
-
fn_last_part = parts.pop
|
490
|
-
|
491
|
-
duplicate_counter = 1
|
492
|
-
loop do
|
493
|
-
fn_last_part = if fn_last_part =~ copy_pattern
|
494
|
-
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
495
|
-
else
|
496
|
-
"#{fn_last_part} (#{duplicate_counter})"
|
497
|
-
end
|
498
|
-
new_filename = (parts + [fn_last_part, ext]).compact.join('.')
|
499
|
-
return new_filename unless @filenames_set.include?(new_filename)
|
500
|
-
duplicate_counter += 1
|
501
|
-
end
|
502
|
-
end
|
503
493
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module ZipTricks::UniquifyFilename
|
2
|
+
|
3
|
+
# Makes a given filename unique by appending a (n) suffix
|
4
|
+
# between just before the filename extension. So "file.txt" gets
|
5
|
+
# transformed into "file (1).txt". The transformation is applied
|
6
|
+
# repeatedly as long as the generated filename is present
|
7
|
+
# in `while_included_in` object
|
8
|
+
#
|
9
|
+
# @param path[String] the path to make unique
|
10
|
+
# @param while_included_in[#include?] an object that stores the list of already used paths
|
11
|
+
# @return [String] the path as is, or with the suffix required to make it unique
|
12
|
+
def self.call(path, while_included_in)
|
13
|
+
return path unless while_included_in.include?(path)
|
14
|
+
|
15
|
+
# we add (1), (2), (n) at the end of a filename before the filename extension,
|
16
|
+
# but only if there is a duplicate
|
17
|
+
copy_pattern = /\((\d+)\)$/
|
18
|
+
parts = path.split('.')
|
19
|
+
ext = if parts.last =~ /gz|zip/ && parts.size > 2
|
20
|
+
parts.pop(2)
|
21
|
+
elsif parts.size > 1
|
22
|
+
parts.pop
|
23
|
+
end
|
24
|
+
fn_last_part = parts.pop
|
25
|
+
|
26
|
+
duplicate_counter = 1
|
27
|
+
loop do
|
28
|
+
fn_last_part = if fn_last_part =~ copy_pattern
|
29
|
+
fn_last_part.sub(copy_pattern, "(#{duplicate_counter})")
|
30
|
+
else
|
31
|
+
"#{fn_last_part} (#{duplicate_counter})"
|
32
|
+
end
|
33
|
+
new_path = (parts + [fn_last_part, ext]).compact.join('.')
|
34
|
+
return new_path unless while_included_in.include?(new_path)
|
35
|
+
duplicate_counter += 1
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/zip_tricks/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zip_tricks
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julik Tarkhanov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -228,6 +228,7 @@ files:
|
|
228
228
|
- lib/zip_tricks/file_reader/stored_reader.rb
|
229
229
|
- lib/zip_tricks/null_writer.rb
|
230
230
|
- lib/zip_tricks/output_enumerator.rb
|
231
|
+
- lib/zip_tricks/path_set.rb
|
231
232
|
- lib/zip_tricks/rack_body.rb
|
232
233
|
- lib/zip_tricks/rails_streaming.rb
|
233
234
|
- lib/zip_tricks/remote_io.rb
|
@@ -239,6 +240,7 @@ files:
|
|
239
240
|
- lib/zip_tricks/streamer/entry.rb
|
240
241
|
- lib/zip_tricks/streamer/stored_writer.rb
|
241
242
|
- lib/zip_tricks/streamer/writable.rb
|
243
|
+
- lib/zip_tricks/uniquify_filename.rb
|
242
244
|
- lib/zip_tricks/version.rb
|
243
245
|
- lib/zip_tricks/write_and_tell.rb
|
244
246
|
- lib/zip_tricks/write_buffer.rb
|