brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brkraw/__init__.py +9 -3
- brkraw/apps/__init__.py +12 -0
- brkraw/apps/addon/__init__.py +30 -0
- brkraw/apps/addon/core.py +35 -0
- brkraw/apps/addon/dependencies.py +402 -0
- brkraw/apps/addon/installation.py +500 -0
- brkraw/apps/addon/io.py +21 -0
- brkraw/apps/hook/__init__.py +25 -0
- brkraw/apps/hook/core.py +636 -0
- brkraw/apps/loader/__init__.py +10 -0
- brkraw/apps/loader/core.py +622 -0
- brkraw/apps/loader/formatter.py +288 -0
- brkraw/apps/loader/helper.py +797 -0
- brkraw/apps/loader/info/__init__.py +11 -0
- brkraw/apps/loader/info/scan.py +85 -0
- brkraw/apps/loader/info/scan.yaml +90 -0
- brkraw/apps/loader/info/study.py +69 -0
- brkraw/apps/loader/info/study.yaml +156 -0
- brkraw/apps/loader/info/transform.py +92 -0
- brkraw/apps/loader/types.py +220 -0
- brkraw/cli/__init__.py +5 -0
- brkraw/cli/commands/__init__.py +2 -0
- brkraw/cli/commands/addon.py +327 -0
- brkraw/cli/commands/config.py +205 -0
- brkraw/cli/commands/convert.py +903 -0
- brkraw/cli/commands/hook.py +348 -0
- brkraw/cli/commands/info.py +74 -0
- brkraw/cli/commands/init.py +214 -0
- brkraw/cli/commands/params.py +106 -0
- brkraw/cli/commands/prune.py +288 -0
- brkraw/cli/commands/session.py +371 -0
- brkraw/cli/hook_args.py +80 -0
- brkraw/cli/main.py +83 -0
- brkraw/cli/utils.py +60 -0
- brkraw/core/__init__.py +13 -0
- brkraw/core/config.py +380 -0
- brkraw/core/entrypoints.py +25 -0
- brkraw/core/formatter.py +367 -0
- brkraw/core/fs.py +495 -0
- brkraw/core/jcamp.py +600 -0
- brkraw/core/layout.py +451 -0
- brkraw/core/parameters.py +781 -0
- brkraw/core/zip.py +1121 -0
- brkraw/dataclasses/__init__.py +14 -0
- brkraw/dataclasses/node.py +139 -0
- brkraw/dataclasses/reco.py +33 -0
- brkraw/dataclasses/scan.py +61 -0
- brkraw/dataclasses/study.py +131 -0
- brkraw/default/__init__.py +3 -0
- brkraw/default/pruner_specs/deid4share.yaml +42 -0
- brkraw/default/rules/00_default.yaml +4 -0
- brkraw/default/specs/metadata_dicom.yaml +236 -0
- brkraw/default/specs/metadata_transforms.py +92 -0
- brkraw/resolver/__init__.py +7 -0
- brkraw/resolver/affine.py +539 -0
- brkraw/resolver/datatype.py +69 -0
- brkraw/resolver/fid.py +90 -0
- brkraw/resolver/helpers.py +36 -0
- brkraw/resolver/image.py +188 -0
- brkraw/resolver/nifti.py +370 -0
- brkraw/resolver/shape.py +235 -0
- brkraw/schema/__init__.py +3 -0
- brkraw/schema/context_map.yaml +62 -0
- brkraw/schema/meta.yaml +57 -0
- brkraw/schema/niftiheader.yaml +95 -0
- brkraw/schema/pruner.yaml +55 -0
- brkraw/schema/remapper.yaml +128 -0
- brkraw/schema/rules.yaml +154 -0
- brkraw/specs/__init__.py +10 -0
- brkraw/specs/hook/__init__.py +12 -0
- brkraw/specs/hook/logic.py +31 -0
- brkraw/specs/hook/validator.py +22 -0
- brkraw/specs/meta/__init__.py +5 -0
- brkraw/specs/meta/validator.py +156 -0
- brkraw/specs/pruner/__init__.py +15 -0
- brkraw/specs/pruner/logic.py +361 -0
- brkraw/specs/pruner/validator.py +119 -0
- brkraw/specs/remapper/__init__.py +27 -0
- brkraw/specs/remapper/logic.py +924 -0
- brkraw/specs/remapper/validator.py +314 -0
- brkraw/specs/rules/__init__.py +6 -0
- brkraw/specs/rules/logic.py +263 -0
- brkraw/specs/rules/validator.py +103 -0
- brkraw-0.5.0.dist-info/METADATA +81 -0
- brkraw-0.5.0.dist-info/RECORD +88 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
- brkraw-0.5.0.dist-info/entry_points.txt +13 -0
- brkraw/lib/__init__.py +0 -4
- brkraw/lib/backup.py +0 -641
- brkraw/lib/bids.py +0 -0
- brkraw/lib/errors.py +0 -125
- brkraw/lib/loader.py +0 -1220
- brkraw/lib/orient.py +0 -194
- brkraw/lib/parser.py +0 -48
- brkraw/lib/pvobj.py +0 -301
- brkraw/lib/reference.py +0 -245
- brkraw/lib/utils.py +0 -471
- brkraw/scripts/__init__.py +0 -0
- brkraw/scripts/brk_backup.py +0 -106
- brkraw/scripts/brkraw.py +0 -744
- brkraw/ui/__init__.py +0 -0
- brkraw/ui/config.py +0 -17
- brkraw/ui/main_win.py +0 -214
- brkraw/ui/previewer.py +0 -225
- brkraw/ui/scan_info.py +0 -72
- brkraw/ui/scan_list.py +0 -73
- brkraw/ui/subj_info.py +0 -128
- brkraw-0.3.11.dist-info/METADATA +0 -25
- brkraw-0.3.11.dist-info/RECORD +0 -28
- brkraw-0.3.11.dist-info/entry_points.txt +0 -3
- brkraw-0.3.11.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
brkraw/core/zip.py
ADDED
|
@@ -0,0 +1,1121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A set of lightweight utilities for working with ZIP archives in-memory and
|
|
3
|
+
providing convenient abstractions for files and directories inside a ZIP.
|
|
4
|
+
|
|
5
|
+
The focus is:
|
|
6
|
+
- Safe, Pythonic dataclasses (FileBuffer, ZippedFile, ZippedDir) wrapping
|
|
7
|
+
raw bytes and zipfile.ZipFile entries.
|
|
8
|
+
- Support for extracting, isolating, and re-packing subtrees of a ZIP archive
|
|
9
|
+
without touching the filesystem unless explicitly requested.
|
|
10
|
+
- Flexible to_filename() dispatcher to persist objects (ZipFile, ZippedDir,
|
|
11
|
+
ZippedFile, BytesIO, or raw bytes) to disk in a normalized way.
|
|
12
|
+
|
|
13
|
+
Key abstractions
|
|
14
|
+
----------------
|
|
15
|
+
- FileBuffer
|
|
16
|
+
A simple wrapper around an in-memory BytesIO buffer.
|
|
17
|
+
Provides .bytes() to retrieve raw data and .to_filename() to persist
|
|
18
|
+
directly to disk.
|
|
19
|
+
|
|
20
|
+
- ZippedFile
|
|
21
|
+
Represents a single file entry inside a ZIP archive.
|
|
22
|
+
Offers .open(), .read(), .buffer(), .isolate() to access content,
|
|
23
|
+
and .extract_to() to write the raw file to disk.
|
|
24
|
+
|
|
25
|
+
- ZippedDir
|
|
26
|
+
Represents a directory subtree inside a ZIP.
|
|
27
|
+
Provides .isolate() to generate a new ZIP containing only this subtree
|
|
28
|
+
(optionally under a new root directory), .to_filename() to persist it as a
|
|
29
|
+
zip file, and .extract_to() to unpack the subtree to a directory.
|
|
30
|
+
|
|
31
|
+
- walk()
|
|
32
|
+
Like os.walk, but operates over a zipfile.ZipFile.
|
|
33
|
+
Yields (dirpath, dirnames, fileentries) tuples, where fileentries are
|
|
34
|
+
ZippedFile objects with direct access to contents.
|
|
35
|
+
|
|
36
|
+
- fetch_files_in_zip() / fetch_dirs_in_zip()
|
|
37
|
+
Helpers for searching within a ZIP by filename or directory name, supporting
|
|
38
|
+
exact match, wildcards, or regex.
|
|
39
|
+
|
|
40
|
+
- to_filename()
|
|
41
|
+
A generic dispatcher to persist many kinds of in-memory objects. For ZipFile
|
|
42
|
+
and ZippedDir it creates zip archives; for ZippedFile it writes the raw file
|
|
43
|
+
to disk; for bytes/str/BytesIO it writes the raw payload to a file.
|
|
44
|
+
|
|
45
|
+
Typical usage
|
|
46
|
+
-------------
|
|
47
|
+
import zipfile
|
|
48
|
+
from brkraw.core import zip
|
|
49
|
+
|
|
50
|
+
# Load a zip from bytes
|
|
51
|
+
zf = zip.bytes_to_zipfile(zip_bytes)
|
|
52
|
+
|
|
53
|
+
# Walk the archive
|
|
54
|
+
for dirpath, dirnames, files in zip.walk(zf):
|
|
55
|
+
for f in files:
|
|
56
|
+
print(f.name, len(f.read()))
|
|
57
|
+
|
|
58
|
+
# Extract all "config.json" files
|
|
59
|
+
matches = zip.fetch_files_in_zip(zf, "config.json")
|
|
60
|
+
for m in matches:
|
|
61
|
+
buf = m.isolate() # -> FileBuffer
|
|
62
|
+
buf.to_filename("/tmp/config.json")
|
|
63
|
+
|
|
64
|
+
# Isolate a subdirectory into a new in-memory zip
|
|
65
|
+
dirs = zip.fetch_dirs_in_zip(zf, "src")
|
|
66
|
+
if dirs:
|
|
67
|
+
sub = dirs[0] # ZippedDir
|
|
68
|
+
new_zip = sub.isolate(add_root=True, root_name="package-src")
|
|
69
|
+
with new_zip.open("package-src/module.py") as fh:
|
|
70
|
+
print(fh.read().decode("utf-8"))
|
|
71
|
+
|
|
72
|
+
# Optionally persist the isolated zip to disk:
|
|
73
|
+
zip.to_filename(new_zip, "/tmp/package-src.zip")
|
|
74
|
+
|
|
75
|
+
Design notes
|
|
76
|
+
------------
|
|
77
|
+
|
|
78
|
+
- Uses only the stdlib (zipfile, io, shutil) for maximum portability.
|
|
79
|
+
- Preserves timestamps and file permissions (external_attr) where possible.
|
|
80
|
+
- Supports both in-memory workflows (BytesIO) and on-disk workflows
|
|
81
|
+
(via extract_to() or the to_filename() dispatcher).
|
|
82
|
+
- Explicit directory entries are preserved/added so that GUI ZIP browsers
|
|
83
|
+
behave predictably.
|
|
84
|
+
|
|
85
|
+
Exports
|
|
86
|
+
-------
|
|
87
|
+
|
|
88
|
+
- FileBuffer
|
|
89
|
+
- ZippedFile
|
|
90
|
+
- ZippedDir
|
|
91
|
+
- walk
|
|
92
|
+
- bytes_to_zipfile
|
|
93
|
+
- create_from_dir
|
|
94
|
+
- load
|
|
95
|
+
- fetch_files_in_zip
|
|
96
|
+
- fetch_dirs_in_zip
|
|
97
|
+
- to_filename
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
from __future__ import annotations
|
|
101
|
+
|
|
102
|
+
import fnmatch
|
|
103
|
+
import io
|
|
104
|
+
import os
|
|
105
|
+
import re
|
|
106
|
+
import shutil
|
|
107
|
+
import tempfile
|
|
108
|
+
import zipfile
|
|
109
|
+
from pathlib import Path
|
|
110
|
+
from collections import defaultdict
|
|
111
|
+
from dataclasses import dataclass
|
|
112
|
+
from typing import Any, Dict, IO, Iterable, List, Optional, Tuple, Union, Literal, Set
|
|
113
|
+
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
# internal helpers
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _ensure_parent_dir(path: Union[str, os.PathLike]) -> str:
|
|
120
|
+
"""Ensure parent directory exists and return an absolute path."""
|
|
121
|
+
p = os.fspath(path)
|
|
122
|
+
abs_path = os.path.abspath(p)
|
|
123
|
+
parent = os.path.dirname(abs_path)
|
|
124
|
+
if parent and not os.path.exists(parent):
|
|
125
|
+
os.makedirs(parent, exist_ok=True)
|
|
126
|
+
return abs_path
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# FileBuffer
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class FileBuffer:
|
|
136
|
+
"""A simple file buffer object (in-memory or spooled to disk)."""
|
|
137
|
+
name: str
|
|
138
|
+
buffer: IO[bytes]
|
|
139
|
+
|
|
140
|
+
def bytes(self) -> bytes:
|
|
141
|
+
"""Return full bytes content."""
|
|
142
|
+
pos = self.buffer.tell()
|
|
143
|
+
try:
|
|
144
|
+
self.buffer.seek(0)
|
|
145
|
+
return self.buffer.read()
|
|
146
|
+
finally:
|
|
147
|
+
self.buffer.seek(pos)
|
|
148
|
+
|
|
149
|
+
def to_filename(
|
|
150
|
+
self,
|
|
151
|
+
path: Union[str, os.PathLike],
|
|
152
|
+
*,
|
|
153
|
+
overwrite: bool = True,
|
|
154
|
+
makedirs: bool = True,
|
|
155
|
+
) -> str:
|
|
156
|
+
"""Write the buffer content to a file at path.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
path : str or os.PathLike
|
|
161
|
+
Destination file path.
|
|
162
|
+
overwrite : bool, optional
|
|
163
|
+
If False and the file exists, raise FileExistsError. Default True.
|
|
164
|
+
makedirs : bool, optional
|
|
165
|
+
If True, create parent directories as needed. Default True.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
str
|
|
170
|
+
The absolute filesystem path written to.
|
|
171
|
+
"""
|
|
172
|
+
path = os.fspath(path)
|
|
173
|
+
abs_path = os.path.abspath(path)
|
|
174
|
+
|
|
175
|
+
if not overwrite and os.path.exists(abs_path):
|
|
176
|
+
raise FileExistsError(f"File already exists: {abs_path}")
|
|
177
|
+
|
|
178
|
+
parent = os.path.dirname(abs_path)
|
|
179
|
+
if makedirs and parent and not os.path.exists(parent):
|
|
180
|
+
os.makedirs(parent, exist_ok=True)
|
|
181
|
+
|
|
182
|
+
pos = self.buffer.tell()
|
|
183
|
+
try:
|
|
184
|
+
self.buffer.seek(0)
|
|
185
|
+
with open(abs_path, "wb") as f:
|
|
186
|
+
shutil.copyfileobj(self.buffer, f)
|
|
187
|
+
finally:
|
|
188
|
+
self.buffer.seek(pos)
|
|
189
|
+
|
|
190
|
+
return abs_path
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# ZippedFile
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass
|
|
199
|
+
class ZippedFile:
|
|
200
|
+
"""A file-like handle to a file inside a ZipFile with convenient accessors."""
|
|
201
|
+
name: str # basename of the file (e.g., "README.md")
|
|
202
|
+
arcname: str # archive path inside the zip (e.g., "repo-123/README.md")
|
|
203
|
+
zipobj: zipfile.ZipFile
|
|
204
|
+
|
|
205
|
+
def __repr__(self) -> str:
|
|
206
|
+
try:
|
|
207
|
+
info = self.zipobj.getinfo(self.arcname)
|
|
208
|
+
size = info.file_size
|
|
209
|
+
except Exception:
|
|
210
|
+
size = "?"
|
|
211
|
+
return f"ZippedFile(path='{self.arcname}', size={size})"
|
|
212
|
+
|
|
213
|
+
def is_dir(self) -> bool:
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
def is_file(self) -> bool:
|
|
217
|
+
return True
|
|
218
|
+
|
|
219
|
+
def open(self) -> IO[bytes]:
|
|
220
|
+
"""Return a readable file-like object (binary). Caller should close it."""
|
|
221
|
+
return self.zipobj.open(self.arcname, "r")
|
|
222
|
+
|
|
223
|
+
def read(self) -> bytes:
|
|
224
|
+
"""Read entire file content into bytes."""
|
|
225
|
+
return self.zipobj.read(self.arcname)
|
|
226
|
+
|
|
227
|
+
def buffer(self) -> io.BytesIO:
|
|
228
|
+
"""Return an in-memory BytesIO buffer holding the file content."""
|
|
229
|
+
return io.BytesIO(self.read())
|
|
230
|
+
|
|
231
|
+
def isolate(
|
|
232
|
+
self,
|
|
233
|
+
*,
|
|
234
|
+
buffering: Literal["memory", "spooled"] = "memory",
|
|
235
|
+
max_spool_size: int = 10 * 1024 * 1024,
|
|
236
|
+
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
|
237
|
+
) -> FileBuffer:
|
|
238
|
+
"""Return a FileBuffer for this file content.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
buffering : {"memory", "spooled"}, optional
|
|
243
|
+
Use in-memory BytesIO by default. When "spooled", use a
|
|
244
|
+
tempfile.SpooledTemporaryFile that spills to disk past
|
|
245
|
+
max_spool_size to avoid high memory usage.
|
|
246
|
+
max_spool_size : int, optional
|
|
247
|
+
Threshold in bytes before a spooled buffer spills to disk.
|
|
248
|
+
cache_dir : str or os.PathLike, optional
|
|
249
|
+
Directory to place temporary files when buffering="spooled".
|
|
250
|
+
"""
|
|
251
|
+
data = self.read()
|
|
252
|
+
if buffering == "spooled":
|
|
253
|
+
spool_dir = os.fspath(cache_dir) if cache_dir is not None else None
|
|
254
|
+
buf = tempfile.SpooledTemporaryFile(max_size=max_spool_size, dir=spool_dir)
|
|
255
|
+
buf.write(data)
|
|
256
|
+
buf.seek(0)
|
|
257
|
+
return FileBuffer(name=self.name, buffer=buf)
|
|
258
|
+
# default: in-memory
|
|
259
|
+
buf = io.BytesIO(data)
|
|
260
|
+
buf.seek(0)
|
|
261
|
+
return FileBuffer(name=self.name, buffer=buf)
|
|
262
|
+
|
|
263
|
+
def extract_to(
|
|
264
|
+
self,
|
|
265
|
+
path: Union[str, os.PathLike],
|
|
266
|
+
) -> str:
|
|
267
|
+
"""Extract this file to a filesystem path.
|
|
268
|
+
|
|
269
|
+
If `path` is a directory, the file is written under that directory using
|
|
270
|
+
this entry's name. If `path` is a file path, the content is written
|
|
271
|
+
directly to that path. Use `arcname` in the dispatcher to override the
|
|
272
|
+
output name when calling via to_filename().
|
|
273
|
+
"""
|
|
274
|
+
return zippedfile_to_filename(self, path)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
# ---------------------------------------------------------------------------
|
|
278
|
+
# Create zip from directory
|
|
279
|
+
# ---------------------------------------------------------------------------
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def create_from_dir(
|
|
283
|
+
zip_path: Union[str, os.PathLike],
|
|
284
|
+
source_dir: Union[str, os.PathLike],
|
|
285
|
+
compression: int = zipfile.ZIP_DEFLATED,
|
|
286
|
+
) -> str:
|
|
287
|
+
"""Create a ZIP archive from the contents of a directory.
|
|
288
|
+
|
|
289
|
+
Parameters
|
|
290
|
+
----------
|
|
291
|
+
zip_path : str or os.PathLike
|
|
292
|
+
The path to the output ZIP file.
|
|
293
|
+
source_dir : str or os.PathLike
|
|
294
|
+
The path to the directory whose contents will be zipped.
|
|
295
|
+
compression : int, optional
|
|
296
|
+
The compression method to use (default: zipfile.ZIP_DEFLATED).
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
str
|
|
301
|
+
The absolute path to the created ZIP file.
|
|
302
|
+
"""
|
|
303
|
+
zip_path = _ensure_parent_dir(zip_path)
|
|
304
|
+
source_dir = os.fspath(source_dir)
|
|
305
|
+
|
|
306
|
+
with zipfile.ZipFile(zip_path, "w", compression=compression) as zf:
|
|
307
|
+
for root, dirs, files in os.walk(source_dir):
|
|
308
|
+
# Add directory entries
|
|
309
|
+
for d in dirs:
|
|
310
|
+
full_path = os.path.join(root, d)
|
|
311
|
+
arcname = os.path.relpath(full_path, source_dir)
|
|
312
|
+
zf.writestr(arcname + "/", b"")
|
|
313
|
+
# Add file entries
|
|
314
|
+
for file in files:
|
|
315
|
+
full_path = os.path.join(root, file)
|
|
316
|
+
arcname = os.path.relpath(full_path, source_dir)
|
|
317
|
+
zf.write(full_path, arcname)
|
|
318
|
+
return zip_path
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
# ---------------------------------------------------------------------------
|
|
322
|
+
# ZippedDir
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@dataclass
|
|
327
|
+
class ZippedDir:
|
|
328
|
+
"""Directory-like node inside a ZipFile. Holds subdirectories and files."""
|
|
329
|
+
name: str
|
|
330
|
+
path: str
|
|
331
|
+
dirs: List["ZippedDir"]
|
|
332
|
+
files: List[ZippedFile]
|
|
333
|
+
|
|
334
|
+
def __repr__(self) -> str:
|
|
335
|
+
return f"ZippedDir(path='{self.path}', dirs={len(self.dirs)}, files={len(self.files)})"
|
|
336
|
+
|
|
337
|
+
def is_dir(self) -> bool:
|
|
338
|
+
return True
|
|
339
|
+
|
|
340
|
+
def is_file(self) -> bool:
|
|
341
|
+
return False
|
|
342
|
+
|
|
343
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
344
|
+
"""Convert to plain dict (for debugging or serialization)."""
|
|
345
|
+
return {
|
|
346
|
+
"name": self.name,
|
|
347
|
+
"path": self.path,
|
|
348
|
+
"dirs": [d.as_dict() for d in self.dirs],
|
|
349
|
+
"files": [f.name for f in self.files],
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
def listdir(self) -> List[str]:
|
|
353
|
+
"""List immediate children names (dirs first, then files)."""
|
|
354
|
+
dirnames = sorted([d.name for d in self.dirs])
|
|
355
|
+
filenames = sorted([f.name for f in self.files])
|
|
356
|
+
return dirnames + filenames
|
|
357
|
+
|
|
358
|
+
def iterdir(self) -> Iterable[Union["ZippedDir", ZippedFile]]:
|
|
359
|
+
"""Iterate over children objects (dirs first, then files)."""
|
|
360
|
+
for d in sorted(self.dirs, key=lambda x: x.name):
|
|
361
|
+
yield d
|
|
362
|
+
for f in sorted(self.files, key=lambda x: x.name):
|
|
363
|
+
yield f
|
|
364
|
+
|
|
365
|
+
def _resolve_zipobj(self) -> zipfile.ZipFile:
|
|
366
|
+
"""Resolve the underlying ZipFile from any child file. Raise if not resolvable."""
|
|
367
|
+
stack: List["ZippedDir"] = [self]
|
|
368
|
+
while stack:
|
|
369
|
+
node = stack.pop()
|
|
370
|
+
for f in node.files:
|
|
371
|
+
return f.zipobj
|
|
372
|
+
stack.extend(node.dirs)
|
|
373
|
+
raise RuntimeError("Cannot resolve ZipFile for this ZippedDir (no files found).")
|
|
374
|
+
|
|
375
|
+
def isolate(
|
|
376
|
+
self,
|
|
377
|
+
compression: int = zipfile.ZIP_DEFLATED,
|
|
378
|
+
include_dir_entries: bool = True,
|
|
379
|
+
add_root: bool = False,
|
|
380
|
+
root_name: Union[str, None] = None,
|
|
381
|
+
*,
|
|
382
|
+
buffering: Literal["memory", "spooled"] = "memory",
|
|
383
|
+
max_spool_size: int = 20 * 1024 * 1024,
|
|
384
|
+
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
|
385
|
+
) -> zipfile.ZipFile:
|
|
386
|
+
"""Create a new ZIP containing only this directory subtree.
|
|
387
|
+
|
|
388
|
+
By default (add_root=False), the new ZIP root is this directory itself,
|
|
389
|
+
i.e., arcnames are relative to self.path (no extra top-level folder).
|
|
390
|
+
|
|
391
|
+
If add_root=True, files are placed under a top-level directory named
|
|
392
|
+
root_name (or self.name if root_name is None). In other words, entries
|
|
393
|
+
will look like "<root_name>/<relative-path-inside-self>".
|
|
394
|
+
|
|
395
|
+
Parameters
|
|
396
|
+
----------
|
|
397
|
+
compression : int, optional
|
|
398
|
+
Zip compression method (default: ZIP_DEFLATED).
|
|
399
|
+
include_dir_entries : bool, optional
|
|
400
|
+
If True, ensure folder entries (for example "a/", "a/b/") exist.
|
|
401
|
+
add_root : bool, optional
|
|
402
|
+
If True, wrap all contents under a top-level directory.
|
|
403
|
+
root_name : Optional[str], optional
|
|
404
|
+
Name of the top-level directory when add_root is True. If None,
|
|
405
|
+
uses self.name.
|
|
406
|
+
buffering : {"memory", "spooled"}, optional
|
|
407
|
+
Storage for the generated zip. "memory" uses BytesIO; "spooled" uses
|
|
408
|
+
tempfile.SpooledTemporaryFile and spills to disk past max_spool_size.
|
|
409
|
+
max_spool_size : int, optional
|
|
410
|
+
Threshold in bytes for spilling to disk when buffering="spooled".
|
|
411
|
+
cache_dir : str or os.PathLike, optional
|
|
412
|
+
Directory for temporary files when buffering="spooled".
|
|
413
|
+
|
|
414
|
+
Returns
|
|
415
|
+
-------
|
|
416
|
+
zipfile.ZipFile
|
|
417
|
+
A ZipFile object containing only this subtree.
|
|
418
|
+
"""
|
|
419
|
+
src_zip = self._resolve_zipobj()
|
|
420
|
+
|
|
421
|
+
# Normalize to POSIX style used inside zip archives
|
|
422
|
+
prefix = self.path.strip("/")
|
|
423
|
+
if prefix:
|
|
424
|
+
prefix = prefix + "/"
|
|
425
|
+
|
|
426
|
+
# Decide root folder name when requested
|
|
427
|
+
if add_root:
|
|
428
|
+
root = (root_name or (self.name or "root")).strip("/")
|
|
429
|
+
root_prefix = f"{root}/"
|
|
430
|
+
else:
|
|
431
|
+
root_prefix = ""
|
|
432
|
+
|
|
433
|
+
if buffering == "spooled":
|
|
434
|
+
spool_dir = os.fspath(cache_dir) if cache_dir is not None else None
|
|
435
|
+
out_buf: IO[bytes] = tempfile.SpooledTemporaryFile(
|
|
436
|
+
max_size=max_spool_size, dir=spool_dir
|
|
437
|
+
)
|
|
438
|
+
else:
|
|
439
|
+
out_buf = io.BytesIO()
|
|
440
|
+
|
|
441
|
+
with zipfile.ZipFile(out_buf, "w", compression=compression) as out_zip:
|
|
442
|
+
# Optional explicit top-level root
|
|
443
|
+
if add_root and include_dir_entries:
|
|
444
|
+
ri = zipfile.ZipInfo(root_prefix)
|
|
445
|
+
ri.external_attr = (0o40755 << 16)
|
|
446
|
+
out_zip.writestr(ri, b"")
|
|
447
|
+
|
|
448
|
+
# Copy all entries whose filename starts with the directory prefix
|
|
449
|
+
for info in src_zip.infolist():
|
|
450
|
+
fn = info.filename
|
|
451
|
+
if not fn.startswith(prefix):
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
rel = fn[len(prefix):]
|
|
455
|
+
if not rel:
|
|
456
|
+
continue
|
|
457
|
+
|
|
458
|
+
if add_root:
|
|
459
|
+
arcname = root_prefix + rel
|
|
460
|
+
else:
|
|
461
|
+
arcname = rel
|
|
462
|
+
|
|
463
|
+
if arcname.endswith("/"):
|
|
464
|
+
if include_dir_entries:
|
|
465
|
+
dir_info = zipfile.ZipInfo(arcname)
|
|
466
|
+
dir_info.date_time = info.date_time
|
|
467
|
+
dir_info.external_attr = (0o40755 << 16)
|
|
468
|
+
out_zip.writestr(dir_info, b"")
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
data = src_zip.read(info.filename)
|
|
472
|
+
new_info = zipfile.ZipInfo(arcname)
|
|
473
|
+
new_info.date_time = info.date_time
|
|
474
|
+
new_info.external_attr = info.external_attr
|
|
475
|
+
out_zip.writestr(new_info, data)
|
|
476
|
+
|
|
477
|
+
if include_dir_entries:
|
|
478
|
+
written = set(out_zip.namelist())
|
|
479
|
+
need_dirs = set()
|
|
480
|
+
for name in written:
|
|
481
|
+
if name.endswith("/"):
|
|
482
|
+
continue
|
|
483
|
+
parts = name.split("/")[:-1]
|
|
484
|
+
cur = []
|
|
485
|
+
for p in parts:
|
|
486
|
+
cur.append(p)
|
|
487
|
+
need_dirs.add("/".join(cur) + "/")
|
|
488
|
+
|
|
489
|
+
for d in sorted(need_dirs):
|
|
490
|
+
if d not in written:
|
|
491
|
+
di = zipfile.ZipInfo(d)
|
|
492
|
+
di.external_attr = (0o40755 << 16)
|
|
493
|
+
out_zip.writestr(di, b"")
|
|
494
|
+
|
|
495
|
+
out_buf.seek(0)
|
|
496
|
+
return zipfile.ZipFile(out_buf, "r")
|
|
497
|
+
|
|
498
|
+
def extract_to(
|
|
499
|
+
self,
|
|
500
|
+
dest: Union[str, os.PathLike],
|
|
501
|
+
*,
|
|
502
|
+
add_root: bool = False,
|
|
503
|
+
root_name: Optional[str] = None,
|
|
504
|
+
) -> str:
|
|
505
|
+
"""Extract this directory subtree to the filesystem.
|
|
506
|
+
|
|
507
|
+
Parameters
|
|
508
|
+
----------
|
|
509
|
+
dest : str or os.PathLike
|
|
510
|
+
Destination directory where contents will be written.
|
|
511
|
+
add_root : bool, optional
|
|
512
|
+
If True, wrap extracted contents under a top-level folder named
|
|
513
|
+
root_name (or this directory's name when None). If False, contents
|
|
514
|
+
are placed directly under dest, preserving internal structure.
|
|
515
|
+
root_name : Optional[str], optional
|
|
516
|
+
Optional explicit root folder name when add_root is True.
|
|
517
|
+
|
|
518
|
+
Returns
|
|
519
|
+
-------
|
|
520
|
+
str
|
|
521
|
+
Absolute path to the extraction root (dest or dest/root_name).
|
|
522
|
+
"""
|
|
523
|
+
src_zip = self._resolve_zipobj()
|
|
524
|
+
prefix = self.path.strip("/")
|
|
525
|
+
if prefix:
|
|
526
|
+
prefix += "/"
|
|
527
|
+
|
|
528
|
+
dest_path = Path(dest)
|
|
529
|
+
if add_root:
|
|
530
|
+
root = (root_name or (self.name or "root")).strip("/")
|
|
531
|
+
base = dest_path / root
|
|
532
|
+
else:
|
|
533
|
+
base = dest_path
|
|
534
|
+
|
|
535
|
+
base_abs = Path(_ensure_parent_dir(base))
|
|
536
|
+
for info in src_zip.infolist():
|
|
537
|
+
fn = info.filename
|
|
538
|
+
if not fn.startswith(prefix):
|
|
539
|
+
continue
|
|
540
|
+
rel = fn[len(prefix):]
|
|
541
|
+
if not rel:
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
target = base_abs / rel
|
|
545
|
+
if fn.endswith("/"):
|
|
546
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
547
|
+
continue
|
|
548
|
+
|
|
549
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
550
|
+
data = src_zip.read(fn)
|
|
551
|
+
with open(target, "wb") as f:
|
|
552
|
+
f.write(data)
|
|
553
|
+
# best-effort permission preservation
|
|
554
|
+
mode = (info.external_attr >> 16) & 0o777
|
|
555
|
+
if mode:
|
|
556
|
+
try:
|
|
557
|
+
os.chmod(target, mode)
|
|
558
|
+
except OSError:
|
|
559
|
+
pass
|
|
560
|
+
|
|
561
|
+
return str(base_abs)
|
|
562
|
+
|
|
563
|
+
def to_filename(
|
|
564
|
+
self,
|
|
565
|
+
path: Union[str, os.PathLike],
|
|
566
|
+
*,
|
|
567
|
+
compression: int = zipfile.ZIP_DEFLATED,
|
|
568
|
+
include_dir_entries: bool = True,
|
|
569
|
+
add_root: bool = False,
|
|
570
|
+
root_name: Optional[str] = None,
|
|
571
|
+
) -> str:
|
|
572
|
+
"""Persist this directory subtree as a zip file written to path."""
|
|
573
|
+
return zippeddir_to_filename(
|
|
574
|
+
self,
|
|
575
|
+
path,
|
|
576
|
+
compression=compression,
|
|
577
|
+
include_dir_entries=include_dir_entries,
|
|
578
|
+
add_root=add_root,
|
|
579
|
+
root_name=root_name,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
# ---------------------------------------------------------------------------
|
|
584
|
+
# walk over ZipFile
|
|
585
|
+
# ---------------------------------------------------------------------------
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def walk(
|
|
589
|
+
zipobj: zipfile.ZipFile,
|
|
590
|
+
top: str = "",
|
|
591
|
+
) -> Iterable[Tuple[str, List[ZippedDir], List[ZippedFile]]]:
|
|
592
|
+
"""Walk through a ZipFile like os.walk, but with ZippedFile entries.
|
|
593
|
+
|
|
594
|
+
Parameters
|
|
595
|
+
----------
|
|
596
|
+
zipobj : zipfile.ZipFile
|
|
597
|
+
Opened ZipFile object.
|
|
598
|
+
top : str, optional
|
|
599
|
+
Start directory inside the archive (default: root). Use archive-style
|
|
600
|
+
paths (for example "repo-abc/dir"). When top does not correspond to an
|
|
601
|
+
explicit directory entry, the function still yields a subtree rooted at
|
|
602
|
+
top, and dirpath values are archive paths under that prefix.
|
|
603
|
+
|
|
604
|
+
Yields
|
|
605
|
+
------
|
|
606
|
+
(dirpath, dirnames, fileentries)
|
|
607
|
+
dirpath : str
|
|
608
|
+
Current archive path ("" for root or, for example, "repo-abc/dir").
|
|
609
|
+
dirnames : List[ZippedDir]
|
|
610
|
+
Sorted list of immediate subdirectories as ZippedDir objects.
|
|
611
|
+
fileentries : List[ZippedFile]
|
|
612
|
+
Sorted list of file entries; each has .open(), .read(), .buffer().
|
|
613
|
+
"""
|
|
614
|
+
tree_map: Dict[str, Dict[str, Any]] = defaultdict(lambda: {"dirs": set(), "files": {}})
|
|
615
|
+
|
|
616
|
+
# Normalize and index
|
|
617
|
+
for arcname in zipobj.namelist():
|
|
618
|
+
norm = arcname.rstrip("/")
|
|
619
|
+
parts = norm.split("/")
|
|
620
|
+
parent = "/".join(parts[:-1]) # "" at root
|
|
621
|
+
leaf = parts[-1]
|
|
622
|
+
|
|
623
|
+
if arcname.endswith("/"): # a directory entry
|
|
624
|
+
tree_map[parent]["dirs"].add(leaf)
|
|
625
|
+
else: # a file entry
|
|
626
|
+
tree_map[parent]["files"][leaf] = ZippedFile(
|
|
627
|
+
name=leaf, arcname=norm, zipobj=zipobj
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
# ensure intermediate directories are known
|
|
631
|
+
for i in range(len(parts) - 1):
|
|
632
|
+
up_parent = "/".join(parts[:i])
|
|
633
|
+
up_child = parts[i]
|
|
634
|
+
tree_map[up_parent]["dirs"].add(up_child)
|
|
635
|
+
|
|
636
|
+
start = top.rstrip("/")
|
|
637
|
+
|
|
638
|
+
# When top does not exist explicitly, build a filtered pseudo-map rooted at top
|
|
639
|
+
if start and start not in tree_map:
|
|
640
|
+
pseudo_map: Dict[str, Dict[str, Any]] = defaultdict(lambda: {"dirs": set(), "files": {}})
|
|
641
|
+
for arcname in zipobj.namelist():
|
|
642
|
+
if arcname.startswith(start + "/") or arcname.rstrip("/") == start:
|
|
643
|
+
norm = arcname.rstrip("/")
|
|
644
|
+
rel = norm[len(start):].lstrip("/")
|
|
645
|
+
parent = "/".join([start] + ([p for p in rel.split("/")[:-1]] if rel else []))
|
|
646
|
+
leaf = rel.split("/")[-1] if rel else start.split("/")[-1]
|
|
647
|
+
if arcname.endswith("/"):
|
|
648
|
+
pseudo_map[parent]["dirs"].add(leaf)
|
|
649
|
+
else:
|
|
650
|
+
pseudo_map[parent]["files"][leaf] = ZippedFile(leaf, norm, zipobj)
|
|
651
|
+
prefix_parts = parent.split("/") if parent else []
|
|
652
|
+
for i in range(len(prefix_parts)):
|
|
653
|
+
up_parent = "/".join(prefix_parts[:i])
|
|
654
|
+
up_child = prefix_parts[i]
|
|
655
|
+
pseudo_map[up_parent]["dirs"].add(up_child)
|
|
656
|
+
tree_map = pseudo_map
|
|
657
|
+
if start and start not in tree_map:
|
|
658
|
+
return
|
|
659
|
+
|
|
660
|
+
built_dirs: Dict[str, ZippedDir] = {}
|
|
661
|
+
|
|
662
|
+
def _build(path: str) -> ZippedDir:
|
|
663
|
+
if path in built_dirs:
|
|
664
|
+
return built_dirs[path]
|
|
665
|
+
dirnames = sorted(tree_map[path]["dirs"])
|
|
666
|
+
files = [tree_map[path]["files"][k] for k in sorted(tree_map[path]["files"].keys())]
|
|
667
|
+
subs: List[ZippedDir] = []
|
|
668
|
+
for name in dirnames:
|
|
669
|
+
sub_path = f"{path}/{name}" if path else name
|
|
670
|
+
subs.append(_build(sub_path))
|
|
671
|
+
obj = ZippedDir(
|
|
672
|
+
name=path.rsplit("/", 1)[-1] if path else "",
|
|
673
|
+
path=path,
|
|
674
|
+
dirs=subs,
|
|
675
|
+
files=files,
|
|
676
|
+
)
|
|
677
|
+
built_dirs[path] = obj
|
|
678
|
+
return obj
|
|
679
|
+
|
|
680
|
+
def _walk(current_path: str):
|
|
681
|
+
cur_dir = _build(current_path)
|
|
682
|
+
yield current_path, cur_dir.dirs, cur_dir.files
|
|
683
|
+
for sub in cur_dir.dirs:
|
|
684
|
+
yield from _walk(sub.path)
|
|
685
|
+
|
|
686
|
+
yield from _walk(start)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
# ---------------------------------------------------------------------------
|
|
690
|
+
# basic helpers
|
|
691
|
+
# ---------------------------------------------------------------------------
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
def bytes_to_zipfile(zip_bytes: bytes) -> zipfile.ZipFile:
|
|
695
|
+
"""Open a zip archive from a bytes object.
|
|
696
|
+
|
|
697
|
+
This is a convenience wrapper around zipfile.ZipFile(io.BytesIO(zip_bytes)).
|
|
698
|
+
|
|
699
|
+
Parameters
|
|
700
|
+
----------
|
|
701
|
+
zip_bytes : bytes
|
|
702
|
+
The binary content of a zip archive.
|
|
703
|
+
|
|
704
|
+
Returns
|
|
705
|
+
-------
|
|
706
|
+
zipfile.ZipFile
|
|
707
|
+
A readable ZipFile object.
|
|
708
|
+
"""
|
|
709
|
+
return zipfile.ZipFile(io.BytesIO(zip_bytes))
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
# ---------------------------------------------------------------------------
|
|
713
|
+
# search helpers
|
|
714
|
+
# ---------------------------------------------------------------------------
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def fetch_files_in_zip(
|
|
718
|
+
zipobj: zipfile.ZipFile,
|
|
719
|
+
filename: str,
|
|
720
|
+
top: str = "",
|
|
721
|
+
wildcard: bool = True,
|
|
722
|
+
regex: Optional[str] = None,
|
|
723
|
+
) -> List[ZippedFile]:
|
|
724
|
+
"""Search for files in a ZipFile whose leaf name matches filename.
|
|
725
|
+
|
|
726
|
+
Parameters
|
|
727
|
+
----------
|
|
728
|
+
zipobj : zipfile.ZipFile
|
|
729
|
+
Opened ZipFile object.
|
|
730
|
+
filename : str
|
|
731
|
+
Target filename (exact match or pattern).
|
|
732
|
+
top : str, optional
|
|
733
|
+
Directory prefix to restrict search (default: root).
|
|
734
|
+
wildcard : bool, optional
|
|
735
|
+
If True, use fnmatch (shell-style wildcards) for filename matching.
|
|
736
|
+
regex : str, optional
|
|
737
|
+
If given, use this regex pattern to match filenames (overrides wildcard).
|
|
738
|
+
|
|
739
|
+
Returns
|
|
740
|
+
-------
|
|
741
|
+
List[ZippedFile]
|
|
742
|
+
List of matching ZippedFile objects.
|
|
743
|
+
"""
|
|
744
|
+
matches: List[ZippedFile] = []
|
|
745
|
+
pattern = re.compile(regex) if regex else None
|
|
746
|
+
for _, _, fileentries in walk(zipobj, top=top):
|
|
747
|
+
for entry in fileentries:
|
|
748
|
+
if pattern is not None:
|
|
749
|
+
if pattern.fullmatch(entry.name):
|
|
750
|
+
matches.append(entry)
|
|
751
|
+
elif wildcard:
|
|
752
|
+
if fnmatch.fnmatch(entry.name, filename):
|
|
753
|
+
matches.append(entry)
|
|
754
|
+
else:
|
|
755
|
+
if entry.name == filename:
|
|
756
|
+
matches.append(entry)
|
|
757
|
+
return matches
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
def fetch_dirs_in_zip(
|
|
761
|
+
zipobj: zipfile.ZipFile,
|
|
762
|
+
dirname: str,
|
|
763
|
+
top: str = "",
|
|
764
|
+
wildcard: bool = True,
|
|
765
|
+
regex: Optional[str] = None,
|
|
766
|
+
match_scope: str = "basename", # "basename" | "fullpath"
|
|
767
|
+
) -> List[ZippedDir]:
|
|
768
|
+
"""Return ZippedDir trees rooted at the matched directories.
|
|
769
|
+
|
|
770
|
+
Parameters
|
|
771
|
+
----------
|
|
772
|
+
zipobj : zipfile.ZipFile
|
|
773
|
+
The opened zip file object.
|
|
774
|
+
dirname : str
|
|
775
|
+
Directory name pattern to match.
|
|
776
|
+
top : str, optional
|
|
777
|
+
The starting directory inside the archive (default: root).
|
|
778
|
+
wildcard : bool, optional
|
|
779
|
+
Whether to allow wildcard matching (default: True).
|
|
780
|
+
regex : Optional[str], optional
|
|
781
|
+
Regex pattern to match directories (default: None).
|
|
782
|
+
match_scope : {"basename", "fullpath"}, optional
|
|
783
|
+
Matching scope:
|
|
784
|
+
- "basename": match only against the final directory name.
|
|
785
|
+
- "fullpath": match against the entire directory path.
|
|
786
|
+
|
|
787
|
+
Returns
|
|
788
|
+
-------
|
|
789
|
+
List[ZippedDir]
|
|
790
|
+
A list of matched ZippedDir objects.
|
|
791
|
+
|
|
792
|
+
Raises
|
|
793
|
+
------
|
|
794
|
+
ValueError
|
|
795
|
+
If match_scope is not "basename" or "fullpath".
|
|
796
|
+
"""
|
|
797
|
+
if match_scope not in {"basename", "fullpath"}:
|
|
798
|
+
raise ValueError(f"Invalid match_scope: {match_scope!r}")
|
|
799
|
+
|
|
800
|
+
index: Dict[str, Tuple[List[ZippedDir], List[ZippedFile]]] = {}
|
|
801
|
+
for dirpath, direntries, fileentries in walk(zipobj, top=top):
|
|
802
|
+
index[dirpath] = (direntries, fileentries)
|
|
803
|
+
|
|
804
|
+
def _target(dirpath: str) -> str:
|
|
805
|
+
if match_scope == "basename":
|
|
806
|
+
return dirpath.rsplit("/", 1)[-1] if dirpath else ""
|
|
807
|
+
return dirpath
|
|
808
|
+
|
|
809
|
+
def _match(dirpath: str) -> bool:
|
|
810
|
+
target = _target(dirpath)
|
|
811
|
+
if regex is not None:
|
|
812
|
+
return re.search(regex, target) is not None
|
|
813
|
+
if wildcard:
|
|
814
|
+
return fnmatch.fnmatch(target, dirname)
|
|
815
|
+
return target == dirname
|
|
816
|
+
|
|
817
|
+
def _build_dir(path: str) -> ZippedDir:
|
|
818
|
+
direntries, files = index.get(path, ([], []))
|
|
819
|
+
subdirs: List[ZippedDir] = []
|
|
820
|
+
for d in direntries:
|
|
821
|
+
subdirs.append(_build_dir(d.path))
|
|
822
|
+
return ZippedDir(
|
|
823
|
+
name=path.rsplit("/", 1)[-1] if path else "",
|
|
824
|
+
path=path,
|
|
825
|
+
dirs=subdirs,
|
|
826
|
+
files=files,
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
results: List[ZippedDir] = []
|
|
830
|
+
for dirpath in index.keys():
|
|
831
|
+
if _match(dirpath):
|
|
832
|
+
results.append(_build_dir(dirpath))
|
|
833
|
+
return results
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
# ---------------------------------------------------------------------------
|
|
837
|
+
# low-level copy helpers
|
|
838
|
+
# ---------------------------------------------------------------------------
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
def _copy_zip(
|
|
842
|
+
zipobj: zipfile.ZipFile,
|
|
843
|
+
dst_path: Union[str, os.PathLike],
|
|
844
|
+
compression: int = zipfile.ZIP_DEFLATED,
|
|
845
|
+
include_dir_entries: bool = True,
|
|
846
|
+
) -> None:
|
|
847
|
+
"""Copy all entries from an existing ZipFile to a new zip at dst_path.
|
|
848
|
+
|
|
849
|
+
Re-compresses entries using compression.
|
|
850
|
+
"""
|
|
851
|
+
dst_path = _ensure_parent_dir(dst_path)
|
|
852
|
+
with zipfile.ZipFile(dst_path, "w", compression=compression) as out:
|
|
853
|
+
written: Set[str] = set()
|
|
854
|
+
if include_dir_entries:
|
|
855
|
+
dirs = set()
|
|
856
|
+
for info in zipobj.infolist():
|
|
857
|
+
name = info.filename
|
|
858
|
+
if name.endswith("/"):
|
|
859
|
+
dirs.add(name)
|
|
860
|
+
else:
|
|
861
|
+
parts = name.split("/")[:-1]
|
|
862
|
+
cur: List[str] = []
|
|
863
|
+
for p in parts:
|
|
864
|
+
cur.append(p)
|
|
865
|
+
dirs.add("/".join(cur) + "/")
|
|
866
|
+
for d in sorted(dirs):
|
|
867
|
+
di = zipfile.ZipInfo(d)
|
|
868
|
+
di.external_attr = (0o40755 << 16)
|
|
869
|
+
out.writestr(di, b"")
|
|
870
|
+
written.add(d)
|
|
871
|
+
|
|
872
|
+
for info in zipobj.infolist():
|
|
873
|
+
name = info.filename
|
|
874
|
+
if name.endswith("/"):
|
|
875
|
+
if include_dir_entries and name not in written:
|
|
876
|
+
di = zipfile.ZipInfo(name)
|
|
877
|
+
di.date_time = info.date_time
|
|
878
|
+
di.external_attr = info.external_attr
|
|
879
|
+
out.writestr(di, b"")
|
|
880
|
+
written.add(name)
|
|
881
|
+
continue
|
|
882
|
+
data = zipobj.read(name)
|
|
883
|
+
ni = zipfile.ZipInfo(name)
|
|
884
|
+
ni.date_time = info.date_time
|
|
885
|
+
ni.external_attr = info.external_attr
|
|
886
|
+
out.writestr(ni, data)
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
# ---------------------------------------------------------------------------
|
|
890
|
+
# ZippedFile method implementation
|
|
891
|
+
# ---------------------------------------------------------------------------
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def zippedfile_to_filename(
|
|
895
|
+
self: ZippedFile,
|
|
896
|
+
path: Union[str, os.PathLike],
|
|
897
|
+
arcname: Optional[str] = None,
|
|
898
|
+
) -> str:
|
|
899
|
+
"""Extract this single file to disk.
|
|
900
|
+
|
|
901
|
+
Behavior:
|
|
902
|
+
- If `path` points to a directory, the file is written under that
|
|
903
|
+
directory using `arcname` (if provided) or the entry name.
|
|
904
|
+
- If `path` points to a file, the file content is written directly to
|
|
905
|
+
that location. When `arcname` is provided, the file name is overridden
|
|
906
|
+
relative to the parent directory of `path`.
|
|
907
|
+
"""
|
|
908
|
+
target = os.fspath(path)
|
|
909
|
+
|
|
910
|
+
# Decide whether path is a directory target
|
|
911
|
+
is_dir_target = os.path.isdir(target) or target.endswith(os.sep)
|
|
912
|
+
if is_dir_target:
|
|
913
|
+
rel = arcname or self.name
|
|
914
|
+
target = os.path.join(target, rel)
|
|
915
|
+
elif arcname:
|
|
916
|
+
# Override filename relative to the parent of the given path
|
|
917
|
+
parent = os.path.dirname(target) or "."
|
|
918
|
+
target = os.path.join(parent, arcname)
|
|
919
|
+
|
|
920
|
+
abs_path = _ensure_parent_dir(target)
|
|
921
|
+
with open(abs_path, "wb") as f:
|
|
922
|
+
f.write(self.read())
|
|
923
|
+
return abs_path
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
# ---------------------------------------------------------------------------
|
|
927
|
+
# ZippedDir method implementation
|
|
928
|
+
# ---------------------------------------------------------------------------
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def zippeddir_to_filename(
|
|
932
|
+
self: ZippedDir,
|
|
933
|
+
path: Union[str, os.PathLike],
|
|
934
|
+
compression: int = zipfile.ZIP_DEFLATED,
|
|
935
|
+
include_dir_entries: bool = True,
|
|
936
|
+
add_root: bool = False,
|
|
937
|
+
root_name: Optional[str] = None,
|
|
938
|
+
) -> str:
|
|
939
|
+
"""Save this directory subtree into a new zip file at path.
|
|
940
|
+
|
|
941
|
+
Mirrors ZippedDir.isolate() options but writes directly to disk.
|
|
942
|
+
"""
|
|
943
|
+
abs_path = _ensure_parent_dir(path)
|
|
944
|
+
src_zip = self._resolve_zipobj()
|
|
945
|
+
prefix = self.path.strip("/")
|
|
946
|
+
if prefix:
|
|
947
|
+
prefix += "/"
|
|
948
|
+
|
|
949
|
+
if add_root:
|
|
950
|
+
root = (root_name or (self.name or "root")).strip("/")
|
|
951
|
+
root_prefix = f"{root}/"
|
|
952
|
+
else:
|
|
953
|
+
root_prefix = ""
|
|
954
|
+
|
|
955
|
+
with zipfile.ZipFile(abs_path, "w", compression=compression) as out_zip:
|
|
956
|
+
if add_root and include_dir_entries:
|
|
957
|
+
ri = zipfile.ZipInfo(root_prefix)
|
|
958
|
+
ri.external_attr = (0o40755 << 16)
|
|
959
|
+
out_zip.writestr(ri, b"")
|
|
960
|
+
|
|
961
|
+
# copy matching entries
|
|
962
|
+
for info in src_zip.infolist():
|
|
963
|
+
fn = info.filename
|
|
964
|
+
if not fn.startswith(prefix):
|
|
965
|
+
continue
|
|
966
|
+
rel = fn[len(prefix):]
|
|
967
|
+
if not rel:
|
|
968
|
+
continue
|
|
969
|
+
arcname = root_prefix + rel
|
|
970
|
+
|
|
971
|
+
if arcname.endswith("/"):
|
|
972
|
+
if include_dir_entries:
|
|
973
|
+
di = zipfile.ZipInfo(arcname)
|
|
974
|
+
di.date_time = info.date_time
|
|
975
|
+
di.external_attr = (0o40755 << 16)
|
|
976
|
+
out_zip.writestr(di, b"")
|
|
977
|
+
continue
|
|
978
|
+
|
|
979
|
+
data = src_zip.read(fn)
|
|
980
|
+
ni = zipfile.ZipInfo(arcname)
|
|
981
|
+
ni.date_time = info.date_time
|
|
982
|
+
ni.external_attr = info.external_attr
|
|
983
|
+
out_zip.writestr(ni, data)
|
|
984
|
+
|
|
985
|
+
if include_dir_entries:
|
|
986
|
+
written = set(out_zip.namelist())
|
|
987
|
+
need_dirs: Set[str] = set()
|
|
988
|
+
for name in written:
|
|
989
|
+
if name.endswith("/"):
|
|
990
|
+
continue
|
|
991
|
+
parts = name.split("/")[:-1]
|
|
992
|
+
cur: List[str] = []
|
|
993
|
+
for p in parts:
|
|
994
|
+
cur.append(p)
|
|
995
|
+
need_dirs.add("/".join(cur) + "/")
|
|
996
|
+
for d in sorted(need_dirs):
|
|
997
|
+
if d not in written:
|
|
998
|
+
di = zipfile.ZipInfo(d)
|
|
999
|
+
di.external_attr = (0o40755 << 16)
|
|
1000
|
+
out_zip.writestr(di, b"")
|
|
1001
|
+
return abs_path
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
# ---------------------------------------------------------------------------
|
|
1005
|
+
# Generic dispatcher
|
|
1006
|
+
# ---------------------------------------------------------------------------
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
def to_filename(
|
|
1010
|
+
obj: Union[
|
|
1011
|
+
zipfile.ZipFile,
|
|
1012
|
+
ZippedDir,
|
|
1013
|
+
ZippedFile,
|
|
1014
|
+
str,
|
|
1015
|
+
bytes,
|
|
1016
|
+
bytearray,
|
|
1017
|
+
io.BytesIO,
|
|
1018
|
+
],
|
|
1019
|
+
path: Union[str, os.PathLike],
|
|
1020
|
+
*,
|
|
1021
|
+
compression: int = zipfile.ZIP_DEFLATED,
|
|
1022
|
+
include_dir_entries: bool = True,
|
|
1023
|
+
add_root: bool = False,
|
|
1024
|
+
root_name: Optional[str] = None,
|
|
1025
|
+
arcname: Optional[str] = None,
|
|
1026
|
+
) -> str:
|
|
1027
|
+
"""Persist an object to disk.
|
|
1028
|
+
|
|
1029
|
+
Supported:
|
|
1030
|
+
- zipfile.ZipFile: copy all entries into a new zip.
|
|
1031
|
+
- ZippedDir: save the subtree (same options as ZippedDir.to_filename()).
|
|
1032
|
+
- ZippedFile: extract the file to disk (arcname can rename the output).
|
|
1033
|
+
- str: encode as utf-8 and write to a raw file.
|
|
1034
|
+
- bytes/bytearray: write to a raw file.
|
|
1035
|
+
- io.BytesIO: write buffer content to a raw file.
|
|
1036
|
+
|
|
1037
|
+
When obj is a str, bytes, bytearray, or BytesIO, the output filename
|
|
1038
|
+
defaults to `path` (overridden by arcname when provided). If `path` is a
|
|
1039
|
+
directory, the filename defaults to the basename of `path` without its
|
|
1040
|
+
extension, or "payload" when empty.
|
|
1041
|
+
"""
|
|
1042
|
+
abs_path = _ensure_parent_dir(path)
|
|
1043
|
+
|
|
1044
|
+
if isinstance(obj, zipfile.ZipFile):
|
|
1045
|
+
_copy_zip(obj, abs_path, compression=compression, include_dir_entries=include_dir_entries)
|
|
1046
|
+
return abs_path
|
|
1047
|
+
|
|
1048
|
+
if isinstance(obj, ZippedDir):
|
|
1049
|
+
return obj.to_filename(
|
|
1050
|
+
abs_path,
|
|
1051
|
+
compression=compression,
|
|
1052
|
+
include_dir_entries=include_dir_entries,
|
|
1053
|
+
add_root=add_root,
|
|
1054
|
+
root_name=root_name,
|
|
1055
|
+
)
|
|
1056
|
+
|
|
1057
|
+
if isinstance(obj, ZippedFile):
|
|
1058
|
+
return zippedfile_to_filename(obj, abs_path, arcname=arcname)
|
|
1059
|
+
|
|
1060
|
+
if isinstance(obj, (str, bytes, bytearray, io.BytesIO)):
|
|
1061
|
+
# Determine target file path (not a zip)
|
|
1062
|
+
target = abs_path
|
|
1063
|
+
if os.path.isdir(abs_path) or str(path).endswith(os.sep):
|
|
1064
|
+
default_name = arcname or os.path.basename(abs_path).rsplit(".", 1)[0] or "payload"
|
|
1065
|
+
target = os.path.join(abs_path, default_name)
|
|
1066
|
+
elif arcname:
|
|
1067
|
+
target = os.path.join(os.path.dirname(abs_path) or ".", arcname)
|
|
1068
|
+
target = _ensure_parent_dir(target)
|
|
1069
|
+
|
|
1070
|
+
if isinstance(obj, io.BytesIO):
|
|
1071
|
+
data = obj.getvalue()
|
|
1072
|
+
elif isinstance(obj, str):
|
|
1073
|
+
data = obj.encode("utf-8")
|
|
1074
|
+
else:
|
|
1075
|
+
data = bytes(obj)
|
|
1076
|
+
|
|
1077
|
+
with open(target, "wb") as f:
|
|
1078
|
+
f.write(data)
|
|
1079
|
+
return target
|
|
1080
|
+
|
|
1081
|
+
raise TypeError(f"Unsupported type for to_filename: {type(obj)!r}")
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
# ---------------------------------------------------------------------------
|
|
1085
|
+
# small helpers
|
|
1086
|
+
# ---------------------------------------------------------------------------
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def load(path: Union[str, os.PathLike]) -> zipfile.ZipFile:
|
|
1090
|
+
"""Open a zip archive from a file path.
|
|
1091
|
+
|
|
1092
|
+
A convenience wrapper for zipfile.ZipFile(path, "r").
|
|
1093
|
+
|
|
1094
|
+
Parameters
|
|
1095
|
+
----------
|
|
1096
|
+
path : str or os.PathLike
|
|
1097
|
+
Path to the zip archive file.
|
|
1098
|
+
|
|
1099
|
+
Returns
|
|
1100
|
+
-------
|
|
1101
|
+
zipfile.ZipFile
|
|
1102
|
+
A readable ZipFile object.
|
|
1103
|
+
"""
|
|
1104
|
+
return zipfile.ZipFile(os.fspath(path), "r")
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
__all__ = [
|
|
1108
|
+
"FileBuffer",
|
|
1109
|
+
"ZippedFile",
|
|
1110
|
+
"ZippedDir",
|
|
1111
|
+
"walk",
|
|
1112
|
+
"bytes_to_zipfile",
|
|
1113
|
+
"create_from_dir",
|
|
1114
|
+
"load",
|
|
1115
|
+
"fetch_files_in_zip",
|
|
1116
|
+
"fetch_dirs_in_zip",
|
|
1117
|
+
"to_filename",
|
|
1118
|
+
]
|
|
1119
|
+
|
|
1120
|
+
def __dir__() -> List[str]:
|
|
1121
|
+
return sorted(__all__)
|