brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. brkraw/__init__.py +9 -3
  2. brkraw/apps/__init__.py +12 -0
  3. brkraw/apps/addon/__init__.py +30 -0
  4. brkraw/apps/addon/core.py +35 -0
  5. brkraw/apps/addon/dependencies.py +402 -0
  6. brkraw/apps/addon/installation.py +500 -0
  7. brkraw/apps/addon/io.py +21 -0
  8. brkraw/apps/hook/__init__.py +25 -0
  9. brkraw/apps/hook/core.py +636 -0
  10. brkraw/apps/loader/__init__.py +10 -0
  11. brkraw/apps/loader/core.py +622 -0
  12. brkraw/apps/loader/formatter.py +288 -0
  13. brkraw/apps/loader/helper.py +797 -0
  14. brkraw/apps/loader/info/__init__.py +11 -0
  15. brkraw/apps/loader/info/scan.py +85 -0
  16. brkraw/apps/loader/info/scan.yaml +90 -0
  17. brkraw/apps/loader/info/study.py +69 -0
  18. brkraw/apps/loader/info/study.yaml +156 -0
  19. brkraw/apps/loader/info/transform.py +92 -0
  20. brkraw/apps/loader/types.py +220 -0
  21. brkraw/cli/__init__.py +5 -0
  22. brkraw/cli/commands/__init__.py +2 -0
  23. brkraw/cli/commands/addon.py +327 -0
  24. brkraw/cli/commands/config.py +205 -0
  25. brkraw/cli/commands/convert.py +903 -0
  26. brkraw/cli/commands/hook.py +348 -0
  27. brkraw/cli/commands/info.py +74 -0
  28. brkraw/cli/commands/init.py +214 -0
  29. brkraw/cli/commands/params.py +106 -0
  30. brkraw/cli/commands/prune.py +288 -0
  31. brkraw/cli/commands/session.py +371 -0
  32. brkraw/cli/hook_args.py +80 -0
  33. brkraw/cli/main.py +83 -0
  34. brkraw/cli/utils.py +60 -0
  35. brkraw/core/__init__.py +13 -0
  36. brkraw/core/config.py +380 -0
  37. brkraw/core/entrypoints.py +25 -0
  38. brkraw/core/formatter.py +367 -0
  39. brkraw/core/fs.py +495 -0
  40. brkraw/core/jcamp.py +600 -0
  41. brkraw/core/layout.py +451 -0
  42. brkraw/core/parameters.py +781 -0
  43. brkraw/core/zip.py +1121 -0
  44. brkraw/dataclasses/__init__.py +14 -0
  45. brkraw/dataclasses/node.py +139 -0
  46. brkraw/dataclasses/reco.py +33 -0
  47. brkraw/dataclasses/scan.py +61 -0
  48. brkraw/dataclasses/study.py +131 -0
  49. brkraw/default/__init__.py +3 -0
  50. brkraw/default/pruner_specs/deid4share.yaml +42 -0
  51. brkraw/default/rules/00_default.yaml +4 -0
  52. brkraw/default/specs/metadata_dicom.yaml +236 -0
  53. brkraw/default/specs/metadata_transforms.py +92 -0
  54. brkraw/resolver/__init__.py +7 -0
  55. brkraw/resolver/affine.py +539 -0
  56. brkraw/resolver/datatype.py +69 -0
  57. brkraw/resolver/fid.py +90 -0
  58. brkraw/resolver/helpers.py +36 -0
  59. brkraw/resolver/image.py +188 -0
  60. brkraw/resolver/nifti.py +370 -0
  61. brkraw/resolver/shape.py +235 -0
  62. brkraw/schema/__init__.py +3 -0
  63. brkraw/schema/context_map.yaml +62 -0
  64. brkraw/schema/meta.yaml +57 -0
  65. brkraw/schema/niftiheader.yaml +95 -0
  66. brkraw/schema/pruner.yaml +55 -0
  67. brkraw/schema/remapper.yaml +128 -0
  68. brkraw/schema/rules.yaml +154 -0
  69. brkraw/specs/__init__.py +10 -0
  70. brkraw/specs/hook/__init__.py +12 -0
  71. brkraw/specs/hook/logic.py +31 -0
  72. brkraw/specs/hook/validator.py +22 -0
  73. brkraw/specs/meta/__init__.py +5 -0
  74. brkraw/specs/meta/validator.py +156 -0
  75. brkraw/specs/pruner/__init__.py +15 -0
  76. brkraw/specs/pruner/logic.py +361 -0
  77. brkraw/specs/pruner/validator.py +119 -0
  78. brkraw/specs/remapper/__init__.py +27 -0
  79. brkraw/specs/remapper/logic.py +924 -0
  80. brkraw/specs/remapper/validator.py +314 -0
  81. brkraw/specs/rules/__init__.py +6 -0
  82. brkraw/specs/rules/logic.py +263 -0
  83. brkraw/specs/rules/validator.py +103 -0
  84. brkraw-0.5.0.dist-info/METADATA +81 -0
  85. brkraw-0.5.0.dist-info/RECORD +88 -0
  86. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
  87. brkraw-0.5.0.dist-info/entry_points.txt +13 -0
  88. brkraw/lib/__init__.py +0 -4
  89. brkraw/lib/backup.py +0 -641
  90. brkraw/lib/bids.py +0 -0
  91. brkraw/lib/errors.py +0 -125
  92. brkraw/lib/loader.py +0 -1220
  93. brkraw/lib/orient.py +0 -194
  94. brkraw/lib/parser.py +0 -48
  95. brkraw/lib/pvobj.py +0 -301
  96. brkraw/lib/reference.py +0 -245
  97. brkraw/lib/utils.py +0 -471
  98. brkraw/scripts/__init__.py +0 -0
  99. brkraw/scripts/brk_backup.py +0 -106
  100. brkraw/scripts/brkraw.py +0 -744
  101. brkraw/ui/__init__.py +0 -0
  102. brkraw/ui/config.py +0 -17
  103. brkraw/ui/main_win.py +0 -214
  104. brkraw/ui/previewer.py +0 -225
  105. brkraw/ui/scan_info.py +0 -72
  106. brkraw/ui/scan_list.py +0 -73
  107. brkraw/ui/subj_info.py +0 -128
  108. brkraw-0.3.11.dist-info/METADATA +0 -25
  109. brkraw-0.3.11.dist-info/RECORD +0 -28
  110. brkraw-0.3.11.dist-info/entry_points.txt +0 -3
  111. brkraw-0.3.11.dist-info/top_level.txt +0 -2
  112. tests/__init__.py +0 -0
  113. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
brkraw/core/zip.py ADDED
@@ -0,0 +1,1121 @@
1
+ """
2
+ A set of lightweight utilities for working with ZIP archives in-memory and
3
+ providing convenient abstractions for files and directories inside a ZIP.
4
+
5
+ The focus is:
6
+ - Safe, Pythonic dataclasses (FileBuffer, ZippedFile, ZippedDir) wrapping
7
+ raw bytes and zipfile.ZipFile entries.
8
+ - Support for extracting, isolating, and re-packing subtrees of a ZIP archive
9
+ without touching the filesystem unless explicitly requested.
10
+ - Flexible to_filename() dispatcher to persist objects (ZipFile, ZippedDir,
11
+ ZippedFile, BytesIO, or raw bytes) to disk in a normalized way.
12
+
13
+ Key abstractions
14
+ ----------------
15
+ - FileBuffer
16
+ A simple wrapper around an in-memory BytesIO buffer.
17
+ Provides .bytes() to retrieve raw data and .to_filename() to persist
18
+ directly to disk.
19
+
20
+ - ZippedFile
21
+ Represents a single file entry inside a ZIP archive.
22
+ Offers .open(), .read(), .buffer(), .isolate() to access content,
23
+ and .extract_to() to write the raw file to disk.
24
+
25
+ - ZippedDir
26
+ Represents a directory subtree inside a ZIP.
27
+ Provides .isolate() to generate a new ZIP containing only this subtree
28
+ (optionally under a new root directory), .to_filename() to persist it as a
29
+ zip file, and .extract_to() to unpack the subtree to a directory.
30
+
31
+ - walk()
32
+ Like os.walk, but operates over a zipfile.ZipFile.
33
+ Yields (dirpath, dirnames, fileentries) tuples, where fileentries are
34
+ ZippedFile objects with direct access to contents.
35
+
36
+ - fetch_files_in_zip() / fetch_dirs_in_zip()
37
+ Helpers for searching within a ZIP by filename or directory name, supporting
38
+ exact match, wildcards, or regex.
39
+
40
+ - to_filename()
41
+ A generic dispatcher to persist many kinds of in-memory objects. For ZipFile
42
+ and ZippedDir it creates zip archives; for ZippedFile it writes the raw file
43
+ to disk; for bytes/str/BytesIO it writes the raw payload to a file.
44
+
45
+ Typical usage
46
+ -------------
47
+ import zipfile
48
+ from brkraw.core import zip
49
+
50
+ # Load a zip from bytes
51
+ zf = zip.bytes_to_zipfile(zip_bytes)
52
+
53
+ # Walk the archive
54
+ for dirpath, dirnames, files in zip.walk(zf):
55
+ for f in files:
56
+ print(f.name, len(f.read()))
57
+
58
+ # Extract all "config.json" files
59
+ matches = zip.fetch_files_in_zip(zf, "config.json")
60
+ for m in matches:
61
+ buf = m.isolate() # -> FileBuffer
62
+ buf.to_filename("/tmp/config.json")
63
+
64
+ # Isolate a subdirectory into a new in-memory zip
65
+ dirs = zip.fetch_dirs_in_zip(zf, "src")
66
+ if dirs:
67
+ sub = dirs[0] # ZippedDir
68
+ new_zip = sub.isolate(add_root=True, root_name="package-src")
69
+ with new_zip.open("package-src/module.py") as fh:
70
+ print(fh.read().decode("utf-8"))
71
+
72
+ # Optionally persist the isolated zip to disk:
73
+ zip.to_filename(new_zip, "/tmp/package-src.zip")
74
+
75
+ Design notes
76
+ ------------
77
+
78
+ - Uses only the stdlib (zipfile, io, shutil) for maximum portability.
79
+ - Preserves timestamps and file permissions (external_attr) where possible.
80
+ - Supports both in-memory workflows (BytesIO) and on-disk workflows
81
+ (via extract_to() or the to_filename() dispatcher).
82
+ - Explicit directory entries are preserved/added so that GUI ZIP browsers
83
+ behave predictably.
84
+
85
+ Exports
86
+ -------
87
+
88
+ - FileBuffer
89
+ - ZippedFile
90
+ - ZippedDir
91
+ - walk
92
+ - bytes_to_zipfile
93
+ - create_from_dir
94
+ - load
95
+ - fetch_files_in_zip
96
+ - fetch_dirs_in_zip
97
+ - to_filename
98
+ """
99
+
100
+ from __future__ import annotations
101
+
102
+ import fnmatch
103
+ import io
104
+ import os
105
+ import re
106
+ import shutil
107
+ import tempfile
108
+ import zipfile
109
+ from pathlib import Path
110
+ from collections import defaultdict
111
+ from dataclasses import dataclass
112
+ from typing import Any, Dict, IO, Iterable, List, Optional, Tuple, Union, Literal, Set
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # internal helpers
116
+ # ---------------------------------------------------------------------------
117
+
118
+
119
+ def _ensure_parent_dir(path: Union[str, os.PathLike]) -> str:
120
+ """Ensure parent directory exists and return an absolute path."""
121
+ p = os.fspath(path)
122
+ abs_path = os.path.abspath(p)
123
+ parent = os.path.dirname(abs_path)
124
+ if parent and not os.path.exists(parent):
125
+ os.makedirs(parent, exist_ok=True)
126
+ return abs_path
127
+
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # FileBuffer
131
+ # ---------------------------------------------------------------------------
132
+
133
+
134
+ @dataclass
135
+ class FileBuffer:
136
+ """A simple file buffer object (in-memory or spooled to disk)."""
137
+ name: str
138
+ buffer: IO[bytes]
139
+
140
+ def bytes(self) -> bytes:
141
+ """Return full bytes content."""
142
+ pos = self.buffer.tell()
143
+ try:
144
+ self.buffer.seek(0)
145
+ return self.buffer.read()
146
+ finally:
147
+ self.buffer.seek(pos)
148
+
149
+ def to_filename(
150
+ self,
151
+ path: Union[str, os.PathLike],
152
+ *,
153
+ overwrite: bool = True,
154
+ makedirs: bool = True,
155
+ ) -> str:
156
+ """Write the buffer content to a file at path.
157
+
158
+ Parameters
159
+ ----------
160
+ path : str or os.PathLike
161
+ Destination file path.
162
+ overwrite : bool, optional
163
+ If False and the file exists, raise FileExistsError. Default True.
164
+ makedirs : bool, optional
165
+ If True, create parent directories as needed. Default True.
166
+
167
+ Returns
168
+ -------
169
+ str
170
+ The absolute filesystem path written to.
171
+ """
172
+ path = os.fspath(path)
173
+ abs_path = os.path.abspath(path)
174
+
175
+ if not overwrite and os.path.exists(abs_path):
176
+ raise FileExistsError(f"File already exists: {abs_path}")
177
+
178
+ parent = os.path.dirname(abs_path)
179
+ if makedirs and parent and not os.path.exists(parent):
180
+ os.makedirs(parent, exist_ok=True)
181
+
182
+ pos = self.buffer.tell()
183
+ try:
184
+ self.buffer.seek(0)
185
+ with open(abs_path, "wb") as f:
186
+ shutil.copyfileobj(self.buffer, f)
187
+ finally:
188
+ self.buffer.seek(pos)
189
+
190
+ return abs_path
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # ZippedFile
195
+ # ---------------------------------------------------------------------------
196
+
197
+
198
+ @dataclass
199
+ class ZippedFile:
200
+ """A file-like handle to a file inside a ZipFile with convenient accessors."""
201
+ name: str # basename of the file (e.g., "README.md")
202
+ arcname: str # archive path inside the zip (e.g., "repo-123/README.md")
203
+ zipobj: zipfile.ZipFile
204
+
205
+ def __repr__(self) -> str:
206
+ try:
207
+ info = self.zipobj.getinfo(self.arcname)
208
+ size = info.file_size
209
+ except Exception:
210
+ size = "?"
211
+ return f"ZippedFile(path='{self.arcname}', size={size})"
212
+
213
+ def is_dir(self) -> bool:
214
+ return False
215
+
216
+ def is_file(self) -> bool:
217
+ return True
218
+
219
+ def open(self) -> IO[bytes]:
220
+ """Return a readable file-like object (binary). Caller should close it."""
221
+ return self.zipobj.open(self.arcname, "r")
222
+
223
+ def read(self) -> bytes:
224
+ """Read entire file content into bytes."""
225
+ return self.zipobj.read(self.arcname)
226
+
227
+ def buffer(self) -> io.BytesIO:
228
+ """Return an in-memory BytesIO buffer holding the file content."""
229
+ return io.BytesIO(self.read())
230
+
231
+ def isolate(
232
+ self,
233
+ *,
234
+ buffering: Literal["memory", "spooled"] = "memory",
235
+ max_spool_size: int = 10 * 1024 * 1024,
236
+ cache_dir: Optional[Union[str, os.PathLike]] = None,
237
+ ) -> FileBuffer:
238
+ """Return a FileBuffer for this file content.
239
+
240
+ Parameters
241
+ ----------
242
+ buffering : {"memory", "spooled"}, optional
243
+ Use in-memory BytesIO by default. When "spooled", use a
244
+ tempfile.SpooledTemporaryFile that spills to disk past
245
+ max_spool_size to avoid high memory usage.
246
+ max_spool_size : int, optional
247
+ Threshold in bytes before a spooled buffer spills to disk.
248
+ cache_dir : str or os.PathLike, optional
249
+ Directory to place temporary files when buffering="spooled".
250
+ """
251
+ data = self.read()
252
+ if buffering == "spooled":
253
+ spool_dir = os.fspath(cache_dir) if cache_dir is not None else None
254
+ buf = tempfile.SpooledTemporaryFile(max_size=max_spool_size, dir=spool_dir)
255
+ buf.write(data)
256
+ buf.seek(0)
257
+ return FileBuffer(name=self.name, buffer=buf)
258
+ # default: in-memory
259
+ buf = io.BytesIO(data)
260
+ buf.seek(0)
261
+ return FileBuffer(name=self.name, buffer=buf)
262
+
263
+ def extract_to(
264
+ self,
265
+ path: Union[str, os.PathLike],
266
+ ) -> str:
267
+ """Extract this file to a filesystem path.
268
+
269
+ If `path` is a directory, the file is written under that directory using
270
+ this entry's name. If `path` is a file path, the content is written
271
+ directly to that path. Use `arcname` in the dispatcher to override the
272
+ output name when calling via to_filename().
273
+ """
274
+ return zippedfile_to_filename(self, path)
275
+
276
+
277
+ # ---------------------------------------------------------------------------
278
+ # Create zip from directory
279
+ # ---------------------------------------------------------------------------
280
+
281
+
282
+ def create_from_dir(
283
+ zip_path: Union[str, os.PathLike],
284
+ source_dir: Union[str, os.PathLike],
285
+ compression: int = zipfile.ZIP_DEFLATED,
286
+ ) -> str:
287
+ """Create a ZIP archive from the contents of a directory.
288
+
289
+ Parameters
290
+ ----------
291
+ zip_path : str or os.PathLike
292
+ The path to the output ZIP file.
293
+ source_dir : str or os.PathLike
294
+ The path to the directory whose contents will be zipped.
295
+ compression : int, optional
296
+ The compression method to use (default: zipfile.ZIP_DEFLATED).
297
+
298
+ Returns
299
+ -------
300
+ str
301
+ The absolute path to the created ZIP file.
302
+ """
303
+ zip_path = _ensure_parent_dir(zip_path)
304
+ source_dir = os.fspath(source_dir)
305
+
306
+ with zipfile.ZipFile(zip_path, "w", compression=compression) as zf:
307
+ for root, dirs, files in os.walk(source_dir):
308
+ # Add directory entries
309
+ for d in dirs:
310
+ full_path = os.path.join(root, d)
311
+ arcname = os.path.relpath(full_path, source_dir)
312
+ zf.writestr(arcname + "/", b"")
313
+ # Add file entries
314
+ for file in files:
315
+ full_path = os.path.join(root, file)
316
+ arcname = os.path.relpath(full_path, source_dir)
317
+ zf.write(full_path, arcname)
318
+ return zip_path
319
+
320
+
321
+ # ---------------------------------------------------------------------------
322
+ # ZippedDir
323
+ # ---------------------------------------------------------------------------
324
+
325
+
326
+ @dataclass
327
+ class ZippedDir:
328
+ """Directory-like node inside a ZipFile. Holds subdirectories and files."""
329
+ name: str
330
+ path: str
331
+ dirs: List["ZippedDir"]
332
+ files: List[ZippedFile]
333
+
334
+ def __repr__(self) -> str:
335
+ return f"ZippedDir(path='{self.path}', dirs={len(self.dirs)}, files={len(self.files)})"
336
+
337
+ def is_dir(self) -> bool:
338
+ return True
339
+
340
+ def is_file(self) -> bool:
341
+ return False
342
+
343
+ def as_dict(self) -> Dict[str, Any]:
344
+ """Convert to plain dict (for debugging or serialization)."""
345
+ return {
346
+ "name": self.name,
347
+ "path": self.path,
348
+ "dirs": [d.as_dict() for d in self.dirs],
349
+ "files": [f.name for f in self.files],
350
+ }
351
+
352
+ def listdir(self) -> List[str]:
353
+ """List immediate children names (dirs first, then files)."""
354
+ dirnames = sorted([d.name for d in self.dirs])
355
+ filenames = sorted([f.name for f in self.files])
356
+ return dirnames + filenames
357
+
358
+ def iterdir(self) -> Iterable[Union["ZippedDir", ZippedFile]]:
359
+ """Iterate over children objects (dirs first, then files)."""
360
+ for d in sorted(self.dirs, key=lambda x: x.name):
361
+ yield d
362
+ for f in sorted(self.files, key=lambda x: x.name):
363
+ yield f
364
+
365
+ def _resolve_zipobj(self) -> zipfile.ZipFile:
366
+ """Resolve the underlying ZipFile from any child file. Raise if not resolvable."""
367
+ stack: List["ZippedDir"] = [self]
368
+ while stack:
369
+ node = stack.pop()
370
+ for f in node.files:
371
+ return f.zipobj
372
+ stack.extend(node.dirs)
373
+ raise RuntimeError("Cannot resolve ZipFile for this ZippedDir (no files found).")
374
+
375
+ def isolate(
376
+ self,
377
+ compression: int = zipfile.ZIP_DEFLATED,
378
+ include_dir_entries: bool = True,
379
+ add_root: bool = False,
380
+ root_name: Union[str, None] = None,
381
+ *,
382
+ buffering: Literal["memory", "spooled"] = "memory",
383
+ max_spool_size: int = 20 * 1024 * 1024,
384
+ cache_dir: Optional[Union[str, os.PathLike]] = None,
385
+ ) -> zipfile.ZipFile:
386
+ """Create a new ZIP containing only this directory subtree.
387
+
388
+ By default (add_root=False), the new ZIP root is this directory itself,
389
+ i.e., arcnames are relative to self.path (no extra top-level folder).
390
+
391
+ If add_root=True, files are placed under a top-level directory named
392
+ root_name (or self.name if root_name is None). In other words, entries
393
+ will look like "<root_name>/<relative-path-inside-self>".
394
+
395
+ Parameters
396
+ ----------
397
+ compression : int, optional
398
+ Zip compression method (default: ZIP_DEFLATED).
399
+ include_dir_entries : bool, optional
400
+ If True, ensure folder entries (for example "a/", "a/b/") exist.
401
+ add_root : bool, optional
402
+ If True, wrap all contents under a top-level directory.
403
+ root_name : Optional[str], optional
404
+ Name of the top-level directory when add_root is True. If None,
405
+ uses self.name.
406
+ buffering : {"memory", "spooled"}, optional
407
+ Storage for the generated zip. "memory" uses BytesIO; "spooled" uses
408
+ tempfile.SpooledTemporaryFile and spills to disk past max_spool_size.
409
+ max_spool_size : int, optional
410
+ Threshold in bytes for spilling to disk when buffering="spooled".
411
+ cache_dir : str or os.PathLike, optional
412
+ Directory for temporary files when buffering="spooled".
413
+
414
+ Returns
415
+ -------
416
+ zipfile.ZipFile
417
+ A ZipFile object containing only this subtree.
418
+ """
419
+ src_zip = self._resolve_zipobj()
420
+
421
+ # Normalize to POSIX style used inside zip archives
422
+ prefix = self.path.strip("/")
423
+ if prefix:
424
+ prefix = prefix + "/"
425
+
426
+ # Decide root folder name when requested
427
+ if add_root:
428
+ root = (root_name or (self.name or "root")).strip("/")
429
+ root_prefix = f"{root}/"
430
+ else:
431
+ root_prefix = ""
432
+
433
+ if buffering == "spooled":
434
+ spool_dir = os.fspath(cache_dir) if cache_dir is not None else None
435
+ out_buf: IO[bytes] = tempfile.SpooledTemporaryFile(
436
+ max_size=max_spool_size, dir=spool_dir
437
+ )
438
+ else:
439
+ out_buf = io.BytesIO()
440
+
441
+ with zipfile.ZipFile(out_buf, "w", compression=compression) as out_zip:
442
+ # Optional explicit top-level root
443
+ if add_root and include_dir_entries:
444
+ ri = zipfile.ZipInfo(root_prefix)
445
+ ri.external_attr = (0o40755 << 16)
446
+ out_zip.writestr(ri, b"")
447
+
448
+ # Copy all entries whose filename starts with the directory prefix
449
+ for info in src_zip.infolist():
450
+ fn = info.filename
451
+ if not fn.startswith(prefix):
452
+ continue
453
+
454
+ rel = fn[len(prefix):]
455
+ if not rel:
456
+ continue
457
+
458
+ if add_root:
459
+ arcname = root_prefix + rel
460
+ else:
461
+ arcname = rel
462
+
463
+ if arcname.endswith("/"):
464
+ if include_dir_entries:
465
+ dir_info = zipfile.ZipInfo(arcname)
466
+ dir_info.date_time = info.date_time
467
+ dir_info.external_attr = (0o40755 << 16)
468
+ out_zip.writestr(dir_info, b"")
469
+ continue
470
+
471
+ data = src_zip.read(info.filename)
472
+ new_info = zipfile.ZipInfo(arcname)
473
+ new_info.date_time = info.date_time
474
+ new_info.external_attr = info.external_attr
475
+ out_zip.writestr(new_info, data)
476
+
477
+ if include_dir_entries:
478
+ written = set(out_zip.namelist())
479
+ need_dirs = set()
480
+ for name in written:
481
+ if name.endswith("/"):
482
+ continue
483
+ parts = name.split("/")[:-1]
484
+ cur = []
485
+ for p in parts:
486
+ cur.append(p)
487
+ need_dirs.add("/".join(cur) + "/")
488
+
489
+ for d in sorted(need_dirs):
490
+ if d not in written:
491
+ di = zipfile.ZipInfo(d)
492
+ di.external_attr = (0o40755 << 16)
493
+ out_zip.writestr(di, b"")
494
+
495
+ out_buf.seek(0)
496
+ return zipfile.ZipFile(out_buf, "r")
497
+
498
+ def extract_to(
499
+ self,
500
+ dest: Union[str, os.PathLike],
501
+ *,
502
+ add_root: bool = False,
503
+ root_name: Optional[str] = None,
504
+ ) -> str:
505
+ """Extract this directory subtree to the filesystem.
506
+
507
+ Parameters
508
+ ----------
509
+ dest : str or os.PathLike
510
+ Destination directory where contents will be written.
511
+ add_root : bool, optional
512
+ If True, wrap extracted contents under a top-level folder named
513
+ root_name (or this directory's name when None). If False, contents
514
+ are placed directly under dest, preserving internal structure.
515
+ root_name : Optional[str], optional
516
+ Optional explicit root folder name when add_root is True.
517
+
518
+ Returns
519
+ -------
520
+ str
521
+ Absolute path to the extraction root (dest or dest/root_name).
522
+ """
523
+ src_zip = self._resolve_zipobj()
524
+ prefix = self.path.strip("/")
525
+ if prefix:
526
+ prefix += "/"
527
+
528
+ dest_path = Path(dest)
529
+ if add_root:
530
+ root = (root_name or (self.name or "root")).strip("/")
531
+ base = dest_path / root
532
+ else:
533
+ base = dest_path
534
+
535
+ base_abs = Path(_ensure_parent_dir(base))
536
+ for info in src_zip.infolist():
537
+ fn = info.filename
538
+ if not fn.startswith(prefix):
539
+ continue
540
+ rel = fn[len(prefix):]
541
+ if not rel:
542
+ continue
543
+
544
+ target = base_abs / rel
545
+ if fn.endswith("/"):
546
+ target.mkdir(parents=True, exist_ok=True)
547
+ continue
548
+
549
+ target.parent.mkdir(parents=True, exist_ok=True)
550
+ data = src_zip.read(fn)
551
+ with open(target, "wb") as f:
552
+ f.write(data)
553
+ # best-effort permission preservation
554
+ mode = (info.external_attr >> 16) & 0o777
555
+ if mode:
556
+ try:
557
+ os.chmod(target, mode)
558
+ except OSError:
559
+ pass
560
+
561
+ return str(base_abs)
562
+
563
+ def to_filename(
564
+ self,
565
+ path: Union[str, os.PathLike],
566
+ *,
567
+ compression: int = zipfile.ZIP_DEFLATED,
568
+ include_dir_entries: bool = True,
569
+ add_root: bool = False,
570
+ root_name: Optional[str] = None,
571
+ ) -> str:
572
+ """Persist this directory subtree as a zip file written to path."""
573
+ return zippeddir_to_filename(
574
+ self,
575
+ path,
576
+ compression=compression,
577
+ include_dir_entries=include_dir_entries,
578
+ add_root=add_root,
579
+ root_name=root_name,
580
+ )
581
+
582
+
583
+ # ---------------------------------------------------------------------------
584
+ # walk over ZipFile
585
+ # ---------------------------------------------------------------------------
586
+
587
+
588
+ def walk(
589
+ zipobj: zipfile.ZipFile,
590
+ top: str = "",
591
+ ) -> Iterable[Tuple[str, List[ZippedDir], List[ZippedFile]]]:
592
+ """Walk through a ZipFile like os.walk, but with ZippedFile entries.
593
+
594
+ Parameters
595
+ ----------
596
+ zipobj : zipfile.ZipFile
597
+ Opened ZipFile object.
598
+ top : str, optional
599
+ Start directory inside the archive (default: root). Use archive-style
600
+ paths (for example "repo-abc/dir"). When top does not correspond to an
601
+ explicit directory entry, the function still yields a subtree rooted at
602
+ top, and dirpath values are archive paths under that prefix.
603
+
604
+ Yields
605
+ ------
606
+ (dirpath, dirnames, fileentries)
607
+ dirpath : str
608
+ Current archive path ("" for root or, for example, "repo-abc/dir").
609
+ dirnames : List[ZippedDir]
610
+ Sorted list of immediate subdirectories as ZippedDir objects.
611
+ fileentries : List[ZippedFile]
612
+ Sorted list of file entries; each has .open(), .read(), .buffer().
613
+ """
614
+ tree_map: Dict[str, Dict[str, Any]] = defaultdict(lambda: {"dirs": set(), "files": {}})
615
+
616
+ # Normalize and index
617
+ for arcname in zipobj.namelist():
618
+ norm = arcname.rstrip("/")
619
+ parts = norm.split("/")
620
+ parent = "/".join(parts[:-1]) # "" at root
621
+ leaf = parts[-1]
622
+
623
+ if arcname.endswith("/"): # a directory entry
624
+ tree_map[parent]["dirs"].add(leaf)
625
+ else: # a file entry
626
+ tree_map[parent]["files"][leaf] = ZippedFile(
627
+ name=leaf, arcname=norm, zipobj=zipobj
628
+ )
629
+
630
+ # ensure intermediate directories are known
631
+ for i in range(len(parts) - 1):
632
+ up_parent = "/".join(parts[:i])
633
+ up_child = parts[i]
634
+ tree_map[up_parent]["dirs"].add(up_child)
635
+
636
+ start = top.rstrip("/")
637
+
638
+ # When top does not exist explicitly, build a filtered pseudo-map rooted at top
639
+ if start and start not in tree_map:
640
+ pseudo_map: Dict[str, Dict[str, Any]] = defaultdict(lambda: {"dirs": set(), "files": {}})
641
+ for arcname in zipobj.namelist():
642
+ if arcname.startswith(start + "/") or arcname.rstrip("/") == start:
643
+ norm = arcname.rstrip("/")
644
+ rel = norm[len(start):].lstrip("/")
645
+ parent = "/".join([start] + ([p for p in rel.split("/")[:-1]] if rel else []))
646
+ leaf = rel.split("/")[-1] if rel else start.split("/")[-1]
647
+ if arcname.endswith("/"):
648
+ pseudo_map[parent]["dirs"].add(leaf)
649
+ else:
650
+ pseudo_map[parent]["files"][leaf] = ZippedFile(leaf, norm, zipobj)
651
+ prefix_parts = parent.split("/") if parent else []
652
+ for i in range(len(prefix_parts)):
653
+ up_parent = "/".join(prefix_parts[:i])
654
+ up_child = prefix_parts[i]
655
+ pseudo_map[up_parent]["dirs"].add(up_child)
656
+ tree_map = pseudo_map
657
+ if start and start not in tree_map:
658
+ return
659
+
660
+ built_dirs: Dict[str, ZippedDir] = {}
661
+
662
+ def _build(path: str) -> ZippedDir:
663
+ if path in built_dirs:
664
+ return built_dirs[path]
665
+ dirnames = sorted(tree_map[path]["dirs"])
666
+ files = [tree_map[path]["files"][k] for k in sorted(tree_map[path]["files"].keys())]
667
+ subs: List[ZippedDir] = []
668
+ for name in dirnames:
669
+ sub_path = f"{path}/{name}" if path else name
670
+ subs.append(_build(sub_path))
671
+ obj = ZippedDir(
672
+ name=path.rsplit("/", 1)[-1] if path else "",
673
+ path=path,
674
+ dirs=subs,
675
+ files=files,
676
+ )
677
+ built_dirs[path] = obj
678
+ return obj
679
+
680
+ def _walk(current_path: str):
681
+ cur_dir = _build(current_path)
682
+ yield current_path, cur_dir.dirs, cur_dir.files
683
+ for sub in cur_dir.dirs:
684
+ yield from _walk(sub.path)
685
+
686
+ yield from _walk(start)
687
+
688
+
689
+ # ---------------------------------------------------------------------------
690
+ # basic helpers
691
+ # ---------------------------------------------------------------------------
692
+
693
+
694
+ def bytes_to_zipfile(zip_bytes: bytes) -> zipfile.ZipFile:
695
+ """Open a zip archive from a bytes object.
696
+
697
+ This is a convenience wrapper around zipfile.ZipFile(io.BytesIO(zip_bytes)).
698
+
699
+ Parameters
700
+ ----------
701
+ zip_bytes : bytes
702
+ The binary content of a zip archive.
703
+
704
+ Returns
705
+ -------
706
+ zipfile.ZipFile
707
+ A readable ZipFile object.
708
+ """
709
+ return zipfile.ZipFile(io.BytesIO(zip_bytes))
710
+
711
+
712
+ # ---------------------------------------------------------------------------
713
+ # search helpers
714
+ # ---------------------------------------------------------------------------
715
+
716
+
717
+ def fetch_files_in_zip(
718
+ zipobj: zipfile.ZipFile,
719
+ filename: str,
720
+ top: str = "",
721
+ wildcard: bool = True,
722
+ regex: Optional[str] = None,
723
+ ) -> List[ZippedFile]:
724
+ """Search for files in a ZipFile whose leaf name matches filename.
725
+
726
+ Parameters
727
+ ----------
728
+ zipobj : zipfile.ZipFile
729
+ Opened ZipFile object.
730
+ filename : str
731
+ Target filename (exact match or pattern).
732
+ top : str, optional
733
+ Directory prefix to restrict search (default: root).
734
+ wildcard : bool, optional
735
+ If True, use fnmatch (shell-style wildcards) for filename matching.
736
+ regex : str, optional
737
+ If given, use this regex pattern to match filenames (overrides wildcard).
738
+
739
+ Returns
740
+ -------
741
+ List[ZippedFile]
742
+ List of matching ZippedFile objects.
743
+ """
744
+ matches: List[ZippedFile] = []
745
+ pattern = re.compile(regex) if regex else None
746
+ for _, _, fileentries in walk(zipobj, top=top):
747
+ for entry in fileentries:
748
+ if pattern is not None:
749
+ if pattern.fullmatch(entry.name):
750
+ matches.append(entry)
751
+ elif wildcard:
752
+ if fnmatch.fnmatch(entry.name, filename):
753
+ matches.append(entry)
754
+ else:
755
+ if entry.name == filename:
756
+ matches.append(entry)
757
+ return matches
758
+
759
+
760
+ def fetch_dirs_in_zip(
761
+ zipobj: zipfile.ZipFile,
762
+ dirname: str,
763
+ top: str = "",
764
+ wildcard: bool = True,
765
+ regex: Optional[str] = None,
766
+ match_scope: str = "basename", # "basename" | "fullpath"
767
+ ) -> List[ZippedDir]:
768
+ """Return ZippedDir trees rooted at the matched directories.
769
+
770
+ Parameters
771
+ ----------
772
+ zipobj : zipfile.ZipFile
773
+ The opened zip file object.
774
+ dirname : str
775
+ Directory name pattern to match.
776
+ top : str, optional
777
+ The starting directory inside the archive (default: root).
778
+ wildcard : bool, optional
779
+ Whether to allow wildcard matching (default: True).
780
+ regex : Optional[str], optional
781
+ Regex pattern to match directories (default: None).
782
+ match_scope : {"basename", "fullpath"}, optional
783
+ Matching scope:
784
+ - "basename": match only against the final directory name.
785
+ - "fullpath": match against the entire directory path.
786
+
787
+ Returns
788
+ -------
789
+ List[ZippedDir]
790
+ A list of matched ZippedDir objects.
791
+
792
+ Raises
793
+ ------
794
+ ValueError
795
+ If match_scope is not "basename" or "fullpath".
796
+ """
797
+ if match_scope not in {"basename", "fullpath"}:
798
+ raise ValueError(f"Invalid match_scope: {match_scope!r}")
799
+
800
+ index: Dict[str, Tuple[List[ZippedDir], List[ZippedFile]]] = {}
801
+ for dirpath, direntries, fileentries in walk(zipobj, top=top):
802
+ index[dirpath] = (direntries, fileentries)
803
+
804
+ def _target(dirpath: str) -> str:
805
+ if match_scope == "basename":
806
+ return dirpath.rsplit("/", 1)[-1] if dirpath else ""
807
+ return dirpath
808
+
809
+ def _match(dirpath: str) -> bool:
810
+ target = _target(dirpath)
811
+ if regex is not None:
812
+ return re.search(regex, target) is not None
813
+ if wildcard:
814
+ return fnmatch.fnmatch(target, dirname)
815
+ return target == dirname
816
+
817
+ def _build_dir(path: str) -> ZippedDir:
818
+ direntries, files = index.get(path, ([], []))
819
+ subdirs: List[ZippedDir] = []
820
+ for d in direntries:
821
+ subdirs.append(_build_dir(d.path))
822
+ return ZippedDir(
823
+ name=path.rsplit("/", 1)[-1] if path else "",
824
+ path=path,
825
+ dirs=subdirs,
826
+ files=files,
827
+ )
828
+
829
+ results: List[ZippedDir] = []
830
+ for dirpath in index.keys():
831
+ if _match(dirpath):
832
+ results.append(_build_dir(dirpath))
833
+ return results
834
+
835
+
836
+ # ---------------------------------------------------------------------------
837
+ # low-level copy helpers
838
+ # ---------------------------------------------------------------------------
839
+
840
+
841
+ def _copy_zip(
842
+ zipobj: zipfile.ZipFile,
843
+ dst_path: Union[str, os.PathLike],
844
+ compression: int = zipfile.ZIP_DEFLATED,
845
+ include_dir_entries: bool = True,
846
+ ) -> None:
847
+ """Copy all entries from an existing ZipFile to a new zip at dst_path.
848
+
849
+ Re-compresses entries using compression.
850
+ """
851
+ dst_path = _ensure_parent_dir(dst_path)
852
+ with zipfile.ZipFile(dst_path, "w", compression=compression) as out:
853
+ written: Set[str] = set()
854
+ if include_dir_entries:
855
+ dirs = set()
856
+ for info in zipobj.infolist():
857
+ name = info.filename
858
+ if name.endswith("/"):
859
+ dirs.add(name)
860
+ else:
861
+ parts = name.split("/")[:-1]
862
+ cur: List[str] = []
863
+ for p in parts:
864
+ cur.append(p)
865
+ dirs.add("/".join(cur) + "/")
866
+ for d in sorted(dirs):
867
+ di = zipfile.ZipInfo(d)
868
+ di.external_attr = (0o40755 << 16)
869
+ out.writestr(di, b"")
870
+ written.add(d)
871
+
872
+ for info in zipobj.infolist():
873
+ name = info.filename
874
+ if name.endswith("/"):
875
+ if include_dir_entries and name not in written:
876
+ di = zipfile.ZipInfo(name)
877
+ di.date_time = info.date_time
878
+ di.external_attr = info.external_attr
879
+ out.writestr(di, b"")
880
+ written.add(name)
881
+ continue
882
+ data = zipobj.read(name)
883
+ ni = zipfile.ZipInfo(name)
884
+ ni.date_time = info.date_time
885
+ ni.external_attr = info.external_attr
886
+ out.writestr(ni, data)
887
+
888
+
889
+ # ---------------------------------------------------------------------------
890
+ # ZippedFile method implementation
891
+ # ---------------------------------------------------------------------------
892
+
893
+
894
+ def zippedfile_to_filename(
895
+ self: ZippedFile,
896
+ path: Union[str, os.PathLike],
897
+ arcname: Optional[str] = None,
898
+ ) -> str:
899
+ """Extract this single file to disk.
900
+
901
+ Behavior:
902
+ - If `path` points to a directory, the file is written under that
903
+ directory using `arcname` (if provided) or the entry name.
904
+ - If `path` points to a file, the file content is written directly to
905
+ that location. When `arcname` is provided, the file name is overridden
906
+ relative to the parent directory of `path`.
907
+ """
908
+ target = os.fspath(path)
909
+
910
+ # Decide whether path is a directory target
911
+ is_dir_target = os.path.isdir(target) or target.endswith(os.sep)
912
+ if is_dir_target:
913
+ rel = arcname or self.name
914
+ target = os.path.join(target, rel)
915
+ elif arcname:
916
+ # Override filename relative to the parent of the given path
917
+ parent = os.path.dirname(target) or "."
918
+ target = os.path.join(parent, arcname)
919
+
920
+ abs_path = _ensure_parent_dir(target)
921
+ with open(abs_path, "wb") as f:
922
+ f.write(self.read())
923
+ return abs_path
924
+
925
+
926
+ # ---------------------------------------------------------------------------
927
+ # ZippedDir method implementation
928
+ # ---------------------------------------------------------------------------
929
+
930
+
931
+ def zippeddir_to_filename(
932
+ self: ZippedDir,
933
+ path: Union[str, os.PathLike],
934
+ compression: int = zipfile.ZIP_DEFLATED,
935
+ include_dir_entries: bool = True,
936
+ add_root: bool = False,
937
+ root_name: Optional[str] = None,
938
+ ) -> str:
939
+ """Save this directory subtree into a new zip file at path.
940
+
941
+ Mirrors ZippedDir.isolate() options but writes directly to disk.
942
+ """
943
+ abs_path = _ensure_parent_dir(path)
944
+ src_zip = self._resolve_zipobj()
945
+ prefix = self.path.strip("/")
946
+ if prefix:
947
+ prefix += "/"
948
+
949
+ if add_root:
950
+ root = (root_name or (self.name or "root")).strip("/")
951
+ root_prefix = f"{root}/"
952
+ else:
953
+ root_prefix = ""
954
+
955
+ with zipfile.ZipFile(abs_path, "w", compression=compression) as out_zip:
956
+ if add_root and include_dir_entries:
957
+ ri = zipfile.ZipInfo(root_prefix)
958
+ ri.external_attr = (0o40755 << 16)
959
+ out_zip.writestr(ri, b"")
960
+
961
+ # copy matching entries
962
+ for info in src_zip.infolist():
963
+ fn = info.filename
964
+ if not fn.startswith(prefix):
965
+ continue
966
+ rel = fn[len(prefix):]
967
+ if not rel:
968
+ continue
969
+ arcname = root_prefix + rel
970
+
971
+ if arcname.endswith("/"):
972
+ if include_dir_entries:
973
+ di = zipfile.ZipInfo(arcname)
974
+ di.date_time = info.date_time
975
+ di.external_attr = (0o40755 << 16)
976
+ out_zip.writestr(di, b"")
977
+ continue
978
+
979
+ data = src_zip.read(fn)
980
+ ni = zipfile.ZipInfo(arcname)
981
+ ni.date_time = info.date_time
982
+ ni.external_attr = info.external_attr
983
+ out_zip.writestr(ni, data)
984
+
985
+ if include_dir_entries:
986
+ written = set(out_zip.namelist())
987
+ need_dirs: Set[str] = set()
988
+ for name in written:
989
+ if name.endswith("/"):
990
+ continue
991
+ parts = name.split("/")[:-1]
992
+ cur: List[str] = []
993
+ for p in parts:
994
+ cur.append(p)
995
+ need_dirs.add("/".join(cur) + "/")
996
+ for d in sorted(need_dirs):
997
+ if d not in written:
998
+ di = zipfile.ZipInfo(d)
999
+ di.external_attr = (0o40755 << 16)
1000
+ out_zip.writestr(di, b"")
1001
+ return abs_path
1002
+
1003
+
1004
+ # ---------------------------------------------------------------------------
1005
+ # Generic dispatcher
1006
+ # ---------------------------------------------------------------------------
1007
+
1008
+
1009
+ def to_filename(
1010
+ obj: Union[
1011
+ zipfile.ZipFile,
1012
+ ZippedDir,
1013
+ ZippedFile,
1014
+ str,
1015
+ bytes,
1016
+ bytearray,
1017
+ io.BytesIO,
1018
+ ],
1019
+ path: Union[str, os.PathLike],
1020
+ *,
1021
+ compression: int = zipfile.ZIP_DEFLATED,
1022
+ include_dir_entries: bool = True,
1023
+ add_root: bool = False,
1024
+ root_name: Optional[str] = None,
1025
+ arcname: Optional[str] = None,
1026
+ ) -> str:
1027
+ """Persist an object to disk.
1028
+
1029
+ Supported:
1030
+ - zipfile.ZipFile: copy all entries into a new zip.
1031
+ - ZippedDir: save the subtree (same options as ZippedDir.to_filename()).
1032
+ - ZippedFile: extract the file to disk (arcname can rename the output).
1033
+ - str: encode as utf-8 and write to a raw file.
1034
+ - bytes/bytearray: write to a raw file.
1035
+ - io.BytesIO: write buffer content to a raw file.
1036
+
1037
+ When obj is a str, bytes, bytearray, or BytesIO, the output filename
1038
+ defaults to `path` (overridden by arcname when provided). If `path` is a
1039
+ directory, the filename defaults to the basename of `path` without its
1040
+ extension, or "payload" when empty.
1041
+ """
1042
+ abs_path = _ensure_parent_dir(path)
1043
+
1044
+ if isinstance(obj, zipfile.ZipFile):
1045
+ _copy_zip(obj, abs_path, compression=compression, include_dir_entries=include_dir_entries)
1046
+ return abs_path
1047
+
1048
+ if isinstance(obj, ZippedDir):
1049
+ return obj.to_filename(
1050
+ abs_path,
1051
+ compression=compression,
1052
+ include_dir_entries=include_dir_entries,
1053
+ add_root=add_root,
1054
+ root_name=root_name,
1055
+ )
1056
+
1057
+ if isinstance(obj, ZippedFile):
1058
+ return zippedfile_to_filename(obj, abs_path, arcname=arcname)
1059
+
1060
+ if isinstance(obj, (str, bytes, bytearray, io.BytesIO)):
1061
+ # Determine target file path (not a zip)
1062
+ target = abs_path
1063
+ if os.path.isdir(abs_path) or str(path).endswith(os.sep):
1064
+ default_name = arcname or os.path.basename(abs_path).rsplit(".", 1)[0] or "payload"
1065
+ target = os.path.join(abs_path, default_name)
1066
+ elif arcname:
1067
+ target = os.path.join(os.path.dirname(abs_path) or ".", arcname)
1068
+ target = _ensure_parent_dir(target)
1069
+
1070
+ if isinstance(obj, io.BytesIO):
1071
+ data = obj.getvalue()
1072
+ elif isinstance(obj, str):
1073
+ data = obj.encode("utf-8")
1074
+ else:
1075
+ data = bytes(obj)
1076
+
1077
+ with open(target, "wb") as f:
1078
+ f.write(data)
1079
+ return target
1080
+
1081
+ raise TypeError(f"Unsupported type for to_filename: {type(obj)!r}")
1082
+
1083
+
1084
+ # ---------------------------------------------------------------------------
1085
+ # small helpers
1086
+ # ---------------------------------------------------------------------------
1087
+
1088
+
1089
+ def load(path: Union[str, os.PathLike]) -> zipfile.ZipFile:
1090
+ """Open a zip archive from a file path.
1091
+
1092
+ A convenience wrapper for zipfile.ZipFile(path, "r").
1093
+
1094
+ Parameters
1095
+ ----------
1096
+ path : str or os.PathLike
1097
+ Path to the zip archive file.
1098
+
1099
+ Returns
1100
+ -------
1101
+ zipfile.ZipFile
1102
+ A readable ZipFile object.
1103
+ """
1104
+ return zipfile.ZipFile(os.fspath(path), "r")
1105
+
1106
+
1107
+ __all__ = [
1108
+ "FileBuffer",
1109
+ "ZippedFile",
1110
+ "ZippedDir",
1111
+ "walk",
1112
+ "bytes_to_zipfile",
1113
+ "create_from_dir",
1114
+ "load",
1115
+ "fetch_files_in_zip",
1116
+ "fetch_dirs_in_zip",
1117
+ "to_filename",
1118
+ ]
1119
+
1120
+ def __dir__() -> List[str]:
1121
+ return sorted(__all__)