brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brkraw/__init__.py +9 -3
- brkraw/apps/__init__.py +12 -0
- brkraw/apps/addon/__init__.py +30 -0
- brkraw/apps/addon/core.py +35 -0
- brkraw/apps/addon/dependencies.py +402 -0
- brkraw/apps/addon/installation.py +500 -0
- brkraw/apps/addon/io.py +21 -0
- brkraw/apps/hook/__init__.py +25 -0
- brkraw/apps/hook/core.py +636 -0
- brkraw/apps/loader/__init__.py +10 -0
- brkraw/apps/loader/core.py +622 -0
- brkraw/apps/loader/formatter.py +288 -0
- brkraw/apps/loader/helper.py +797 -0
- brkraw/apps/loader/info/__init__.py +11 -0
- brkraw/apps/loader/info/scan.py +85 -0
- brkraw/apps/loader/info/scan.yaml +90 -0
- brkraw/apps/loader/info/study.py +69 -0
- brkraw/apps/loader/info/study.yaml +156 -0
- brkraw/apps/loader/info/transform.py +92 -0
- brkraw/apps/loader/types.py +220 -0
- brkraw/cli/__init__.py +5 -0
- brkraw/cli/commands/__init__.py +2 -0
- brkraw/cli/commands/addon.py +327 -0
- brkraw/cli/commands/config.py +205 -0
- brkraw/cli/commands/convert.py +903 -0
- brkraw/cli/commands/hook.py +348 -0
- brkraw/cli/commands/info.py +74 -0
- brkraw/cli/commands/init.py +214 -0
- brkraw/cli/commands/params.py +106 -0
- brkraw/cli/commands/prune.py +288 -0
- brkraw/cli/commands/session.py +371 -0
- brkraw/cli/hook_args.py +80 -0
- brkraw/cli/main.py +83 -0
- brkraw/cli/utils.py +60 -0
- brkraw/core/__init__.py +13 -0
- brkraw/core/config.py +380 -0
- brkraw/core/entrypoints.py +25 -0
- brkraw/core/formatter.py +367 -0
- brkraw/core/fs.py +495 -0
- brkraw/core/jcamp.py +600 -0
- brkraw/core/layout.py +451 -0
- brkraw/core/parameters.py +781 -0
- brkraw/core/zip.py +1121 -0
- brkraw/dataclasses/__init__.py +14 -0
- brkraw/dataclasses/node.py +139 -0
- brkraw/dataclasses/reco.py +33 -0
- brkraw/dataclasses/scan.py +61 -0
- brkraw/dataclasses/study.py +131 -0
- brkraw/default/__init__.py +3 -0
- brkraw/default/pruner_specs/deid4share.yaml +42 -0
- brkraw/default/rules/00_default.yaml +4 -0
- brkraw/default/specs/metadata_dicom.yaml +236 -0
- brkraw/default/specs/metadata_transforms.py +92 -0
- brkraw/resolver/__init__.py +7 -0
- brkraw/resolver/affine.py +539 -0
- brkraw/resolver/datatype.py +69 -0
- brkraw/resolver/fid.py +90 -0
- brkraw/resolver/helpers.py +36 -0
- brkraw/resolver/image.py +188 -0
- brkraw/resolver/nifti.py +370 -0
- brkraw/resolver/shape.py +235 -0
- brkraw/schema/__init__.py +3 -0
- brkraw/schema/context_map.yaml +62 -0
- brkraw/schema/meta.yaml +57 -0
- brkraw/schema/niftiheader.yaml +95 -0
- brkraw/schema/pruner.yaml +55 -0
- brkraw/schema/remapper.yaml +128 -0
- brkraw/schema/rules.yaml +154 -0
- brkraw/specs/__init__.py +10 -0
- brkraw/specs/hook/__init__.py +12 -0
- brkraw/specs/hook/logic.py +31 -0
- brkraw/specs/hook/validator.py +22 -0
- brkraw/specs/meta/__init__.py +5 -0
- brkraw/specs/meta/validator.py +156 -0
- brkraw/specs/pruner/__init__.py +15 -0
- brkraw/specs/pruner/logic.py +361 -0
- brkraw/specs/pruner/validator.py +119 -0
- brkraw/specs/remapper/__init__.py +27 -0
- brkraw/specs/remapper/logic.py +924 -0
- brkraw/specs/remapper/validator.py +314 -0
- brkraw/specs/rules/__init__.py +6 -0
- brkraw/specs/rules/logic.py +263 -0
- brkraw/specs/rules/validator.py +103 -0
- brkraw-0.5.0.dist-info/METADATA +81 -0
- brkraw-0.5.0.dist-info/RECORD +88 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
- brkraw-0.5.0.dist-info/entry_points.txt +13 -0
- brkraw/lib/__init__.py +0 -4
- brkraw/lib/backup.py +0 -641
- brkraw/lib/bids.py +0 -0
- brkraw/lib/errors.py +0 -125
- brkraw/lib/loader.py +0 -1220
- brkraw/lib/orient.py +0 -194
- brkraw/lib/parser.py +0 -48
- brkraw/lib/pvobj.py +0 -301
- brkraw/lib/reference.py +0 -245
- brkraw/lib/utils.py +0 -471
- brkraw/scripts/__init__.py +0 -0
- brkraw/scripts/brk_backup.py +0 -106
- brkraw/scripts/brkraw.py +0 -744
- brkraw/ui/__init__.py +0 -0
- brkraw/ui/config.py +0 -17
- brkraw/ui/main_win.py +0 -214
- brkraw/ui/previewer.py +0 -225
- brkraw/ui/scan_info.py +0 -72
- brkraw/ui/scan_list.py +0 -73
- brkraw/ui/subj_info.py +0 -128
- brkraw-0.3.11.dist-info/METADATA +0 -25
- brkraw-0.3.11.dist-info/RECORD +0 -28
- brkraw-0.3.11.dist-info/entry_points.txt +0 -3
- brkraw-0.3.11.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
brkraw/core/fs.py
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified filesystem view for Paravision-like datasets stored as directories or zip
|
|
3
|
+
archives.
|
|
4
|
+
|
|
5
|
+
This module provides `DatasetFS`, a lightweight abstraction that presents the same
|
|
6
|
+
API whether the dataset lives on disk or inside a zip file. It handles:
|
|
7
|
+
- Anchor detection so zip members can be referenced with stable, anchor-stripped
|
|
8
|
+
relative paths.
|
|
9
|
+
- Directory and zip traversal via a zipfile-like `walk` that mirrors `os.walk`.
|
|
10
|
+
- Opening files by archive-relative path, yielding file-like objects or concrete
|
|
11
|
+
temp files for consumers that require real paths.
|
|
12
|
+
- Repacking subtrees back into zip files with optional root folder control.
|
|
13
|
+
|
|
14
|
+
`DatasetFS` is intentionally small and side-effect free so it can be reused
|
|
15
|
+
outside Paravision-specific contexts.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
import io
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import IO, Iterable, Literal, Optional, Tuple, List, Union, TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
import zipfile
|
|
25
|
+
import shutil
|
|
26
|
+
from tempfile import TemporaryDirectory
|
|
27
|
+
from . import zip as zipcore
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from .zip import ZippedDir, ZippedFile
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DatasetFile:
|
|
35
|
+
"""Filesystem-backed file entry mirroring zip.ZippedFile API."""
|
|
36
|
+
|
|
37
|
+
name: str
|
|
38
|
+
path: str # archive-style path (anchor-aware)
|
|
39
|
+
fs: "DatasetFS"
|
|
40
|
+
|
|
41
|
+
def __repr__(self) -> str:
|
|
42
|
+
try:
|
|
43
|
+
full = self.fs.root / self.fs._normalize_relpath(self.path)
|
|
44
|
+
size = full.stat().st_size
|
|
45
|
+
except Exception:
|
|
46
|
+
size = "?"
|
|
47
|
+
return f"DatasetFile(path='{self.path}', size={size})"
|
|
48
|
+
|
|
49
|
+
def is_dir(self) -> bool:
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
def is_file(self) -> bool:
|
|
53
|
+
return True
|
|
54
|
+
|
|
55
|
+
def open(self) -> IO[bytes]:
|
|
56
|
+
"""Open the file for reading in binary mode."""
|
|
57
|
+
return self.fs.open_binary(self.path)
|
|
58
|
+
|
|
59
|
+
def read(self) -> bytes:
|
|
60
|
+
with self.open() as f:
|
|
61
|
+
return f.read()
|
|
62
|
+
|
|
63
|
+
def buffer(self) -> io.BytesIO:
|
|
64
|
+
buf = io.BytesIO(self.read())
|
|
65
|
+
buf.seek(0)
|
|
66
|
+
return buf
|
|
67
|
+
|
|
68
|
+
def isolate(self) -> zipcore.FileBuffer:
|
|
69
|
+
"""Return a FileBuffer wrapping this file's content."""
|
|
70
|
+
buf = self.buffer()
|
|
71
|
+
buf.seek(0)
|
|
72
|
+
return zipcore.FileBuffer(name=self.name, buffer=buf)
|
|
73
|
+
|
|
74
|
+
def extract_to(self, dest: Path) -> Path:
|
|
75
|
+
"""Write this file to a directory or file path."""
|
|
76
|
+
dest_path = Path(dest)
|
|
77
|
+
if dest_path.is_dir() or str(dest_path).endswith(os.sep):
|
|
78
|
+
dest_path = dest_path / self.name
|
|
79
|
+
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
dest_path.write_bytes(self.read())
|
|
81
|
+
return dest_path
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class DatasetDir:
|
|
86
|
+
"""Filesystem-backed directory entry mirroring zip.ZippedDir API."""
|
|
87
|
+
|
|
88
|
+
name: str
|
|
89
|
+
path: str # archive-style path (anchor-aware)
|
|
90
|
+
fs: "DatasetFS"
|
|
91
|
+
|
|
92
|
+
def __repr__(self) -> str:
|
|
93
|
+
dirs = [e for e in self.iterdir() if e.is_dir()]
|
|
94
|
+
files = [e for e in self.iterdir() if e.is_file()]
|
|
95
|
+
return f"DatasetDir(path='{self.path}', dirs={len(dirs)}, files={len(files)})"
|
|
96
|
+
|
|
97
|
+
def is_dir(self) -> bool:
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
def is_file(self) -> bool:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
def listdir(self) -> List[str]:
|
|
104
|
+
"""List immediate children names (dirs first, then files)."""
|
|
105
|
+
return self.fs.listdir(self.path)
|
|
106
|
+
|
|
107
|
+
def iterdir(self) -> Iterable[Union["DatasetDir", "DatasetFile", "ZippedDir", "ZippedFile"]]:
|
|
108
|
+
"""Iterate over children as objects (dirs first, then files)."""
|
|
109
|
+
yield from self.fs.iterdir(self.path)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class DatasetFS:
|
|
114
|
+
"""Unified view over a dataset rooted in a directory or zipfile.
|
|
115
|
+
|
|
116
|
+
Attributes:
|
|
117
|
+
root: Dataset root (directory or zipfile path).
|
|
118
|
+
_mode: Backing mode, either "dir" or "zip".
|
|
119
|
+
_zip: ZipFile handle when `_mode` is "zip", else None.
|
|
120
|
+
_anchor: Optional top-level directory name inside the archive.
|
|
121
|
+
"""
|
|
122
|
+
root: Path
|
|
123
|
+
_mode: Literal["dir", "zip"]
|
|
124
|
+
_zip: Optional[zipfile.ZipFile]
|
|
125
|
+
_anchor: str = field(init=False)
|
|
126
|
+
|
|
127
|
+
def __post_init__(self) -> None:
|
|
128
|
+
self._anchor = self._detect_anchor()
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def from_path(cls, path: Union[Path, str]) -> "DatasetFS":
|
|
132
|
+
"""Create a DatasetFS from a directory or zip path.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
path: Filesystem path pointing to a dataset root.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
DatasetFS bound to the given path.
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ValueError: If the path is neither a directory nor a valid zip file.
|
|
142
|
+
"""
|
|
143
|
+
path = Path(path)
|
|
144
|
+
if path.is_dir():
|
|
145
|
+
return cls(root=path, _mode="dir", _zip=None)
|
|
146
|
+
if path.is_file() and zipfile.is_zipfile(path):
|
|
147
|
+
zf = zipcore.load(path)
|
|
148
|
+
return cls(root=path, _mode="zip", _zip=zf)
|
|
149
|
+
raise ValueError(f"Invalid dataset root: {path}")
|
|
150
|
+
|
|
151
|
+
# -- helpers
|
|
152
|
+
def _detect_anchor(self) -> str:
|
|
153
|
+
"""Infer the top-level archive directory name.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Anchor name when identifiable, else an empty string.
|
|
157
|
+
"""
|
|
158
|
+
if self._mode == "dir":
|
|
159
|
+
return self.root.name
|
|
160
|
+
|
|
161
|
+
assert self._zip is not None
|
|
162
|
+
names = [n.strip("/") for n in self._zip.namelist() if n.strip("/")]
|
|
163
|
+
if not names:
|
|
164
|
+
return ""
|
|
165
|
+
first = names[0].split("/")[0]
|
|
166
|
+
for n in names[1:]:
|
|
167
|
+
if not n.startswith(first + "/") and n != first:
|
|
168
|
+
return ""
|
|
169
|
+
return first
|
|
170
|
+
|
|
171
|
+
def _normalize_relpath(self, relpath: str) -> str:
|
|
172
|
+
"""Remove anchor prefix if present.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
relpath: Archive-relative path that may include the anchor.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Anchor-stripped relative path.
|
|
179
|
+
"""
|
|
180
|
+
relpath = relpath.strip("/")
|
|
181
|
+
if not self._anchor:
|
|
182
|
+
return relpath
|
|
183
|
+
if relpath == self._anchor:
|
|
184
|
+
return ""
|
|
185
|
+
prefix = f"{self._anchor}/"
|
|
186
|
+
if relpath.startswith(prefix):
|
|
187
|
+
return relpath[len(prefix):]
|
|
188
|
+
return relpath
|
|
189
|
+
|
|
190
|
+
def _ensure_anchor(self, relpath: str) -> str:
|
|
191
|
+
"""Add anchor prefix if missing.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
relpath: Archive-relative path without guaranteed anchor.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Path guaranteed to include the anchor when one exists.
|
|
198
|
+
"""
|
|
199
|
+
relpath = relpath.strip("/")
|
|
200
|
+
if not self._anchor:
|
|
201
|
+
return relpath
|
|
202
|
+
if relpath == self._anchor or relpath.startswith(f"{self._anchor}/"):
|
|
203
|
+
return relpath
|
|
204
|
+
return f"{self._anchor}/{relpath}" if relpath else self._anchor
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def anchor(self) -> str:
|
|
208
|
+
return self._anchor
|
|
209
|
+
|
|
210
|
+
def strip_anchor(self, relpath: str) -> str:
|
|
211
|
+
"""Remove anchor prefix if present."""
|
|
212
|
+
return self._normalize_relpath(relpath)
|
|
213
|
+
|
|
214
|
+
def add_anchor(self, relpath: str) -> str:
|
|
215
|
+
"""Ensure anchor prefix is present."""
|
|
216
|
+
return self._ensure_anchor(relpath)
|
|
217
|
+
|
|
218
|
+
# -- public API
|
|
219
|
+
def walk(
|
|
220
|
+
self,
|
|
221
|
+
top: str = "",
|
|
222
|
+
*,
|
|
223
|
+
as_objects: bool = False,
|
|
224
|
+
) -> Iterable[Tuple[str, List, List]]:
|
|
225
|
+
"""Yield (dirpath, direntries, fileentries) with archive-style paths.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
top: Optional subdirectory to start from (anchor-aware).
|
|
229
|
+
as_objects: When True, return DatasetDir/ZippedDir and
|
|
230
|
+
DatasetFile/ZippedFile entries; otherwise return name strings.
|
|
231
|
+
|
|
232
|
+
Yields:
|
|
233
|
+
Tuples of `(dirpath, direntries, fileentries)` using posix-style paths.
|
|
234
|
+
"""
|
|
235
|
+
norm_top = top.strip("/")
|
|
236
|
+
if self._anchor and norm_top and not norm_top.startswith(self._anchor):
|
|
237
|
+
norm_top = f"{self._anchor}/{norm_top}"
|
|
238
|
+
|
|
239
|
+
if self._mode == "dir":
|
|
240
|
+
base = self.root
|
|
241
|
+
rel_top = self._normalize_relpath(norm_top)
|
|
242
|
+
start = base / rel_top if rel_top else base
|
|
243
|
+
if not start.exists():
|
|
244
|
+
return
|
|
245
|
+
|
|
246
|
+
if not norm_top and self._anchor:
|
|
247
|
+
# mirror zip.walk: expose the anchor as the top-level directory
|
|
248
|
+
if as_objects:
|
|
249
|
+
yield "", [DatasetDir(name=self._anchor, path=self._anchor, fs=self)], []
|
|
250
|
+
else:
|
|
251
|
+
yield "", [self._anchor], []
|
|
252
|
+
|
|
253
|
+
for dirpath, dirnames, filenames in os.walk(start):
|
|
254
|
+
rel = os.path.relpath(dirpath, base)
|
|
255
|
+
rel = "" if rel == "." else rel.replace(os.sep, "/")
|
|
256
|
+
rel = self._ensure_anchor(rel)
|
|
257
|
+
dirnames = sorted(dirnames)
|
|
258
|
+
filenames = sorted(filenames)
|
|
259
|
+
if as_objects:
|
|
260
|
+
dir_objs = [
|
|
261
|
+
DatasetDir(name=d, path=(f"{rel}/{d}".strip("/")), fs=self) for d in dirnames
|
|
262
|
+
]
|
|
263
|
+
file_objs = [
|
|
264
|
+
DatasetFile(
|
|
265
|
+
name=f, path=(f"{rel}/{f}".strip("/")), fs=self
|
|
266
|
+
)
|
|
267
|
+
for f in filenames
|
|
268
|
+
]
|
|
269
|
+
yield rel, dir_objs, file_objs
|
|
270
|
+
else:
|
|
271
|
+
yield rel, dirnames, filenames
|
|
272
|
+
else:
|
|
273
|
+
assert self._zip is not None
|
|
274
|
+
for dirpath, direntries, files in zipcore.walk(self._zip, top=norm_top):
|
|
275
|
+
if as_objects:
|
|
276
|
+
yield dirpath, direntries, files
|
|
277
|
+
else:
|
|
278
|
+
yield dirpath, [d.name for d in direntries], [f.name for f in files]
|
|
279
|
+
|
|
280
|
+
def open_binary(self, relpath: str) -> IO[bytes]:
|
|
281
|
+
"""Open a file by archive-relative path.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
relpath: Path relative to the dataset root (posix separators).
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
File-like object in binary mode.
|
|
288
|
+
|
|
289
|
+
Raises:
|
|
290
|
+
FileNotFoundError: If the file does not exist.
|
|
291
|
+
"""
|
|
292
|
+
relpath = self._normalize_relpath(relpath)
|
|
293
|
+
|
|
294
|
+
if self._mode == "dir":
|
|
295
|
+
full = self.root / relpath
|
|
296
|
+
return open(full, "rb")
|
|
297
|
+
else:
|
|
298
|
+
assert self._zip is not None
|
|
299
|
+
arcname = self._ensure_anchor(relpath)
|
|
300
|
+
top = os.path.dirname(arcname)
|
|
301
|
+
leaf = os.path.basename(arcname)
|
|
302
|
+
matches = zipcore.fetch_files_in_zip(
|
|
303
|
+
self._zip,
|
|
304
|
+
leaf,
|
|
305
|
+
top=top,
|
|
306
|
+
wildcard=False,
|
|
307
|
+
)
|
|
308
|
+
if not matches:
|
|
309
|
+
raise FileNotFoundError(arcname)
|
|
310
|
+
return matches[0].open()
|
|
311
|
+
|
|
312
|
+
def listdir(self, relpath: str = "") -> List[str]:
|
|
313
|
+
"""Return entry names under a relative path (dirs first, then files)."""
|
|
314
|
+
relpath = self._normalize_relpath(relpath)
|
|
315
|
+
target = self._ensure_anchor(relpath)
|
|
316
|
+
|
|
317
|
+
if self._mode == "dir":
|
|
318
|
+
base_path = self.root / relpath if relpath else self.root
|
|
319
|
+
if not base_path.exists():
|
|
320
|
+
return []
|
|
321
|
+
|
|
322
|
+
dirnames: List[str] = []
|
|
323
|
+
filenames: List[str] = []
|
|
324
|
+
for entry in base_path.iterdir():
|
|
325
|
+
if entry.is_dir():
|
|
326
|
+
dirnames.append(entry.name)
|
|
327
|
+
else:
|
|
328
|
+
filenames.append(entry.name)
|
|
329
|
+
dirnames.sort()
|
|
330
|
+
filenames.sort()
|
|
331
|
+
|
|
332
|
+
return dirnames + filenames
|
|
333
|
+
|
|
334
|
+
# zip mode
|
|
335
|
+
assert self._zip is not None
|
|
336
|
+
dirnames: List[str] = []
|
|
337
|
+
filenames: List[str] = []
|
|
338
|
+
dirobjs: List[zipcore.ZippedDir] = []
|
|
339
|
+
fileobjs: List[zipcore.ZippedFile] = []
|
|
340
|
+
for dirpath, direntries, files in zipcore.walk(self._zip, top=target):
|
|
341
|
+
if dirpath != target:
|
|
342
|
+
continue
|
|
343
|
+
dirobjs = sorted(direntries, key=lambda d: d.name)
|
|
344
|
+
fileobjs = sorted(files, key=lambda f: f.name)
|
|
345
|
+
dirnames = [d.name for d in dirobjs]
|
|
346
|
+
filenames = [f.name for f in fileobjs]
|
|
347
|
+
break
|
|
348
|
+
|
|
349
|
+
return dirnames + filenames
|
|
350
|
+
|
|
351
|
+
def iterdir(
|
|
352
|
+
self,
|
|
353
|
+
relpath: str = "",
|
|
354
|
+
) -> Iterable[Union["DatasetDir", "DatasetFile", zipcore.ZippedDir, zipcore.ZippedFile]]:
|
|
355
|
+
"""Iterate entries under a relative path as objects (dirs first)."""
|
|
356
|
+
relpath = self._normalize_relpath(relpath)
|
|
357
|
+
target = self._ensure_anchor(relpath)
|
|
358
|
+
|
|
359
|
+
if self._mode == "dir":
|
|
360
|
+
base_path = self.root / relpath if relpath else self.root
|
|
361
|
+
if not base_path.exists():
|
|
362
|
+
return iter(())
|
|
363
|
+
|
|
364
|
+
dir_entries: List[DatasetDir] = []
|
|
365
|
+
file_entries: List[DatasetFile] = []
|
|
366
|
+
for entry in base_path.iterdir():
|
|
367
|
+
name = entry.name
|
|
368
|
+
p = f"{relpath}/{name}".strip("/")
|
|
369
|
+
if entry.is_dir():
|
|
370
|
+
dir_entries.append(DatasetDir(name=name, path=self._ensure_anchor(p), fs=self))
|
|
371
|
+
else:
|
|
372
|
+
file_entries.append(DatasetFile(name=name, path=self._ensure_anchor(p), fs=self))
|
|
373
|
+
dir_entries.sort(key=lambda d: d.name)
|
|
374
|
+
file_entries.sort(key=lambda f: f.name)
|
|
375
|
+
return iter([*dir_entries, *file_entries])
|
|
376
|
+
|
|
377
|
+
# zip mode
|
|
378
|
+
assert self._zip is not None
|
|
379
|
+
for dirpath, direntries, files in zipcore.walk(self._zip, top=target):
|
|
380
|
+
if dirpath != target:
|
|
381
|
+
continue
|
|
382
|
+
sorted_dirs = sorted(direntries, key=lambda d: d.name)
|
|
383
|
+
sorted_files = sorted(files, key=lambda f: f.name)
|
|
384
|
+
return iter([*sorted_dirs, *sorted_files])
|
|
385
|
+
return iter(())
|
|
386
|
+
|
|
387
|
+
def exists(self, relpath: str) -> bool:
|
|
388
|
+
"""Check existence of a dataset-relative path."""
|
|
389
|
+
relpath = self._normalize_relpath(relpath)
|
|
390
|
+
if self._mode == "dir":
|
|
391
|
+
return (self.root / relpath).exists()
|
|
392
|
+
else:
|
|
393
|
+
assert self._zip is not None
|
|
394
|
+
try:
|
|
395
|
+
self._zip.getinfo(self._ensure_anchor(relpath))
|
|
396
|
+
return True
|
|
397
|
+
except KeyError:
|
|
398
|
+
return False
|
|
399
|
+
|
|
400
|
+
def compress_to(
|
|
401
|
+
self,
|
|
402
|
+
dest: Path,
|
|
403
|
+
*,
|
|
404
|
+
relpath: str = "",
|
|
405
|
+
add_root: bool = True,
|
|
406
|
+
root_name: Optional[str] = None,
|
|
407
|
+
) -> Path:
|
|
408
|
+
"""Persist the whole dataset or a subtree as a zip file.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
dest: Destination zip path.
|
|
412
|
+
relpath: Optional subtree to pack relative to the dataset root.
|
|
413
|
+
add_root: Whether to include a top-level root folder in the zip.
|
|
414
|
+
root_name: Optional name for the root folder when `add_root` is True.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Path to the created zip file.
|
|
418
|
+
|
|
419
|
+
Raises:
|
|
420
|
+
FileNotFoundError: When the requested subtree does not exist.
|
|
421
|
+
RuntimeError: When root detection inside a temporary zip fails.
|
|
422
|
+
"""
|
|
423
|
+
dest = Path(dest)
|
|
424
|
+
relpath = self._normalize_relpath(relpath)
|
|
425
|
+
root_name = root_name or self.anchor or (Path(relpath).parts[0] if relpath else self.root.name)
|
|
426
|
+
relpath = relpath.strip("/")
|
|
427
|
+
|
|
428
|
+
with TemporaryDirectory() as tmp:
|
|
429
|
+
tmp_root = Path(tmp) / root_name
|
|
430
|
+
if relpath:
|
|
431
|
+
extract_into = tmp_root / relpath
|
|
432
|
+
else:
|
|
433
|
+
extract_into = tmp_root
|
|
434
|
+
|
|
435
|
+
if self._mode == "dir":
|
|
436
|
+
src_dir = self.root / relpath
|
|
437
|
+
if not src_dir.exists():
|
|
438
|
+
raise FileNotFoundError(src_dir)
|
|
439
|
+
extract_into.parent.mkdir(parents=True, exist_ok=True)
|
|
440
|
+
shutil.copytree(src_dir, extract_into, dirs_exist_ok=True)
|
|
441
|
+
else:
|
|
442
|
+
assert self._zip is not None
|
|
443
|
+
arcdir = self._ensure_anchor(relpath)
|
|
444
|
+
if not relpath or arcdir == self.anchor:
|
|
445
|
+
# whole zip; just copy
|
|
446
|
+
shutil.copyfile(self.root, dest)
|
|
447
|
+
return dest
|
|
448
|
+
|
|
449
|
+
dirs = zipcore.fetch_dirs_in_zip(
|
|
450
|
+
self._zip,
|
|
451
|
+
dirname=arcdir,
|
|
452
|
+
match_scope="fullpath",
|
|
453
|
+
wildcard=False,
|
|
454
|
+
)
|
|
455
|
+
target = dirs[0] if dirs else None
|
|
456
|
+
if target is None:
|
|
457
|
+
raise FileNotFoundError(arcdir)
|
|
458
|
+
with target.isolate() as subzip:
|
|
459
|
+
extract_into.parent.mkdir(parents=True, exist_ok=True)
|
|
460
|
+
subzip.extractall(extract_into)
|
|
461
|
+
|
|
462
|
+
if add_root:
|
|
463
|
+
tmp_zip = dest.with_suffix(".tmp.zip")
|
|
464
|
+
zipcore.create_from_dir(tmp_zip, tmp_root)
|
|
465
|
+
try:
|
|
466
|
+
with zipcore.load(tmp_zip) as zf:
|
|
467
|
+
roots = zipcore.fetch_dirs_in_zip(
|
|
468
|
+
zf, dirname="", match_scope="fullpath", wildcard=True
|
|
469
|
+
)
|
|
470
|
+
root_dir = roots[0] if roots else None
|
|
471
|
+
if root_dir is None:
|
|
472
|
+
raise RuntimeError("Failed to locate root dir while zipping.")
|
|
473
|
+
root_dir.to_filename(
|
|
474
|
+
dest,
|
|
475
|
+
add_root=True,
|
|
476
|
+
root_name=root_name,
|
|
477
|
+
include_dir_entries=True,
|
|
478
|
+
)
|
|
479
|
+
finally:
|
|
480
|
+
Path(tmp_zip).unlink(missing_ok=True)
|
|
481
|
+
else:
|
|
482
|
+
pack_dir = extract_into if relpath else tmp_root
|
|
483
|
+
zipcore.create_from_dir(dest, pack_dir)
|
|
484
|
+
|
|
485
|
+
return dest
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
__all__ = [
|
|
489
|
+
"DatasetFS",
|
|
490
|
+
"DatasetDir",
|
|
491
|
+
"DatasetFile",
|
|
492
|
+
]
|
|
493
|
+
|
|
494
|
+
def __dir__() -> List[str]:
|
|
495
|
+
return sorted(__all__)
|