brkraw 0.5.2__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brkraw/__init__.py +1 -1
- brkraw/api/__init__.py +122 -0
- brkraw/api/types.py +39 -0
- brkraw/apps/loader/__init__.py +3 -6
- brkraw/apps/loader/core.py +128 -132
- brkraw/apps/loader/formatter.py +0 -2
- brkraw/apps/loader/helper.py +334 -114
- brkraw/apps/loader/info/scan.py +2 -2
- brkraw/apps/loader/info/transform.py +0 -1
- brkraw/apps/loader/types.py +56 -59
- brkraw/cli/commands/addon.py +1 -1
- brkraw/cli/commands/cache.py +82 -0
- brkraw/cli/commands/config.py +2 -2
- brkraw/cli/commands/convert.py +61 -38
- brkraw/cli/commands/hook.py +1 -3
- brkraw/cli/commands/info.py +1 -1
- brkraw/cli/commands/init.py +1 -1
- brkraw/cli/commands/params.py +1 -1
- brkraw/cli/commands/prune.py +2 -2
- brkraw/cli/commands/session.py +1 -11
- brkraw/cli/main.py +51 -1
- brkraw/cli/utils.py +1 -1
- brkraw/core/cache.py +87 -0
- brkraw/core/config.py +18 -2
- brkraw/core/fs.py +26 -9
- brkraw/core/zip.py +46 -32
- brkraw/dataclasses/__init__.py +3 -2
- brkraw/dataclasses/study.py +73 -23
- brkraw/resolver/datatype.py +10 -2
- brkraw/resolver/image.py +140 -21
- brkraw/resolver/nifti.py +4 -12
- brkraw/schema/niftiheader.yaml +0 -2
- brkraw/specs/meta/validator.py +0 -1
- brkraw/specs/rules/logic.py +1 -3
- {brkraw-0.5.2.dist-info → brkraw-0.5.5.dist-info}/METADATA +8 -9
- {brkraw-0.5.2.dist-info → brkraw-0.5.5.dist-info}/RECORD +39 -35
- {brkraw-0.5.2.dist-info → brkraw-0.5.5.dist-info}/entry_points.txt +1 -0
- {brkraw-0.5.2.dist-info → brkraw-0.5.5.dist-info}/WHEEL +0 -0
- {brkraw-0.5.2.dist-info → brkraw-0.5.5.dist-info}/licenses/LICENSE +0 -0
brkraw/cli/commands/session.py
CHANGED
|
@@ -98,14 +98,12 @@ def cmd_unset(args: argparse.Namespace) -> int:
|
|
|
98
98
|
"BRKRAW_CONVERT_CONTEXT_MAP",
|
|
99
99
|
"BRKRAW_CONVERT_SPACE",
|
|
100
100
|
"BRKRAW_CONVERT_COMPRESS",
|
|
101
|
-
"BRKRAW_CONVERT_FLIP_X",
|
|
102
101
|
"BRKRAW_CONVERT_FLATTEN_FG",
|
|
103
102
|
"BRKRAW_CONVERT_OVERRIDE_SUBJECT_TYPE",
|
|
104
103
|
"BRKRAW_CONVERT_OVERRIDE_SUBJECT_POSE",
|
|
105
104
|
"BRKRAW_CONVERT_XYZ_UNITS",
|
|
106
105
|
"BRKRAW_CONVERT_T_UNITS",
|
|
107
106
|
"BRKRAW_CONVERT_HEADER",
|
|
108
|
-
"BRKRAW_CONVERT_FORMAT",
|
|
109
107
|
]
|
|
110
108
|
targets: List[str] = []
|
|
111
109
|
if args.path:
|
|
@@ -153,14 +151,12 @@ def cmd_env(_: argparse.Namespace) -> int:
|
|
|
153
151
|
convert_context_map = os.environ.get("BRKRAW_CONVERT_CONTEXT_MAP")
|
|
154
152
|
convert_compress = os.environ.get("BRKRAW_CONVERT_COMPRESS")
|
|
155
153
|
convert_space = os.environ.get("BRKRAW_CONVERT_SPACE")
|
|
156
|
-
convert_flip_x = os.environ.get("BRKRAW_CONVERT_FLIP_X")
|
|
157
154
|
convert_flatten_fg = os.environ.get("BRKRAW_CONVERT_FLATTEN_FG")
|
|
158
155
|
convert_subject_type = os.environ.get("BRKRAW_CONVERT_OVERRIDE_SUBJECT_TYPE")
|
|
159
156
|
convert_subject_pose = os.environ.get("BRKRAW_CONVERT_OVERRIDE_SUBJECT_POSE")
|
|
160
157
|
convert_xyz_units = os.environ.get("BRKRAW_CONVERT_XYZ_UNITS")
|
|
161
158
|
convert_t_units = os.environ.get("BRKRAW_CONVERT_T_UNITS")
|
|
162
159
|
convert_header = os.environ.get("BRKRAW_CONVERT_HEADER")
|
|
163
|
-
convert_format = os.environ.get("BRKRAW_CONVERT_FORMAT")
|
|
164
160
|
if (
|
|
165
161
|
path is None
|
|
166
162
|
and scan_id is None
|
|
@@ -175,14 +171,12 @@ def cmd_env(_: argparse.Namespace) -> int:
|
|
|
175
171
|
and convert_context_map is None
|
|
176
172
|
and convert_compress is None
|
|
177
173
|
and convert_space is None
|
|
178
|
-
and convert_flip_x is None
|
|
179
174
|
and convert_flatten_fg is None
|
|
180
175
|
and convert_subject_type is None
|
|
181
176
|
and convert_subject_pose is None
|
|
182
177
|
and convert_xyz_units is None
|
|
183
178
|
and convert_t_units is None
|
|
184
179
|
and convert_header is None
|
|
185
|
-
and convert_format is None
|
|
186
180
|
):
|
|
187
181
|
print("(none)")
|
|
188
182
|
return 0
|
|
@@ -212,8 +206,6 @@ def cmd_env(_: argparse.Namespace) -> int:
|
|
|
212
206
|
print(f"BRKRAW_CONVERT_SPACE={convert_space}")
|
|
213
207
|
if convert_compress is not None:
|
|
214
208
|
print(f"BRKRAW_CONVERT_COMPRESS={convert_compress}")
|
|
215
|
-
if convert_flip_x is not None:
|
|
216
|
-
print(f"BRKRAW_CONVERT_FLIP_X={convert_flip_x}")
|
|
217
209
|
if convert_flatten_fg is not None:
|
|
218
210
|
print(f"BRKRAW_CONVERT_FLATTEN_FG={convert_flatten_fg}")
|
|
219
211
|
if convert_subject_type is not None:
|
|
@@ -226,8 +218,6 @@ def cmd_env(_: argparse.Namespace) -> int:
|
|
|
226
218
|
print(f"BRKRAW_CONVERT_T_UNITS={convert_t_units}")
|
|
227
219
|
if convert_header is not None:
|
|
228
220
|
print(f"BRKRAW_CONVERT_HEADER={convert_header}")
|
|
229
|
-
if convert_format is not None:
|
|
230
|
-
print(f"BRKRAW_CONVERT_FORMAT={convert_format}")
|
|
231
221
|
return 0
|
|
232
222
|
|
|
233
223
|
|
|
@@ -311,7 +301,7 @@ def register(subparsers: argparse._SubParsersAction) -> None: # type: ignore[na
|
|
|
311
301
|
help=(
|
|
312
302
|
"Set BRKRAW_CONVERT_<OPTION> as KEY=VALUE (repeatable). "
|
|
313
303
|
"Keys: OUTPUT, PREFIX, SCAN_ID, RECO_ID, SIDECAR, CONTEXT_MAP, "
|
|
314
|
-
"COMPRESS, SPACE,
|
|
304
|
+
"COMPRESS, SPACE, FLATTEN_FG, OVERRIDE_SUBJECT_TYPE, "
|
|
315
305
|
"OVERRIDE_SUBJECT_POSE, XYZ_UNITS, T_UNITS, HEADER, FORMAT."
|
|
316
306
|
),
|
|
317
307
|
)
|
brkraw/cli/main.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import re
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
3
7
|
import argparse
|
|
4
8
|
from typing import Callable, List, Optional
|
|
5
9
|
from ..core.entrypoints import list_entry_points as _iter_entry_points
|
|
@@ -10,6 +14,50 @@ from brkraw.core import config as config_core
|
|
|
10
14
|
PLUGIN_GROUP = "brkraw.cli"
|
|
11
15
|
|
|
12
16
|
|
|
17
|
+
def _run_capture(cmd: list[str]) -> str:
|
|
18
|
+
p = subprocess.run(cmd, check=True, text=True, capture_output=True)
|
|
19
|
+
return p.stdout
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _pv_autoset_env() -> None:
|
|
23
|
+
if shutil.which("pvcmd") is None:
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
p = subprocess.run(["pvcmd", "-e", "ParxServer"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
27
|
+
if p.returncode != 0:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
out = _run_capture(["pvcmd", "-a", "ParxServer", "-r", "ListPs", "-csv"])
|
|
31
|
+
matches = [line for line in out.splitlines() if "REQUEST_ATTR" in line]
|
|
32
|
+
|
|
33
|
+
if len(matches) == 0:
|
|
34
|
+
raise SystemExit("ERROR: No ps entry with REQUEST_ATTR found")
|
|
35
|
+
if len(matches) > 1:
|
|
36
|
+
msg = "ERROR: Multiple ps entries with REQUEST_ATTR found\n" + "\n".join(matches)
|
|
37
|
+
raise SystemExit(msg)
|
|
38
|
+
|
|
39
|
+
line = matches[0]
|
|
40
|
+
parts = line.split(";")
|
|
41
|
+
|
|
42
|
+
m = None
|
|
43
|
+
for f in parts:
|
|
44
|
+
f = f.strip()
|
|
45
|
+
m = re.match(r"^(?P<exp_path>.+)/(?P<scan_id>\d+)/pdata/(?P<reco_id>\d+)$", f)
|
|
46
|
+
if m:
|
|
47
|
+
break
|
|
48
|
+
|
|
49
|
+
if not m:
|
|
50
|
+
raise SystemExit("ERROR: No valid <exp_path>/<scan_id>/pdata/<reco_id> path found")
|
|
51
|
+
|
|
52
|
+
exp_path = m.group("exp_path")
|
|
53
|
+
scan_id = m.group("scan_id")
|
|
54
|
+
reco_id = m.group("reco_id")
|
|
55
|
+
|
|
56
|
+
os.environ["BRKRAW_PATH"] = exp_path
|
|
57
|
+
os.environ["BRKRAW_SCAN_ID"] = scan_id
|
|
58
|
+
os.environ["BRKRAW_RECO_ID"] = reco_id
|
|
59
|
+
|
|
60
|
+
|
|
13
61
|
def _register_entry_point_commands(
|
|
14
62
|
subparsers: argparse._SubParsersAction, # type: ignore[name-defined]
|
|
15
63
|
) -> None:
|
|
@@ -26,6 +74,7 @@ def _register_entry_point_commands(
|
|
|
26
74
|
preferred = [
|
|
27
75
|
"init",
|
|
28
76
|
"config",
|
|
77
|
+
"cache",
|
|
29
78
|
"session",
|
|
30
79
|
"info",
|
|
31
80
|
"params",
|
|
@@ -70,7 +119,8 @@ def main(argv: Optional[List[str]] = None) -> int:
|
|
|
70
119
|
)
|
|
71
120
|
|
|
72
121
|
_register_entry_point_commands(subparsers)
|
|
73
|
-
|
|
122
|
+
_pv_autoset_env()
|
|
123
|
+
|
|
74
124
|
args = parser.parse_args(argv)
|
|
75
125
|
if not hasattr(args, "func"):
|
|
76
126
|
parser.print_help()
|
brkraw/cli/utils.py
CHANGED
brkraw/core/cache.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional, Union
|
|
8
|
+
|
|
9
|
+
from . import config
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("brkraw.cache")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_info(
|
|
15
|
+
root: Optional[Union[str, Path]] = None,
|
|
16
|
+
path: Optional[Union[str, Path]] = None,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Get information about the current cache directory.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
root: Configuration root directory (used to resolve default cache path).
|
|
23
|
+
path: Explicit path to the cache directory. If provided, overrides 'root'.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dict with keys:
|
|
27
|
+
- path: Path to cache directory
|
|
28
|
+
- size: Total size in bytes
|
|
29
|
+
- count: Number of files
|
|
30
|
+
"""
|
|
31
|
+
if path is not None:
|
|
32
|
+
cache_path = Path(path)
|
|
33
|
+
else:
|
|
34
|
+
cache_path = config.cache_dir(root)
|
|
35
|
+
|
|
36
|
+
if not cache_path.exists():
|
|
37
|
+
return {"path": cache_path, "size": 0, "count": 0}
|
|
38
|
+
|
|
39
|
+
total_size = 0
|
|
40
|
+
file_count = 0
|
|
41
|
+
|
|
42
|
+
for dirpath, _, filenames in os.walk(str(cache_path), followlinks=True):
|
|
43
|
+
for f in filenames:
|
|
44
|
+
try:
|
|
45
|
+
fp = Path(dirpath) / f
|
|
46
|
+
if fp.is_symlink():
|
|
47
|
+
continue
|
|
48
|
+
total_size += fp.stat().st_size
|
|
49
|
+
file_count += 1
|
|
50
|
+
except OSError as e:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
"path": cache_path,
|
|
55
|
+
"size": total_size,
|
|
56
|
+
"count": file_count
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def clear(
|
|
61
|
+
root: Optional[Union[str, Path]] = None,
|
|
62
|
+
path: Optional[Union[str, Path]] = None,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Clear all files in the cache directory.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
root: Configuration root directory (used to resolve default cache path).
|
|
69
|
+
path: Explicit path to the cache directory. If provided, overrides 'root'.
|
|
70
|
+
"""
|
|
71
|
+
if path is not None:
|
|
72
|
+
cache_path = Path(path)
|
|
73
|
+
else:
|
|
74
|
+
cache_path = config.cache_dir(root)
|
|
75
|
+
|
|
76
|
+
if not cache_path.exists():
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
logger.info("Clearing cache at: %s", cache_path)
|
|
80
|
+
for item in cache_path.iterdir():
|
|
81
|
+
try:
|
|
82
|
+
if item.is_file() or item.is_symlink():
|
|
83
|
+
item.unlink()
|
|
84
|
+
elif item.is_dir():
|
|
85
|
+
shutil.rmtree(item)
|
|
86
|
+
except Exception as exc:
|
|
87
|
+
logger.warning("Failed to remove %s: %s", item, exc)
|
brkraw/core/config.py
CHANGED
|
@@ -65,6 +65,7 @@ class ConfigPaths:
|
|
|
65
65
|
pruner_specs_dir: Path
|
|
66
66
|
rules_dir: Path
|
|
67
67
|
transforms_dir: Path
|
|
68
|
+
cache_dir: Path
|
|
68
69
|
|
|
69
70
|
|
|
70
71
|
def resolve_root(root: Optional[Union[str, Path]] = None) -> Path:
|
|
@@ -85,6 +86,7 @@ def get_paths(root: Optional[Union[str, Path]] = None) -> ConfigPaths:
|
|
|
85
86
|
pruner_specs_dir=base / "pruner_specs",
|
|
86
87
|
rules_dir=base / "rules",
|
|
87
88
|
transforms_dir=base / "transforms",
|
|
89
|
+
cache_dir=base / "cache",
|
|
88
90
|
)
|
|
89
91
|
|
|
90
92
|
|
|
@@ -101,6 +103,7 @@ def get_path(name: str, root: Optional[Union[str, Path]] = None) -> Path:
|
|
|
101
103
|
"pruner_specs": paths_obj.pruner_specs_dir,
|
|
102
104
|
"rules": paths_obj.rules_dir,
|
|
103
105
|
"transforms": paths_obj.transforms_dir,
|
|
106
|
+
"cache": paths_obj.cache_dir,
|
|
104
107
|
}
|
|
105
108
|
if name not in mapping:
|
|
106
109
|
raise KeyError(f"Unknown config path: {name}")
|
|
@@ -126,6 +129,7 @@ def ensure_initialized(
|
|
|
126
129
|
paths.pruner_specs_dir.mkdir(parents=True, exist_ok=True)
|
|
127
130
|
paths.rules_dir.mkdir(parents=True, exist_ok=True)
|
|
128
131
|
paths.transforms_dir.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
paths.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
129
133
|
if create_config and not paths.config_file.exists():
|
|
130
134
|
paths.config_file.write_text(DEFAULT_CONFIG_YAML, encoding="utf-8")
|
|
131
135
|
return paths
|
|
@@ -140,6 +144,10 @@ def init(
|
|
|
140
144
|
return ensure_initialized(root=root, create_config=create_config, exist_ok=exist_ok)
|
|
141
145
|
|
|
142
146
|
|
|
147
|
+
def cache_dir(root: Optional[Union[str, Path]] = None) -> Path:
|
|
148
|
+
return get_paths(root=root).cache_dir
|
|
149
|
+
|
|
150
|
+
|
|
143
151
|
def load_config(root: Optional[Union[str, Path]] = None) -> Optional[Dict[str, Any]]:
|
|
144
152
|
paths = get_paths(root)
|
|
145
153
|
if not paths.config_file.exists():
|
|
@@ -211,6 +219,7 @@ def clear_config(
|
|
|
211
219
|
keep_specs: bool = False,
|
|
212
220
|
keep_pruner_specs: bool = False,
|
|
213
221
|
keep_transforms: bool = False,
|
|
222
|
+
keep_cache: bool = False,
|
|
214
223
|
) -> None:
|
|
215
224
|
paths = get_paths(root=root)
|
|
216
225
|
if not paths.root.exists():
|
|
@@ -225,6 +234,8 @@ def clear_config(
|
|
|
225
234
|
_remove_tree(paths.pruner_specs_dir)
|
|
226
235
|
if paths.transforms_dir.exists() and not keep_transforms:
|
|
227
236
|
_remove_tree(paths.transforms_dir)
|
|
237
|
+
if paths.cache_dir.exists() and not keep_cache:
|
|
238
|
+
_remove_tree(paths.cache_dir)
|
|
228
239
|
try:
|
|
229
240
|
paths.root.rmdir()
|
|
230
241
|
except OSError:
|
|
@@ -239,6 +250,7 @@ def clear(
|
|
|
239
250
|
keep_specs: bool = False,
|
|
240
251
|
keep_pruner_specs: bool = False,
|
|
241
252
|
keep_transforms: bool = False,
|
|
253
|
+
keep_cache: bool = False,
|
|
242
254
|
) -> None:
|
|
243
255
|
clear_config(
|
|
244
256
|
root=root,
|
|
@@ -247,15 +259,19 @@ def clear(
|
|
|
247
259
|
keep_specs=keep_specs,
|
|
248
260
|
keep_pruner_specs=keep_pruner_specs,
|
|
249
261
|
keep_transforms=keep_transforms,
|
|
262
|
+
keep_cache=keep_cache,
|
|
250
263
|
)
|
|
251
264
|
|
|
252
265
|
|
|
253
266
|
def configure_logging(
|
|
254
267
|
*,
|
|
268
|
+
name: Optional[str] = None,
|
|
255
269
|
root: Optional[Union[str, Path]] = None,
|
|
256
270
|
level: Optional[Union[str, int]] = None,
|
|
257
271
|
stream=None,
|
|
258
272
|
) -> logging.Logger:
|
|
273
|
+
if name is None:
|
|
274
|
+
name = "brkraw"
|
|
259
275
|
config = resolve_config(root=root)
|
|
260
276
|
if level is None:
|
|
261
277
|
level = config.get("logging", {}).get("level", "INFO")
|
|
@@ -265,9 +281,9 @@ def configure_logging(
|
|
|
265
281
|
if level == logging.INFO:
|
|
266
282
|
fmt = "%(message)s"
|
|
267
283
|
else:
|
|
268
|
-
fmt = "%(levelname)
|
|
284
|
+
fmt = "%(asctime)s(%(levelname).1s): %(name)s:%(funcName)s - %(message)s"
|
|
269
285
|
logging.basicConfig(level=level, format=fmt, stream=stream)
|
|
270
|
-
return logging.getLogger(
|
|
286
|
+
return logging.getLogger(name)
|
|
271
287
|
|
|
272
288
|
|
|
273
289
|
def output_width(root: Optional[Union[str, Path]] = None, default: int = 120) -> int:
|
brkraw/core/fs.py
CHANGED
|
@@ -221,6 +221,7 @@ class DatasetFS:
|
|
|
221
221
|
top: str = "",
|
|
222
222
|
*,
|
|
223
223
|
as_objects: bool = False,
|
|
224
|
+
sort_entries: bool = True,
|
|
224
225
|
) -> Iterable[Tuple[str, List, List]]:
|
|
225
226
|
"""Yield (dirpath, direntries, fileentries) with archive-style paths.
|
|
226
227
|
|
|
@@ -228,13 +229,17 @@ class DatasetFS:
|
|
|
228
229
|
top: Optional subdirectory to start from (anchor-aware).
|
|
229
230
|
as_objects: When True, return DatasetDir/ZippedDir and
|
|
230
231
|
DatasetFile/ZippedFile entries; otherwise return name strings.
|
|
232
|
+
sort_entries: When True, sort directory and file entries for deterministic output.
|
|
233
|
+
Set to False for faster traversal when ordering does not matter.
|
|
231
234
|
|
|
232
235
|
Yields:
|
|
233
236
|
Tuples of `(dirpath, direntries, fileentries)` using posix-style paths.
|
|
234
237
|
"""
|
|
235
238
|
norm_top = top.strip("/")
|
|
236
|
-
if self._anchor and norm_top
|
|
237
|
-
|
|
239
|
+
if self._anchor and norm_top:
|
|
240
|
+
anchored = norm_top == self._anchor or norm_top.startswith(f"{self._anchor}/")
|
|
241
|
+
if not anchored:
|
|
242
|
+
norm_top = f"{self._anchor}/{norm_top}"
|
|
238
243
|
|
|
239
244
|
if self._mode == "dir":
|
|
240
245
|
base = self.root
|
|
@@ -254,17 +259,16 @@ class DatasetFS:
|
|
|
254
259
|
rel = os.path.relpath(dirpath, base)
|
|
255
260
|
rel = "" if rel == "." else rel.replace(os.sep, "/")
|
|
256
261
|
rel = self._ensure_anchor(rel)
|
|
257
|
-
|
|
258
|
-
|
|
262
|
+
if sort_entries:
|
|
263
|
+
dirnames.sort()
|
|
264
|
+
filenames.sort()
|
|
265
|
+
|
|
259
266
|
if as_objects:
|
|
260
267
|
dir_objs = [
|
|
261
268
|
DatasetDir(name=d, path=(f"{rel}/{d}".strip("/")), fs=self) for d in dirnames
|
|
262
269
|
]
|
|
263
270
|
file_objs = [
|
|
264
|
-
DatasetFile(
|
|
265
|
-
name=f, path=(f"{rel}/{f}".strip("/")), fs=self
|
|
266
|
-
)
|
|
267
|
-
for f in filenames
|
|
271
|
+
DatasetFile(name=f, path=(f"{rel}/{f}".strip("/")), fs=self) for f in filenames
|
|
268
272
|
]
|
|
269
273
|
yield rel, dir_objs, file_objs
|
|
270
274
|
else:
|
|
@@ -272,10 +276,23 @@ class DatasetFS:
|
|
|
272
276
|
else:
|
|
273
277
|
assert self._zip is not None
|
|
274
278
|
for dirpath, direntries, files in zipcore.walk(self._zip, top=norm_top):
|
|
279
|
+
if sort_entries:
|
|
280
|
+
try:
|
|
281
|
+
direntries = sorted(direntries, key=lambda d: d.name)
|
|
282
|
+
files = sorted(files, key=lambda f: f.name)
|
|
283
|
+
except Exception:
|
|
284
|
+
# If entries are plain strings or otherwise unsortable, fall back.
|
|
285
|
+
pass
|
|
286
|
+
|
|
275
287
|
if as_objects:
|
|
276
288
|
yield dirpath, direntries, files
|
|
277
289
|
else:
|
|
278
|
-
|
|
290
|
+
dnames = [d.name for d in direntries]
|
|
291
|
+
fnames = [f.name for f in files]
|
|
292
|
+
if sort_entries:
|
|
293
|
+
dnames.sort()
|
|
294
|
+
fnames.sort()
|
|
295
|
+
yield dirpath, dnames, fnames
|
|
279
296
|
|
|
280
297
|
def open_binary(self, relpath: str) -> IO[bytes]:
|
|
281
298
|
"""Open a file by archive-relative path.
|
brkraw/core/zip.py
CHANGED
|
@@ -588,6 +588,8 @@ class ZippedDir:
|
|
|
588
588
|
def walk(
|
|
589
589
|
zipobj: zipfile.ZipFile,
|
|
590
590
|
top: str = "",
|
|
591
|
+
*,
|
|
592
|
+
sort_entries: bool = True,
|
|
591
593
|
) -> Iterable[Tuple[str, List[ZippedDir], List[ZippedFile]]]:
|
|
592
594
|
"""Walk through a ZipFile like os.walk, but with ZippedFile entries.
|
|
593
595
|
|
|
@@ -600,6 +602,9 @@ def walk(
|
|
|
600
602
|
paths (for example "repo-abc/dir"). When top does not correspond to an
|
|
601
603
|
explicit directory entry, the function still yields a subtree rooted at
|
|
602
604
|
top, and dirpath values are archive paths under that prefix.
|
|
605
|
+
sort_entries : bool, optional
|
|
606
|
+
When True, sort directory names and file names for deterministic output.
|
|
607
|
+
Set to False for faster traversal when ordering does not matter.
|
|
603
608
|
|
|
604
609
|
Yields
|
|
605
610
|
------
|
|
@@ -613,61 +618,70 @@ def walk(
|
|
|
613
618
|
"""
|
|
614
619
|
tree_map: Dict[str, Dict[str, Any]] = defaultdict(lambda: {"dirs": set(), "files": {}})
|
|
615
620
|
|
|
616
|
-
|
|
617
|
-
|
|
621
|
+
start = top.strip("/")
|
|
622
|
+
prefix = f"{start}/" if start else ""
|
|
623
|
+
|
|
624
|
+
def _is_dir(info: zipfile.ZipInfo) -> bool:
|
|
625
|
+
# ZipInfo.is_dir() exists on modern Python, but keep a safe fallback.
|
|
626
|
+
try:
|
|
627
|
+
return info.is_dir() # type: ignore[attr-defined]
|
|
628
|
+
except Exception:
|
|
629
|
+
return info.filename.endswith("/")
|
|
630
|
+
|
|
631
|
+
# Single pass over the archive; restrict to subtree early when top is given.
|
|
632
|
+
for info in zipobj.infolist():
|
|
633
|
+
arcname = info.filename
|
|
618
634
|
norm = arcname.rstrip("/")
|
|
635
|
+
if not norm:
|
|
636
|
+
continue
|
|
637
|
+
|
|
638
|
+
# Restrict to the requested subtree if provided.
|
|
639
|
+
if start:
|
|
640
|
+
if norm != start and not norm.startswith(prefix):
|
|
641
|
+
continue
|
|
642
|
+
|
|
619
643
|
parts = norm.split("/")
|
|
620
644
|
parent = "/".join(parts[:-1]) # "" at root
|
|
621
645
|
leaf = parts[-1]
|
|
622
646
|
|
|
623
|
-
if
|
|
647
|
+
if _is_dir(info):
|
|
624
648
|
tree_map[parent]["dirs"].add(leaf)
|
|
625
|
-
else:
|
|
626
|
-
tree_map[parent]["files"][leaf] = ZippedFile(
|
|
627
|
-
name=leaf, arcname=norm, zipobj=zipobj
|
|
628
|
-
)
|
|
649
|
+
else:
|
|
650
|
+
tree_map[parent]["files"][leaf] = ZippedFile(name=leaf, arcname=norm, zipobj=zipobj)
|
|
629
651
|
|
|
630
|
-
#
|
|
652
|
+
# Ensure intermediate directories are known.
|
|
631
653
|
for i in range(len(parts) - 1):
|
|
632
654
|
up_parent = "/".join(parts[:i])
|
|
633
655
|
up_child = parts[i]
|
|
634
656
|
tree_map[up_parent]["dirs"].add(up_child)
|
|
635
657
|
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
# When top does not exist explicitly, build a filtered pseudo-map rooted at top
|
|
658
|
+
# If the subtree has no entries, return nothing.
|
|
639
659
|
if start and start not in tree_map:
|
|
640
|
-
|
|
641
|
-
for arcname in zipobj.namelist():
|
|
642
|
-
if arcname.startswith(start + "/") or arcname.rstrip("/") == start:
|
|
643
|
-
norm = arcname.rstrip("/")
|
|
644
|
-
rel = norm[len(start):].lstrip("/")
|
|
645
|
-
parent = "/".join([start] + ([p for p in rel.split("/")[:-1]] if rel else []))
|
|
646
|
-
leaf = rel.split("/")[-1] if rel else start.split("/")[-1]
|
|
647
|
-
if arcname.endswith("/"):
|
|
648
|
-
pseudo_map[parent]["dirs"].add(leaf)
|
|
649
|
-
else:
|
|
650
|
-
pseudo_map[parent]["files"][leaf] = ZippedFile(leaf, norm, zipobj)
|
|
651
|
-
prefix_parts = parent.split("/") if parent else []
|
|
652
|
-
for i in range(len(prefix_parts)):
|
|
653
|
-
up_parent = "/".join(prefix_parts[:i])
|
|
654
|
-
up_child = prefix_parts[i]
|
|
655
|
-
pseudo_map[up_parent]["dirs"].add(up_child)
|
|
656
|
-
tree_map = pseudo_map
|
|
657
|
-
if start and start not in tree_map:
|
|
658
|
-
return
|
|
660
|
+
return
|
|
659
661
|
|
|
660
662
|
built_dirs: Dict[str, ZippedDir] = {}
|
|
661
663
|
|
|
662
664
|
def _build(path: str) -> ZippedDir:
|
|
663
665
|
if path in built_dirs:
|
|
664
666
|
return built_dirs[path]
|
|
665
|
-
|
|
666
|
-
|
|
667
|
+
|
|
668
|
+
dirset = tree_map[path]["dirs"]
|
|
669
|
+
files_dict = tree_map[path]["files"]
|
|
670
|
+
|
|
671
|
+
if sort_entries:
|
|
672
|
+
dirnames = sorted(dirset)
|
|
673
|
+
filekeys = sorted(files_dict.keys())
|
|
674
|
+
else:
|
|
675
|
+
# Sets/dicts are already in-memory; avoid sorting for speed.
|
|
676
|
+
dirnames = list(dirset)
|
|
677
|
+
filekeys = list(files_dict.keys())
|
|
678
|
+
|
|
679
|
+
files = [files_dict[k] for k in filekeys]
|
|
667
680
|
subs: List[ZippedDir] = []
|
|
668
681
|
for name in dirnames:
|
|
669
682
|
sub_path = f"{path}/{name}" if path else name
|
|
670
683
|
subs.append(_build(sub_path))
|
|
684
|
+
|
|
671
685
|
obj = ZippedDir(
|
|
672
686
|
name=path.rsplit("/", 1)[-1] if path else "",
|
|
673
687
|
path=path,
|
brkraw/dataclasses/__init__.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from .study import Study
|
|
3
|
+
from .study import Study, LazyScan
|
|
4
4
|
from .scan import Scan
|
|
5
5
|
from .reco import Reco
|
|
6
6
|
from .node import DatasetNode
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
10
|
-
'Study',
|
|
10
|
+
'Study',
|
|
11
|
+
'LazyScan',
|
|
11
12
|
'Scan',
|
|
12
13
|
'Reco',
|
|
13
14
|
'DatasetNode'
|