brkraw 0.5.3__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brkraw/__init__.py +1 -1
- brkraw/api/__init__.py +122 -0
- brkraw/api/types.py +39 -0
- brkraw/apps/loader/__init__.py +3 -6
- brkraw/apps/loader/core.py +128 -124
- brkraw/apps/loader/formatter.py +0 -2
- brkraw/apps/loader/helper.py +181 -68
- brkraw/apps/loader/info/scan.py +2 -2
- brkraw/apps/loader/info/transform.py +0 -1
- brkraw/apps/loader/types.py +37 -17
- brkraw/cli/commands/addon.py +1 -1
- brkraw/cli/commands/cache.py +82 -0
- brkraw/cli/commands/config.py +2 -2
- brkraw/cli/commands/convert.py +56 -14
- brkraw/cli/commands/hook.py +1 -1
- brkraw/cli/commands/info.py +1 -1
- brkraw/cli/commands/init.py +1 -1
- brkraw/cli/commands/params.py +1 -1
- brkraw/cli/commands/prune.py +2 -2
- brkraw/cli/main.py +51 -1
- brkraw/cli/utils.py +1 -1
- brkraw/core/cache.py +87 -0
- brkraw/core/config.py +18 -2
- brkraw/core/fs.py +26 -9
- brkraw/core/zip.py +46 -32
- brkraw/dataclasses/__init__.py +3 -2
- brkraw/dataclasses/study.py +73 -23
- brkraw/resolver/affine.py +11 -4
- brkraw/resolver/datatype.py +10 -2
- brkraw/resolver/image.py +140 -21
- brkraw/resolver/nifti.py +1 -1
- brkraw/specs/meta/validator.py +0 -1
- brkraw/specs/rules/logic.py +1 -3
- {brkraw-0.5.3.dist-info → brkraw-0.5.6.dist-info}/METADATA +4 -4
- {brkraw-0.5.3.dist-info → brkraw-0.5.6.dist-info}/RECORD +38 -34
- {brkraw-0.5.3.dist-info → brkraw-0.5.6.dist-info}/entry_points.txt +1 -0
- {brkraw-0.5.3.dist-info → brkraw-0.5.6.dist-info}/WHEEL +0 -0
- {brkraw-0.5.3.dist-info → brkraw-0.5.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from ...core import cache
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def cmd_cache(args: argparse.Namespace) -> int:
|
|
13
|
+
handler = getattr(args, "cache_func", None)
|
|
14
|
+
if handler is None:
|
|
15
|
+
args.parser.print_help()
|
|
16
|
+
return 2
|
|
17
|
+
return handler(args)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def cmd_info(args: argparse.Namespace) -> int:
|
|
21
|
+
info = cache.get_info(root=args.root)
|
|
22
|
+
path = info["path"]
|
|
23
|
+
size = info["size"]
|
|
24
|
+
count = info["count"]
|
|
25
|
+
|
|
26
|
+
# Format size
|
|
27
|
+
unit = "B"
|
|
28
|
+
size_f = float(size)
|
|
29
|
+
for u in ["B", "KB", "MB", "GB", "TB"]:
|
|
30
|
+
unit = u
|
|
31
|
+
if size_f < 1024:
|
|
32
|
+
break
|
|
33
|
+
size_f /= 1024
|
|
34
|
+
|
|
35
|
+
print(f"Path: {path}")
|
|
36
|
+
print(f"Size: {size_f:.2f} {unit}")
|
|
37
|
+
print(f"Files: {count}")
|
|
38
|
+
return 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def cmd_clear(args: argparse.Namespace) -> int:
|
|
42
|
+
if not args.yes:
|
|
43
|
+
info = cache.get_info(root=args.root)
|
|
44
|
+
if info["count"] == 0:
|
|
45
|
+
print("Cache is already empty.")
|
|
46
|
+
return 0
|
|
47
|
+
path = info["path"]
|
|
48
|
+
prompt = f"Clear {info['count']} files from {path}? [y/N]: "
|
|
49
|
+
try:
|
|
50
|
+
reply = input(prompt).strip().lower()
|
|
51
|
+
except EOFError:
|
|
52
|
+
reply = ""
|
|
53
|
+
if reply not in {"y", "yes"}:
|
|
54
|
+
return 1
|
|
55
|
+
|
|
56
|
+
cache.clear(root=args.root)
|
|
57
|
+
print("Cache cleared.")
|
|
58
|
+
return 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def register(subparsers: argparse._SubParsersAction) -> None: # type: ignore[name-defined]
|
|
62
|
+
cache_parser = subparsers.add_parser(
|
|
63
|
+
"cache",
|
|
64
|
+
help="Manage brkraw cache.",
|
|
65
|
+
)
|
|
66
|
+
cache_parser.add_argument(
|
|
67
|
+
"--root",
|
|
68
|
+
help="Override config root directory (default: BRKRAW_CONFIG_HOME or ~/.brkraw).",
|
|
69
|
+
)
|
|
70
|
+
cache_parser.set_defaults(func=cmd_cache, parser=cache_parser)
|
|
71
|
+
cache_sub = cache_parser.add_subparsers(dest="cache_command")
|
|
72
|
+
|
|
73
|
+
info_parser = cache_sub.add_parser("info", help="Show cache information.")
|
|
74
|
+
info_parser.set_defaults(cache_func=cmd_info)
|
|
75
|
+
|
|
76
|
+
clear_parser = cache_sub.add_parser("clear", help="Clear cache contents.")
|
|
77
|
+
clear_parser.add_argument(
|
|
78
|
+
"--yes", "-y",
|
|
79
|
+
action="store_true",
|
|
80
|
+
help="Do not prompt for confirmation.",
|
|
81
|
+
)
|
|
82
|
+
clear_parser.set_defaults(cache_func=cmd_clear)
|
brkraw/cli/commands/config.py
CHANGED
|
@@ -11,7 +11,7 @@ import subprocess
|
|
|
11
11
|
|
|
12
12
|
from brkraw.core import config as config_core
|
|
13
13
|
|
|
14
|
-
logger = logging.getLogger(
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def cmd_config(args: argparse.Namespace) -> int:
|
|
@@ -179,7 +179,7 @@ def register(subparsers: argparse._SubParsersAction) -> None: # type: ignore[na
|
|
|
179
179
|
path_parser = config_sub.add_parser("path", help="Print a specific config path.")
|
|
180
180
|
path_parser.add_argument(
|
|
181
181
|
"name",
|
|
182
|
-
choices=["root", "config", "rules", "specs", "transforms"],
|
|
182
|
+
choices=["root", "config", "rules", "specs", "transforms", "cache"],
|
|
183
183
|
help="Path key to print.",
|
|
184
184
|
)
|
|
185
185
|
path_parser.set_defaults(config_func=cmd_path)
|
brkraw/cli/commands/convert.py
CHANGED
|
@@ -11,7 +11,6 @@ import json
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
13
|
import re
|
|
14
|
-
import sys
|
|
15
14
|
from pathlib import Path
|
|
16
15
|
from typing import Any, Mapping, Optional, Dict, List, Tuple, cast, get_args
|
|
17
16
|
|
|
@@ -27,7 +26,7 @@ from brkraw.resolver.affine import SubjectPose, SubjectType
|
|
|
27
26
|
from brkraw.apps.loader.types import AffineSpace
|
|
28
27
|
|
|
29
28
|
|
|
30
|
-
logger = logging.getLogger(
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
31
30
|
|
|
32
31
|
_INVALID_CHARS = re.compile(r"[^A-Za-z0-9._-]+")
|
|
33
32
|
|
|
@@ -86,6 +85,28 @@ def cmd_convert(args: argparse.Namespace) -> int:
|
|
|
86
85
|
return 2
|
|
87
86
|
if not args.flatten_fg:
|
|
88
87
|
args.flatten_fg = _env_flag("BRKRAW_CONVERT_FLATTEN_FG")
|
|
88
|
+
|
|
89
|
+
# resolve cycle_index/cycle_count from env
|
|
90
|
+
if args.cycle_index is None:
|
|
91
|
+
value = os.environ.get("BRKRAW_CONVERT_CYCLE_INDEX")
|
|
92
|
+
if value:
|
|
93
|
+
try:
|
|
94
|
+
args.cycle_index = int(value)
|
|
95
|
+
except ValueError:
|
|
96
|
+
logger.error("Invalid BRKRAW_CONVERT_CYCLE_INDEX: %s", value)
|
|
97
|
+
return 2
|
|
98
|
+
if args.cycle_count is None:
|
|
99
|
+
value = os.environ.get("BRKRAW_CONVERT_CYCLE_COUNT")
|
|
100
|
+
if value:
|
|
101
|
+
try:
|
|
102
|
+
args.cycle_count = int(value)
|
|
103
|
+
except ValueError:
|
|
104
|
+
logger.error("Invalid BRKRAW_CONVERT_CYCLE_COUNT: %s", value)
|
|
105
|
+
return 2
|
|
106
|
+
# if cycle_count is set but cycle_index is not, default cycle_index to 0
|
|
107
|
+
if args.cycle_index is None and args.cycle_count is not None:
|
|
108
|
+
args.cycle_index = 0
|
|
109
|
+
|
|
89
110
|
if args.space is None:
|
|
90
111
|
args.space = os.environ.get("BRKRAW_CONVERT_SPACE")
|
|
91
112
|
if args.override_subject_type is None:
|
|
@@ -169,6 +190,7 @@ def cmd_convert(args: argparse.Namespace) -> int:
|
|
|
169
190
|
hook_args_by_name = merge_hook_args(hook_args_by_name, hook_args_cli)
|
|
170
191
|
|
|
171
192
|
loader = load(args.path, prefix="Loading")
|
|
193
|
+
logger.debug("Dataset: %s loaded", args.path)
|
|
172
194
|
try:
|
|
173
195
|
override_header = nifti_resolver.load_header_overrides(args.header)
|
|
174
196
|
except ValueError:
|
|
@@ -224,7 +246,9 @@ def cmd_convert(args: argparse.Namespace) -> int:
|
|
|
224
246
|
if scan_id is None:
|
|
225
247
|
continue
|
|
226
248
|
scan = loader.get_scan(scan_id)
|
|
249
|
+
logger.debug("Processing scan %s.", scan_id)
|
|
227
250
|
reco_ids = [args.reco_id] if args.reco_id is not None else list(scan.avail.keys())
|
|
251
|
+
logger.debug("Recos: %s", reco_ids or "None")
|
|
228
252
|
if not reco_ids:
|
|
229
253
|
if getattr(scan, "_converter_hook", None):
|
|
230
254
|
reco_ids = [None]
|
|
@@ -252,18 +276,26 @@ def cmd_convert(args: argparse.Namespace) -> int:
|
|
|
252
276
|
nii_list: List[Any] = []
|
|
253
277
|
output_count = 1
|
|
254
278
|
else:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
279
|
+
try:
|
|
280
|
+
nii = loader.convert(
|
|
281
|
+
scan_id,
|
|
282
|
+
reco_id=reco_id,
|
|
283
|
+
space=cast(AffineSpace, args.space),
|
|
284
|
+
override_header=cast(Nifti1HeaderContents, override_header) if override_header else None,
|
|
285
|
+
override_subject_type=cast(Optional[SubjectType], args.override_subject_type),
|
|
286
|
+
override_subject_pose=cast(Optional[SubjectPose], args.override_subject_pose),
|
|
287
|
+
flatten_fg=args.flatten_fg,
|
|
288
|
+
xyz_units=cast(XYZUNIT, args.xyz_units),
|
|
289
|
+
t_units=cast(TUNIT, args.t_units),
|
|
290
|
+
hook_args_by_name=hook_args_by_name,
|
|
291
|
+
cycle_index=args.cycle_index,
|
|
292
|
+
cycle_count=args.cycle_count,
|
|
293
|
+
)
|
|
294
|
+
except Exception as exc:
|
|
295
|
+
logger.error("Conversion failed for scan %s reco %s: %s", scan_id, reco_id, exc)
|
|
296
|
+
if not batch_all and args.reco_id is not None:
|
|
297
|
+
return 2
|
|
298
|
+
continue
|
|
267
299
|
if nii is None:
|
|
268
300
|
if not batch_all and args.reco_id is not None:
|
|
269
301
|
logger.error("No NIfTI output generated for scan %s reco %s.", scan_id, reco_id)
|
|
@@ -845,6 +877,16 @@ def _add_convert_args(
|
|
|
845
877
|
action="store_true",
|
|
846
878
|
help="Flatten frame-group dimensions to 4D when data is 5D or higher.",
|
|
847
879
|
)
|
|
880
|
+
parser.add_argument(
|
|
881
|
+
"--cycle-index",
|
|
882
|
+
type=int,
|
|
883
|
+
help="Start cycle index (last axis). When set, read only a subset of cycles.",
|
|
884
|
+
)
|
|
885
|
+
parser.add_argument(
|
|
886
|
+
"--cycle-count",
|
|
887
|
+
type=int,
|
|
888
|
+
help="Number of cycles to read starting at --cycle-index. When omitted, reads to the end.",
|
|
889
|
+
)
|
|
848
890
|
parser.add_argument(
|
|
849
891
|
"--no-compress",
|
|
850
892
|
dest="compress",
|
brkraw/cli/commands/hook.py
CHANGED
brkraw/cli/commands/info.py
CHANGED
brkraw/cli/commands/init.py
CHANGED
brkraw/cli/commands/params.py
CHANGED
brkraw/cli/commands/prune.py
CHANGED
|
@@ -6,7 +6,7 @@ import argparse
|
|
|
6
6
|
import logging
|
|
7
7
|
from datetime import datetime
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Optional
|
|
9
|
+
from typing import Optional
|
|
10
10
|
|
|
11
11
|
import yaml
|
|
12
12
|
|
|
@@ -14,7 +14,7 @@ from brkraw.cli.utils import spinner
|
|
|
14
14
|
from brkraw.core import config as config_core
|
|
15
15
|
from brkraw.specs.pruner import prune_dataset_to_zip_from_spec
|
|
16
16
|
|
|
17
|
-
logger = logging.getLogger(
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def cmd_prune(args: argparse.Namespace) -> int:
|
brkraw/cli/main.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import re
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
3
7
|
import argparse
|
|
4
8
|
from typing import Callable, List, Optional
|
|
5
9
|
from ..core.entrypoints import list_entry_points as _iter_entry_points
|
|
@@ -10,6 +14,50 @@ from brkraw.core import config as config_core
|
|
|
10
14
|
PLUGIN_GROUP = "brkraw.cli"
|
|
11
15
|
|
|
12
16
|
|
|
17
|
+
def _run_capture(cmd: list[str]) -> str:
|
|
18
|
+
p = subprocess.run(cmd, check=True, text=True, capture_output=True)
|
|
19
|
+
return p.stdout
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _pv_autoset_env() -> None:
|
|
23
|
+
if shutil.which("pvcmd") is None:
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
p = subprocess.run(["pvcmd", "-e", "ParxServer"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
27
|
+
if p.returncode != 0:
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
out = _run_capture(["pvcmd", "-a", "ParxServer", "-r", "ListPs", "-csv"])
|
|
31
|
+
matches = [line for line in out.splitlines() if "REQUEST_ATTR" in line]
|
|
32
|
+
|
|
33
|
+
if len(matches) == 0:
|
|
34
|
+
raise SystemExit("ERROR: No ps entry with REQUEST_ATTR found")
|
|
35
|
+
if len(matches) > 1:
|
|
36
|
+
msg = "ERROR: Multiple ps entries with REQUEST_ATTR found\n" + "\n".join(matches)
|
|
37
|
+
raise SystemExit(msg)
|
|
38
|
+
|
|
39
|
+
line = matches[0]
|
|
40
|
+
parts = line.split(";")
|
|
41
|
+
|
|
42
|
+
m = None
|
|
43
|
+
for f in parts:
|
|
44
|
+
f = f.strip()
|
|
45
|
+
m = re.match(r"^(?P<exp_path>.+)/(?P<scan_id>\d+)/pdata/(?P<reco_id>\d+)$", f)
|
|
46
|
+
if m:
|
|
47
|
+
break
|
|
48
|
+
|
|
49
|
+
if not m:
|
|
50
|
+
raise SystemExit("ERROR: No valid <exp_path>/<scan_id>/pdata/<reco_id> path found")
|
|
51
|
+
|
|
52
|
+
exp_path = m.group("exp_path")
|
|
53
|
+
scan_id = m.group("scan_id")
|
|
54
|
+
reco_id = m.group("reco_id")
|
|
55
|
+
|
|
56
|
+
os.environ["BRKRAW_PATH"] = exp_path
|
|
57
|
+
os.environ["BRKRAW_SCAN_ID"] = scan_id
|
|
58
|
+
os.environ["BRKRAW_RECO_ID"] = reco_id
|
|
59
|
+
|
|
60
|
+
|
|
13
61
|
def _register_entry_point_commands(
|
|
14
62
|
subparsers: argparse._SubParsersAction, # type: ignore[name-defined]
|
|
15
63
|
) -> None:
|
|
@@ -26,6 +74,7 @@ def _register_entry_point_commands(
|
|
|
26
74
|
preferred = [
|
|
27
75
|
"init",
|
|
28
76
|
"config",
|
|
77
|
+
"cache",
|
|
29
78
|
"session",
|
|
30
79
|
"info",
|
|
31
80
|
"params",
|
|
@@ -70,7 +119,8 @@ def main(argv: Optional[List[str]] = None) -> int:
|
|
|
70
119
|
)
|
|
71
120
|
|
|
72
121
|
_register_entry_point_commands(subparsers)
|
|
73
|
-
|
|
122
|
+
_pv_autoset_env()
|
|
123
|
+
|
|
74
124
|
args = parser.parse_args(argv)
|
|
75
125
|
if not hasattr(args, "func"):
|
|
76
126
|
parser.print_help()
|
brkraw/cli/utils.py
CHANGED
brkraw/core/cache.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional, Union
|
|
8
|
+
|
|
9
|
+
from . import config
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("brkraw.cache")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_info(
|
|
15
|
+
root: Optional[Union[str, Path]] = None,
|
|
16
|
+
path: Optional[Union[str, Path]] = None,
|
|
17
|
+
) -> Dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Get information about the current cache directory.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
root: Configuration root directory (used to resolve default cache path).
|
|
23
|
+
path: Explicit path to the cache directory. If provided, overrides 'root'.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dict with keys:
|
|
27
|
+
- path: Path to cache directory
|
|
28
|
+
- size: Total size in bytes
|
|
29
|
+
- count: Number of files
|
|
30
|
+
"""
|
|
31
|
+
if path is not None:
|
|
32
|
+
cache_path = Path(path)
|
|
33
|
+
else:
|
|
34
|
+
cache_path = config.cache_dir(root)
|
|
35
|
+
|
|
36
|
+
if not cache_path.exists():
|
|
37
|
+
return {"path": cache_path, "size": 0, "count": 0}
|
|
38
|
+
|
|
39
|
+
total_size = 0
|
|
40
|
+
file_count = 0
|
|
41
|
+
|
|
42
|
+
for dirpath, _, filenames in os.walk(str(cache_path), followlinks=True):
|
|
43
|
+
for f in filenames:
|
|
44
|
+
try:
|
|
45
|
+
fp = Path(dirpath) / f
|
|
46
|
+
if fp.is_symlink():
|
|
47
|
+
continue
|
|
48
|
+
total_size += fp.stat().st_size
|
|
49
|
+
file_count += 1
|
|
50
|
+
except OSError as e:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
"path": cache_path,
|
|
55
|
+
"size": total_size,
|
|
56
|
+
"count": file_count
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def clear(
|
|
61
|
+
root: Optional[Union[str, Path]] = None,
|
|
62
|
+
path: Optional[Union[str, Path]] = None,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Clear all files in the cache directory.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
root: Configuration root directory (used to resolve default cache path).
|
|
69
|
+
path: Explicit path to the cache directory. If provided, overrides 'root'.
|
|
70
|
+
"""
|
|
71
|
+
if path is not None:
|
|
72
|
+
cache_path = Path(path)
|
|
73
|
+
else:
|
|
74
|
+
cache_path = config.cache_dir(root)
|
|
75
|
+
|
|
76
|
+
if not cache_path.exists():
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
logger.info("Clearing cache at: %s", cache_path)
|
|
80
|
+
for item in cache_path.iterdir():
|
|
81
|
+
try:
|
|
82
|
+
if item.is_file() or item.is_symlink():
|
|
83
|
+
item.unlink()
|
|
84
|
+
elif item.is_dir():
|
|
85
|
+
shutil.rmtree(item)
|
|
86
|
+
except Exception as exc:
|
|
87
|
+
logger.warning("Failed to remove %s: %s", item, exc)
|
brkraw/core/config.py
CHANGED
|
@@ -65,6 +65,7 @@ class ConfigPaths:
|
|
|
65
65
|
pruner_specs_dir: Path
|
|
66
66
|
rules_dir: Path
|
|
67
67
|
transforms_dir: Path
|
|
68
|
+
cache_dir: Path
|
|
68
69
|
|
|
69
70
|
|
|
70
71
|
def resolve_root(root: Optional[Union[str, Path]] = None) -> Path:
|
|
@@ -85,6 +86,7 @@ def get_paths(root: Optional[Union[str, Path]] = None) -> ConfigPaths:
|
|
|
85
86
|
pruner_specs_dir=base / "pruner_specs",
|
|
86
87
|
rules_dir=base / "rules",
|
|
87
88
|
transforms_dir=base / "transforms",
|
|
89
|
+
cache_dir=base / "cache",
|
|
88
90
|
)
|
|
89
91
|
|
|
90
92
|
|
|
@@ -101,6 +103,7 @@ def get_path(name: str, root: Optional[Union[str, Path]] = None) -> Path:
|
|
|
101
103
|
"pruner_specs": paths_obj.pruner_specs_dir,
|
|
102
104
|
"rules": paths_obj.rules_dir,
|
|
103
105
|
"transforms": paths_obj.transforms_dir,
|
|
106
|
+
"cache": paths_obj.cache_dir,
|
|
104
107
|
}
|
|
105
108
|
if name not in mapping:
|
|
106
109
|
raise KeyError(f"Unknown config path: {name}")
|
|
@@ -126,6 +129,7 @@ def ensure_initialized(
|
|
|
126
129
|
paths.pruner_specs_dir.mkdir(parents=True, exist_ok=True)
|
|
127
130
|
paths.rules_dir.mkdir(parents=True, exist_ok=True)
|
|
128
131
|
paths.transforms_dir.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
paths.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
129
133
|
if create_config and not paths.config_file.exists():
|
|
130
134
|
paths.config_file.write_text(DEFAULT_CONFIG_YAML, encoding="utf-8")
|
|
131
135
|
return paths
|
|
@@ -140,6 +144,10 @@ def init(
|
|
|
140
144
|
return ensure_initialized(root=root, create_config=create_config, exist_ok=exist_ok)
|
|
141
145
|
|
|
142
146
|
|
|
147
|
+
def cache_dir(root: Optional[Union[str, Path]] = None) -> Path:
|
|
148
|
+
return get_paths(root=root).cache_dir
|
|
149
|
+
|
|
150
|
+
|
|
143
151
|
def load_config(root: Optional[Union[str, Path]] = None) -> Optional[Dict[str, Any]]:
|
|
144
152
|
paths = get_paths(root)
|
|
145
153
|
if not paths.config_file.exists():
|
|
@@ -211,6 +219,7 @@ def clear_config(
|
|
|
211
219
|
keep_specs: bool = False,
|
|
212
220
|
keep_pruner_specs: bool = False,
|
|
213
221
|
keep_transforms: bool = False,
|
|
222
|
+
keep_cache: bool = False,
|
|
214
223
|
) -> None:
|
|
215
224
|
paths = get_paths(root=root)
|
|
216
225
|
if not paths.root.exists():
|
|
@@ -225,6 +234,8 @@ def clear_config(
|
|
|
225
234
|
_remove_tree(paths.pruner_specs_dir)
|
|
226
235
|
if paths.transforms_dir.exists() and not keep_transforms:
|
|
227
236
|
_remove_tree(paths.transforms_dir)
|
|
237
|
+
if paths.cache_dir.exists() and not keep_cache:
|
|
238
|
+
_remove_tree(paths.cache_dir)
|
|
228
239
|
try:
|
|
229
240
|
paths.root.rmdir()
|
|
230
241
|
except OSError:
|
|
@@ -239,6 +250,7 @@ def clear(
|
|
|
239
250
|
keep_specs: bool = False,
|
|
240
251
|
keep_pruner_specs: bool = False,
|
|
241
252
|
keep_transforms: bool = False,
|
|
253
|
+
keep_cache: bool = False,
|
|
242
254
|
) -> None:
|
|
243
255
|
clear_config(
|
|
244
256
|
root=root,
|
|
@@ -247,15 +259,19 @@ def clear(
|
|
|
247
259
|
keep_specs=keep_specs,
|
|
248
260
|
keep_pruner_specs=keep_pruner_specs,
|
|
249
261
|
keep_transforms=keep_transforms,
|
|
262
|
+
keep_cache=keep_cache,
|
|
250
263
|
)
|
|
251
264
|
|
|
252
265
|
|
|
253
266
|
def configure_logging(
|
|
254
267
|
*,
|
|
268
|
+
name: Optional[str] = None,
|
|
255
269
|
root: Optional[Union[str, Path]] = None,
|
|
256
270
|
level: Optional[Union[str, int]] = None,
|
|
257
271
|
stream=None,
|
|
258
272
|
) -> logging.Logger:
|
|
273
|
+
if name is None:
|
|
274
|
+
name = "brkraw"
|
|
259
275
|
config = resolve_config(root=root)
|
|
260
276
|
if level is None:
|
|
261
277
|
level = config.get("logging", {}).get("level", "INFO")
|
|
@@ -265,9 +281,9 @@ def configure_logging(
|
|
|
265
281
|
if level == logging.INFO:
|
|
266
282
|
fmt = "%(message)s"
|
|
267
283
|
else:
|
|
268
|
-
fmt = "%(levelname)
|
|
284
|
+
fmt = "%(asctime)s(%(levelname).1s): %(name)s:%(funcName)s - %(message)s"
|
|
269
285
|
logging.basicConfig(level=level, format=fmt, stream=stream)
|
|
270
|
-
return logging.getLogger(
|
|
286
|
+
return logging.getLogger(name)
|
|
271
287
|
|
|
272
288
|
|
|
273
289
|
def output_width(root: Optional[Union[str, Path]] = None, default: int = 120) -> int:
|
brkraw/core/fs.py
CHANGED
|
@@ -221,6 +221,7 @@ class DatasetFS:
|
|
|
221
221
|
top: str = "",
|
|
222
222
|
*,
|
|
223
223
|
as_objects: bool = False,
|
|
224
|
+
sort_entries: bool = True,
|
|
224
225
|
) -> Iterable[Tuple[str, List, List]]:
|
|
225
226
|
"""Yield (dirpath, direntries, fileentries) with archive-style paths.
|
|
226
227
|
|
|
@@ -228,13 +229,17 @@ class DatasetFS:
|
|
|
228
229
|
top: Optional subdirectory to start from (anchor-aware).
|
|
229
230
|
as_objects: When True, return DatasetDir/ZippedDir and
|
|
230
231
|
DatasetFile/ZippedFile entries; otherwise return name strings.
|
|
232
|
+
sort_entries: When True, sort directory and file entries for deterministic output.
|
|
233
|
+
Set to False for faster traversal when ordering does not matter.
|
|
231
234
|
|
|
232
235
|
Yields:
|
|
233
236
|
Tuples of `(dirpath, direntries, fileentries)` using posix-style paths.
|
|
234
237
|
"""
|
|
235
238
|
norm_top = top.strip("/")
|
|
236
|
-
if self._anchor and norm_top
|
|
237
|
-
|
|
239
|
+
if self._anchor and norm_top:
|
|
240
|
+
anchored = norm_top == self._anchor or norm_top.startswith(f"{self._anchor}/")
|
|
241
|
+
if not anchored:
|
|
242
|
+
norm_top = f"{self._anchor}/{norm_top}"
|
|
238
243
|
|
|
239
244
|
if self._mode == "dir":
|
|
240
245
|
base = self.root
|
|
@@ -254,17 +259,16 @@ class DatasetFS:
|
|
|
254
259
|
rel = os.path.relpath(dirpath, base)
|
|
255
260
|
rel = "" if rel == "." else rel.replace(os.sep, "/")
|
|
256
261
|
rel = self._ensure_anchor(rel)
|
|
257
|
-
|
|
258
|
-
|
|
262
|
+
if sort_entries:
|
|
263
|
+
dirnames.sort()
|
|
264
|
+
filenames.sort()
|
|
265
|
+
|
|
259
266
|
if as_objects:
|
|
260
267
|
dir_objs = [
|
|
261
268
|
DatasetDir(name=d, path=(f"{rel}/{d}".strip("/")), fs=self) for d in dirnames
|
|
262
269
|
]
|
|
263
270
|
file_objs = [
|
|
264
|
-
DatasetFile(
|
|
265
|
-
name=f, path=(f"{rel}/{f}".strip("/")), fs=self
|
|
266
|
-
)
|
|
267
|
-
for f in filenames
|
|
271
|
+
DatasetFile(name=f, path=(f"{rel}/{f}".strip("/")), fs=self) for f in filenames
|
|
268
272
|
]
|
|
269
273
|
yield rel, dir_objs, file_objs
|
|
270
274
|
else:
|
|
@@ -272,10 +276,23 @@ class DatasetFS:
|
|
|
272
276
|
else:
|
|
273
277
|
assert self._zip is not None
|
|
274
278
|
for dirpath, direntries, files in zipcore.walk(self._zip, top=norm_top):
|
|
279
|
+
if sort_entries:
|
|
280
|
+
try:
|
|
281
|
+
direntries = sorted(direntries, key=lambda d: d.name)
|
|
282
|
+
files = sorted(files, key=lambda f: f.name)
|
|
283
|
+
except Exception:
|
|
284
|
+
# If entries are plain strings or otherwise unsortable, fall back.
|
|
285
|
+
pass
|
|
286
|
+
|
|
275
287
|
if as_objects:
|
|
276
288
|
yield dirpath, direntries, files
|
|
277
289
|
else:
|
|
278
|
-
|
|
290
|
+
dnames = [d.name for d in direntries]
|
|
291
|
+
fnames = [f.name for f in files]
|
|
292
|
+
if sort_entries:
|
|
293
|
+
dnames.sort()
|
|
294
|
+
fnames.sort()
|
|
295
|
+
yield dirpath, dnames, fnames
|
|
279
296
|
|
|
280
297
|
def open_binary(self, relpath: str) -> IO[bytes]:
|
|
281
298
|
"""Open a file by archive-relative path.
|