brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brkraw/__init__.py +9 -3
- brkraw/apps/__init__.py +12 -0
- brkraw/apps/addon/__init__.py +30 -0
- brkraw/apps/addon/core.py +35 -0
- brkraw/apps/addon/dependencies.py +402 -0
- brkraw/apps/addon/installation.py +500 -0
- brkraw/apps/addon/io.py +21 -0
- brkraw/apps/hook/__init__.py +25 -0
- brkraw/apps/hook/core.py +636 -0
- brkraw/apps/loader/__init__.py +10 -0
- brkraw/apps/loader/core.py +622 -0
- brkraw/apps/loader/formatter.py +288 -0
- brkraw/apps/loader/helper.py +797 -0
- brkraw/apps/loader/info/__init__.py +11 -0
- brkraw/apps/loader/info/scan.py +85 -0
- brkraw/apps/loader/info/scan.yaml +90 -0
- brkraw/apps/loader/info/study.py +69 -0
- brkraw/apps/loader/info/study.yaml +156 -0
- brkraw/apps/loader/info/transform.py +92 -0
- brkraw/apps/loader/types.py +220 -0
- brkraw/cli/__init__.py +5 -0
- brkraw/cli/commands/__init__.py +2 -0
- brkraw/cli/commands/addon.py +327 -0
- brkraw/cli/commands/config.py +205 -0
- brkraw/cli/commands/convert.py +903 -0
- brkraw/cli/commands/hook.py +348 -0
- brkraw/cli/commands/info.py +74 -0
- brkraw/cli/commands/init.py +214 -0
- brkraw/cli/commands/params.py +106 -0
- brkraw/cli/commands/prune.py +288 -0
- brkraw/cli/commands/session.py +371 -0
- brkraw/cli/hook_args.py +80 -0
- brkraw/cli/main.py +83 -0
- brkraw/cli/utils.py +60 -0
- brkraw/core/__init__.py +13 -0
- brkraw/core/config.py +380 -0
- brkraw/core/entrypoints.py +25 -0
- brkraw/core/formatter.py +367 -0
- brkraw/core/fs.py +495 -0
- brkraw/core/jcamp.py +600 -0
- brkraw/core/layout.py +451 -0
- brkraw/core/parameters.py +781 -0
- brkraw/core/zip.py +1121 -0
- brkraw/dataclasses/__init__.py +14 -0
- brkraw/dataclasses/node.py +139 -0
- brkraw/dataclasses/reco.py +33 -0
- brkraw/dataclasses/scan.py +61 -0
- brkraw/dataclasses/study.py +131 -0
- brkraw/default/__init__.py +3 -0
- brkraw/default/pruner_specs/deid4share.yaml +42 -0
- brkraw/default/rules/00_default.yaml +4 -0
- brkraw/default/specs/metadata_dicom.yaml +236 -0
- brkraw/default/specs/metadata_transforms.py +92 -0
- brkraw/resolver/__init__.py +7 -0
- brkraw/resolver/affine.py +539 -0
- brkraw/resolver/datatype.py +69 -0
- brkraw/resolver/fid.py +90 -0
- brkraw/resolver/helpers.py +36 -0
- brkraw/resolver/image.py +188 -0
- brkraw/resolver/nifti.py +370 -0
- brkraw/resolver/shape.py +235 -0
- brkraw/schema/__init__.py +3 -0
- brkraw/schema/context_map.yaml +62 -0
- brkraw/schema/meta.yaml +57 -0
- brkraw/schema/niftiheader.yaml +95 -0
- brkraw/schema/pruner.yaml +55 -0
- brkraw/schema/remapper.yaml +128 -0
- brkraw/schema/rules.yaml +154 -0
- brkraw/specs/__init__.py +10 -0
- brkraw/specs/hook/__init__.py +12 -0
- brkraw/specs/hook/logic.py +31 -0
- brkraw/specs/hook/validator.py +22 -0
- brkraw/specs/meta/__init__.py +5 -0
- brkraw/specs/meta/validator.py +156 -0
- brkraw/specs/pruner/__init__.py +15 -0
- brkraw/specs/pruner/logic.py +361 -0
- brkraw/specs/pruner/validator.py +119 -0
- brkraw/specs/remapper/__init__.py +27 -0
- brkraw/specs/remapper/logic.py +924 -0
- brkraw/specs/remapper/validator.py +314 -0
- brkraw/specs/rules/__init__.py +6 -0
- brkraw/specs/rules/logic.py +263 -0
- brkraw/specs/rules/validator.py +103 -0
- brkraw-0.5.0.dist-info/METADATA +81 -0
- brkraw-0.5.0.dist-info/RECORD +88 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
- brkraw-0.5.0.dist-info/entry_points.txt +13 -0
- brkraw/lib/__init__.py +0 -4
- brkraw/lib/backup.py +0 -641
- brkraw/lib/bids.py +0 -0
- brkraw/lib/errors.py +0 -125
- brkraw/lib/loader.py +0 -1220
- brkraw/lib/orient.py +0 -194
- brkraw/lib/parser.py +0 -48
- brkraw/lib/pvobj.py +0 -301
- brkraw/lib/reference.py +0 -245
- brkraw/lib/utils.py +0 -471
- brkraw/scripts/__init__.py +0 -0
- brkraw/scripts/brk_backup.py +0 -106
- brkraw/scripts/brkraw.py +0 -744
- brkraw/ui/__init__.py +0 -0
- brkraw/ui/config.py +0 -17
- brkraw/ui/main_win.py +0 -214
- brkraw/ui/previewer.py +0 -225
- brkraw/ui/scan_info.py +0 -72
- brkraw/ui/scan_list.py +0 -73
- brkraw/ui/subj_info.py +0 -128
- brkraw-0.3.11.dist-info/METADATA +0 -25
- brkraw-0.3.11.dist-info/RECORD +0 -28
- brkraw-0.3.11.dist-info/entry_points.txt +0 -3
- brkraw-0.3.11.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional
|
|
5
|
+
from importlib import resources
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
resources.files # type: ignore[attr-defined]
|
|
9
|
+
except AttributeError: # pragma: no cover - fallback for Python 3.8
|
|
10
|
+
import importlib_resources as resources # type: ignore[assignment]
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_NAME_PATTERN = re.compile(r"^[a-z][a-z0-9]*(?:_[a-z0-9]+){0,3}$")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def validate_meta(
|
|
20
|
+
meta: Any,
|
|
21
|
+
*,
|
|
22
|
+
allow_extra_keys: Optional[Iterable[str]] = None,
|
|
23
|
+
raise_on_error: bool = True,
|
|
24
|
+
) -> List[str]:
|
|
25
|
+
"""Validate a __meta__ mapping.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
meta: __meta__ mapping to validate.
|
|
29
|
+
allow_extra_keys: Additional keys allowed in __meta__ beyond the base schema.
|
|
30
|
+
raise_on_error: If True, raise ValueError on validation errors.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
List of validation error messages (empty when valid).
|
|
34
|
+
"""
|
|
35
|
+
errors: List[str] = []
|
|
36
|
+
if not isinstance(meta, Mapping):
|
|
37
|
+
errors.append("__meta__: must be an object.")
|
|
38
|
+
if errors and raise_on_error:
|
|
39
|
+
raise ValueError("Invalid __meta__:\n" + "\n".join(errors))
|
|
40
|
+
return errors
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
import jsonschema
|
|
44
|
+
except Exception:
|
|
45
|
+
errors.extend(_validate_meta_minimal(meta, allow_extra_keys=allow_extra_keys))
|
|
46
|
+
else:
|
|
47
|
+
schema = _load_schema()
|
|
48
|
+
if allow_extra_keys:
|
|
49
|
+
schema = _extend_schema(schema, allow_extra_keys)
|
|
50
|
+
validator = jsonschema.Draft202012Validator(schema)
|
|
51
|
+
for err in validator.iter_errors(meta):
|
|
52
|
+
path = ".".join(str(p) for p in err.path)
|
|
53
|
+
prefix = f"__meta__.{path}" if path else "__meta__"
|
|
54
|
+
errors.append(f"{prefix}: {err.message}")
|
|
55
|
+
|
|
56
|
+
if errors and raise_on_error:
|
|
57
|
+
raise ValueError("Invalid __meta__:\n" + "\n".join(errors))
|
|
58
|
+
return errors
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _extend_schema(schema: Dict[str, Any], extra_keys: Iterable[str]) -> Dict[str, Any]:
|
|
62
|
+
schema = dict(schema)
|
|
63
|
+
props = dict(schema.get("properties") or {})
|
|
64
|
+
for key in extra_keys:
|
|
65
|
+
if key not in props:
|
|
66
|
+
props[key] = {}
|
|
67
|
+
schema["properties"] = props
|
|
68
|
+
return schema
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _load_schema() -> Dict[str, Any]:
|
|
72
|
+
if __package__ is None:
|
|
73
|
+
raise RuntimeError("Package context required to load meta schema.")
|
|
74
|
+
with resources.files("brkraw.schema").joinpath("meta.yaml").open(
|
|
75
|
+
"r", encoding="utf-8"
|
|
76
|
+
) as handle:
|
|
77
|
+
return yaml.safe_load(handle)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _validate_meta_minimal(
|
|
81
|
+
meta: Mapping[str, Any],
|
|
82
|
+
*,
|
|
83
|
+
allow_extra_keys: Optional[Iterable[str]] = None,
|
|
84
|
+
) -> List[str]:
|
|
85
|
+
errors: List[str] = []
|
|
86
|
+
name = meta.get("name")
|
|
87
|
+
if not isinstance(name, str) or not name:
|
|
88
|
+
errors.append("__meta__.name: must be a non-empty string.")
|
|
89
|
+
elif not _NAME_PATTERN.match(name):
|
|
90
|
+
errors.append("__meta__.name: must be python-friendly with max 4 tokens.")
|
|
91
|
+
version = meta.get("version")
|
|
92
|
+
if not isinstance(version, str) or not version:
|
|
93
|
+
errors.append("__meta__.version: must be a non-empty string.")
|
|
94
|
+
description = meta.get("description")
|
|
95
|
+
if not isinstance(description, str) or not description:
|
|
96
|
+
errors.append("__meta__.description: must be a non-empty string.")
|
|
97
|
+
category = meta.get("category")
|
|
98
|
+
if not isinstance(category, str) or not category:
|
|
99
|
+
errors.append("__meta__.category: must be a non-empty string.")
|
|
100
|
+
|
|
101
|
+
for key in ("authors", "developers"):
|
|
102
|
+
if key not in meta:
|
|
103
|
+
continue
|
|
104
|
+
value = meta.get(key)
|
|
105
|
+
if not isinstance(value, list) or not value:
|
|
106
|
+
errors.append(f"__meta__.{key}: must be a non-empty list.")
|
|
107
|
+
continue
|
|
108
|
+
for idx, item in enumerate(value):
|
|
109
|
+
if not isinstance(item, Mapping):
|
|
110
|
+
errors.append(f"__meta__.{key}[{idx}]: must be an object.")
|
|
111
|
+
continue
|
|
112
|
+
person_name = item.get("name")
|
|
113
|
+
if not isinstance(person_name, str) or not person_name:
|
|
114
|
+
errors.append(f"__meta__.{key}[{idx}].name: must be a non-empty string.")
|
|
115
|
+
email = item.get("email")
|
|
116
|
+
if email is not None and not isinstance(email, str):
|
|
117
|
+
errors.append(f"__meta__.{key}[{idx}].email: must be a string.")
|
|
118
|
+
affiliations = item.get("affiliations")
|
|
119
|
+
if affiliations is not None:
|
|
120
|
+
if not isinstance(affiliations, list) or not affiliations:
|
|
121
|
+
errors.append(
|
|
122
|
+
f"__meta__.{key}[{idx}].affiliations: must be a non-empty list."
|
|
123
|
+
)
|
|
124
|
+
elif not all(isinstance(a, str) and a for a in affiliations):
|
|
125
|
+
errors.append(
|
|
126
|
+
f"__meta__.{key}[{idx}].affiliations: must be non-empty strings."
|
|
127
|
+
)
|
|
128
|
+
extra = set(item.keys()) - {"name", "email", "affiliations"}
|
|
129
|
+
if extra:
|
|
130
|
+
errors.append(
|
|
131
|
+
f"__meta__.{key}[{idx}]: unexpected keys {sorted(extra)}."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
for key in ("doi", "citation"):
|
|
135
|
+
if key in meta and not isinstance(meta.get(key), str):
|
|
136
|
+
errors.append(f"__meta__.{key}: must be a string.")
|
|
137
|
+
|
|
138
|
+
allowed = {
|
|
139
|
+
"name",
|
|
140
|
+
"version",
|
|
141
|
+
"description",
|
|
142
|
+
"category",
|
|
143
|
+
"authors",
|
|
144
|
+
"developers",
|
|
145
|
+
"doi",
|
|
146
|
+
"citation",
|
|
147
|
+
}
|
|
148
|
+
if allow_extra_keys:
|
|
149
|
+
allowed.update(allow_extra_keys)
|
|
150
|
+
extra = set(meta.keys()) - allowed
|
|
151
|
+
if extra:
|
|
152
|
+
errors.append(f"__meta__: unexpected keys {sorted(extra)}.")
|
|
153
|
+
return errors
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
__all__ = ["validate_meta"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .logic import (
|
|
4
|
+
prune_dataset_to_zip,
|
|
5
|
+
prune_dataset_to_zip_from_spec,
|
|
6
|
+
load_prune_spec,
|
|
7
|
+
)
|
|
8
|
+
from .validator import validate_prune_spec
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"prune_dataset_to_zip",
|
|
12
|
+
"prune_dataset_to_zip_from_spec",
|
|
13
|
+
"load_prune_spec",
|
|
14
|
+
"validate_prune_spec",
|
|
15
|
+
]
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Iterable, Optional, Set, Union, Literal, Mapping, Dict, Any, List
|
|
5
|
+
import re
|
|
6
|
+
import shutil
|
|
7
|
+
import zipfile
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from ...core.fs import DatasetFS
|
|
12
|
+
from ...core.parameters import Parameters
|
|
13
|
+
from .validator import validate_prune_spec
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def prune_dataset_to_zip(
|
|
17
|
+
source: Union[str, Path],
|
|
18
|
+
dest: Union[str, Path],
|
|
19
|
+
files: Iterable[str],
|
|
20
|
+
*,
|
|
21
|
+
mode: Literal["keep", "drop"] = "keep",
|
|
22
|
+
update_params: Optional[Mapping[str, Mapping[str, Optional[str]]]] = None,
|
|
23
|
+
dirs: Optional[Iterable[Mapping[str, Any]]] = None,
|
|
24
|
+
add_root: bool = True,
|
|
25
|
+
root_name: Optional[str] = None,
|
|
26
|
+
strip_jcamp_comments: bool = False,
|
|
27
|
+
) -> Path:
|
|
28
|
+
"""Create a pruned dataset ZIP with optional JCAMP parameter edits.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
source: Dataset root (directory or zip file).
|
|
32
|
+
dest: Destination zip path.
|
|
33
|
+
files: Filenames or relative paths used by the selection mode.
|
|
34
|
+
mode: "keep" to include only matching files, "drop" to exclude them.
|
|
35
|
+
update_params: Mapping of {filename: {key: value}} JCAMP edits.
|
|
36
|
+
dirs: Directory rules as a list of {level, dirs} mappings.
|
|
37
|
+
add_root: Whether to include a top-level root directory in the zip.
|
|
38
|
+
root_name: Override the root directory name when add_root is True.
|
|
39
|
+
strip_jcamp_comments: When True, remove $$ comment lines from JCAMP files.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Path to the created zip file.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
ValueError: When the selector list is empty or no files remain after filtering.
|
|
46
|
+
"""
|
|
47
|
+
fs = DatasetFS.from_path(source)
|
|
48
|
+
selectors = _normalize_selectors(files)
|
|
49
|
+
if not selectors:
|
|
50
|
+
raise ValueError("files must contain at least one filename or path.")
|
|
51
|
+
|
|
52
|
+
if mode not in {"keep", "drop"}:
|
|
53
|
+
raise ValueError("mode must be 'keep' or 'drop'.")
|
|
54
|
+
|
|
55
|
+
rule_specs = _normalize_dir_rules(dirs, mode)
|
|
56
|
+
selected_files = _select_files(fs, selectors, mode=mode, dir_rules=rule_specs)
|
|
57
|
+
if not selected_files:
|
|
58
|
+
raise ValueError(f"No files remain after applying {mode} list.")
|
|
59
|
+
|
|
60
|
+
dest = Path(dest)
|
|
61
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
62
|
+
root = root_name or fs.anchor or fs.root.name
|
|
63
|
+
|
|
64
|
+
arcnames = [_to_arcname(relpath, root, add_root=add_root) for relpath in selected_files]
|
|
65
|
+
param_updates = _load_parameter_updates(update_params)
|
|
66
|
+
_write_zip(
|
|
67
|
+
fs,
|
|
68
|
+
dest,
|
|
69
|
+
selected_files,
|
|
70
|
+
arcnames,
|
|
71
|
+
param_updates=param_updates,
|
|
72
|
+
strip_jcamp_comments=strip_jcamp_comments,
|
|
73
|
+
)
|
|
74
|
+
return dest
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def prune_dataset_to_zip_from_spec(
|
|
78
|
+
spec: Union[Mapping[str, Any], str, Path],
|
|
79
|
+
*,
|
|
80
|
+
source: Optional[Union[str, Path]] = None,
|
|
81
|
+
dest: Optional[Union[str, Path]] = None,
|
|
82
|
+
validate: bool = True,
|
|
83
|
+
strip_jcamp_comments: Optional[bool] = None,
|
|
84
|
+
root_name: Optional[str] = None,
|
|
85
|
+
dirs: Optional[Iterable[Mapping[str, Any]]] = None,
|
|
86
|
+
mode: Optional[Literal["keep", "drop"]] = None,
|
|
87
|
+
template_vars: Optional[Mapping[str, str]] = None,
|
|
88
|
+
) -> Path:
|
|
89
|
+
"""Create a pruned dataset ZIP from a prune spec mapping or YAML path.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
spec: Prune spec mapping or YAML file path.
|
|
93
|
+
source: Optional override for spec["source"].
|
|
94
|
+
dest: Optional override for spec["dest"].
|
|
95
|
+
validate: When True, validate the spec against the schema.
|
|
96
|
+
strip_jcamp_comments: Optional override to strip $$ comment lines.
|
|
97
|
+
root_name: Optional override for the root directory name in the zip.
|
|
98
|
+
dirs: Optional override for directory filter rules.
|
|
99
|
+
mode: Optional override for keep/drop mode.
|
|
100
|
+
template_vars: Optional mapping used to substitute `$key` placeholders.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Path to the created zip file.
|
|
104
|
+
"""
|
|
105
|
+
if isinstance(spec, (str, Path)):
|
|
106
|
+
spec_data = load_prune_spec(spec, validate=validate)
|
|
107
|
+
else:
|
|
108
|
+
spec_data = dict(spec)
|
|
109
|
+
if validate:
|
|
110
|
+
validate_prune_spec(spec_data)
|
|
111
|
+
|
|
112
|
+
if template_vars:
|
|
113
|
+
spec_data = _substitute_vars(spec_data, template_vars)
|
|
114
|
+
|
|
115
|
+
if source is None or dest is None:
|
|
116
|
+
raise ValueError("source and dest are required for prune spec.")
|
|
117
|
+
|
|
118
|
+
mode_value = mode if mode is not None else spec_data.get("mode", "keep")
|
|
119
|
+
if mode_value not in {"keep", "drop"}:
|
|
120
|
+
raise ValueError("mode must be 'keep' or 'drop'.")
|
|
121
|
+
|
|
122
|
+
return prune_dataset_to_zip(
|
|
123
|
+
source,
|
|
124
|
+
dest,
|
|
125
|
+
files=spec_data.get("files", []),
|
|
126
|
+
mode=mode_value,
|
|
127
|
+
update_params=spec_data.get("update_params"),
|
|
128
|
+
dirs=dirs if dirs is not None else spec_data.get("dirs"),
|
|
129
|
+
add_root=spec_data.get("add_root", True),
|
|
130
|
+
root_name=root_name if root_name is not None else spec_data.get("root_name"),
|
|
131
|
+
strip_jcamp_comments=(
|
|
132
|
+
strip_jcamp_comments
|
|
133
|
+
if strip_jcamp_comments is not None
|
|
134
|
+
else bool(spec_data.get("strip_jcamp_comments", False))
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def load_prune_spec(path: Union[str, Path], *, validate: bool = True) -> Dict[str, Any]:
|
|
140
|
+
"""Load a prune spec from YAML and optionally validate it."""
|
|
141
|
+
spec_path = Path(path)
|
|
142
|
+
data = yaml.safe_load(spec_path.read_text(encoding="utf-8"))
|
|
143
|
+
if data is None:
|
|
144
|
+
raise ValueError("Prune spec is empty.")
|
|
145
|
+
if not isinstance(data, Mapping):
|
|
146
|
+
raise ValueError("Prune spec must be a mapping.")
|
|
147
|
+
spec = dict(data)
|
|
148
|
+
if validate:
|
|
149
|
+
validate_prune_spec(spec)
|
|
150
|
+
return spec
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _normalize_selectors(items: Iterable[str]) -> Set[str]:
|
|
154
|
+
"""Normalize selector strings by trimming and dropping empty entries."""
|
|
155
|
+
return {str(item).strip().strip("/") for item in items if str(item).strip()}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _select_files(
|
|
159
|
+
fs: DatasetFS,
|
|
160
|
+
selectors: Set[str],
|
|
161
|
+
*,
|
|
162
|
+
mode: Literal["keep", "drop"],
|
|
163
|
+
dir_rules: List[Dict[str, Any]],
|
|
164
|
+
) -> Set[str]:
|
|
165
|
+
"""Return dataset-relative file paths selected by keep/drop rules."""
|
|
166
|
+
selected: Set[str] = set()
|
|
167
|
+
for dirpath, _, filenames in fs.walk():
|
|
168
|
+
for name in filenames:
|
|
169
|
+
rel = f"{dirpath}/{name}".strip("/")
|
|
170
|
+
rel = fs.strip_anchor(rel)
|
|
171
|
+
if _is_excluded_by_dir_rules(rel, dir_rules):
|
|
172
|
+
continue
|
|
173
|
+
matches = _matches_selector(rel, name, selectors)
|
|
174
|
+
if mode == "keep" and matches:
|
|
175
|
+
selected.add(rel)
|
|
176
|
+
elif mode == "drop" and not matches:
|
|
177
|
+
selected.add(rel)
|
|
178
|
+
return selected
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _matches_selector(relpath: str, name: str, selectors: Set[str]) -> bool:
|
|
182
|
+
"""Match either a full relative path or a basename against selectors."""
|
|
183
|
+
return relpath in selectors or name in selectors
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _to_arcname(relpath: str, root: str, *, add_root: bool) -> str:
|
|
187
|
+
"""Build a zip archive name with optional root folder prefix."""
|
|
188
|
+
relpath = relpath.strip("/")
|
|
189
|
+
if not add_root:
|
|
190
|
+
return relpath
|
|
191
|
+
if not root:
|
|
192
|
+
return relpath
|
|
193
|
+
return f"{root}/{relpath}" if relpath else root
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _write_zip(
|
|
197
|
+
fs: DatasetFS,
|
|
198
|
+
dest: Path,
|
|
199
|
+
files: Iterable[str],
|
|
200
|
+
arcnames: Iterable[str],
|
|
201
|
+
*,
|
|
202
|
+
param_updates: Optional[Mapping[str, Mapping[str, Optional[str]]]] = None,
|
|
203
|
+
strip_jcamp_comments: bool = False,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""Write selected files into a zip, applying JCAMP edits when requested."""
|
|
206
|
+
entries = sorted(zip(files, arcnames), key=lambda item: item[1])
|
|
207
|
+
parent_dirs = _collect_parent_dirs([arc for _, arc in entries])
|
|
208
|
+
param_updates = param_updates or {}
|
|
209
|
+
|
|
210
|
+
with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
|
211
|
+
for d in parent_dirs:
|
|
212
|
+
zf.writestr(f"{d}/", b"")
|
|
213
|
+
for relpath, arcname in entries:
|
|
214
|
+
name = relpath.strip("/").split("/")[-1]
|
|
215
|
+
updates = param_updates.get(name)
|
|
216
|
+
if updates:
|
|
217
|
+
content = fs.open_binary(relpath).read()
|
|
218
|
+
updated_text = _apply_jcamp_updates(content, updates, path_hint=relpath)
|
|
219
|
+
if strip_jcamp_comments:
|
|
220
|
+
updated_text = _strip_jcamp_comments(updated_text)
|
|
221
|
+
zf.writestr(arcname, updated_text.encode("utf-8"))
|
|
222
|
+
continue
|
|
223
|
+
if strip_jcamp_comments:
|
|
224
|
+
content = fs.open_binary(relpath).read()
|
|
225
|
+
if Parameters._looks_like_jcamp(content):
|
|
226
|
+
stripped = _strip_jcamp_comments(
|
|
227
|
+
content.decode("utf-8", errors="ignore")
|
|
228
|
+
)
|
|
229
|
+
zf.writestr(arcname, stripped.encode("utf-8"))
|
|
230
|
+
continue
|
|
231
|
+
with fs.open_binary(relpath) as src, zf.open(arcname, "w") as dst:
|
|
232
|
+
shutil.copyfileobj(src, dst)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _collect_parent_dirs(arcnames: Iterable[str]) -> Set[str]:
|
|
236
|
+
"""Return all parent directory entries for the given archive paths."""
|
|
237
|
+
dirs: Set[str] = set()
|
|
238
|
+
for arcname in arcnames:
|
|
239
|
+
parts = arcname.split("/")[:-1]
|
|
240
|
+
acc = []
|
|
241
|
+
for part in parts:
|
|
242
|
+
acc.append(part)
|
|
243
|
+
dirs.add("/".join(acc))
|
|
244
|
+
return {d for d in dirs if d}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _load_parameter_updates(
|
|
248
|
+
update_params: Optional[Mapping[str, Mapping[str, Optional[str]]]]
|
|
249
|
+
) -> Dict[str, Dict[str, Optional[str]]]:
|
|
250
|
+
"""Validate JCAMP update mappings."""
|
|
251
|
+
if update_params is None:
|
|
252
|
+
return {}
|
|
253
|
+
if not isinstance(update_params, Mapping):
|
|
254
|
+
raise ValueError("update_params must be a mapping.")
|
|
255
|
+
|
|
256
|
+
result: Dict[str, Dict[str, Optional[str]]] = {}
|
|
257
|
+
for filename, updates in update_params.items():
|
|
258
|
+
if not isinstance(filename, str) or not filename.strip():
|
|
259
|
+
raise ValueError("update_params keys must be non-empty strings.")
|
|
260
|
+
if not isinstance(updates, Mapping):
|
|
261
|
+
raise ValueError(f"update_params[{filename!r}] must be a mapping.")
|
|
262
|
+
inner: Dict[str, Optional[str]] = {}
|
|
263
|
+
for key, value in updates.items():
|
|
264
|
+
if not isinstance(key, str) or not key.strip():
|
|
265
|
+
raise ValueError(f"update_params[{filename!r}] keys must be strings.")
|
|
266
|
+
inner[key] = None if value is None else str(value)
|
|
267
|
+
result[filename.strip()] = inner
|
|
268
|
+
return result
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _apply_jcamp_updates(
|
|
272
|
+
content: bytes,
|
|
273
|
+
updates: Mapping[str, Optional[str]],
|
|
274
|
+
*,
|
|
275
|
+
path_hint: str,
|
|
276
|
+
) -> str:
|
|
277
|
+
"""Apply JCAMP updates using Parameters and return updated source text."""
|
|
278
|
+
try:
|
|
279
|
+
params = Parameters(content)
|
|
280
|
+
except Exception as exc:
|
|
281
|
+
raise ValueError(f"Parameter file is not parseable: {path_hint}") from exc
|
|
282
|
+
params.replace_values(updates, reparse=True)
|
|
283
|
+
return params.source_text()
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _strip_jcamp_comments(text: str) -> str:
|
|
287
|
+
"""Remove $$ comment lines from JCAMP text."""
|
|
288
|
+
lines = text.splitlines(keepends=True)
|
|
289
|
+
kept = [line for line in lines if not line.lstrip().startswith("$$")]
|
|
290
|
+
return "".join(kept)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _normalize_dir_rules(
|
|
294
|
+
rules: Optional[Iterable[Mapping[str, Any]]],
|
|
295
|
+
mode: Literal["keep", "drop"],
|
|
296
|
+
) -> List[Dict[str, Any]]:
|
|
297
|
+
if not rules:
|
|
298
|
+
return []
|
|
299
|
+
normalized: List[Dict[str, Any]] = []
|
|
300
|
+
for idx, rule in enumerate(rules):
|
|
301
|
+
if not isinstance(rule, Mapping):
|
|
302
|
+
raise ValueError(f"dirs[{idx}] must be a mapping.")
|
|
303
|
+
level = rule.get("level")
|
|
304
|
+
if not isinstance(level, int) or level < 1:
|
|
305
|
+
raise ValueError(f"dirs[{idx}].level must be int >= 1.")
|
|
306
|
+
dirs = rule.get("dirs")
|
|
307
|
+
if not isinstance(dirs, Iterable):
|
|
308
|
+
raise ValueError(f"dirs[{idx}].dirs must be a list of names.")
|
|
309
|
+
names = [str(d).strip() for d in dirs if str(d).strip()]
|
|
310
|
+
if not names:
|
|
311
|
+
raise ValueError(f"dirs[{idx}].dirs must contain at least one name.")
|
|
312
|
+
normalized.append({"mode": mode, "level": level, "dirs": set(names)})
|
|
313
|
+
normalized.sort(key=lambda item: item["level"])
|
|
314
|
+
return normalized
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _is_excluded_by_dir_rules(relpath: str, rules: List[Dict[str, Any]]) -> bool:
|
|
318
|
+
if not rules:
|
|
319
|
+
return False
|
|
320
|
+
parts = [p for p in relpath.split("/") if p]
|
|
321
|
+
for rule in rules:
|
|
322
|
+
level = rule["level"]
|
|
323
|
+
if level > len(parts):
|
|
324
|
+
continue
|
|
325
|
+
name = parts[level - 1]
|
|
326
|
+
if rule["mode"] == "drop":
|
|
327
|
+
if name in rule["dirs"]:
|
|
328
|
+
return True
|
|
329
|
+
else:
|
|
330
|
+
if name not in rule["dirs"]:
|
|
331
|
+
return True
|
|
332
|
+
return False
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _substitute_vars(obj: Any, variables: Mapping[str, str]) -> Any:
|
|
336
|
+
"""Recursively substitute $key placeholders in strings using variables mapping."""
|
|
337
|
+
if isinstance(obj, str):
|
|
338
|
+
return _substitute_string(obj, variables)
|
|
339
|
+
if isinstance(obj, Mapping):
|
|
340
|
+
return {k: _substitute_vars(v, variables) for k, v in obj.items()}
|
|
341
|
+
if isinstance(obj, list):
|
|
342
|
+
return [_substitute_vars(item, variables) for item in obj]
|
|
343
|
+
return obj
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
_VAR_PATTERN = re.compile(r"\$(\w+)")
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _substitute_string(text: str, variables: Mapping[str, str]) -> str:
|
|
350
|
+
def replacer(match: re.Match[str]) -> str:
|
|
351
|
+
key = match.group(1)
|
|
352
|
+
return variables.get(key, match.group(0))
|
|
353
|
+
|
|
354
|
+
return _VAR_PATTERN.sub(replacer, text)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
__all__ = [
|
|
358
|
+
"prune_dataset_to_zip",
|
|
359
|
+
"prune_dataset_to_zip_from_spec",
|
|
360
|
+
"load_prune_spec",
|
|
361
|
+
]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, List, Mapping, Optional
|
|
5
|
+
from importlib import resources
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
resources.files # type: ignore[attr-defined]
|
|
9
|
+
except AttributeError: # pragma: no cover - fallback for Python 3.8
|
|
10
|
+
import importlib_resources as resources # type: ignore[assignment]
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
13
|
+
|
|
14
|
+
from ..meta import validate_meta
|
|
15
|
+
|
|
16
|
+
def validate_prune_spec(spec: Mapping[str, Any], schema_path: Optional[Path] = None) -> List[str]:
|
|
17
|
+
"""Validate a prune spec against schema.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
spec: Parsed prune spec mapping.
|
|
21
|
+
schema_path: Optional schema path override.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
List of validation error messages (empty when valid).
|
|
25
|
+
"""
|
|
26
|
+
errors: List[str] = []
|
|
27
|
+
try:
|
|
28
|
+
import jsonschema
|
|
29
|
+
except Exception:
|
|
30
|
+
errors = _validate_spec_minimal(spec)
|
|
31
|
+
else:
|
|
32
|
+
schema = _load_schema(schema_path)
|
|
33
|
+
validator = jsonschema.Draft202012Validator(schema)
|
|
34
|
+
for err in validator.iter_errors(spec):
|
|
35
|
+
path = ".".join(str(p) for p in err.path)
|
|
36
|
+
prefix = f"spec.{path}" if path else "spec"
|
|
37
|
+
errors.append(f"{prefix}: {err.message}")
|
|
38
|
+
|
|
39
|
+
errors.extend(
|
|
40
|
+
validate_meta(
|
|
41
|
+
spec.get("__meta__"),
|
|
42
|
+
raise_on_error=False,
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
if errors:
|
|
46
|
+
raise ValueError("Invalid prune spec:\n" + "\n".join(errors))
|
|
47
|
+
return errors
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _load_schema(schema_path: Optional[Path]) -> Dict[str, Any]:
|
|
51
|
+
if schema_path is not None:
|
|
52
|
+
return yaml.safe_load(schema_path.read_text(encoding="utf-8"))
|
|
53
|
+
if __package__ is None:
|
|
54
|
+
raise RuntimeError("Package context required to load pruner schema.")
|
|
55
|
+
with resources.files("brkraw.schema").joinpath("pruner.yaml").open(
|
|
56
|
+
"r", encoding="utf-8"
|
|
57
|
+
) as handle:
|
|
58
|
+
return yaml.safe_load(handle)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _validate_spec_minimal(spec: Mapping[str, Any]) -> List[str]:
|
|
62
|
+
errors: List[str] = []
|
|
63
|
+
if not isinstance(spec, Mapping):
|
|
64
|
+
errors.append("spec: must be a mapping.")
|
|
65
|
+
return errors
|
|
66
|
+
|
|
67
|
+
if "__meta__" not in spec:
|
|
68
|
+
errors.append("spec.__meta__: is required.")
|
|
69
|
+
else:
|
|
70
|
+
errors.extend(
|
|
71
|
+
validate_meta(
|
|
72
|
+
spec.get("__meta__"),
|
|
73
|
+
raise_on_error=False,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
files = spec.get("files")
|
|
78
|
+
if not isinstance(files, list) or not files:
|
|
79
|
+
errors.append("spec.files: must be a non-empty list.")
|
|
80
|
+
else:
|
|
81
|
+
for idx, item in enumerate(files):
|
|
82
|
+
if not isinstance(item, (str, int)):
|
|
83
|
+
errors.append(f"spec.files[{idx}]: must be string or int.")
|
|
84
|
+
|
|
85
|
+
mode = spec.get("mode", "keep")
|
|
86
|
+
if mode not in {"keep", "drop"}:
|
|
87
|
+
errors.append("spec.mode: must be 'keep' or 'drop'.")
|
|
88
|
+
|
|
89
|
+
update_params = spec.get("update_params")
|
|
90
|
+
if update_params is not None and not isinstance(update_params, Mapping):
|
|
91
|
+
errors.append("spec.update_params: must be a mapping.")
|
|
92
|
+
|
|
93
|
+
dirs = spec.get("dirs")
|
|
94
|
+
if dirs is not None and not isinstance(dirs, list):
|
|
95
|
+
errors.append("spec.dirs: must be a list.")
|
|
96
|
+
if isinstance(dirs, list):
|
|
97
|
+
for idx, rule in enumerate(dirs):
|
|
98
|
+
if not isinstance(rule, Mapping):
|
|
99
|
+
errors.append(f"spec.dirs[{idx}]: must be a mapping.")
|
|
100
|
+
continue
|
|
101
|
+
level = rule.get("level")
|
|
102
|
+
if not isinstance(level, int) or level < 1:
|
|
103
|
+
errors.append(f"spec.dirs[{idx}].level: must be int >= 1.")
|
|
104
|
+
dirs = rule.get("dirs")
|
|
105
|
+
if not isinstance(dirs, list) or not dirs:
|
|
106
|
+
errors.append(f"spec.dirs[{idx}].dirs: must be a non-empty list.")
|
|
107
|
+
|
|
108
|
+
add_root = spec.get("add_root")
|
|
109
|
+
if add_root is not None and not isinstance(add_root, bool):
|
|
110
|
+
errors.append("spec.add_root: must be boolean.")
|
|
111
|
+
|
|
112
|
+
root_name = spec.get("root_name")
|
|
113
|
+
if root_name is not None and not isinstance(root_name, str):
|
|
114
|
+
errors.append("spec.root_name: must be a string.")
|
|
115
|
+
|
|
116
|
+
return errors
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
__all__ = ["validate_prune_spec"]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .logic import (
|
|
4
|
+
load_spec,
|
|
5
|
+
map_parameters,
|
|
6
|
+
load_context_map,
|
|
7
|
+
load_context_map_data,
|
|
8
|
+
load_context_map_meta,
|
|
9
|
+
get_selector_keys,
|
|
10
|
+
matches_context_map_selectors,
|
|
11
|
+
apply_context_map,
|
|
12
|
+
)
|
|
13
|
+
from .validator import validate_spec, validate_context_map, validate_map_data
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"load_spec",
|
|
17
|
+
"map_parameters",
|
|
18
|
+
"validate_spec",
|
|
19
|
+
"validate_context_map",
|
|
20
|
+
"validate_map_data",
|
|
21
|
+
"load_context_map",
|
|
22
|
+
"load_context_map_data",
|
|
23
|
+
"load_context_map_meta",
|
|
24
|
+
"get_selector_keys",
|
|
25
|
+
"matches_context_map_selectors",
|
|
26
|
+
"apply_context_map",
|
|
27
|
+
]
|