brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. brkraw/__init__.py +9 -3
  2. brkraw/apps/__init__.py +12 -0
  3. brkraw/apps/addon/__init__.py +30 -0
  4. brkraw/apps/addon/core.py +35 -0
  5. brkraw/apps/addon/dependencies.py +402 -0
  6. brkraw/apps/addon/installation.py +500 -0
  7. brkraw/apps/addon/io.py +21 -0
  8. brkraw/apps/hook/__init__.py +25 -0
  9. brkraw/apps/hook/core.py +636 -0
  10. brkraw/apps/loader/__init__.py +10 -0
  11. brkraw/apps/loader/core.py +622 -0
  12. brkraw/apps/loader/formatter.py +288 -0
  13. brkraw/apps/loader/helper.py +797 -0
  14. brkraw/apps/loader/info/__init__.py +11 -0
  15. brkraw/apps/loader/info/scan.py +85 -0
  16. brkraw/apps/loader/info/scan.yaml +90 -0
  17. brkraw/apps/loader/info/study.py +69 -0
  18. brkraw/apps/loader/info/study.yaml +156 -0
  19. brkraw/apps/loader/info/transform.py +92 -0
  20. brkraw/apps/loader/types.py +220 -0
  21. brkraw/cli/__init__.py +5 -0
  22. brkraw/cli/commands/__init__.py +2 -0
  23. brkraw/cli/commands/addon.py +327 -0
  24. brkraw/cli/commands/config.py +205 -0
  25. brkraw/cli/commands/convert.py +903 -0
  26. brkraw/cli/commands/hook.py +348 -0
  27. brkraw/cli/commands/info.py +74 -0
  28. brkraw/cli/commands/init.py +214 -0
  29. brkraw/cli/commands/params.py +106 -0
  30. brkraw/cli/commands/prune.py +288 -0
  31. brkraw/cli/commands/session.py +371 -0
  32. brkraw/cli/hook_args.py +80 -0
  33. brkraw/cli/main.py +83 -0
  34. brkraw/cli/utils.py +60 -0
  35. brkraw/core/__init__.py +13 -0
  36. brkraw/core/config.py +380 -0
  37. brkraw/core/entrypoints.py +25 -0
  38. brkraw/core/formatter.py +367 -0
  39. brkraw/core/fs.py +495 -0
  40. brkraw/core/jcamp.py +600 -0
  41. brkraw/core/layout.py +451 -0
  42. brkraw/core/parameters.py +781 -0
  43. brkraw/core/zip.py +1121 -0
  44. brkraw/dataclasses/__init__.py +14 -0
  45. brkraw/dataclasses/node.py +139 -0
  46. brkraw/dataclasses/reco.py +33 -0
  47. brkraw/dataclasses/scan.py +61 -0
  48. brkraw/dataclasses/study.py +131 -0
  49. brkraw/default/__init__.py +3 -0
  50. brkraw/default/pruner_specs/deid4share.yaml +42 -0
  51. brkraw/default/rules/00_default.yaml +4 -0
  52. brkraw/default/specs/metadata_dicom.yaml +236 -0
  53. brkraw/default/specs/metadata_transforms.py +92 -0
  54. brkraw/resolver/__init__.py +7 -0
  55. brkraw/resolver/affine.py +539 -0
  56. brkraw/resolver/datatype.py +69 -0
  57. brkraw/resolver/fid.py +90 -0
  58. brkraw/resolver/helpers.py +36 -0
  59. brkraw/resolver/image.py +188 -0
  60. brkraw/resolver/nifti.py +370 -0
  61. brkraw/resolver/shape.py +235 -0
  62. brkraw/schema/__init__.py +3 -0
  63. brkraw/schema/context_map.yaml +62 -0
  64. brkraw/schema/meta.yaml +57 -0
  65. brkraw/schema/niftiheader.yaml +95 -0
  66. brkraw/schema/pruner.yaml +55 -0
  67. brkraw/schema/remapper.yaml +128 -0
  68. brkraw/schema/rules.yaml +154 -0
  69. brkraw/specs/__init__.py +10 -0
  70. brkraw/specs/hook/__init__.py +12 -0
  71. brkraw/specs/hook/logic.py +31 -0
  72. brkraw/specs/hook/validator.py +22 -0
  73. brkraw/specs/meta/__init__.py +5 -0
  74. brkraw/specs/meta/validator.py +156 -0
  75. brkraw/specs/pruner/__init__.py +15 -0
  76. brkraw/specs/pruner/logic.py +361 -0
  77. brkraw/specs/pruner/validator.py +119 -0
  78. brkraw/specs/remapper/__init__.py +27 -0
  79. brkraw/specs/remapper/logic.py +924 -0
  80. brkraw/specs/remapper/validator.py +314 -0
  81. brkraw/specs/rules/__init__.py +6 -0
  82. brkraw/specs/rules/logic.py +263 -0
  83. brkraw/specs/rules/validator.py +103 -0
  84. brkraw-0.5.0.dist-info/METADATA +81 -0
  85. brkraw-0.5.0.dist-info/RECORD +88 -0
  86. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
  87. brkraw-0.5.0.dist-info/entry_points.txt +13 -0
  88. brkraw/lib/__init__.py +0 -4
  89. brkraw/lib/backup.py +0 -641
  90. brkraw/lib/bids.py +0 -0
  91. brkraw/lib/errors.py +0 -125
  92. brkraw/lib/loader.py +0 -1220
  93. brkraw/lib/orient.py +0 -194
  94. brkraw/lib/parser.py +0 -48
  95. brkraw/lib/pvobj.py +0 -301
  96. brkraw/lib/reference.py +0 -245
  97. brkraw/lib/utils.py +0 -471
  98. brkraw/scripts/__init__.py +0 -0
  99. brkraw/scripts/brk_backup.py +0 -106
  100. brkraw/scripts/brkraw.py +0 -744
  101. brkraw/ui/__init__.py +0 -0
  102. brkraw/ui/config.py +0 -17
  103. brkraw/ui/main_win.py +0 -214
  104. brkraw/ui/previewer.py +0 -225
  105. brkraw/ui/scan_info.py +0 -72
  106. brkraw/ui/scan_list.py +0 -73
  107. brkraw/ui/subj_info.py +0 -128
  108. brkraw-0.3.11.dist-info/METADATA +0 -25
  109. brkraw-0.3.11.dist-info/RECORD +0 -28
  110. brkraw-0.3.11.dist-info/entry_points.txt +0 -3
  111. brkraw-0.3.11.dist-info/top_level.txt +0 -2
  112. tests/__init__.py +0 -0
  113. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,156 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Iterable, List, Mapping, Optional
5
+ from importlib import resources
6
+
7
+ try:
8
+ resources.files # type: ignore[attr-defined]
9
+ except AttributeError: # pragma: no cover - fallback for Python 3.8
10
+ import importlib_resources as resources # type: ignore[assignment]
11
+ import re
12
+
13
+ import yaml
14
+
15
+
16
+ _NAME_PATTERN = re.compile(r"^[a-z][a-z0-9]*(?:_[a-z0-9]+){0,3}$")
17
+
18
+
19
+ def validate_meta(
20
+ meta: Any,
21
+ *,
22
+ allow_extra_keys: Optional[Iterable[str]] = None,
23
+ raise_on_error: bool = True,
24
+ ) -> List[str]:
25
+ """Validate a __meta__ mapping.
26
+
27
+ Args:
28
+ meta: __meta__ mapping to validate.
29
+ allow_extra_keys: Additional keys allowed in __meta__ beyond the base schema.
30
+ raise_on_error: If True, raise ValueError on validation errors.
31
+
32
+ Returns:
33
+ List of validation error messages (empty when valid).
34
+ """
35
+ errors: List[str] = []
36
+ if not isinstance(meta, Mapping):
37
+ errors.append("__meta__: must be an object.")
38
+ if errors and raise_on_error:
39
+ raise ValueError("Invalid __meta__:\n" + "\n".join(errors))
40
+ return errors
41
+
42
+ try:
43
+ import jsonschema
44
+ except Exception:
45
+ errors.extend(_validate_meta_minimal(meta, allow_extra_keys=allow_extra_keys))
46
+ else:
47
+ schema = _load_schema()
48
+ if allow_extra_keys:
49
+ schema = _extend_schema(schema, allow_extra_keys)
50
+ validator = jsonschema.Draft202012Validator(schema)
51
+ for err in validator.iter_errors(meta):
52
+ path = ".".join(str(p) for p in err.path)
53
+ prefix = f"__meta__.{path}" if path else "__meta__"
54
+ errors.append(f"{prefix}: {err.message}")
55
+
56
+ if errors and raise_on_error:
57
+ raise ValueError("Invalid __meta__:\n" + "\n".join(errors))
58
+ return errors
59
+
60
+
61
+ def _extend_schema(schema: Dict[str, Any], extra_keys: Iterable[str]) -> Dict[str, Any]:
62
+ schema = dict(schema)
63
+ props = dict(schema.get("properties") or {})
64
+ for key in extra_keys:
65
+ if key not in props:
66
+ props[key] = {}
67
+ schema["properties"] = props
68
+ return schema
69
+
70
+
71
+ def _load_schema() -> Dict[str, Any]:
72
+ if __package__ is None:
73
+ raise RuntimeError("Package context required to load meta schema.")
74
+ with resources.files("brkraw.schema").joinpath("meta.yaml").open(
75
+ "r", encoding="utf-8"
76
+ ) as handle:
77
+ return yaml.safe_load(handle)
78
+
79
+
80
+ def _validate_meta_minimal(
81
+ meta: Mapping[str, Any],
82
+ *,
83
+ allow_extra_keys: Optional[Iterable[str]] = None,
84
+ ) -> List[str]:
85
+ errors: List[str] = []
86
+ name = meta.get("name")
87
+ if not isinstance(name, str) or not name:
88
+ errors.append("__meta__.name: must be a non-empty string.")
89
+ elif not _NAME_PATTERN.match(name):
90
+ errors.append("__meta__.name: must be python-friendly with max 4 tokens.")
91
+ version = meta.get("version")
92
+ if not isinstance(version, str) or not version:
93
+ errors.append("__meta__.version: must be a non-empty string.")
94
+ description = meta.get("description")
95
+ if not isinstance(description, str) or not description:
96
+ errors.append("__meta__.description: must be a non-empty string.")
97
+ category = meta.get("category")
98
+ if not isinstance(category, str) or not category:
99
+ errors.append("__meta__.category: must be a non-empty string.")
100
+
101
+ for key in ("authors", "developers"):
102
+ if key not in meta:
103
+ continue
104
+ value = meta.get(key)
105
+ if not isinstance(value, list) or not value:
106
+ errors.append(f"__meta__.{key}: must be a non-empty list.")
107
+ continue
108
+ for idx, item in enumerate(value):
109
+ if not isinstance(item, Mapping):
110
+ errors.append(f"__meta__.{key}[{idx}]: must be an object.")
111
+ continue
112
+ person_name = item.get("name")
113
+ if not isinstance(person_name, str) or not person_name:
114
+ errors.append(f"__meta__.{key}[{idx}].name: must be a non-empty string.")
115
+ email = item.get("email")
116
+ if email is not None and not isinstance(email, str):
117
+ errors.append(f"__meta__.{key}[{idx}].email: must be a string.")
118
+ affiliations = item.get("affiliations")
119
+ if affiliations is not None:
120
+ if not isinstance(affiliations, list) or not affiliations:
121
+ errors.append(
122
+ f"__meta__.{key}[{idx}].affiliations: must be a non-empty list."
123
+ )
124
+ elif not all(isinstance(a, str) and a for a in affiliations):
125
+ errors.append(
126
+ f"__meta__.{key}[{idx}].affiliations: must be non-empty strings."
127
+ )
128
+ extra = set(item.keys()) - {"name", "email", "affiliations"}
129
+ if extra:
130
+ errors.append(
131
+ f"__meta__.{key}[{idx}]: unexpected keys {sorted(extra)}."
132
+ )
133
+
134
+ for key in ("doi", "citation"):
135
+ if key in meta and not isinstance(meta.get(key), str):
136
+ errors.append(f"__meta__.{key}: must be a string.")
137
+
138
+ allowed = {
139
+ "name",
140
+ "version",
141
+ "description",
142
+ "category",
143
+ "authors",
144
+ "developers",
145
+ "doi",
146
+ "citation",
147
+ }
148
+ if allow_extra_keys:
149
+ allowed.update(allow_extra_keys)
150
+ extra = set(meta.keys()) - allowed
151
+ if extra:
152
+ errors.append(f"__meta__: unexpected keys {sorted(extra)}.")
153
+ return errors
154
+
155
+
156
+ __all__ = ["validate_meta"]
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ from .logic import (
4
+ prune_dataset_to_zip,
5
+ prune_dataset_to_zip_from_spec,
6
+ load_prune_spec,
7
+ )
8
+ from .validator import validate_prune_spec
9
+
10
+ __all__ = [
11
+ "prune_dataset_to_zip",
12
+ "prune_dataset_to_zip_from_spec",
13
+ "load_prune_spec",
14
+ "validate_prune_spec",
15
+ ]
@@ -0,0 +1,361 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Iterable, Optional, Set, Union, Literal, Mapping, Dict, Any, List
5
+ import re
6
+ import shutil
7
+ import zipfile
8
+
9
+ import yaml
10
+
11
+ from ...core.fs import DatasetFS
12
+ from ...core.parameters import Parameters
13
+ from .validator import validate_prune_spec
14
+
15
+
16
+ def prune_dataset_to_zip(
17
+ source: Union[str, Path],
18
+ dest: Union[str, Path],
19
+ files: Iterable[str],
20
+ *,
21
+ mode: Literal["keep", "drop"] = "keep",
22
+ update_params: Optional[Mapping[str, Mapping[str, Optional[str]]]] = None,
23
+ dirs: Optional[Iterable[Mapping[str, Any]]] = None,
24
+ add_root: bool = True,
25
+ root_name: Optional[str] = None,
26
+ strip_jcamp_comments: bool = False,
27
+ ) -> Path:
28
+ """Create a pruned dataset ZIP with optional JCAMP parameter edits.
29
+
30
+ Args:
31
+ source: Dataset root (directory or zip file).
32
+ dest: Destination zip path.
33
+ files: Filenames or relative paths used by the selection mode.
34
+ mode: "keep" to include only matching files, "drop" to exclude them.
35
+ update_params: Mapping of {filename: {key: value}} JCAMP edits.
36
+ dirs: Directory rules as a list of {level, dirs} mappings.
37
+ add_root: Whether to include a top-level root directory in the zip.
38
+ root_name: Override the root directory name when add_root is True.
39
+ strip_jcamp_comments: When True, remove $$ comment lines from JCAMP files.
40
+
41
+ Returns:
42
+ Path to the created zip file.
43
+
44
+ Raises:
45
+ ValueError: When the selector list is empty or no files remain after filtering.
46
+ """
47
+ fs = DatasetFS.from_path(source)
48
+ selectors = _normalize_selectors(files)
49
+ if not selectors:
50
+ raise ValueError("files must contain at least one filename or path.")
51
+
52
+ if mode not in {"keep", "drop"}:
53
+ raise ValueError("mode must be 'keep' or 'drop'.")
54
+
55
+ rule_specs = _normalize_dir_rules(dirs, mode)
56
+ selected_files = _select_files(fs, selectors, mode=mode, dir_rules=rule_specs)
57
+ if not selected_files:
58
+ raise ValueError(f"No files remain after applying {mode} list.")
59
+
60
+ dest = Path(dest)
61
+ dest.parent.mkdir(parents=True, exist_ok=True)
62
+ root = root_name or fs.anchor or fs.root.name
63
+
64
+ arcnames = [_to_arcname(relpath, root, add_root=add_root) for relpath in selected_files]
65
+ param_updates = _load_parameter_updates(update_params)
66
+ _write_zip(
67
+ fs,
68
+ dest,
69
+ selected_files,
70
+ arcnames,
71
+ param_updates=param_updates,
72
+ strip_jcamp_comments=strip_jcamp_comments,
73
+ )
74
+ return dest
75
+
76
+
77
+ def prune_dataset_to_zip_from_spec(
78
+ spec: Union[Mapping[str, Any], str, Path],
79
+ *,
80
+ source: Optional[Union[str, Path]] = None,
81
+ dest: Optional[Union[str, Path]] = None,
82
+ validate: bool = True,
83
+ strip_jcamp_comments: Optional[bool] = None,
84
+ root_name: Optional[str] = None,
85
+ dirs: Optional[Iterable[Mapping[str, Any]]] = None,
86
+ mode: Optional[Literal["keep", "drop"]] = None,
87
+ template_vars: Optional[Mapping[str, str]] = None,
88
+ ) -> Path:
89
+ """Create a pruned dataset ZIP from a prune spec mapping or YAML path.
90
+
91
+ Args:
92
+ spec: Prune spec mapping or YAML file path.
93
+ source: Optional override for spec["source"].
94
+ dest: Optional override for spec["dest"].
95
+ validate: When True, validate the spec against the schema.
96
+ strip_jcamp_comments: Optional override to strip $$ comment lines.
97
+ root_name: Optional override for the root directory name in the zip.
98
+ dirs: Optional override for directory filter rules.
99
+ mode: Optional override for keep/drop mode.
100
+ template_vars: Optional mapping used to substitute `$key` placeholders.
101
+
102
+ Returns:
103
+ Path to the created zip file.
104
+ """
105
+ if isinstance(spec, (str, Path)):
106
+ spec_data = load_prune_spec(spec, validate=validate)
107
+ else:
108
+ spec_data = dict(spec)
109
+ if validate:
110
+ validate_prune_spec(spec_data)
111
+
112
+ if template_vars:
113
+ spec_data = _substitute_vars(spec_data, template_vars)
114
+
115
+ if source is None or dest is None:
116
+ raise ValueError("source and dest are required for prune spec.")
117
+
118
+ mode_value = mode if mode is not None else spec_data.get("mode", "keep")
119
+ if mode_value not in {"keep", "drop"}:
120
+ raise ValueError("mode must be 'keep' or 'drop'.")
121
+
122
+ return prune_dataset_to_zip(
123
+ source,
124
+ dest,
125
+ files=spec_data.get("files", []),
126
+ mode=mode_value,
127
+ update_params=spec_data.get("update_params"),
128
+ dirs=dirs if dirs is not None else spec_data.get("dirs"),
129
+ add_root=spec_data.get("add_root", True),
130
+ root_name=root_name if root_name is not None else spec_data.get("root_name"),
131
+ strip_jcamp_comments=(
132
+ strip_jcamp_comments
133
+ if strip_jcamp_comments is not None
134
+ else bool(spec_data.get("strip_jcamp_comments", False))
135
+ ),
136
+ )
137
+
138
+
139
+ def load_prune_spec(path: Union[str, Path], *, validate: bool = True) -> Dict[str, Any]:
140
+ """Load a prune spec from YAML and optionally validate it."""
141
+ spec_path = Path(path)
142
+ data = yaml.safe_load(spec_path.read_text(encoding="utf-8"))
143
+ if data is None:
144
+ raise ValueError("Prune spec is empty.")
145
+ if not isinstance(data, Mapping):
146
+ raise ValueError("Prune spec must be a mapping.")
147
+ spec = dict(data)
148
+ if validate:
149
+ validate_prune_spec(spec)
150
+ return spec
151
+
152
+
153
+ def _normalize_selectors(items: Iterable[str]) -> Set[str]:
154
+ """Normalize selector strings by trimming and dropping empty entries."""
155
+ return {str(item).strip().strip("/") for item in items if str(item).strip()}
156
+
157
+
158
+ def _select_files(
159
+ fs: DatasetFS,
160
+ selectors: Set[str],
161
+ *,
162
+ mode: Literal["keep", "drop"],
163
+ dir_rules: List[Dict[str, Any]],
164
+ ) -> Set[str]:
165
+ """Return dataset-relative file paths selected by keep/drop rules."""
166
+ selected: Set[str] = set()
167
+ for dirpath, _, filenames in fs.walk():
168
+ for name in filenames:
169
+ rel = f"{dirpath}/{name}".strip("/")
170
+ rel = fs.strip_anchor(rel)
171
+ if _is_excluded_by_dir_rules(rel, dir_rules):
172
+ continue
173
+ matches = _matches_selector(rel, name, selectors)
174
+ if mode == "keep" and matches:
175
+ selected.add(rel)
176
+ elif mode == "drop" and not matches:
177
+ selected.add(rel)
178
+ return selected
179
+
180
+
181
+ def _matches_selector(relpath: str, name: str, selectors: Set[str]) -> bool:
182
+ """Match either a full relative path or a basename against selectors."""
183
+ return relpath in selectors or name in selectors
184
+
185
+
186
+ def _to_arcname(relpath: str, root: str, *, add_root: bool) -> str:
187
+ """Build a zip archive name with optional root folder prefix."""
188
+ relpath = relpath.strip("/")
189
+ if not add_root:
190
+ return relpath
191
+ if not root:
192
+ return relpath
193
+ return f"{root}/{relpath}" if relpath else root
194
+
195
+
196
+ def _write_zip(
197
+ fs: DatasetFS,
198
+ dest: Path,
199
+ files: Iterable[str],
200
+ arcnames: Iterable[str],
201
+ *,
202
+ param_updates: Optional[Mapping[str, Mapping[str, Optional[str]]]] = None,
203
+ strip_jcamp_comments: bool = False,
204
+ ) -> None:
205
+ """Write selected files into a zip, applying JCAMP edits when requested."""
206
+ entries = sorted(zip(files, arcnames), key=lambda item: item[1])
207
+ parent_dirs = _collect_parent_dirs([arc for _, arc in entries])
208
+ param_updates = param_updates or {}
209
+
210
+ with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
211
+ for d in parent_dirs:
212
+ zf.writestr(f"{d}/", b"")
213
+ for relpath, arcname in entries:
214
+ name = relpath.strip("/").split("/")[-1]
215
+ updates = param_updates.get(name)
216
+ if updates:
217
+ content = fs.open_binary(relpath).read()
218
+ updated_text = _apply_jcamp_updates(content, updates, path_hint=relpath)
219
+ if strip_jcamp_comments:
220
+ updated_text = _strip_jcamp_comments(updated_text)
221
+ zf.writestr(arcname, updated_text.encode("utf-8"))
222
+ continue
223
+ if strip_jcamp_comments:
224
+ content = fs.open_binary(relpath).read()
225
+ if Parameters._looks_like_jcamp(content):
226
+ stripped = _strip_jcamp_comments(
227
+ content.decode("utf-8", errors="ignore")
228
+ )
229
+ zf.writestr(arcname, stripped.encode("utf-8"))
230
+ continue
231
+ with fs.open_binary(relpath) as src, zf.open(arcname, "w") as dst:
232
+ shutil.copyfileobj(src, dst)
233
+
234
+
235
+ def _collect_parent_dirs(arcnames: Iterable[str]) -> Set[str]:
236
+ """Return all parent directory entries for the given archive paths."""
237
+ dirs: Set[str] = set()
238
+ for arcname in arcnames:
239
+ parts = arcname.split("/")[:-1]
240
+ acc = []
241
+ for part in parts:
242
+ acc.append(part)
243
+ dirs.add("/".join(acc))
244
+ return {d for d in dirs if d}
245
+
246
+
247
+ def _load_parameter_updates(
248
+ update_params: Optional[Mapping[str, Mapping[str, Optional[str]]]]
249
+ ) -> Dict[str, Dict[str, Optional[str]]]:
250
+ """Validate JCAMP update mappings."""
251
+ if update_params is None:
252
+ return {}
253
+ if not isinstance(update_params, Mapping):
254
+ raise ValueError("update_params must be a mapping.")
255
+
256
+ result: Dict[str, Dict[str, Optional[str]]] = {}
257
+ for filename, updates in update_params.items():
258
+ if not isinstance(filename, str) or not filename.strip():
259
+ raise ValueError("update_params keys must be non-empty strings.")
260
+ if not isinstance(updates, Mapping):
261
+ raise ValueError(f"update_params[{filename!r}] must be a mapping.")
262
+ inner: Dict[str, Optional[str]] = {}
263
+ for key, value in updates.items():
264
+ if not isinstance(key, str) or not key.strip():
265
+ raise ValueError(f"update_params[{filename!r}] keys must be strings.")
266
+ inner[key] = None if value is None else str(value)
267
+ result[filename.strip()] = inner
268
+ return result
269
+
270
+
271
+ def _apply_jcamp_updates(
272
+ content: bytes,
273
+ updates: Mapping[str, Optional[str]],
274
+ *,
275
+ path_hint: str,
276
+ ) -> str:
277
+ """Apply JCAMP updates using Parameters and return updated source text."""
278
+ try:
279
+ params = Parameters(content)
280
+ except Exception as exc:
281
+ raise ValueError(f"Parameter file is not parseable: {path_hint}") from exc
282
+ params.replace_values(updates, reparse=True)
283
+ return params.source_text()
284
+
285
+
286
+ def _strip_jcamp_comments(text: str) -> str:
287
+ """Remove $$ comment lines from JCAMP text."""
288
+ lines = text.splitlines(keepends=True)
289
+ kept = [line for line in lines if not line.lstrip().startswith("$$")]
290
+ return "".join(kept)
291
+
292
+
293
+ def _normalize_dir_rules(
294
+ rules: Optional[Iterable[Mapping[str, Any]]],
295
+ mode: Literal["keep", "drop"],
296
+ ) -> List[Dict[str, Any]]:
297
+ if not rules:
298
+ return []
299
+ normalized: List[Dict[str, Any]] = []
300
+ for idx, rule in enumerate(rules):
301
+ if not isinstance(rule, Mapping):
302
+ raise ValueError(f"dirs[{idx}] must be a mapping.")
303
+ level = rule.get("level")
304
+ if not isinstance(level, int) or level < 1:
305
+ raise ValueError(f"dirs[{idx}].level must be int >= 1.")
306
+ dirs = rule.get("dirs")
307
+ if not isinstance(dirs, Iterable):
308
+ raise ValueError(f"dirs[{idx}].dirs must be a list of names.")
309
+ names = [str(d).strip() for d in dirs if str(d).strip()]
310
+ if not names:
311
+ raise ValueError(f"dirs[{idx}].dirs must contain at least one name.")
312
+ normalized.append({"mode": mode, "level": level, "dirs": set(names)})
313
+ normalized.sort(key=lambda item: item["level"])
314
+ return normalized
315
+
316
+
317
+ def _is_excluded_by_dir_rules(relpath: str, rules: List[Dict[str, Any]]) -> bool:
318
+ if not rules:
319
+ return False
320
+ parts = [p for p in relpath.split("/") if p]
321
+ for rule in rules:
322
+ level = rule["level"]
323
+ if level > len(parts):
324
+ continue
325
+ name = parts[level - 1]
326
+ if rule["mode"] == "drop":
327
+ if name in rule["dirs"]:
328
+ return True
329
+ else:
330
+ if name not in rule["dirs"]:
331
+ return True
332
+ return False
333
+
334
+
335
+ def _substitute_vars(obj: Any, variables: Mapping[str, str]) -> Any:
336
+ """Recursively substitute $key placeholders in strings using variables mapping."""
337
+ if isinstance(obj, str):
338
+ return _substitute_string(obj, variables)
339
+ if isinstance(obj, Mapping):
340
+ return {k: _substitute_vars(v, variables) for k, v in obj.items()}
341
+ if isinstance(obj, list):
342
+ return [_substitute_vars(item, variables) for item in obj]
343
+ return obj
344
+
345
+
346
+ _VAR_PATTERN = re.compile(r"\$(\w+)")
347
+
348
+
349
+ def _substitute_string(text: str, variables: Mapping[str, str]) -> str:
350
+ def replacer(match: re.Match[str]) -> str:
351
+ key = match.group(1)
352
+ return variables.get(key, match.group(0))
353
+
354
+ return _VAR_PATTERN.sub(replacer, text)
355
+
356
+
357
+ __all__ = [
358
+ "prune_dataset_to_zip",
359
+ "prune_dataset_to_zip_from_spec",
360
+ "load_prune_spec",
361
+ ]
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List, Mapping, Optional
5
+ from importlib import resources
6
+
7
+ try:
8
+ resources.files # type: ignore[attr-defined]
9
+ except AttributeError: # pragma: no cover - fallback for Python 3.8
10
+ import importlib_resources as resources # type: ignore[assignment]
11
+
12
+ import yaml
13
+
14
+ from ..meta import validate_meta
15
+
16
+ def validate_prune_spec(spec: Mapping[str, Any], schema_path: Optional[Path] = None) -> List[str]:
17
+ """Validate a prune spec against schema.
18
+
19
+ Args:
20
+ spec: Parsed prune spec mapping.
21
+ schema_path: Optional schema path override.
22
+
23
+ Returns:
24
+ List of validation error messages (empty when valid).
25
+ """
26
+ errors: List[str] = []
27
+ try:
28
+ import jsonschema
29
+ except Exception:
30
+ errors = _validate_spec_minimal(spec)
31
+ else:
32
+ schema = _load_schema(schema_path)
33
+ validator = jsonschema.Draft202012Validator(schema)
34
+ for err in validator.iter_errors(spec):
35
+ path = ".".join(str(p) for p in err.path)
36
+ prefix = f"spec.{path}" if path else "spec"
37
+ errors.append(f"{prefix}: {err.message}")
38
+
39
+ errors.extend(
40
+ validate_meta(
41
+ spec.get("__meta__"),
42
+ raise_on_error=False,
43
+ )
44
+ )
45
+ if errors:
46
+ raise ValueError("Invalid prune spec:\n" + "\n".join(errors))
47
+ return errors
48
+
49
+
50
+ def _load_schema(schema_path: Optional[Path]) -> Dict[str, Any]:
51
+ if schema_path is not None:
52
+ return yaml.safe_load(schema_path.read_text(encoding="utf-8"))
53
+ if __package__ is None:
54
+ raise RuntimeError("Package context required to load pruner schema.")
55
+ with resources.files("brkraw.schema").joinpath("pruner.yaml").open(
56
+ "r", encoding="utf-8"
57
+ ) as handle:
58
+ return yaml.safe_load(handle)
59
+
60
+
61
+ def _validate_spec_minimal(spec: Mapping[str, Any]) -> List[str]:
62
+ errors: List[str] = []
63
+ if not isinstance(spec, Mapping):
64
+ errors.append("spec: must be a mapping.")
65
+ return errors
66
+
67
+ if "__meta__" not in spec:
68
+ errors.append("spec.__meta__: is required.")
69
+ else:
70
+ errors.extend(
71
+ validate_meta(
72
+ spec.get("__meta__"),
73
+ raise_on_error=False,
74
+ )
75
+ )
76
+
77
+ files = spec.get("files")
78
+ if not isinstance(files, list) or not files:
79
+ errors.append("spec.files: must be a non-empty list.")
80
+ else:
81
+ for idx, item in enumerate(files):
82
+ if not isinstance(item, (str, int)):
83
+ errors.append(f"spec.files[{idx}]: must be string or int.")
84
+
85
+ mode = spec.get("mode", "keep")
86
+ if mode not in {"keep", "drop"}:
87
+ errors.append("spec.mode: must be 'keep' or 'drop'.")
88
+
89
+ update_params = spec.get("update_params")
90
+ if update_params is not None and not isinstance(update_params, Mapping):
91
+ errors.append("spec.update_params: must be a mapping.")
92
+
93
+ dirs = spec.get("dirs")
94
+ if dirs is not None and not isinstance(dirs, list):
95
+ errors.append("spec.dirs: must be a list.")
96
+ if isinstance(dirs, list):
97
+ for idx, rule in enumerate(dirs):
98
+ if not isinstance(rule, Mapping):
99
+ errors.append(f"spec.dirs[{idx}]: must be a mapping.")
100
+ continue
101
+ level = rule.get("level")
102
+ if not isinstance(level, int) or level < 1:
103
+ errors.append(f"spec.dirs[{idx}].level: must be int >= 1.")
104
+ dirs = rule.get("dirs")
105
+ if not isinstance(dirs, list) or not dirs:
106
+ errors.append(f"spec.dirs[{idx}].dirs: must be a non-empty list.")
107
+
108
+ add_root = spec.get("add_root")
109
+ if add_root is not None and not isinstance(add_root, bool):
110
+ errors.append("spec.add_root: must be boolean.")
111
+
112
+ root_name = spec.get("root_name")
113
+ if root_name is not None and not isinstance(root_name, str):
114
+ errors.append("spec.root_name: must be a string.")
115
+
116
+ return errors
117
+
118
+
119
+ __all__ = ["validate_prune_spec"]
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from .logic import (
4
+ load_spec,
5
+ map_parameters,
6
+ load_context_map,
7
+ load_context_map_data,
8
+ load_context_map_meta,
9
+ get_selector_keys,
10
+ matches_context_map_selectors,
11
+ apply_context_map,
12
+ )
13
+ from .validator import validate_spec, validate_context_map, validate_map_data
14
+
15
+ __all__ = [
16
+ "load_spec",
17
+ "map_parameters",
18
+ "validate_spec",
19
+ "validate_context_map",
20
+ "validate_map_data",
21
+ "load_context_map",
22
+ "load_context_map_data",
23
+ "load_context_map_meta",
24
+ "get_selector_keys",
25
+ "matches_context_map_selectors",
26
+ "apply_context_map",
27
+ ]