brkraw 0.3.11__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. brkraw/__init__.py +9 -3
  2. brkraw/apps/__init__.py +12 -0
  3. brkraw/apps/addon/__init__.py +30 -0
  4. brkraw/apps/addon/core.py +35 -0
  5. brkraw/apps/addon/dependencies.py +402 -0
  6. brkraw/apps/addon/installation.py +500 -0
  7. brkraw/apps/addon/io.py +21 -0
  8. brkraw/apps/hook/__init__.py +25 -0
  9. brkraw/apps/hook/core.py +636 -0
  10. brkraw/apps/loader/__init__.py +10 -0
  11. brkraw/apps/loader/core.py +622 -0
  12. brkraw/apps/loader/formatter.py +288 -0
  13. brkraw/apps/loader/helper.py +797 -0
  14. brkraw/apps/loader/info/__init__.py +11 -0
  15. brkraw/apps/loader/info/scan.py +85 -0
  16. brkraw/apps/loader/info/scan.yaml +90 -0
  17. brkraw/apps/loader/info/study.py +69 -0
  18. brkraw/apps/loader/info/study.yaml +156 -0
  19. brkraw/apps/loader/info/transform.py +92 -0
  20. brkraw/apps/loader/types.py +220 -0
  21. brkraw/cli/__init__.py +5 -0
  22. brkraw/cli/commands/__init__.py +2 -0
  23. brkraw/cli/commands/addon.py +327 -0
  24. brkraw/cli/commands/config.py +205 -0
  25. brkraw/cli/commands/convert.py +903 -0
  26. brkraw/cli/commands/hook.py +348 -0
  27. brkraw/cli/commands/info.py +74 -0
  28. brkraw/cli/commands/init.py +214 -0
  29. brkraw/cli/commands/params.py +106 -0
  30. brkraw/cli/commands/prune.py +288 -0
  31. brkraw/cli/commands/session.py +371 -0
  32. brkraw/cli/hook_args.py +80 -0
  33. brkraw/cli/main.py +83 -0
  34. brkraw/cli/utils.py +60 -0
  35. brkraw/core/__init__.py +13 -0
  36. brkraw/core/config.py +380 -0
  37. brkraw/core/entrypoints.py +25 -0
  38. brkraw/core/formatter.py +367 -0
  39. brkraw/core/fs.py +495 -0
  40. brkraw/core/jcamp.py +600 -0
  41. brkraw/core/layout.py +451 -0
  42. brkraw/core/parameters.py +781 -0
  43. brkraw/core/zip.py +1121 -0
  44. brkraw/dataclasses/__init__.py +14 -0
  45. brkraw/dataclasses/node.py +139 -0
  46. brkraw/dataclasses/reco.py +33 -0
  47. brkraw/dataclasses/scan.py +61 -0
  48. brkraw/dataclasses/study.py +131 -0
  49. brkraw/default/__init__.py +3 -0
  50. brkraw/default/pruner_specs/deid4share.yaml +42 -0
  51. brkraw/default/rules/00_default.yaml +4 -0
  52. brkraw/default/specs/metadata_dicom.yaml +236 -0
  53. brkraw/default/specs/metadata_transforms.py +92 -0
  54. brkraw/resolver/__init__.py +7 -0
  55. brkraw/resolver/affine.py +539 -0
  56. brkraw/resolver/datatype.py +69 -0
  57. brkraw/resolver/fid.py +90 -0
  58. brkraw/resolver/helpers.py +36 -0
  59. brkraw/resolver/image.py +188 -0
  60. brkraw/resolver/nifti.py +370 -0
  61. brkraw/resolver/shape.py +235 -0
  62. brkraw/schema/__init__.py +3 -0
  63. brkraw/schema/context_map.yaml +62 -0
  64. brkraw/schema/meta.yaml +57 -0
  65. brkraw/schema/niftiheader.yaml +95 -0
  66. brkraw/schema/pruner.yaml +55 -0
  67. brkraw/schema/remapper.yaml +128 -0
  68. brkraw/schema/rules.yaml +154 -0
  69. brkraw/specs/__init__.py +10 -0
  70. brkraw/specs/hook/__init__.py +12 -0
  71. brkraw/specs/hook/logic.py +31 -0
  72. brkraw/specs/hook/validator.py +22 -0
  73. brkraw/specs/meta/__init__.py +5 -0
  74. brkraw/specs/meta/validator.py +156 -0
  75. brkraw/specs/pruner/__init__.py +15 -0
  76. brkraw/specs/pruner/logic.py +361 -0
  77. brkraw/specs/pruner/validator.py +119 -0
  78. brkraw/specs/remapper/__init__.py +27 -0
  79. brkraw/specs/remapper/logic.py +924 -0
  80. brkraw/specs/remapper/validator.py +314 -0
  81. brkraw/specs/rules/__init__.py +6 -0
  82. brkraw/specs/rules/logic.py +263 -0
  83. brkraw/specs/rules/validator.py +103 -0
  84. brkraw-0.5.0.dist-info/METADATA +81 -0
  85. brkraw-0.5.0.dist-info/RECORD +88 -0
  86. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info}/WHEEL +1 -2
  87. brkraw-0.5.0.dist-info/entry_points.txt +13 -0
  88. brkraw/lib/__init__.py +0 -4
  89. brkraw/lib/backup.py +0 -641
  90. brkraw/lib/bids.py +0 -0
  91. brkraw/lib/errors.py +0 -125
  92. brkraw/lib/loader.py +0 -1220
  93. brkraw/lib/orient.py +0 -194
  94. brkraw/lib/parser.py +0 -48
  95. brkraw/lib/pvobj.py +0 -301
  96. brkraw/lib/reference.py +0 -245
  97. brkraw/lib/utils.py +0 -471
  98. brkraw/scripts/__init__.py +0 -0
  99. brkraw/scripts/brk_backup.py +0 -106
  100. brkraw/scripts/brkraw.py +0 -744
  101. brkraw/ui/__init__.py +0 -0
  102. brkraw/ui/config.py +0 -17
  103. brkraw/ui/main_win.py +0 -214
  104. brkraw/ui/previewer.py +0 -225
  105. brkraw/ui/scan_info.py +0 -72
  106. brkraw/ui/scan_list.py +0 -73
  107. brkraw/ui/subj_info.py +0 -128
  108. brkraw-0.3.11.dist-info/METADATA +0 -25
  109. brkraw-0.3.11.dist-info/RECORD +0 -28
  110. brkraw-0.3.11.dist-info/entry_points.txt +0 -3
  111. brkraw-0.3.11.dist-info/top_level.txt +0 -2
  112. tests/__init__.py +0 -0
  113. {brkraw-0.3.11.dist-info → brkraw-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,314 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, IO, List, Dict, Union, Optional
5
+ from importlib import resources
6
+
7
+ try:
8
+ resources.files # type: ignore[attr-defined]
9
+ except AttributeError: # pragma: no cover - fallback for Python 3.8
10
+ import importlib_resources as resources # type: ignore[assignment]
11
+
12
+ import yaml
13
+
14
+ from ..meta import validate_meta
15
+
16
+ _ALLOWED_FILES = {"method", "acqp", "visu_pars", "reco", "subject"}
17
+ _RULE_KEYS = {"sources", "inputs", "const", "ref", "transform", "default"}
18
+ _INPUT_KEYS = {"sources", "const", "ref", "transform", "default", "required"}
19
+ _INLINE_SOURCE_KEYS = {"inputs", "transform"}
20
+ _META_KEY = "__meta__"
21
+
22
+
23
+ def _validate_transforms_source(
24
+ transforms_source: Optional[Union[str, Path, List[str], List[Path], IO[str], IO[bytes]]],
25
+ errors: List[str],
26
+ ) -> None:
27
+ if transforms_source is None:
28
+ return
29
+ if isinstance(transforms_source, (str, Path)):
30
+ src_path = Path(transforms_source)
31
+ if not src_path.exists():
32
+ errors.append(f"transforms_source: not found: {src_path}")
33
+ return
34
+ if isinstance(transforms_source, list):
35
+ for item in transforms_source:
36
+ if not isinstance(item, (str, Path)):
37
+ errors.append("transforms_source: list entries must be paths.")
38
+ continue
39
+ src_path = Path(item)
40
+ if not src_path.exists():
41
+ errors.append(f"transforms_source: not found: {src_path}")
42
+
43
+
44
+ def _load_schema() -> Dict[str, Any]:
45
+ if __package__ is None:
46
+ raise RuntimeError("Package context required to load remapper schema.")
47
+ with resources.files("brkraw.schema").joinpath("remapper.yaml").open(
48
+ "r", encoding="utf-8"
49
+ ) as handle:
50
+ return yaml.safe_load(handle)
51
+
52
+
53
+ def _load_map_schema() -> Dict[str, Any]:
54
+ if __package__ is None:
55
+ raise RuntimeError("Package context required to load map schema.")
56
+ with resources.files("brkraw.schema").joinpath("context_map.yaml").open(
57
+ "r", encoding="utf-8"
58
+ ) as handle:
59
+ return yaml.safe_load(handle)
60
+
61
+
62
+ def _validate_sources(sources: Any, path: str, errors: List[str]) -> None:
63
+ if not isinstance(sources, list):
64
+ errors.append(f"{path}: sources must be a list.")
65
+ return
66
+ for idx, src in enumerate(sources):
67
+ item_path = f"{path}.sources[{idx}]"
68
+ if not isinstance(src, dict):
69
+ errors.append(f"{item_path}: source must be an object.")
70
+ continue
71
+ if "inputs" in src:
72
+ extra = set(src.keys()) - _INLINE_SOURCE_KEYS
73
+ if extra:
74
+ errors.append(f"{item_path}: unexpected keys {sorted(extra)}.")
75
+ _validate_inputs(src["inputs"], item_path, errors)
76
+ if "transform" in src:
77
+ t = src["transform"]
78
+ if isinstance(t, list):
79
+ if not all(isinstance(name, str) for name in t):
80
+ errors.append(f"{item_path}: transform list must be strings.")
81
+ elif not isinstance(t, str):
82
+ errors.append(f"{item_path}: transform must be a string or list.")
83
+ continue
84
+ extra = set(src.keys()) - {"file", "key", "reco_id"}
85
+ if extra:
86
+ errors.append(f"{item_path}: unexpected keys {sorted(extra)}.")
87
+ if "file" not in src or "key" not in src:
88
+ errors.append(f"{item_path}: requires file and key.")
89
+ continue
90
+ if src["file"] not in _ALLOWED_FILES:
91
+ errors.append(f"{item_path}: invalid file {src['file']!r}.")
92
+ if not isinstance(src["key"], str):
93
+ errors.append(f"{item_path}: key must be a string.")
94
+ reco_id = src.get("reco_id")
95
+ if reco_id is not None and (not isinstance(reco_id, int) or reco_id < 1):
96
+ errors.append(f"{item_path}: reco_id must be int >= 1.")
97
+
98
+
99
+ def _validate_inputs(inputs: Any, path: str, errors: List[str]) -> None:
100
+ if not isinstance(inputs, dict):
101
+ errors.append(f"{path}: inputs must be a mapping.")
102
+ return
103
+ for name, spec in inputs.items():
104
+ item_path = f"{path}.inputs[{name!r}]"
105
+ if isinstance(spec, str):
106
+ if not spec.startswith("$"):
107
+ errors.append(f"{item_path}: input shorthand must start with '$'.")
108
+ continue
109
+ if not isinstance(spec, dict):
110
+ errors.append(f"{item_path}: input spec must be an object.")
111
+ continue
112
+ extra = set(spec.keys()) - _INPUT_KEYS
113
+ if extra:
114
+ errors.append(f"{item_path}: unexpected keys {sorted(extra)}.")
115
+ if not any(k in spec for k in ("sources", "const", "ref")):
116
+ errors.append(f"{item_path}: requires sources, const, or ref.")
117
+ if "sources" in spec:
118
+ _validate_sources(spec["sources"], item_path, errors)
119
+ if "ref" in spec and not isinstance(spec["ref"], str):
120
+ errors.append(f"{item_path}: ref must be a string.")
121
+ if "transform" in spec:
122
+ t = spec["transform"]
123
+ if isinstance(t, list):
124
+ if not all(isinstance(name, str) for name in t):
125
+ errors.append(f"{item_path}: transform list must be strings.")
126
+ elif not isinstance(t, str):
127
+ errors.append(f"{item_path}: transform must be a string or list.")
128
+ if "required" in spec and not isinstance(spec["required"], bool):
129
+ errors.append(f"{item_path}: required must be a boolean.")
130
+
131
+
132
+ def _validate_spec_minimal(spec: Any) -> List[str]:
133
+ errors: List[str] = []
134
+ if not isinstance(spec, dict):
135
+ errors.append("spec: must be a mapping.")
136
+ return errors
137
+ if _META_KEY not in spec:
138
+ errors.append("spec.__meta__: is required.")
139
+ else:
140
+ errors.extend(
141
+ validate_meta(
142
+ spec.get(_META_KEY),
143
+ allow_extra_keys={"include", "include_mode", "transforms_source", "__spec_path__"},
144
+ raise_on_error=False,
145
+ )
146
+ )
147
+ for key, rule in spec.items():
148
+ if key == _META_KEY:
149
+ continue
150
+ path = f"spec[{key!r}]"
151
+ if not isinstance(rule, dict):
152
+ errors.append(f"{path}: rule must be an object.")
153
+ continue
154
+ extra = set(rule.keys()) - _RULE_KEYS
155
+ if extra:
156
+ errors.append(f"{path}: unexpected keys {sorted(extra)}.")
157
+ if not any(k in rule for k in ("sources", "inputs", "const", "ref")):
158
+ errors.append(f"{path}: requires sources, inputs, const, or ref.")
159
+ if "sources" in rule:
160
+ _validate_sources(rule["sources"], path, errors)
161
+ if "inputs" in rule:
162
+ _validate_inputs(rule["inputs"], path, errors)
163
+ if "ref" in rule and not isinstance(rule["ref"], str):
164
+ errors.append(f"{path}: ref must be a string.")
165
+ if "transform" in rule:
166
+ t = rule["transform"]
167
+ if isinstance(t, list):
168
+ if not all(isinstance(name, str) for name in t):
169
+ errors.append(f"{path}: transform list must be strings.")
170
+ elif not isinstance(t, str):
171
+ errors.append(f"{path}: transform must be a string or list.")
172
+ return errors
173
+
174
+
175
+ def validate_spec(
176
+ spec: Any,
177
+ *,
178
+ transforms_source: Optional[Union[str, Path, List[str], List[Path], IO[str], IO[bytes]]] = None,
179
+ raise_on_error: bool = True,
180
+ ) -> List[str]:
181
+ """Validate a remapper spec against the schema.
182
+
183
+ Args:
184
+ spec: Parsed spec mapping to validate.
185
+ raise_on_error: If True, raise ValueError on validation errors.
186
+
187
+ Returns:
188
+ List of validation error messages (empty when valid).
189
+ """
190
+ errors: List[str] = []
191
+ try:
192
+ import jsonschema
193
+ except Exception:
194
+ errors = _validate_spec_minimal(spec)
195
+ else:
196
+ schema = _load_schema()
197
+ validator = jsonschema.Draft202012Validator(schema)
198
+ for err in validator.iter_errors(spec):
199
+ path = ".".join(str(p) for p in err.path)
200
+ prefix = f"spec.{path}" if path else "spec"
201
+ errors.append(f"{prefix}: {err.message}")
202
+
203
+ meta = spec.get(_META_KEY) if isinstance(spec, dict) else None
204
+ errors.extend(
205
+ validate_meta(
206
+ meta,
207
+ allow_extra_keys={"include", "include_mode", "transforms_source", "__spec_path__"},
208
+ raise_on_error=False,
209
+ )
210
+ )
211
+ _validate_transforms_source(transforms_source, errors)
212
+ if errors and raise_on_error:
213
+ raise ValueError("Invalid remapper spec:\n" + "\n".join(errors))
214
+ return errors
215
+
216
+
217
+ def _validate_map_minimal(map_data: Any) -> List[str]:
218
+ errors: List[str] = []
219
+ if not isinstance(map_data, dict):
220
+ errors.append("map: must be a mapping.")
221
+ return errors
222
+ for key, value in map_data.items():
223
+ if key == "__meta__":
224
+ continue
225
+ if not isinstance(key, str):
226
+ errors.append(f"map[{key!r}]: key must be a string.")
227
+ if isinstance(value, list):
228
+ for idx, rule in enumerate(value):
229
+ _validate_map_rule(rule, key, errors, idx=idx)
230
+ else:
231
+ _validate_map_rule(value, key, errors, idx=None)
232
+ return errors
233
+
234
+
235
+ def _validate_map_rule(
236
+ rule: Any,
237
+ key: str,
238
+ errors: List[str],
239
+ *,
240
+ idx: Optional[Union[int, str]],
241
+ ) -> None:
242
+ label = f"map[{key!r}]" if idx is None else f"map[{key!r}][{idx}]"
243
+ if not isinstance(rule, dict):
244
+ errors.append(f"{label}: rule must be a mapping.")
245
+ return
246
+ cases = rule.get("cases")
247
+ if cases is not None:
248
+ if not isinstance(cases, list):
249
+ errors.append(f"{label}: cases must be a list.")
250
+ else:
251
+ for case_idx, case in enumerate(cases):
252
+ nested = f"{idx}.cases[{case_idx}]" if idx is not None else f"cases[{case_idx}]"
253
+ _validate_map_rule(case, key, errors, idx=nested)
254
+ rule_type = rule.get("type")
255
+ if rule_type is None:
256
+ if "values" in rule:
257
+ rule_type = "mapping"
258
+ elif "value" in rule:
259
+ rule_type = "const"
260
+ if rule_type not in {"mapping", "const", None}:
261
+ errors.append(f"{label}: invalid type {rule_type!r}.")
262
+ if rule_type == "mapping":
263
+ table = rule.get("values")
264
+ if not isinstance(table, dict) and cases is None:
265
+ errors.append(f"{label}: values must be a mapping.")
266
+ when = rule.get("when")
267
+ if when is not None and not isinstance(when, dict):
268
+ errors.append(f"{label}: when must be a mapping.")
269
+ override = rule.get("override")
270
+ if override is not None and not isinstance(override, bool):
271
+ errors.append(f"{label}: override must be a boolean.")
272
+
273
+
274
+ def validate_map_data(map_data: Any, *, raise_on_error: bool = True) -> List[str]:
275
+ """Validate a map file mapping.
276
+
277
+ Args:
278
+ map_data: Parsed map mapping to validate.
279
+ raise_on_error: If True, raise ValueError on validation errors.
280
+
281
+ Returns:
282
+ List of validation error messages (empty when valid).
283
+ """
284
+ errors: List[str] = []
285
+ try:
286
+ import jsonschema
287
+ except Exception:
288
+ errors = _validate_map_minimal(map_data)
289
+ else:
290
+ schema = _load_map_schema()
291
+ validator = jsonschema.Draft202012Validator(schema)
292
+ for err in validator.iter_errors(map_data):
293
+ path = ".".join(str(p) for p in err.path)
294
+ prefix = f"map.{path}" if path else "map"
295
+ errors.append(f"{prefix}: {err.message}")
296
+ errors.extend(_validate_map_minimal(map_data))
297
+ if errors and raise_on_error:
298
+ raise ValueError("Invalid map file:\n" + "\n".join(errors))
299
+ return errors
300
+
301
+
302
+ def validate_context_map(path: Union[str, Path], *, raise_on_error: bool = True) -> List[str]:
303
+ """Load and validate a context map from YAML.
304
+
305
+ Args:
306
+ path: Context map YAML file path.
307
+ raise_on_error: If True, raise ValueError on validation errors.
308
+
309
+ Returns:
310
+ List of validation error messages (empty when valid).
311
+ """
312
+ map_path = Path(path)
313
+ data = yaml.safe_load(map_path.read_text(encoding="utf-8"))
314
+ return validate_map_data(data, raise_on_error=raise_on_error)
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from .logic import load_rules, rule_matches, select_rule_use
4
+ from .validator import validate_rules
5
+
6
+ __all__ = ["load_rules", "rule_matches", "select_rule_use", "validate_rules"]
@@ -0,0 +1,263 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, List, Dict, Optional, Union
5
+
6
+ import yaml
7
+
8
+ from ...core import config as config_module
9
+ from ..remapper import load_spec, map_parameters
10
+ from .validator import validate_rules
11
+ import logging
12
+
13
+ logger = logging.getLogger("brkraw")
14
+
15
+ RULE_CATEGORIES = ("info_spec", "metadata_spec", "converter_hook")
16
+ SPEC_CATEGORIES = ("info_spec", "metadata_spec")
17
+
18
+
19
+ def _iter_rule_files(rules_dir: Path) -> List[Path]:
20
+ if not rules_dir.exists():
21
+ return []
22
+ files = list(rules_dir.rglob("*.yaml")) + list(rules_dir.rglob("*.yml"))
23
+ return sorted({p.resolve() for p in files})
24
+
25
+
26
+ def _load_rule_file(path: Path) -> Dict[str, Any]:
27
+ data = yaml.safe_load(path.read_text(encoding="utf-8"))
28
+ if data is None:
29
+ return {}
30
+ if not isinstance(data, dict):
31
+ raise ValueError(f"Rule file must be a mapping: {path}")
32
+ return data
33
+
34
+
35
+ def _resolve_spec_path(use: str, base: Path) -> Path:
36
+ candidate = Path(use)
37
+ if candidate.is_absolute():
38
+ return candidate
39
+ if candidate.parts and candidate.parts[0] == "specs":
40
+ return base / candidate
41
+ return base / "specs" / candidate
42
+
43
+
44
+ def _resolve_rule_use(rule: Dict[str, Any], *, base: Path) -> Optional[Path]:
45
+ use = rule.get("use")
46
+ if not isinstance(use, str):
47
+ return None
48
+ version = rule.get("version") if isinstance(rule.get("version"), str) else None
49
+ category = rule.get("__category__") if isinstance(rule.get("__category__"), str) else None
50
+ try:
51
+ from ...apps.addon.core import resolve_spec_reference
52
+ except Exception:
53
+ resolve_spec_reference = None
54
+ if resolve_spec_reference is None:
55
+ return _resolve_spec_path(use, base)
56
+ return resolve_spec_reference(use, category=category, version=version, root=base)
57
+
58
+
59
+ def _resolve_operand(value: Any, bindings: Dict[str, Any]) -> Any:
60
+ if isinstance(value, str) and value.startswith("$"):
61
+ return bindings.get(value[1:])
62
+ return value
63
+
64
+
65
+ def _eval_expr(expr: Any, bindings: Dict[str, Any]) -> bool:
66
+ if expr is None:
67
+ return True
68
+ if not isinstance(expr, dict):
69
+ raise ValueError(f"Rule if must be a mapping, got {type(expr)!r}")
70
+ if len(expr) != 1:
71
+ raise ValueError("Rule if must contain a single operator.")
72
+ op, args = next(iter(expr.items()))
73
+ if op == "any":
74
+ return any(_eval_expr(item, bindings) for item in args)
75
+ if op == "all":
76
+ return all(_eval_expr(item, bindings) for item in args)
77
+ if op == "not":
78
+ return not _eval_expr(args, bindings)
79
+ if op == "always":
80
+ if not isinstance(args, bool):
81
+ raise ValueError("always expects a boolean.")
82
+ return args
83
+
84
+ if not isinstance(args, (list, tuple)) or len(args) != 2:
85
+ raise ValueError(f"Operator {op!r} requires two arguments.")
86
+ left = _resolve_operand(args[0], bindings)
87
+ right = _resolve_operand(args[1], bindings)
88
+
89
+ if left is None or right is None:
90
+ if op == "eq":
91
+ return left == right
92
+ if op == "ne":
93
+ return left != right
94
+ return False
95
+
96
+ if op == "eq":
97
+ return left == right
98
+ if op == "ne":
99
+ return left != right
100
+ if op == "in":
101
+ try:
102
+ return left in right
103
+ except TypeError:
104
+ return False
105
+ if op == "regex":
106
+ import re
107
+ if left is None:
108
+ return False
109
+ return re.search(str(right), str(left)) is not None
110
+ if op == "startswith":
111
+ if left is None:
112
+ return False
113
+ return str(left).startswith(str(right))
114
+ if op == "contains":
115
+ if left is None:
116
+ return False
117
+ if isinstance(left, (list, tuple, set)):
118
+ return right in left
119
+ return str(right) in str(left)
120
+ if op == "gt":
121
+ return left > right
122
+ if op == "ge":
123
+ return left >= right
124
+ if op == "lt":
125
+ return left < right
126
+ if op == "le":
127
+ return left <= right
128
+ raise ValueError(f"Unsupported operator: {op}")
129
+
130
+
131
+ def _load_rule_transforms(rule: Dict[str, Any], base: Path) -> Dict[str, Any]:
132
+ transforms = rule.get("__transforms__")
133
+ if isinstance(transforms, dict):
134
+ return transforms
135
+ category = rule.get("__category__") if isinstance(rule.get("__category__"), str) else None
136
+ if category and category not in SPEC_CATEGORIES:
137
+ return {}
138
+ use = rule.get("use")
139
+ if not isinstance(use, str):
140
+ return {}
141
+ spec_path = rule.get("__spec_path__")
142
+ if isinstance(spec_path, Path):
143
+ _, transforms = load_spec(spec_path, validate=False)
144
+ return transforms
145
+ spec_path = _resolve_rule_use(rule, base=base)
146
+ if isinstance(spec_path, Path) and spec_path.exists():
147
+ _, transforms = load_spec(spec_path, validate=False)
148
+ return transforms
149
+ return {}
150
+
151
+
152
+ def rule_matches(
153
+ source: Any,
154
+ rule: Dict[str, Any],
155
+ *,
156
+ base: Path,
157
+ ) -> bool:
158
+ when = rule.get("when")
159
+ if when is None:
160
+ logger.debug("Rule %r: no 'when' clause, matches by default.", rule.get("name"))
161
+ return True
162
+ if not isinstance(when, dict):
163
+ raise ValueError("Rule 'when' must be a mapping.")
164
+ transforms = _load_rule_transforms(rule, base)
165
+ bindings = map_parameters(source, when, transforms, validate=False)
166
+ logger.debug("Rule %r: when bindings=%s", rule.get("name"), bindings)
167
+ try:
168
+ matched = _eval_expr(rule.get("if"), bindings)
169
+ logger.debug(
170
+ "Rule %r: bindings=%s if=%s matched=%s",
171
+ rule.get("name"),
172
+ bindings,
173
+ rule.get("if"),
174
+ matched,
175
+ )
176
+ return matched
177
+ except Exception as exc:
178
+ name = rule.get("name", "<unnamed>")
179
+ raise ValueError(f"Rule {name!r} evaluation failed: {exc}") from exc
180
+
181
+
182
+ def select_rule_use(
183
+ source: Any,
184
+ rules: List[Dict[str, Any]],
185
+ *,
186
+ base: Path,
187
+ resolve_paths: bool = True,
188
+ ) -> Optional[Union[str, Path]]:
189
+ selected: Optional[Union[str, Path]] = None
190
+ for rule in rules:
191
+ if not isinstance(rule, dict):
192
+ continue
193
+ logger.debug("Evaluating rule %r (use=%r).", rule.get("name"), rule.get("use"))
194
+ try:
195
+ matched = rule_matches(source, rule, base=base)
196
+ except Exception as exc:
197
+ logger.debug(
198
+ "Rule %r evaluation failed: %s",
199
+ rule.get("name"),
200
+ exc,
201
+ exc_info=True,
202
+ )
203
+ continue
204
+ logger.debug("Rule %r: match=%s", rule.get("name"), matched)
205
+ if matched:
206
+ use = rule.get("use")
207
+ if isinstance(use, str):
208
+ if not resolve_paths:
209
+ selected = use
210
+ else:
211
+ spec_path = rule.get("__spec_path__")
212
+ if isinstance(spec_path, Path):
213
+ selected = spec_path
214
+ else:
215
+ selected = _resolve_spec_path(use, base)
216
+ logger.debug("Rule %r matched, selected use=%r.", rule.get("name"), selected)
217
+ else:
218
+ logger.debug("Rule %r matched but has no usable 'use' entry.", rule.get("name"))
219
+ else:
220
+ logger.debug("Rule %r did not match.", rule.get("name"))
221
+ logger.debug("Rule selection result: %r", selected)
222
+ return selected
223
+
224
+
225
+ def load_rules(
226
+ root: Optional[Union[str, Path]] = None,
227
+ *,
228
+ rules_dir: Optional[Path] = None,
229
+ validate: bool = True,
230
+ ) -> Dict[str, List[Dict[str, Any]]]:
231
+ base = config_module.resolve_root(root)
232
+ rules_path = rules_dir or (base / "rules")
233
+ merged = {key: [] for key in RULE_CATEGORIES}
234
+ transforms_cache: Dict[Path, Dict[str, Any]] = {}
235
+ for path in _iter_rule_files(rules_path):
236
+ data = _load_rule_file(path)
237
+ if validate and data:
238
+ validate_rules(data)
239
+ for key in RULE_CATEGORIES:
240
+ items = data.get(key, [])
241
+ if items:
242
+ if not isinstance(items, list):
243
+ raise ValueError(f"{path}: {key} must be a list.")
244
+ for item in items:
245
+ if not isinstance(item, dict):
246
+ continue
247
+ item["__category__"] = key
248
+ if key in SPEC_CATEGORIES:
249
+ use = item.get("use")
250
+ if not isinstance(use, str):
251
+ continue
252
+ spec_path = _resolve_rule_use(item, base=base)
253
+ if not isinstance(spec_path, Path) or not spec_path.exists():
254
+ if validate:
255
+ raise FileNotFoundError(spec_path)
256
+ continue
257
+ if spec_path not in transforms_cache:
258
+ _, transforms = load_spec(spec_path, validate=validate)
259
+ transforms_cache[spec_path] = transforms
260
+ item["__spec_path__"] = spec_path
261
+ item["__transforms__"] = transforms_cache[spec_path]
262
+ merged[key].extend(items)
263
+ return merged
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, List, Dict, Optional
5
+ from importlib import resources
6
+
7
+ try:
8
+ resources.files # type: ignore[attr-defined]
9
+ except AttributeError: # pragma: no cover - fallback for Python 3.8
10
+ import importlib_resources as resources # type: ignore[assignment]
11
+
12
+ import yaml
13
+
14
+ from ...core.entrypoints import list_entry_points
15
+
16
+ CONVERTER_GROUP = "brkraw.converter_hook"
17
+
18
+ def _load_schema() -> Dict[str, Any]:
19
+ if __package__ is None:
20
+ raise RuntimeError("Package context required to load rules schema.")
21
+ with resources.files("brkraw.schema").joinpath("rules.yaml").open(
22
+ "r", encoding="utf-8"
23
+ ) as handle:
24
+ return yaml.safe_load(handle)
25
+
26
+
27
+ def validate_rules(
28
+ rule_data: Dict[str, Any],
29
+ schema_path: Optional[Path] = None,
30
+ ) -> None:
31
+ """Validate rule mappings against schema and hook availability.
32
+
33
+ Args:
34
+ rule_data: Parsed rule mapping to validate.
35
+ schema_path: Optional rules schema path override.
36
+ """
37
+ try:
38
+ import jsonschema
39
+ except ImportError as exc:
40
+ raise RuntimeError("jsonschema is required to validate rule files.") from exc
41
+ schema = (
42
+ _load_schema()
43
+ if schema_path is None
44
+ else yaml.safe_load(schema_path.read_text(encoding="utf-8"))
45
+ )
46
+ jsonschema.Draft202012Validator(schema).validate(rule_data)
47
+ _validate_default_rules(rule_data)
48
+ _validate_converter_hooks(rule_data)
49
+
50
+
51
+ def _validate_default_rules(rule_data: Dict[str, Any]) -> None:
52
+ """Ensure default rules (no 'when') appear first and avoid 'if'."""
53
+ for category, items in rule_data.items():
54
+ if not isinstance(items, list):
55
+ continue
56
+ default_indexes = []
57
+ for idx, rule in enumerate(items):
58
+ if not isinstance(rule, dict):
59
+ continue
60
+ has_when = "when" in rule
61
+ has_if = "if" in rule
62
+ if has_when and not has_if:
63
+ name = rule.get("name", "<unnamed>")
64
+ raise ValueError(
65
+ f"Rule {name!r} in {category!r} must define 'if' when 'when' is present."
66
+ )
67
+ if not has_when:
68
+ if has_if:
69
+ name = rule.get("name", "<unnamed>")
70
+ raise ValueError(
71
+ f"Rule {name!r} in {category!r} cannot use 'if' without 'when'."
72
+ )
73
+ default_indexes.append(idx)
74
+ if not default_indexes:
75
+ continue
76
+ if len(default_indexes) > 1 or default_indexes[0] != 0:
77
+ raise ValueError(
78
+ f"Default rule (no 'when') must be the first entry in {category!r}."
79
+ )
80
+
81
+
82
+ def _validate_converter_hooks(rule_data: Dict[str, Any]) -> None:
83
+ """Ensure converter_hook references resolve to installed hooks."""
84
+ missing: List[str] = []
85
+ items = rule_data.get("converter_hook", [])
86
+ if not items:
87
+ return
88
+ if not isinstance(items, list):
89
+ return
90
+ for item in items:
91
+ if not isinstance(item, dict):
92
+ continue
93
+ use = item.get("use")
94
+ if not isinstance(use, str):
95
+ continue
96
+ if not list_entry_points(CONVERTER_GROUP, use):
97
+ missing.append(use)
98
+ if missing:
99
+ missing_text = ", ".join(sorted(set(missing)))
100
+ raise ValueError(
101
+ "converter_hook references missing hooks: "
102
+ f"{missing_text} (group={CONVERTER_GROUP})"
103
+ )