bidsval 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. bidsval/__init__.py +45 -0
  2. bidsval/cli.py +404 -0
  3. bidsval/context/__init__.py +18 -0
  4. bidsval/context/associations.py +219 -0
  5. bidsval/context/builder.py +167 -0
  6. bidsval/context/entities.py +38 -0
  7. bidsval/context/inheritance.py +171 -0
  8. bidsval/context/loaders.py +129 -0
  9. bidsval/expr/__init__.py +20 -0
  10. bidsval/expr/evaluator.py +342 -0
  11. bidsval/expr/functions.py +351 -0
  12. bidsval/files/__init__.py +14 -0
  13. bidsval/files/bidsignore.py +75 -0
  14. bidsval/files/tree.py +146 -0
  15. bidsval/issues.py +128 -0
  16. bidsval/render/__init__.py +25 -0
  17. bidsval/render/html.py +100 -0
  18. bidsval/render/json.py +56 -0
  19. bidsval/render/sarif.py +72 -0
  20. bidsval/render/text.py +41 -0
  21. bidsval/report.py +103 -0
  22. bidsval/rules/__init__.py +13 -0
  23. bidsval/rules/bespoke.py +63 -0
  24. bidsval/rules/citation.py +65 -0
  25. bidsval/rules/column_types.py +181 -0
  26. bidsval/rules/dataset_checks.py +143 -0
  27. bidsval/rules/engine.py +356 -0
  28. bidsval/rules/filenames.py +498 -0
  29. bidsval/rules/guidance.py +120 -0
  30. bidsval/rules/integrity.py +191 -0
  31. bidsval/rules/tables.py +298 -0
  32. bidsval/rules/values.py +102 -0
  33. bidsval/schema/__init__.py +31 -0
  34. bidsval/schema/bundled/1.10.0.json +1 -0
  35. bidsval/schema/bundled/1.10.1.json +1 -0
  36. bidsval/schema/bundled/1.11.0.json +1 -0
  37. bidsval/schema/bundled/1.11.1.json +1 -0
  38. bidsval/schema/bundled/1.8.0.json +1 -0
  39. bidsval/schema/bundled/1.9.0.json +1 -0
  40. bidsval/schema/cache.py +67 -0
  41. bidsval/schema/introspect.py +142 -0
  42. bidsval/schema/resolve.py +132 -0
  43. bidsval/validate.py +228 -0
  44. bidsval-0.0.1.dist-info/METADATA +217 -0
  45. bidsval-0.0.1.dist-info/RECORD +49 -0
  46. bidsval-0.0.1.dist-info/WHEEL +5 -0
  47. bidsval-0.0.1.dist-info/entry_points.txt +2 -0
  48. bidsval-0.0.1.dist-info/licenses/LICENSE +21 -0
  49. bidsval-0.0.1.dist-info/top_level.txt +1 -0
bidsval/__init__.py ADDED
@@ -0,0 +1,45 @@
1
+ """bidsval - a schema-driven, pydantic-typed, in-process BIDS validator.
2
+
3
+ The public surface grows as the validator does. Today it exposes the two pieces
4
+ that the rest of the engine is built on:
5
+
6
+ * the schema resolver (:func:`bidsval.schema.resolve`), the single place that
7
+ turns a schema selector into one in-memory schema object, and
8
+ * the expression evaluator (:func:`bidsval.expr.evaluate_string`), which runs a
9
+ BIDS schema expression against a context.
10
+
11
+ Result types (:class:`~bidsval.issues.Issue`, :class:`~bidsval.report.ValidationReport`)
12
+ are re-exported here so consumers can ``from bidsval import Issue, ValidationReport``.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from .expr import evaluate_string
18
+ from .issues import DatasetIssues, Issue, Severity
19
+ from .report import FileVerdict, ValidationReport
20
+ from .schema import available_versions, bids_version, resolve, schema_version
21
+ from .validate import validate, validate_file, validate_subject
22
+
23
+ try: # populated from package metadata once installed
24
+ from importlib.metadata import version
25
+
26
+ __version__ = version("bidsval")
27
+ except Exception: # pragma: no cover - source checkout without metadata
28
+ __version__ = "0.0.0"
29
+
30
+ __all__ = [
31
+ "Severity",
32
+ "Issue",
33
+ "DatasetIssues",
34
+ "FileVerdict",
35
+ "ValidationReport",
36
+ "resolve",
37
+ "available_versions",
38
+ "schema_version",
39
+ "bids_version",
40
+ "evaluate_string",
41
+ "validate",
42
+ "validate_subject",
43
+ "validate_file",
44
+ "__version__",
45
+ ]
bidsval/cli.py ADDED
@@ -0,0 +1,404 @@
1
+ """Command-line entry point for bidsval.
2
+
3
+ * ``bidsval validate PATH`` - validate a dataset and report errors and warnings as
4
+ text, JSON, SARIF, or HTML.
5
+ * ``bidsval schema`` - show the schema version a selector resolves to and the bundled versions.
6
+ * ``bidsval eval EXPR`` - evaluate one BIDS schema expression against a context.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import json
13
+ import sys
14
+ from pathlib import Path
15
+
16
+ from . import __version__
17
+ from .expr import EvaluationError, evaluate_string
18
+ from .issues import Severity
19
+ from .render import EXTENSIONS, RENDERERS
20
+ from .schema import SchemaNotAvailable, available_versions, bids_version, resolve, schema_version
21
+ from .validate import validate as run_validate
22
+
23
+ _DESCRIPTION = """\
24
+ A schema-driven, pydantic-typed, in-process BIDS validator written in pure Python.
25
+
26
+ It reads the official BIDS schema and checks a dataset against the rules in it: file
27
+ names and locations, sidecar metadata (presence and value type), associated files, and
28
+ tabular columns. It runs with no external runtime and reports findings as text, JSON,
29
+ SARIF, or HTML.
30
+ """
31
+
32
+ _EPILOG = """\
33
+ Examples:
34
+ bidsval validate /path/to/dataset
35
+ bidsval validate /path/to/dataset --show all
36
+ bidsval validate /path/to/dataset --schema 1.10.0
37
+ bidsval validate /path/to/dataset --output-type json,html --out-dir ./reports
38
+ bidsval schema
39
+ bidsval eval "suffix == 'T1w'" --context '{"suffix": "T1w"}'
40
+
41
+ Run 'bidsval <command> -h' for command-specific help and more examples.
42
+
43
+ Exit codes:
44
+ 0 the dataset is valid (no errors); warnings may still be present
45
+ 1 validation found errors, or an expression failed to evaluate
46
+ 2 usage error, file/IO error, schema not available, or invalid JSON input
47
+ """
48
+
49
+ _VALIDATE_DESCRIPTION = """\
50
+ Validate a BIDS dataset against the schema and report errors and warnings.
51
+
52
+ A dataset is valid when it has no errors. Errors are rule violations (a misplaced or
53
+ misnamed file, a required field that is missing, a value of the wrong type). Warnings
54
+ flag recommended-but-missing metadata and do not affect validity. Use --subject to
55
+ check a single participant.
56
+ """
57
+
58
+ _VALIDATE_EPILOG = """\
59
+ Examples:
60
+ # quick check (text summary; exits non-zero on errors, so it fits CI)
61
+ bidsval validate /data/my_study
62
+
63
+ # show everything, including warnings and suppressed notes
64
+ bidsval validate /data/my_study --show all
65
+
66
+ # check a single subject (the sub- prefix is optional)
67
+ bidsval validate /data/my_study --subject 01
68
+
69
+ # pin a schema version for reproducible results
70
+ bidsval validate /data/my_study --schema 1.10.0
71
+
72
+ # skip NIfTI header reading (faster)
73
+ bidsval validate /data/my_study --no-headers
74
+
75
+ # write machine-readable and HTML reports into a directory
76
+ bidsval validate /data/my_study --output-type json,html --out-dir ./reports
77
+
78
+ Output:
79
+ One format prints to stdout (text by default). Selecting more than one format requires
80
+ --out-dir, which writes report.<ext> per format (report.txt, report.json,
81
+ report.sarif, report.html). In the text report each issue is one line:
82
+ SEVERITY CODE [field] file - message.
83
+ """
84
+
85
+ _SCHEMA_DESCRIPTION = """\
86
+ Show the schema bidsval would use and the versions bundled with this install.
87
+
88
+ Prints the BIDS version and the schema version a selector resolves to, plus the list of
89
+ bundled versions you can pass to --schema, here or on 'validate'.
90
+ """
91
+
92
+ _SCHEMA_EPILOG = """\
93
+ Examples:
94
+ # the default (bundled latest) schema
95
+ bidsval schema
96
+
97
+ # a specific bundled version
98
+ bidsval schema --schema 1.10.0
99
+
100
+ # the development tip, fetched from the spec and cached
101
+ bidsval schema --schema latest
102
+ """
103
+
104
+ _EVAL_DESCRIPTION = """\
105
+ Evaluate one BIDS schema expression against a context and print the result as JSON.
106
+
107
+ This exposes the same expression engine the validator uses, which is handy for
108
+ understanding a rule or testing a single condition. Undefined names evaluate to null.
109
+ """
110
+
111
+ _EVAL_EPILOG = """\
112
+ Examples:
113
+ # a simple comparison
114
+ bidsval eval "suffix == 'T1w'" --context '{"suffix": "T1w"}'
115
+
116
+ # combine conditions with && and ||
117
+ bidsval eval "x > 0 && x < 10" --context '{"x": 5}'
118
+
119
+ # arithmetic and the modulo operator
120
+ bidsval eval "n % 2 == 0" --context '{"n": 4}'
121
+
122
+ Operators:
123
+ comparison == != < <= > >=
124
+ logical && || !
125
+ arithmetic + - * / %
126
+
127
+ --context must be a JSON object (not an array or a scalar). An undefined name is null.
128
+ """
129
+
130
+
131
+ # Order subcommands are shown in the full (`bidsval --help`) dump: the primary
132
+ # command first.
133
+ _HELP_ORDER = ["validate", "schema", "eval"]
134
+
135
+
136
+ def _print_full_help(parser: argparse.ArgumentParser) -> None:
137
+ """Print the overview help, then every subcommand's full help.
138
+
139
+ Argparse's default top-level help lists only the subcommand names. This prints
140
+ that overview and then each subcommand's complete help (its arguments, their
141
+ explanations, and its examples), so ``bidsval --help`` shows everything in one
142
+ place without drilling into each subcommand.
143
+ """
144
+ parser.print_help()
145
+ sub_actions = [a for a in parser._actions if isinstance(a, argparse._SubParsersAction)]
146
+ for action in sub_actions:
147
+ names = sorted(
148
+ action.choices,
149
+ key=lambda n: _HELP_ORDER.index(n) if n in _HELP_ORDER else len(_HELP_ORDER),
150
+ )
151
+ for name in names:
152
+ print("\n" + "=" * 78)
153
+ print(f" bidsval {name}")
154
+ print("=" * 78)
155
+ action.choices[name].print_help()
156
+
157
+
158
+ class _FullHelpAction(argparse.Action):
159
+ """A ``-h/--help`` that prints the overview plus every subcommand's full help."""
160
+
161
+ def __init__(
162
+ self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help=None
163
+ ):
164
+ super().__init__(
165
+ option_strings=option_strings, dest=dest, default=default, nargs=0, help=help
166
+ )
167
+
168
+ def __call__(self, parser, namespace, values, option_string=None):
169
+ _print_full_help(parser)
170
+ parser.exit()
171
+
172
+
173
+ def build_parser() -> argparse.ArgumentParser:
174
+ parser = argparse.ArgumentParser(
175
+ prog="bidsval",
176
+ description=_DESCRIPTION,
177
+ epilog=_EPILOG,
178
+ formatter_class=argparse.RawDescriptionHelpFormatter,
179
+ add_help=False,
180
+ )
181
+ parser.add_argument(
182
+ "-h",
183
+ "--help",
184
+ action=_FullHelpAction,
185
+ help="show this help, including every command's options and examples, and exit",
186
+ )
187
+ parser.add_argument("--version", action="version", version=f"bidsval {__version__}")
188
+ subcommands = parser.add_subparsers(dest="command", metavar="<command>", title="commands")
189
+
190
+ schema_cmd = subcommands.add_parser(
191
+ "schema",
192
+ help="show the resolved schema version and bundled versions",
193
+ description=_SCHEMA_DESCRIPTION,
194
+ epilog=_SCHEMA_EPILOG,
195
+ formatter_class=argparse.RawDescriptionHelpFormatter,
196
+ )
197
+ schema_cmd.add_argument(
198
+ "--schema",
199
+ default=None,
200
+ metavar="SELECTOR",
201
+ help="schema to resolve: a BIDS version (e.g. 1.11.1), 'latest', a URL, a local "
202
+ "schema.json, or a YAML schema source directory (default: the bundled latest)",
203
+ )
204
+ schema_cmd.set_defaults(func=_run_schema)
205
+
206
+ eval_cmd = subcommands.add_parser(
207
+ "eval",
208
+ help="evaluate a BIDS schema expression",
209
+ description=_EVAL_DESCRIPTION,
210
+ epilog=_EVAL_EPILOG,
211
+ formatter_class=argparse.RawDescriptionHelpFormatter,
212
+ )
213
+ eval_cmd.add_argument(
214
+ "expression", metavar="EXPR", help="the expression to evaluate, e.g. \"suffix == 'T1w'\""
215
+ )
216
+ eval_cmd.add_argument(
217
+ "--context",
218
+ default="{}",
219
+ metavar="JSON",
220
+ help="JSON object of variables the expression can reference (default: {}). "
221
+ "Must be a JSON object, not an array or a scalar.",
222
+ )
223
+ eval_cmd.set_defaults(func=_run_eval)
224
+
225
+ validate_cmd = subcommands.add_parser(
226
+ "validate",
227
+ help="validate a BIDS dataset",
228
+ description=_VALIDATE_DESCRIPTION,
229
+ epilog=_VALIDATE_EPILOG,
230
+ formatter_class=argparse.RawDescriptionHelpFormatter,
231
+ )
232
+ validate_cmd.add_argument(
233
+ "dataset",
234
+ metavar="PATH",
235
+ help="path to the dataset root (the folder that holds dataset_description.json)",
236
+ )
237
+ validate_cmd.add_argument(
238
+ "--schema",
239
+ default=None,
240
+ metavar="SELECTOR",
241
+ help="schema to validate against: a BIDS version (e.g. 1.11.1), 'latest', a URL, a "
242
+ "local schema.json, or a YAML schema source directory (default: the bundled latest). "
243
+ "Run 'bidsval schema' to list bundled versions.",
244
+ )
245
+ validate_cmd.add_argument(
246
+ "--subject",
247
+ default=None,
248
+ metavar="SUB",
249
+ help="validate only this subject. Accepts sub-01 or just 01 (the sub- prefix is "
250
+ "added if missing).",
251
+ )
252
+ validate_cmd.add_argument(
253
+ "--no-headers",
254
+ action="store_true",
255
+ help="skip NIfTI header checks (faster). Headers are read by default (needs nibabel); "
256
+ "if nibabel is not installed these checks are skipped anyway.",
257
+ )
258
+ validate_cmd.add_argument(
259
+ "--recursive",
260
+ action="store_true",
261
+ help="also validate BIDS datasets under derivatives/ (each on its own).",
262
+ )
263
+ validate_cmd.add_argument(
264
+ "--output-type",
265
+ default="text",
266
+ metavar="TYPES",
267
+ help="comma-separated output formats: text, json, sarif, html, or 'all' "
268
+ "(default: text). Selecting more than one requires --out-dir.",
269
+ )
270
+ validate_cmd.add_argument(
271
+ "--out-dir",
272
+ metavar="DIR",
273
+ help="write reports to this directory (created if needed), one report.<ext> per "
274
+ "--output-type. Required when more than one format is selected; a single format "
275
+ "prints to stdout.",
276
+ )
277
+ validate_cmd.add_argument(
278
+ "--show",
279
+ default="error,warning",
280
+ metavar="LEVELS",
281
+ help="severities to display: any of error, warning, ignore, or 'all' "
282
+ "(default: error,warning). Filters the output only; it does not change validity "
283
+ "or the exit code.",
284
+ )
285
+ validate_cmd.set_defaults(func=_run_validate)
286
+
287
+ return parser
288
+
289
+
290
+ def main(argv: list[str] | None = None) -> int:
291
+ parser = build_parser()
292
+ args = parser.parse_args(argv)
293
+ if not getattr(args, "command", None):
294
+ _print_full_help(parser)
295
+ return 0
296
+ return args.func(args)
297
+
298
+
299
+ def _run_schema(args: argparse.Namespace) -> int:
300
+ try:
301
+ schema = resolve(args.schema)
302
+ except SchemaNotAvailable as error:
303
+ print(f"error: {error}", file=sys.stderr)
304
+ return 2
305
+ print(f"BIDS version : {bids_version(schema)}")
306
+ print(f"schema version: {schema_version(schema)}")
307
+ print(f"bundled : {', '.join(available_versions())}")
308
+ return 0
309
+
310
+
311
+ def _run_eval(args: argparse.Namespace) -> int:
312
+ try:
313
+ context = json.loads(args.context)
314
+ except json.JSONDecodeError as error:
315
+ print(f"error: --context is not valid JSON: {error}", file=sys.stderr)
316
+ return 2
317
+ if not isinstance(context, dict):
318
+ print("error: --context must be a JSON object", file=sys.stderr)
319
+ return 2
320
+ try:
321
+ result = evaluate_string(args.expression, context)
322
+ except EvaluationError as error:
323
+ print(f"error: {error}", file=sys.stderr)
324
+ return 1
325
+ print(json.dumps(result))
326
+ return 0
327
+
328
+
329
+ def _run_validate(args: argparse.Namespace) -> int:
330
+ subjects = None
331
+ if args.subject:
332
+ sub = args.subject if args.subject.startswith("sub-") else f"sub-{args.subject}"
333
+ subjects = [sub]
334
+ try:
335
+ report = run_validate(
336
+ args.dataset,
337
+ schema=args.schema,
338
+ read_headers=not args.no_headers,
339
+ subjects=subjects,
340
+ recursive=args.recursive,
341
+ )
342
+ except SchemaNotAvailable as error:
343
+ print(f"error: {error}", file=sys.stderr)
344
+ return 2
345
+ except (FileNotFoundError, NotADirectoryError) as error:
346
+ print(f"error: {error}", file=sys.stderr)
347
+ return 2
348
+
349
+ try:
350
+ types = _parse_output_types(args.output_type)
351
+ severities = _parse_severities(args.show)
352
+ except ValueError as error:
353
+ print(f"error: {error}", file=sys.stderr)
354
+ return 2
355
+
356
+ # Findings are filtered for display only; validity always depends on errors.
357
+ display = report.filtered(severities)
358
+
359
+ if args.out_dir:
360
+ out_dir = Path(args.out_dir)
361
+ out_dir.mkdir(parents=True, exist_ok=True)
362
+ for output_type in sorted(types):
363
+ destination = out_dir / f"report.{EXTENSIONS[output_type]}"
364
+ destination.write_text(RENDERERS[output_type](display), encoding="utf-8")
365
+ print(f"wrote {destination}", file=sys.stderr)
366
+ elif len(types) > 1:
367
+ print("error: --out-dir is required when --output-type selects more than one format",
368
+ file=sys.stderr)
369
+ return 2
370
+ else:
371
+ print(RENDERERS[next(iter(types))](display))
372
+
373
+ return 0 if report.is_valid else 1
374
+
375
+
376
+ def _parse_output_types(value: str) -> set[str]:
377
+ requested = [t.strip().lower() for t in value.split(",") if t.strip()]
378
+ if "all" in requested:
379
+ return set(RENDERERS)
380
+ unknown = [t for t in requested if t not in RENDERERS]
381
+ if unknown:
382
+ raise ValueError(
383
+ f"unknown --output-type {unknown}; choose from {sorted(RENDERERS)} or 'all'"
384
+ )
385
+ return set(requested) or {"text"}
386
+
387
+
388
+ def _parse_severities(value: str) -> set[Severity]:
389
+ requested = [s.strip().lower() for s in value.split(",") if s.strip()]
390
+ if "all" in requested:
391
+ return set(Severity)
392
+ out: set[Severity] = set()
393
+ for name in requested:
394
+ try:
395
+ out.add(Severity(name))
396
+ except ValueError as error:
397
+ raise ValueError(
398
+ f"unknown --show level {name!r}; choose from error, warning, ignore, or 'all'"
399
+ ) from error
400
+ return out or {Severity.ERROR, Severity.WARNING}
401
+
402
+
403
+ if __name__ == "__main__": # pragma: no cover
404
+ sys.exit(main())
@@ -0,0 +1,18 @@
1
+ """Build the per-file context the rule engine evaluates against.
2
+
3
+ A *context* is a mapping of the names a BIDS schema expression may reference
4
+ (``entities``, ``datatype``, ``suffix``, ``sidecar``, ``nifti_header`` ...) to
5
+ their values for one file. Its shape follows the schema's own ``meta.context``
6
+ definition, so selectors and checks evaluate against exactly what the schema
7
+ expects.
8
+
9
+ :class:`~bidsval.context.builder.ContextBuilder` assembles it: parse the
10
+ filename, find the datatype, merge the inheritance-principle sidecars, and
11
+ lazily load file content (JSON, TSV columns, NIfTI headers).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from .builder import ContextBuilder
17
+
18
+ __all__ = ["ContextBuilder"]
@@ -0,0 +1,219 @@
1
+ """Resolve a data file's associated files into the ``associations`` context.
2
+
3
+ Many schema checks look at files that travel with a data file: a ``dwi`` file's
4
+ ``.bval``/``.bvec``, a task recording's ``events.tsv``, an electrophysiology
5
+ recording's ``channels.tsv``, an ASL run's ``aslcontext.tsv``, and so on. The
6
+ schema describes each of these in ``meta.associations`` (a selector saying when
7
+ it applies, a target suffix/extension to look for, and whether it inherits up the
8
+ tree).
9
+
10
+ This module finds those files (using the same proximity walk as the inheritance
11
+ principle) and exposes them under ``associations.<name>`` with the fields the
12
+ checks read: a TSV's columns plus ``n_rows``/``n_cols`` and its sidecar; a
13
+ ``.bval``/``.bvec``'s ``values``/``n_rows``/``n_cols``; or just the path for
14
+ plain existence checks.
15
+
16
+ Association names that need a more complex aggregate (``coordsystems``,
17
+ ``atlas_description``) are intentionally not built here; the rule engine skips
18
+ rules that reference them, so they are never guessed at.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from typing import Any
24
+
25
+ from bidsschematools.types.namespace import Namespace
26
+
27
+ from ..expr import EvaluationError, evaluate_string
28
+ from ..expr.functions import truthy
29
+ from ..files import BIDSFile, FileTree
30
+ from .entities import parse_filename
31
+ from .inheritance import _is_subset, merged_sidecar
32
+ from .loaders import load_columns, load_json
33
+
34
+ # Built here (the rule engine relies on these being populated).
35
+ _BUILT = {
36
+ "events", "bval", "bvec", "channels", "aslcontext", "m0scan",
37
+ "magnitude", "magnitude1", "coordsystem", "electrodes", "physio",
38
+ "atlas_description", "coordsystems",
39
+ }
40
+
41
+
42
+ def build_associations(
43
+ schema: Namespace,
44
+ tree: FileTree,
45
+ data_file: BIDSFile,
46
+ source_entities: dict[str, str],
47
+ source_suffix: str,
48
+ source_extension: str,
49
+ source_datatype: str = "",
50
+ ) -> dict[str, Any]:
51
+ """Return the ``associations`` mapping for one data file."""
52
+ if not source_suffix:
53
+ return {}
54
+ specs = schema["meta"].get("associations", {})
55
+ selector_context = {
56
+ "suffix": source_suffix,
57
+ "extension": source_extension,
58
+ "entities": source_entities,
59
+ "datatype": source_datatype,
60
+ }
61
+ out: dict[str, Any] = {}
62
+ for name, spec in specs.items():
63
+ if name not in _BUILT:
64
+ continue
65
+ if not _spec_applies(spec.get("selectors", []), selector_context):
66
+ continue
67
+ if name == "coordsystems":
68
+ # An aggregate of all coordsystem files (one per space-), with the fields
69
+ # the EMG rules read; not a single target.
70
+ aggregate = _build_coordsystems(schema, tree, data_file, source_entities)
71
+ if aggregate is not None:
72
+ out[name] = aggregate
73
+ continue
74
+ target = spec.get("target", {})
75
+ found = _find_target(
76
+ schema,
77
+ tree,
78
+ data_file,
79
+ source_entities,
80
+ str(target.get("suffix", source_suffix)),
81
+ _as_list(target.get("extension")),
82
+ bool(spec.get("inherit", False)),
83
+ )
84
+ if found is None:
85
+ continue
86
+ out[name] = _association_object(schema, tree, found)
87
+ return out
88
+
89
+
90
+ def _build_coordsystems(
91
+ schema: Namespace,
92
+ tree: FileTree,
93
+ data_file: BIDSFile,
94
+ source_entities: dict[str, str],
95
+ ) -> dict[str, Any] | None:
96
+ """Collect every applicable ``coordsystem`` JSON (one per ``space-``) and expose
97
+ ``paths`` / ``spaces`` / ``ParentCoordinateSystems`` (the EMG rules read these).
98
+
99
+ A coordsystem matches when its entities are a subset of the source's, except the
100
+ ``space`` entity may differ (the target allows it), mirroring the reference's
101
+ ``targetEntities=['space']`` walk.
102
+ """
103
+ found: list[tuple[BIDSFile, dict[str, str]]] = []
104
+ for dir_relpath in tree.ancestor_dirs(data_file.relpath): # inherit up the tree
105
+ for candidate in tree.files_in(dir_relpath):
106
+ cand_entities, cand_suffix, cand_ext = parse_filename(schema, candidate.name)
107
+ if cand_suffix != "coordsystem" or cand_ext != ".json":
108
+ continue
109
+ if all(source_entities.get(k) == v or k == "space" for k, v in cand_entities.items()):
110
+ found.append((candidate, cand_entities))
111
+ if not found:
112
+ return None
113
+ parents: list[str] = []
114
+ for candidate, _entities in found:
115
+ data = load_json(candidate)
116
+ parent = data.get("ParentCoordinateSystem") if isinstance(data, dict) else None
117
+ if parent:
118
+ parents.append(parent)
119
+ return {
120
+ "paths": ["/" + candidate.relpath for candidate, _ in found],
121
+ "spaces": [ent["space"] for _f, ent in found if "space" in ent],
122
+ "ParentCoordinateSystems": parents,
123
+ }
124
+
125
+
126
+ def _spec_applies(selectors: list[str], context: dict[str, Any]) -> bool:
127
+ for selector in selectors:
128
+ try:
129
+ if not truthy(evaluate_string(selector, context)):
130
+ return False
131
+ except EvaluationError:
132
+ return False
133
+ return True
134
+
135
+
136
+ def _find_target(
137
+ schema: Namespace,
138
+ tree: FileTree,
139
+ data_file: BIDSFile,
140
+ source_entities: dict[str, str],
141
+ target_suffix: str,
142
+ target_extensions: list[str],
143
+ inherit: bool,
144
+ ) -> BIDSFile | None:
145
+ """The closest file matching the target suffix/extension with a subset of the
146
+ source's entities. Walks up the tree when the association inherits."""
147
+ dirs = tree.ancestor_dirs(data_file.relpath) if inherit else [data_file.parent]
148
+ for dir_relpath in dirs: # closest first
149
+ best: BIDSFile | None = None
150
+ best_specificity = -1
151
+ for candidate in tree.files_in(dir_relpath):
152
+ if candidate.relpath == data_file.relpath:
153
+ continue
154
+ cand_entities, cand_suffix, cand_ext = parse_filename(schema, candidate.name)
155
+ if target_suffix and cand_suffix != target_suffix:
156
+ continue
157
+ if target_extensions and cand_ext not in target_extensions:
158
+ continue
159
+ if not _is_subset(cand_entities, source_entities):
160
+ continue
161
+ if len(cand_entities) > best_specificity:
162
+ best, best_specificity = candidate, len(cand_entities)
163
+ if best is not None:
164
+ return best
165
+ return None
166
+
167
+
168
+ def _association_object(schema: Namespace, tree: FileTree, found: BIDSFile) -> Any:
169
+ """Build the object exposed under ``associations.<name>`` for a found file."""
170
+ name = found.name
171
+ path = "/" + found.relpath
172
+ if name.endswith(".tsv") or name.endswith(".tsv.gz"):
173
+ columns = load_columns(found, max_rows=-1)
174
+ n_rows = max((len(values) for values in columns.values()), default=0)
175
+ obj: dict[str, Any] = dict(columns)
176
+ obj.update(
177
+ n_rows=n_rows,
178
+ n_cols=len(columns),
179
+ sidecar=merged_sidecar(schema, tree, found),
180
+ path=path,
181
+ )
182
+ return obj
183
+ if name.endswith(".bval") or name.endswith(".bvec"):
184
+ return _numeric_matrix(found, path)
185
+ if name.endswith(".json"):
186
+ data = load_json(found)
187
+ data = dict(data) if isinstance(data, dict) else {}
188
+ data["path"] = path
189
+ return data
190
+ # A plain data file (e.g. m0scan, magnitude): only existence/path matters.
191
+ return {"path": path}
192
+
193
+
194
+ def _numeric_matrix(found: BIDSFile, path: str) -> dict[str, Any]:
195
+ """Parse a whitespace-delimited ``.bval``/``.bvec`` into values + shape."""
196
+ try:
197
+ text = found.read_text()
198
+ except OSError:
199
+ return {"values": [], "n_rows": 0, "n_cols": 0, "path": path}
200
+ rows = [line.split() for line in text.splitlines() if line.strip()]
201
+ values: list[float] = []
202
+ for row in rows:
203
+ for token in row:
204
+ try:
205
+ values.append(float(token))
206
+ except ValueError:
207
+ pass
208
+ return {
209
+ "values": values,
210
+ "n_rows": len(rows),
211
+ "n_cols": len(rows[0]) if rows else 0,
212
+ "path": path,
213
+ }
214
+
215
+
216
+ def _as_list(value: Any) -> list[str]:
217
+ if value is None:
218
+ return []
219
+ return [value] if isinstance(value, str) else list(value)