PyPI - bidsval - Versions diffs - 0.0.1__py3-none-any.whl - Mend

bidsval 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

bidsval/__init__.py +45 -0
bidsval/cli.py +404 -0
bidsval/context/__init__.py +18 -0
bidsval/context/associations.py +219 -0
bidsval/context/builder.py +167 -0
bidsval/context/entities.py +38 -0
bidsval/context/inheritance.py +171 -0
bidsval/context/loaders.py +129 -0
bidsval/expr/__init__.py +20 -0
bidsval/expr/evaluator.py +342 -0
bidsval/expr/functions.py +351 -0
bidsval/files/__init__.py +14 -0
bidsval/files/bidsignore.py +75 -0
bidsval/files/tree.py +146 -0
bidsval/issues.py +128 -0
bidsval/render/__init__.py +25 -0
bidsval/render/html.py +100 -0
bidsval/render/json.py +56 -0
bidsval/render/sarif.py +72 -0
bidsval/render/text.py +41 -0
bidsval/report.py +103 -0
bidsval/rules/__init__.py +13 -0
bidsval/rules/bespoke.py +63 -0
bidsval/rules/citation.py +65 -0
bidsval/rules/column_types.py +181 -0
bidsval/rules/dataset_checks.py +143 -0
bidsval/rules/engine.py +356 -0
bidsval/rules/filenames.py +498 -0
bidsval/rules/guidance.py +120 -0
bidsval/rules/integrity.py +191 -0
bidsval/rules/tables.py +298 -0
bidsval/rules/values.py +102 -0
bidsval/schema/__init__.py +31 -0
bidsval/schema/bundled/1.10.0.json +1 -0
bidsval/schema/bundled/1.10.1.json +1 -0
bidsval/schema/bundled/1.11.0.json +1 -0
bidsval/schema/bundled/1.11.1.json +1 -0
bidsval/schema/bundled/1.8.0.json +1 -0
bidsval/schema/bundled/1.9.0.json +1 -0
bidsval/schema/cache.py +67 -0
bidsval/schema/introspect.py +142 -0
bidsval/schema/resolve.py +132 -0
bidsval/validate.py +228 -0
bidsval-0.0.1.dist-info/METADATA +217 -0
bidsval-0.0.1.dist-info/RECORD +49 -0
bidsval-0.0.1.dist-info/WHEEL +5 -0
bidsval-0.0.1.dist-info/entry_points.txt +2 -0
bidsval-0.0.1.dist-info/licenses/LICENSE +21 -0
bidsval-0.0.1.dist-info/top_level.txt +1 -0

bidsval/__init__.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""bidsval - a schema-driven, pydantic-typed, in-process BIDS validator.
+The public surface grows as the validator does. Today it exposes the two pieces
+that the rest of the engine is built on:
+* the schema resolver (:func:`bidsval.schema.resolve`), the single place that
+  turns a schema selector into one in-memory schema object, and
+* the expression evaluator (:func:`bidsval.expr.evaluate_string`), which runs a
+  BIDS schema expression against a context.
+Result types (:class:`~bidsval.issues.Issue`, :class:`~bidsval.report.ValidationReport`)
+are re-exported here so consumers can ``from bidsval import Issue, ValidationReport``.
+"""
+from __future__ import annotations
+from .expr import evaluate_string
+from .issues import DatasetIssues, Issue, Severity
+from .report import FileVerdict, ValidationReport
+from .schema import available_versions, bids_version, resolve, schema_version
+from .validate import validate, validate_file, validate_subject
+try:  # populated from package metadata once installed
+    from importlib.metadata import version
+    __version__ = version("bidsval")
+except Exception:  # pragma: no cover - source checkout without metadata
+    __version__ = "0.0.0"
+__all__ = [
+    "Severity",
+    "Issue",
+    "DatasetIssues",
+    "FileVerdict",
+    "ValidationReport",
+    "resolve",
+    "available_versions",
+    "schema_version",
+    "bids_version",
+    "evaluate_string",
+    "validate",
+    "validate_subject",
+    "validate_file",
+    "__version__",
+]

bidsval/cli.py ADDED Viewed

@@ -0,0 +1,404 @@
+"""Command-line entry point for bidsval.
+* ``bidsval validate PATH`` - validate a dataset and report errors and warnings as
+  text, JSON, SARIF, or HTML.
+* ``bidsval schema`` - show the schema version a selector resolves to and the bundled versions.
+* ``bidsval eval EXPR`` - evaluate one BIDS schema expression against a context.
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from . import __version__
+from .expr import EvaluationError, evaluate_string
+from .issues import Severity
+from .render import EXTENSIONS, RENDERERS
+from .schema import SchemaNotAvailable, available_versions, bids_version, resolve, schema_version
+from .validate import validate as run_validate
+_DESCRIPTION = """\
+A schema-driven, pydantic-typed, in-process BIDS validator written in pure Python.
+It reads the official BIDS schema and checks a dataset against the rules in it: file
+names and locations, sidecar metadata (presence and value type), associated files, and
+tabular columns. It runs with no external runtime and reports findings as text, JSON,
+SARIF, or HTML.
+"""
+_EPILOG = """\
+Examples:
+  bidsval validate /path/to/dataset
+  bidsval validate /path/to/dataset --show all
+  bidsval validate /path/to/dataset --schema 1.10.0
+  bidsval validate /path/to/dataset --output-type json,html --out-dir ./reports
+  bidsval schema
+  bidsval eval "suffix == 'T1w'" --context '{"suffix": "T1w"}'
+Run 'bidsval <command> -h' for command-specific help and more examples.
+Exit codes:
+  0   the dataset is valid (no errors); warnings may still be present
+  1   validation found errors, or an expression failed to evaluate
+  2   usage error, file/IO error, schema not available, or invalid JSON input
+"""
+_VALIDATE_DESCRIPTION = """\
+Validate a BIDS dataset against the schema and report errors and warnings.
+A dataset is valid when it has no errors. Errors are rule violations (a misplaced or
+misnamed file, a required field that is missing, a value of the wrong type). Warnings
+flag recommended-but-missing metadata and do not affect validity. Use --subject to
+check a single participant.
+"""
+_VALIDATE_EPILOG = """\
+Examples:
+  # quick check (text summary; exits non-zero on errors, so it fits CI)
+  bidsval validate /data/my_study
+  # show everything, including warnings and suppressed notes
+  bidsval validate /data/my_study --show all
+  # check a single subject (the sub- prefix is optional)
+  bidsval validate /data/my_study --subject 01
+  # pin a schema version for reproducible results
+  bidsval validate /data/my_study --schema 1.10.0
+  # skip NIfTI header reading (faster)
+  bidsval validate /data/my_study --no-headers
+  # write machine-readable and HTML reports into a directory
+  bidsval validate /data/my_study --output-type json,html --out-dir ./reports
+Output:
+  One format prints to stdout (text by default). Selecting more than one format requires
+  --out-dir, which writes report.<ext> per format (report.txt, report.json,
+  report.sarif, report.html). In the text report each issue is one line:
+  SEVERITY CODE [field] file - message.
+"""
+_SCHEMA_DESCRIPTION = """\
+Show the schema bidsval would use and the versions bundled with this install.
+Prints the BIDS version and the schema version a selector resolves to, plus the list of
+bundled versions you can pass to --schema, here or on 'validate'.
+"""
+_SCHEMA_EPILOG = """\
+Examples:
+  # the default (bundled latest) schema
+  bidsval schema
+  # a specific bundled version
+  bidsval schema --schema 1.10.0
+  # the development tip, fetched from the spec and cached
+  bidsval schema --schema latest
+"""
+_EVAL_DESCRIPTION = """\
+Evaluate one BIDS schema expression against a context and print the result as JSON.
+This exposes the same expression engine the validator uses, which is handy for
+understanding a rule or testing a single condition. Undefined names evaluate to null.
+"""
+_EVAL_EPILOG = """\
+Examples:
+  # a simple comparison
+  bidsval eval "suffix == 'T1w'" --context '{"suffix": "T1w"}'
+  # combine conditions with && and ||
+  bidsval eval "x > 0 && x < 10" --context '{"x": 5}'
+  # arithmetic and the modulo operator
+  bidsval eval "n % 2 == 0" --context '{"n": 4}'
+Operators:
+  comparison  ==  !=  <  <=  >  >=
+  logical     &&  ||  !
+  arithmetic  +  -  *  /  %
+  --context must be a JSON object (not an array or a scalar). An undefined name is null.
+"""
+# Order subcommands are shown in the full (`bidsval --help`) dump: the primary
+# command first.
+_HELP_ORDER = ["validate", "schema", "eval"]
+def _print_full_help(parser: argparse.ArgumentParser) -> None:
+    """Print the overview help, then every subcommand's full help.
+    Argparse's default top-level help lists only the subcommand names. This prints
+    that overview and then each subcommand's complete help (its arguments, their
+    explanations, and its examples), so ``bidsval --help`` shows everything in one
+    place without drilling into each subcommand.
+    """
+    parser.print_help()
+    sub_actions = [a for a in parser._actions if isinstance(a, argparse._SubParsersAction)]
+    for action in sub_actions:
+        names = sorted(
+            action.choices,
+            key=lambda n: _HELP_ORDER.index(n) if n in _HELP_ORDER else len(_HELP_ORDER),
+        )
+        for name in names:
+            print("\n" + "=" * 78)
+            print(f"  bidsval {name}")
+            print("=" * 78)
+            action.choices[name].print_help()
+class _FullHelpAction(argparse.Action):
+    """A ``-h/--help`` that prints the overview plus every subcommand's full help."""
+    def __init__(
+        self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help=None
+    ):
+        super().__init__(
+            option_strings=option_strings, dest=dest, default=default, nargs=0, help=help
+        )
+    def __call__(self, parser, namespace, values, option_string=None):
+        _print_full_help(parser)
+        parser.exit()
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="bidsval",
+        description=_DESCRIPTION,
+        epilog=_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        add_help=False,
+    )
+    parser.add_argument(
+        "-h",
+        "--help",
+        action=_FullHelpAction,
+        help="show this help, including every command's options and examples, and exit",
+    )
+    parser.add_argument("--version", action="version", version=f"bidsval {__version__}")
+    subcommands = parser.add_subparsers(dest="command", metavar="<command>", title="commands")
+    schema_cmd = subcommands.add_parser(
+        "schema",
+        help="show the resolved schema version and bundled versions",
+        description=_SCHEMA_DESCRIPTION,
+        epilog=_SCHEMA_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    schema_cmd.add_argument(
+        "--schema",
+        default=None,
+        metavar="SELECTOR",
+        help="schema to resolve: a BIDS version (e.g. 1.11.1), 'latest', a URL, a local "
+        "schema.json, or a YAML schema source directory (default: the bundled latest)",
+    )
+    schema_cmd.set_defaults(func=_run_schema)
+    eval_cmd = subcommands.add_parser(
+        "eval",
+        help="evaluate a BIDS schema expression",
+        description=_EVAL_DESCRIPTION,
+        epilog=_EVAL_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    eval_cmd.add_argument(
+        "expression", metavar="EXPR", help="the expression to evaluate, e.g. \"suffix == 'T1w'\""
+    )
+    eval_cmd.add_argument(
+        "--context",
+        default="{}",
+        metavar="JSON",
+        help="JSON object of variables the expression can reference (default: {}). "
+        "Must be a JSON object, not an array or a scalar.",
+    )
+    eval_cmd.set_defaults(func=_run_eval)
+    validate_cmd = subcommands.add_parser(
+        "validate",
+        help="validate a BIDS dataset",
+        description=_VALIDATE_DESCRIPTION,
+        epilog=_VALIDATE_EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    validate_cmd.add_argument(
+        "dataset",
+        metavar="PATH",
+        help="path to the dataset root (the folder that holds dataset_description.json)",
+    )
+    validate_cmd.add_argument(
+        "--schema",
+        default=None,
+        metavar="SELECTOR",
+        help="schema to validate against: a BIDS version (e.g. 1.11.1), 'latest', a URL, a "
+        "local schema.json, or a YAML schema source directory (default: the bundled latest). "
+        "Run 'bidsval schema' to list bundled versions.",
+    )
+    validate_cmd.add_argument(
+        "--subject",
+        default=None,
+        metavar="SUB",
+        help="validate only this subject. Accepts sub-01 or just 01 (the sub- prefix is "
+        "added if missing).",
+    )
+    validate_cmd.add_argument(
+        "--no-headers",
+        action="store_true",
+        help="skip NIfTI header checks (faster). Headers are read by default (needs nibabel); "
+        "if nibabel is not installed these checks are skipped anyway.",
+    )
+    validate_cmd.add_argument(
+        "--recursive",
+        action="store_true",
+        help="also validate BIDS datasets under derivatives/ (each on its own).",
+    )
+    validate_cmd.add_argument(
+        "--output-type",
+        default="text",
+        metavar="TYPES",
+        help="comma-separated output formats: text, json, sarif, html, or 'all' "
+        "(default: text). Selecting more than one requires --out-dir.",
+    )
+    validate_cmd.add_argument(
+        "--out-dir",
+        metavar="DIR",
+        help="write reports to this directory (created if needed), one report.<ext> per "
+        "--output-type. Required when more than one format is selected; a single format "
+        "prints to stdout.",
+    )
+    validate_cmd.add_argument(
+        "--show",
+        default="error,warning",
+        metavar="LEVELS",
+        help="severities to display: any of error, warning, ignore, or 'all' "
+        "(default: error,warning). Filters the output only; it does not change validity "
+        "or the exit code.",
+    )
+    validate_cmd.set_defaults(func=_run_validate)
+    return parser
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if not getattr(args, "command", None):
+        _print_full_help(parser)
+        return 0
+    return args.func(args)
+def _run_schema(args: argparse.Namespace) -> int:
+    try:
+        schema = resolve(args.schema)
+    except SchemaNotAvailable as error:
+        print(f"error: {error}", file=sys.stderr)
+        return 2
+    print(f"BIDS version  : {bids_version(schema)}")
+    print(f"schema version: {schema_version(schema)}")
+    print(f"bundled       : {', '.join(available_versions())}")
+    return 0
+def _run_eval(args: argparse.Namespace) -> int:
+    try:
+        context = json.loads(args.context)
+    except json.JSONDecodeError as error:
+        print(f"error: --context is not valid JSON: {error}", file=sys.stderr)
+        return 2
+    if not isinstance(context, dict):
+        print("error: --context must be a JSON object", file=sys.stderr)
+        return 2
+    try:
+        result = evaluate_string(args.expression, context)
+    except EvaluationError as error:
+        print(f"error: {error}", file=sys.stderr)
+        return 1
+    print(json.dumps(result))
+    return 0
+def _run_validate(args: argparse.Namespace) -> int:
+    subjects = None
+    if args.subject:
+        sub = args.subject if args.subject.startswith("sub-") else f"sub-{args.subject}"
+        subjects = [sub]
+    try:
+        report = run_validate(
+            args.dataset,
+            schema=args.schema,
+            read_headers=not args.no_headers,
+            subjects=subjects,
+            recursive=args.recursive,
+        )
+    except SchemaNotAvailable as error:
+        print(f"error: {error}", file=sys.stderr)
+        return 2
+    except (FileNotFoundError, NotADirectoryError) as error:
+        print(f"error: {error}", file=sys.stderr)
+        return 2
+    try:
+        types = _parse_output_types(args.output_type)
+        severities = _parse_severities(args.show)
+    except ValueError as error:
+        print(f"error: {error}", file=sys.stderr)
+        return 2
+    # Findings are filtered for display only; validity always depends on errors.
+    display = report.filtered(severities)
+    if args.out_dir:
+        out_dir = Path(args.out_dir)
+        out_dir.mkdir(parents=True, exist_ok=True)
+        for output_type in sorted(types):
+            destination = out_dir / f"report.{EXTENSIONS[output_type]}"
+            destination.write_text(RENDERERS[output_type](display), encoding="utf-8")
+            print(f"wrote {destination}", file=sys.stderr)
+    elif len(types) > 1:
+        print("error: --out-dir is required when --output-type selects more than one format",
+              file=sys.stderr)
+        return 2
+    else:
+        print(RENDERERS[next(iter(types))](display))
+    return 0 if report.is_valid else 1
+def _parse_output_types(value: str) -> set[str]:
+    requested = [t.strip().lower() for t in value.split(",") if t.strip()]
+    if "all" in requested:
+        return set(RENDERERS)
+    unknown = [t for t in requested if t not in RENDERERS]
+    if unknown:
+        raise ValueError(
+            f"unknown --output-type {unknown}; choose from {sorted(RENDERERS)} or 'all'"
+        )
+    return set(requested) or {"text"}
+def _parse_severities(value: str) -> set[Severity]:
+    requested = [s.strip().lower() for s in value.split(",") if s.strip()]
+    if "all" in requested:
+        return set(Severity)
+    out: set[Severity] = set()
+    for name in requested:
+        try:
+            out.add(Severity(name))
+        except ValueError as error:
+            raise ValueError(
+                f"unknown --show level {name!r}; choose from error, warning, ignore, or 'all'"
+            ) from error
+    return out or {Severity.ERROR, Severity.WARNING}
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(main())

bidsval/context/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Build the per-file context the rule engine evaluates against.
+A *context* is a mapping of the names a BIDS schema expression may reference
+(``entities``, ``datatype``, ``suffix``, ``sidecar``, ``nifti_header`` ...) to
+their values for one file. Its shape follows the schema's own ``meta.context``
+definition, so selectors and checks evaluate against exactly what the schema
+expects.
+:class:`~bidsval.context.builder.ContextBuilder` assembles it: parse the
+filename, find the datatype, merge the inheritance-principle sidecars, and
+lazily load file content (JSON, TSV columns, NIfTI headers).
+"""
+from __future__ import annotations
+from .builder import ContextBuilder
+__all__ = ["ContextBuilder"]

bidsval/context/associations.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""Resolve a data file's associated files into the ``associations`` context.
+Many schema checks look at files that travel with a data file: a ``dwi`` file's
+``.bval``/``.bvec``, a task recording's ``events.tsv``, an electrophysiology
+recording's ``channels.tsv``, an ASL run's ``aslcontext.tsv``, and so on. The
+schema describes each of these in ``meta.associations`` (a selector saying when
+it applies, a target suffix/extension to look for, and whether it inherits up the
+tree).
+This module finds those files (using the same proximity walk as the inheritance
+principle) and exposes them under ``associations.<name>`` with the fields the
+checks read: a TSV's columns plus ``n_rows``/``n_cols`` and its sidecar; a
+``.bval``/``.bvec``'s ``values``/``n_rows``/``n_cols``; or just the path for
+plain existence checks.
+Association names that need a more complex aggregate (``coordsystems``,
+``atlas_description``) are intentionally not built here; the rule engine skips
+rules that reference them, so they are never guessed at.
+"""
+from __future__ import annotations
+from typing import Any
+from bidsschematools.types.namespace import Namespace
+from ..expr import EvaluationError, evaluate_string
+from ..expr.functions import truthy
+from ..files import BIDSFile, FileTree
+from .entities import parse_filename
+from .inheritance import _is_subset, merged_sidecar
+from .loaders import load_columns, load_json
+# Built here (the rule engine relies on these being populated).
+_BUILT = {
+    "events", "bval", "bvec", "channels", "aslcontext", "m0scan",
+    "magnitude", "magnitude1", "coordsystem", "electrodes", "physio",
+    "atlas_description", "coordsystems",
+}
+def build_associations(
+    schema: Namespace,
+    tree: FileTree,
+    data_file: BIDSFile,
+    source_entities: dict[str, str],
+    source_suffix: str,
+    source_extension: str,
+    source_datatype: str = "",
+) -> dict[str, Any]:
+    """Return the ``associations`` mapping for one data file."""
+    if not source_suffix:
+        return {}
+    specs = schema["meta"].get("associations", {})
+    selector_context = {
+        "suffix": source_suffix,
+        "extension": source_extension,
+        "entities": source_entities,
+        "datatype": source_datatype,
+    }
+    out: dict[str, Any] = {}
+    for name, spec in specs.items():
+        if name not in _BUILT:
+            continue
+        if not _spec_applies(spec.get("selectors", []), selector_context):
+            continue
+        if name == "coordsystems":
+            # An aggregate of all coordsystem files (one per space-), with the fields
+            # the EMG rules read; not a single target.
+            aggregate = _build_coordsystems(schema, tree, data_file, source_entities)
+            if aggregate is not None:
+                out[name] = aggregate
+            continue
+        target = spec.get("target", {})
+        found = _find_target(
+            schema,
+            tree,
+            data_file,
+            source_entities,
+            str(target.get("suffix", source_suffix)),
+            _as_list(target.get("extension")),
+            bool(spec.get("inherit", False)),
+        )
+        if found is None:
+            continue
+        out[name] = _association_object(schema, tree, found)
+    return out
+def _build_coordsystems(
+    schema: Namespace,
+    tree: FileTree,
+    data_file: BIDSFile,
+    source_entities: dict[str, str],
+) -> dict[str, Any] | None:
+    """Collect every applicable ``coordsystem`` JSON (one per ``space-``) and expose
+    ``paths`` / ``spaces`` / ``ParentCoordinateSystems`` (the EMG rules read these).
+    A coordsystem matches when its entities are a subset of the source's, except the
+    ``space`` entity may differ (the target allows it), mirroring the reference's
+    ``targetEntities=['space']`` walk.
+    """
+    found: list[tuple[BIDSFile, dict[str, str]]] = []
+    for dir_relpath in tree.ancestor_dirs(data_file.relpath):  # inherit up the tree
+        for candidate in tree.files_in(dir_relpath):
+            cand_entities, cand_suffix, cand_ext = parse_filename(schema, candidate.name)
+            if cand_suffix != "coordsystem" or cand_ext != ".json":
+                continue
+            if all(source_entities.get(k) == v or k == "space" for k, v in cand_entities.items()):
+                found.append((candidate, cand_entities))
+    if not found:
+        return None
+    parents: list[str] = []
+    for candidate, _entities in found:
+        data = load_json(candidate)
+        parent = data.get("ParentCoordinateSystem") if isinstance(data, dict) else None
+        if parent:
+            parents.append(parent)
+    return {
+        "paths": ["/" + candidate.relpath for candidate, _ in found],
+        "spaces": [ent["space"] for _f, ent in found if "space" in ent],
+        "ParentCoordinateSystems": parents,
+    }
+def _spec_applies(selectors: list[str], context: dict[str, Any]) -> bool:
+    for selector in selectors:
+        try:
+            if not truthy(evaluate_string(selector, context)):
+                return False
+        except EvaluationError:
+            return False
+    return True
+def _find_target(
+    schema: Namespace,
+    tree: FileTree,
+    data_file: BIDSFile,
+    source_entities: dict[str, str],
+    target_suffix: str,
+    target_extensions: list[str],
+    inherit: bool,
+) -> BIDSFile | None:
+    """The closest file matching the target suffix/extension with a subset of the
+    source's entities. Walks up the tree when the association inherits."""
+    dirs = tree.ancestor_dirs(data_file.relpath) if inherit else [data_file.parent]
+    for dir_relpath in dirs:  # closest first
+        best: BIDSFile | None = None
+        best_specificity = -1
+        for candidate in tree.files_in(dir_relpath):
+            if candidate.relpath == data_file.relpath:
+                continue
+            cand_entities, cand_suffix, cand_ext = parse_filename(schema, candidate.name)
+            if target_suffix and cand_suffix != target_suffix:
+                continue
+            if target_extensions and cand_ext not in target_extensions:
+                continue
+            if not _is_subset(cand_entities, source_entities):
+                continue
+            if len(cand_entities) > best_specificity:
+                best, best_specificity = candidate, len(cand_entities)
+        if best is not None:
+            return best
+    return None
+def _association_object(schema: Namespace, tree: FileTree, found: BIDSFile) -> Any:
+    """Build the object exposed under ``associations.<name>`` for a found file."""
+    name = found.name
+    path = "/" + found.relpath
+    if name.endswith(".tsv") or name.endswith(".tsv.gz"):
+        columns = load_columns(found, max_rows=-1)
+        n_rows = max((len(values) for values in columns.values()), default=0)
+        obj: dict[str, Any] = dict(columns)
+        obj.update(
+            n_rows=n_rows,
+            n_cols=len(columns),
+            sidecar=merged_sidecar(schema, tree, found),
+            path=path,
+        )
+        return obj
+    if name.endswith(".bval") or name.endswith(".bvec"):
+        return _numeric_matrix(found, path)
+    if name.endswith(".json"):
+        data = load_json(found)
+        data = dict(data) if isinstance(data, dict) else {}
+        data["path"] = path
+        return data
+    # A plain data file (e.g. m0scan, magnitude): only existence/path matters.
+    return {"path": path}
+def _numeric_matrix(found: BIDSFile, path: str) -> dict[str, Any]:
+    """Parse a whitespace-delimited ``.bval``/``.bvec`` into values + shape."""
+    try:
+        text = found.read_text()
+    except OSError:
+        return {"values": [], "n_rows": 0, "n_cols": 0, "path": path}
+    rows = [line.split() for line in text.splitlines() if line.strip()]
+    values: list[float] = []
+    for row in rows:
+        for token in row:
+            try:
+                values.append(float(token))
+            except ValueError:
+                pass
+    return {
+        "values": values,
+        "n_rows": len(rows),
+        "n_cols": len(rows[0]) if rows else 0,
+        "path": path,
+    }
+def _as_list(value: Any) -> list[str]:
+    if value is None:
+        return []
+    return [value] if isinstance(value, str) else list(value)