PyPI - axobench - Versions diffs - 0.1.0__tar.gz - Mend

axobench 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

axobench-0.1.0/PKG-INFO +34 -0
axobench-0.1.0/README.md +19 -0
axobench-0.1.0/pyproject.toml +37 -0
axobench-0.1.0/setup.cfg +4 -0
axobench-0.1.0/src/axobench/__init__.py +5 -0
axobench-0.1.0/src/axobench/benchmark/__init__.py +103 -0
axobench-0.1.0/src/axobench/benchmark/branch_adapter.py +104 -0
axobench-0.1.0/src/axobench/benchmark/bundle.py +336 -0
axobench-0.1.0/src/axobench/benchmark/dataset_schema.py +698 -0
axobench-0.1.0/src/axobench/benchmark/diagnostic_audits.py +507 -0
axobench-0.1.0/src/axobench/benchmark/diagnostic_rows.py +748 -0
axobench-0.1.0/src/axobench/benchmark/mechanistic_response.py +360 -0
axobench-0.1.0/src/axobench/benchmark/morphology_transfer.py +244 -0
axobench-0.1.0/src/axobench/benchmark/perturbation_stability.py +117 -0
axobench-0.1.0/src/axobench/benchmark/plots.py +1382 -0
axobench-0.1.0/src/axobench/benchmark/profiles.py +381 -0
axobench-0.1.0/src/axobench/benchmark/regime_stratified.py +417 -0
axobench-0.1.0/src/axobench/benchmark/reports.py +273 -0
axobench-0.1.0/src/axobench/benchmark/runner.py +288 -0
axobench-0.1.0/src/axobench/benchmark/selectors.py +83 -0
axobench-0.1.0/src/axobench/benchmark/state_metrics.py +321 -0
axobench-0.1.0/src/axobench/benchmark/structured_intervention.py +423 -0
axobench-0.1.0/src/axobench/benchmark/suite.py +725 -0
axobench-0.1.0/src/axobench/benchmark/swc_utils.py +250 -0
axobench-0.1.0/src/axobench/benchmark/trace_shape.py +404 -0
axobench-0.1.0/src/axobench/cli.py +550 -0
axobench-0.1.0/src/axobench/data.py +666 -0
axobench-0.1.0/src/axobench/generation/__init__.py +15 -0
axobench-0.1.0/src/axobench/generation/arbor_sim.py +1143 -0
axobench-0.1.0/src/axobench/generation/assets/__init__.py +1 -0
axobench-0.1.0/src/axobench/generation/assets/allen_l5_template.swc +4855 -0
axobench-0.1.0/src/axobench/generation/assets/allen_l5_template_fit.json +297 -0
axobench-0.1.0/src/axobench/generation/generate_coreneuron_hay_dataset.py +439 -0
axobench-0.1.0/src/axobench/generation/generate_hay_neuron_dataset.py +289 -0
axobench-0.1.0/src/axobench/generation/mechanisms/__init__.py +1 -0
axobench-0.1.0/src/axobench/generation/mechanisms/nmda.mod +75 -0
axobench-0.1.0/src/axobench/generation/prepare_hay_swc_template.py +93 -0
axobench-0.1.0/src/axobench/generation/rank_allen_l5_m3_candidates.py +197 -0
axobench-0.1.0/src/axobench/generation/run_coreneuron_event_dropout_pair.py +614 -0
axobench-0.1.0/src/axobench/generation/run_coreneuron_hay_probe.py +248 -0
axobench-0.1.0/src/axobench/generation/run_generation_throughput_gate.py +437 -0
axobench-0.1.0/src/axobench/generation/run_hay_neuron_driven_probe.py +266 -0
axobench-0.1.0/src/axobench/generation/run_v1_parallel_generation.py +605 -0
axobench-0.1.0/src/axobench/metrics.py +468 -0
axobench-0.1.0/src/axobench/neuronio_raw.py +240 -0
axobench-0.1.0/src/axobench/setup_workflow.py +150 -0
axobench-0.1.0/src/axobench.egg-info/PKG-INFO +34 -0
axobench-0.1.0/src/axobench.egg-info/SOURCES.txt +66 -0
axobench-0.1.0/src/axobench.egg-info/dependency_links.txt +1 -0
axobench-0.1.0/src/axobench.egg-info/entry_points.txt +2 -0
axobench-0.1.0/src/axobench.egg-info/requires.txt +11 -0
axobench-0.1.0/src/axobench.egg-info/top_level.txt +1 -0
axobench-0.1.0/tests/test_arbor_sim.py +111 -0
axobench-0.1.0/tests/test_benchmark_bundle.py +162 -0
axobench-0.1.0/tests/test_benchmark_profiles.py +56 -0
axobench-0.1.0/tests/test_benchmark_reports.py +93 -0
axobench-0.1.0/tests/test_benchmark_runner.py +153 -0
axobench-0.1.0/tests/test_benchmark_selectors.py +44 -0
axobench-0.1.0/tests/test_benchmark_suite.py +535 -0
axobench-0.1.0/tests/test_cli.py +505 -0
axobench-0.1.0/tests/test_data.py +195 -0
axobench-0.1.0/tests/test_dataset_schema.py +85 -0
axobench-0.1.0/tests/test_diagnostic_audits.py +153 -0
axobench-0.1.0/tests/test_diagnostic_rows.py +249 -0
axobench-0.1.0/tests/test_kaggle_download.py +127 -0
axobench-0.1.0/tests/test_metrics.py +103 -0
axobench-0.1.0/tests/test_neuronio_convert.py +117 -0
axobench-0.1.0/tests/test_setup_workflow.py +50 -0

axobench-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,34 @@
+Metadata-Version: 2.4
+Name: axobench
+Version: 0.1.0
+Summary: Single-neuron surrogate benchmark and dataset generation workbench.
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: matplotlib>=3.8
+Requires-Dist: numpy>=1.24
+Requires-Dist: requests>=2.32
+Provides-Extra: dev
+Requires-Dist: pytest>=8; extra == "dev"
+Provides-Extra: sim
+Requires-Dist: arbor==0.11.0; extra == "sim"
+Provides-Extra: plots
+# AxoBench
+AxoBench is the extracted benchmark and dataset-generation workbench for
+single-neuron surrogate evaluation. It owns dataset generation, data/schema
+utilities, diagnostic suites, and benchmark reporting.
+The package keeps simulator-backed generation code under
+`src/axobench/generation/`: Arbor, NEURON/Hay, CoreNEURON/Hay, SWC-template
+preparation, morphology-candidate ranking, throughput probes, and the v1
+parallel dataset generator. `scripts/` is reserved for thin operational entry
+points such as launch scripts and data-download helpers.
+Model implementations, training loops, checkpoint formats, and model-specific
+experiments stay in `bnn_sim`. AxoBench evaluates caller-supplied prediction
+functions or stored prediction/diagnostic artifacts; it does not ship BranchELM,
+Mamba, RNN, or training code.
+The active exploration arc lives in `docs/benchmark/`, which is ignored by Git
+while the benchmark audit is still exploratory.

axobench-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,19 @@
+# AxoBench
+AxoBench is the extracted benchmark and dataset-generation workbench for
+single-neuron surrogate evaluation. It owns dataset generation, data/schema
+utilities, diagnostic suites, and benchmark reporting.
+The package keeps simulator-backed generation code under
+`src/axobench/generation/`: Arbor, NEURON/Hay, CoreNEURON/Hay, SWC-template
+preparation, morphology-candidate ranking, throughput probes, and the v1
+parallel dataset generator. `scripts/` is reserved for thin operational entry
+points such as launch scripts and data-download helpers.
+Model implementations, training loops, checkpoint formats, and model-specific
+experiments stay in `bnn_sim`. AxoBench evaluates caller-supplied prediction
+functions or stored prediction/diagnostic artifacts; it does not ship BranchELM,
+Mamba, RNN, or training code.
+The active exploration arc lives in `docs/benchmark/`, which is ignored by Git
+while the benchmark audit is still exploratory.

axobench-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,37 @@
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "axobench"
+version = "0.1.0"
+description = "Single-neuron surrogate benchmark and dataset generation workbench."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+  "matplotlib>=3.8",
+  "numpy>=1.24",
+  "requests>=2.32",
+]
+[project.optional-dependencies]
+dev = [
+  "pytest>=8",
+]
+sim = [
+  "arbor==0.11.0",
+]
+plots = []
+[project.scripts]
+axobench = "axobench.cli:main"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.setuptools.package-data]
+"axobench.generation" = ["assets/*.swc", "assets/*.json", "mechanisms/*.mod"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["src"]

axobench-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

axobench-0.1.0/src/axobench/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Dataset-generation and diagnostic utilities for single-neuron benchmarks."""
+from __future__ import annotations
+__all__: list[str] = []

axobench-0.1.0/src/axobench/benchmark/__init__.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""AxoBench neuron surrogate benchmark utilities.
+The package exports the public benchmark helpers lazily so lightweight
+dataframe/TCN evaluation can run in environments that intentionally omit
+PyTorch.
+"""
+from __future__ import annotations
+from typing import Any
+_EXPORTS: dict[str, tuple[str, str]] = {
+    "compute_regime_stratified_metrics": ("axobench.benchmark.regime_stratified", "compute_regime_stratified_metrics"),
+    "compute_state_conditioned_metrics": ("axobench.benchmark.state_metrics", "compute_state_conditioned_metrics"),
+    "build_neuron_state_masks": ("axobench.benchmark.state_metrics", "build_neuron_state_masks"),
+    "compute_trace_shape_metrics": ("axobench.benchmark.trace_shape", "compute_trace_shape_metrics"),
+    "compute_perturbation_stability": ("axobench.benchmark.perturbation_stability", "compute_perturbation_stability"),
+    "compute_paired_event_intervention_metrics": (
+        "axobench.benchmark.structured_intervention",
+        "compute_paired_event_intervention_metrics",
+    ),
+    "make_event_dropout_inputs": ("axobench.benchmark.structured_intervention", "make_event_dropout_inputs"),
+    "make_selective_event_dropout_inputs": (
+        "axobench.benchmark.structured_intervention",
+        "make_selective_event_dropout_inputs",
+    ),
+    "make_site_silence_inputs": ("axobench.benchmark.structured_intervention", "make_site_silence_inputs"),
+    "make_structured_intervention_inputs": (
+        "axobench.benchmark.structured_intervention",
+        "make_structured_intervention_inputs",
+    ),
+    "make_temporal_jitter_inputs": ("axobench.benchmark.structured_intervention", "make_temporal_jitter_inputs"),
+    "MorphologyTransferEvaluator": ("axobench.benchmark.morphology_transfer", "MorphologyTransferEvaluator"),
+    "load_available_morphologies": ("axobench.benchmark.morphology_transfer", "load_available_morphologies"),
+    "adapt_branch_count": ("axobench.benchmark.branch_adapter", "adapt_branch_count"),
+    "evaluate_with_branch_adaptation": ("axobench.benchmark.branch_adapter", "evaluate_with_branch_adaptation"),
+    "BenchmarkProfile": ("axobench.benchmark.profiles", "BenchmarkProfile"),
+    "BenchmarkCostModel": ("axobench.benchmark.profiles", "BenchmarkCostModel"),
+    "LOCAL_CORENEURON_HAY_COST_MODEL": ("axobench.benchmark.profiles", "LOCAL_CORENEURON_HAY_COST_MODEL"),
+    "build_axis_plan": ("axobench.benchmark.profiles", "build_axis_plan"),
+    "estimate_profile_cost": ("axobench.benchmark.profiles", "estimate_profile_cost"),
+    "get_benchmark_profile": ("axobench.benchmark.profiles", "get_benchmark_profile"),
+    "list_benchmark_profiles": ("axobench.benchmark.profiles", "list_benchmark_profiles"),
+    "DIAGNOSTIC_SUITES": ("axobench.benchmark.dataset_schema", "DIAGNOSTIC_SUITES"),
+    "INTERVENTION_CONDITIONS": ("axobench.benchmark.dataset_schema", "INTERVENTION_CONDITIONS"),
+    "ORDINARY_SPLITS": ("axobench.benchmark.dataset_schema", "ORDINARY_SPLITS"),
+    "V1_MORPHOLOGY_COUNT": ("axobench.benchmark.dataset_schema", "V1_MORPHOLOGY_COUNT"),
+    "build_diagnostic_suite_catalog": ("axobench.benchmark.dataset_schema", "build_diagnostic_suite_catalog"),
+    "build_v1_dataset_manifest_template": ("axobench.benchmark.dataset_schema", "build_v1_dataset_manifest_template"),
+    "default_diagnostic_specs": ("axobench.benchmark.dataset_schema", "default_diagnostic_specs"),
+    "default_target_views": ("axobench.benchmark.dataset_schema", "default_target_views"),
+    "provisional_v1_morphologies": ("axobench.benchmark.dataset_schema", "provisional_v1_morphologies"),
+    "validate_v1_manifest_template": ("axobench.benchmark.dataset_schema", "validate_v1_manifest_template"),
+    "DiagnosticContext": ("axobench.benchmark.diagnostic_rows", "DiagnosticContext"),
+    "evaluate_model_diagnostic_rows": ("axobench.benchmark.diagnostic_rows", "evaluate_model_diagnostic_rows"),
+    "morphology_contrast_diagnostic_rows": ("axobench.benchmark.diagnostic_rows", "morphology_contrast_diagnostic_rows"),
+    "morphology_routing_qc_rows": ("axobench.benchmark.diagnostic_rows", "morphology_routing_qc_rows"),
+    "paired_intervention_diagnostic_rows": ("axobench.benchmark.diagnostic_rows", "paired_intervention_diagnostic_rows"),
+    "teacher_qc_diagnostic_rows": ("axobench.benchmark.diagnostic_rows", "teacher_qc_diagnostic_rows"),
+    "validate_diagnostic_rows": ("axobench.benchmark.diagnostic_rows", "validate_diagnostic_rows"),
+    "write_diagnostic_rows": ("axobench.benchmark.diagnostic_rows", "write_diagnostic_rows"),
+    "DiagnosticAuditSources": ("axobench.benchmark.diagnostic_audits", "DiagnosticAuditSources"),
+    "adapter_fairness_diagnostic_rows": ("axobench.benchmark.diagnostic_audits", "adapter_fairness_diagnostic_rows"),
+    "diagnostic_audit_rows": ("axobench.benchmark.diagnostic_audits", "diagnostic_audit_rows"),
+    "diagnostic_audit_rows_from_sources": ("axobench.benchmark.diagnostic_audits", "diagnostic_audit_rows_from_sources"),
+    "load_artifact_rows": ("axobench.benchmark.diagnostic_audits", "load_artifact_rows"),
+    "metric_decoupling_diagnostic_rows": ("axobench.benchmark.diagnostic_audits", "metric_decoupling_diagnostic_rows"),
+    "protocol_coverage_diagnostic_rows": ("axobench.benchmark.diagnostic_audits", "protocol_coverage_diagnostic_rows"),
+    "robustness_directionality_diagnostic_rows": (
+        "axobench.benchmark.diagnostic_audits",
+        "robustness_directionality_diagnostic_rows",
+    ),
+    "target_view_audit_diagnostic_rows": ("axobench.benchmark.diagnostic_audits", "target_view_audit_diagnostic_rows"),
+    "DatasetSelector": ("axobench.benchmark.selectors", "DatasetSelector"),
+    "V1DatasetLayout": ("axobench.benchmark.selectors", "V1DatasetLayout"),
+    "load_intervention_pair_npz": ("axobench.benchmark.runner", "load_intervention_pair_npz"),
+    "load_selector_batch": ("axobench.benchmark.runner", "load_selector_batch"),
+    "run_model_diagnostics_on_selector": ("axobench.benchmark.runner", "run_model_diagnostics_on_selector"),
+    "run_paired_intervention_on_npz": ("axobench.benchmark.runner", "run_paired_intervention_on_npz"),
+    "run_paired_intervention_on_selector": ("axobench.benchmark.runner", "run_paired_intervention_on_selector"),
+    "run_teacher_qc_on_selector": ("axobench.benchmark.runner", "run_teacher_qc_on_selector"),
+    "summarize_diagnostic_rows": ("axobench.benchmark.reports", "summarize_diagnostic_rows"),
+    "core_metric_comparison_rows": ("axobench.benchmark.reports", "core_metric_comparison_rows"),
+    "write_diagnostic_summary": ("axobench.benchmark.reports", "write_diagnostic_summary"),
+    "write_core_metric_comparison_table": ("axobench.benchmark.reports", "write_core_metric_comparison_table"),
+    "run_v1_benchmark_bundle": ("axobench.benchmark.bundle", "run_v1_benchmark_bundle"),
+    "infer_pair_metadata": ("axobench.benchmark.bundle", "infer_pair_metadata"),
+    "BenchmarkSuite": ("axobench.benchmark.suite", "BenchmarkSuite"),
+}
+__all__ = sorted(_EXPORTS)
+def __getattr__(name: str) -> Any:
+    if name not in _EXPORTS:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module_name, attribute = _EXPORTS[name]
+    from importlib import import_module
+    value = getattr(import_module(module_name), attribute)
+    globals()[name] = value
+    return value

axobench-0.1.0/src/axobench/benchmark/branch_adapter.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Branch count adaptation for morphology transfer (M3).
+Handles mismatch between training morphology (e.g., 45 branches)
+and test morphologies (e.g., 24-40 branches).
+"""
+from __future__ import annotations
+import numpy as np
+from axobench.metrics import binary_auc
+def adapt_branch_count(
+    routing_matrix: np.ndarray,
+    target_n_branches: int,
+    strategy: str = "pad",
+) -> np.ndarray:
+    """Adapt a routing matrix to a target branch count.
+    Args:
+        routing_matrix: Source routing matrix of shape (n_branches, input_dim).
+        target_n_branches: Desired number of branches.
+        strategy: Adaptation strategy:
+            - "pad": Pad with zero-weight branches (if source < target)
+            - "truncate": Truncate excess branches (if source > target)
+            - "interpolate": Distribute inputs proportionally (always works)
+    Returns:
+        Adapted routing matrix of shape (target_n_branches, input_dim).
+    """
+    n_source, input_dim = routing_matrix.shape
+    if n_source == target_n_branches:
+        return routing_matrix
+    if strategy == "pad":
+        if n_source < target_n_branches:
+            # Pad with zero-weight branches
+            padding = np.zeros((target_n_branches - n_source, input_dim))
+            return np.vstack([routing_matrix, padding])
+        else:
+            # Truncate excess branches
+            return routing_matrix[:target_n_branches]
+    elif strategy == "interpolate":
+        # Distribute source branches across target branches proportionally
+        result = np.zeros((target_n_branches, input_dim))
+        for target_b in range(target_n_branches):
+            # Map target branch to source branch space
+            source_idx = int(target_b * n_source / target_n_branches)
+            # Weight by fractional overlap
+            weight = min(1.0, n_source / target_n_branches)
+            result[target_b] = routing_matrix[source_idx] * weight
+        return result
+    else:
+        raise ValueError(f"Unknown strategy: {strategy}")
+def evaluate_with_branch_adaptation(
+    model_fn: callable,
+    inputs: np.ndarray,
+    targets: np.ndarray,
+    source_routing: np.ndarray,
+    target_routing: np.ndarray,
+    strategy: str = "pad",
+) -> dict:
+    """Evaluate model with branch count adaptation.
+    Args:
+        model_fn: Model prediction function.
+        inputs: Input array (batch, time, input_dim).
+        targets: Target array (batch, time, 2).
+        source_routing: Routing matrix used during training.
+        target_routing: Routing matrix for test morphology.
+        strategy: Adaptation strategy.
+    Returns:
+        Dictionary with evaluation metrics.
+    """
+    # Adapt routing if needed
+    n_source = source_routing.shape[0]
+    n_target = target_routing.shape[0]
+    adapted_routing = adapt_branch_count(target_routing, n_source, strategy=strategy)
+    # For now, just evaluate directly (assuming model handles routing internally)
+    # In practice, would need to rewire model's routing matrix
+    preds = model_fn(inputs)
+    rmse_mv = np.sqrt(np.mean((preds[:, :, 1] - targets[:, :, 1]) ** 2)) / 0.1
+    auc = binary_auc(preds[:, :, 0], targets[:, :, 0])
+    if np.isnan(auc):
+        auc = 0.5
+    return {
+        "n_source_branches": n_source,
+        "n_target_branches": n_target,
+        "adaptation_strategy": strategy,
+        "rmse_mv": float(rmse_mv),
+        "auc": float(auc),
+    }

axobench-0.1.0/src/axobench/benchmark/bundle.py ADDED Viewed

@@ -0,0 +1,336 @@
+"""Promoted v1 benchmark bundle runner."""
+from __future__ import annotations
+from collections.abc import Callable, Iterable
+import json
+from pathlib import Path
+import re
+from typing import Any, Literal
+import numpy as np
+from axobench.benchmark.dataset_schema import INTERVENTION_CONDITIONS
+from axobench.benchmark.diagnostic_rows import (
+    DiagnosticContext,
+    morphology_contrast_diagnostic_rows,
+    morphology_routing_qc_rows,
+    write_diagnostic_rows,
+)
+from axobench.benchmark.reports import write_diagnostic_summary
+from axobench.benchmark.runner import (
+    resolve_intervention_pair_paths,
+    run_model_diagnostics_on_selector,
+    run_paired_intervention_on_npz,
+    run_paired_intervention_on_npzs,
+    run_paired_intervention_on_selector,
+    run_teacher_qc_on_selector,
+)
+from axobench.benchmark.selectors import V1DatasetLayout
+DEFAULT_MODEL_SUITES = (
+    "trace-core",
+    "state-dynamics",
+    "spike-behavior",
+    "feature-fidelity",
+    "paper-summary",
+)
+DEFAULT_INTERVENTION_SUITES = ("teacher-qc", "intervention-response", "paper-summary")
+SuiteProfile = Literal["teacher-qc", "paper-summary", "full"]
+def run_v1_benchmark_bundle(
+    *,
+    dataset_root: str | Path,
+    output_dir: str | Path,
+    base_context: DiagnosticContext | None = None,
+    model_fn: Callable[[np.ndarray], np.ndarray] | None = None,
+    model_id: str | None = None,
+    intervention_pair_paths: Iterable[str | Path] = (),
+    suite: SuiteProfile = "paper-summary",
+    max_val_samples: int | None = None,
+    max_intervention_samples: int | None = None,
+    cache_shards: int = 1,
+    write_summaries: bool = True,
+) -> dict[str, Any]:
+    """Run the promoted v1 validation plus intervention diagnostic bundle."""
+    layout = V1DatasetLayout(dataset_root)
+    output = Path(output_dir)
+    output.mkdir(parents=True, exist_ok=True)
+    base = base_context or DiagnosticContext()
+    suite_config = _suite_config(suite)
+    artifacts: list[dict[str, Any]] = []
+    intervention_row_sets: list[dict[str, Any]] = []
+    teacher_context = _replace_context(
+        base,
+        suites=suite_config["teacher"],
+        model_id="teacher",
+        split="val",
+        condition="val",
+        intervention=None,
+    )
+    teacher_rows = run_teacher_qc_on_selector(
+        layout.default_validation(),
+        context=teacher_context,
+        max_samples=max_val_samples,
+        output=output / "teacher_qc_val.jsonl",
+        cache_shards=cache_shards,
+    )
+    _record_artifact(teacher_rows, output / "teacher_qc_val.jsonl", artifacts, write_summaries=write_summaries)
+    routing_rows = morphology_routing_qc_rows(
+        context=_replace_context(base, suites=suite_config["routing"], model_id="teacher")
+    )
+    write_diagnostic_rows(routing_rows, output / "morphology_routing_qc.jsonl")
+    _record_artifact(
+        routing_rows,
+        output / "morphology_routing_qc.jsonl",
+        artifacts,
+        write_summaries=write_summaries,
+    )
+    if model_fn is not None and suite_config["model"] is not None:
+        resolved_model_id = model_id or base.model_id
+        model_context = _replace_context(
+            base,
+            suites=suite_config["model"],
+            model_id=resolved_model_id,
+            split="val",
+            condition="val",
+            intervention=None,
+        )
+        model_rows = run_model_diagnostics_on_selector(
+            model_fn,
+            layout.default_validation(),
+            context=model_context,
+            max_samples=max_val_samples,
+            output=output / f"model_val_{_slug(resolved_model_id)}.jsonl",
+            cache_shards=cache_shards,
+        )
+        _record_artifact(
+            model_rows,
+            output / f"model_val_{_slug(resolved_model_id)}.jsonl",
+            artifacts,
+            write_summaries=write_summaries,
+        )
+    pair_paths = [Path(path) for path in intervention_pair_paths]
+    if pair_paths:
+        for intervention, morphology_id, grouped_paths in _group_pair_paths_by_morphology(
+            pair_paths,
+            default_morphology=base.morphology_id,
+        ):
+            rows_path, rows = _run_grouped_intervention(
+                grouped_paths,
+                output=output,
+                base=base,
+                suites=suite_config["intervention"],
+                model_fn=model_fn,
+                model_id=model_id,
+                intervention=intervention,
+                morphology_id=morphology_id,
+                max_samples=max_intervention_samples,
+            )
+            intervention_row_sets.extend(rows)
+            _record_artifact(rows, rows_path, artifacts, write_summaries=write_summaries)
+    else:
+        for intervention in layout.available_interventions():
+            selector = layout.intervention(intervention)
+            selector_pair_paths = resolve_intervention_pair_paths(selector)
+            if len(selector_pair_paths) == 1:
+                intervention_model_id = model_id if model_fn is not None else "teacher"
+                context = _replace_context(
+                    base,
+                    suites=suite_config["intervention"],
+                    model_id=intervention_model_id,
+                    split="",
+                    condition=f"interventions/{intervention}",
+                    intervention=intervention,
+                )
+                filename = f"intervention_{_slug(base.morphology_id)}_{_slug(intervention)}"
+                rows = run_paired_intervention_on_selector(
+                    selector,
+                    model_fn=model_fn,
+                    context=context,
+                    max_samples=max_intervention_samples,
+                    output=output / f"{filename}.jsonl",
+                )
+                intervention_row_sets.extend(rows)
+                _record_artifact(rows, output / f"{filename}.jsonl", artifacts, write_summaries=write_summaries)
+                continue
+            for inferred_intervention, morphology_id, grouped_paths in _group_pair_paths_by_morphology(
+                selector_pair_paths,
+                default_morphology=base.morphology_id,
+            ):
+                rows_path, rows = _run_grouped_intervention(
+                    grouped_paths,
+                    output=output,
+                    base=base,
+                    suites=suite_config["intervention"],
+                    model_fn=model_fn,
+                    model_id=model_id,
+                    intervention=inferred_intervention,
+                    morphology_id=morphology_id,
+                    max_samples=max_intervention_samples,
+                )
+                intervention_row_sets.extend(rows)
+                _record_artifact(rows, rows_path, artifacts, write_summaries=write_summaries)
+    contrast_rows = morphology_contrast_diagnostic_rows(
+        intervention_row_sets,
+        context=_replace_context(base, suites=suite_config["morphology_contrast"], morphology_id="all_v1"),
+    )
+    if contrast_rows:
+        contrast_path = output / "morphology_contrast_interventions.jsonl"
+        write_diagnostic_rows(contrast_rows, contrast_path)
+        _record_artifact(
+            contrast_rows,
+            contrast_path,
+            artifacts,
+            write_summaries=write_summaries,
+        )
+    manifest = {
+        "dataset_root": str(dataset_root),
+        "output_dir": str(output),
+        "model_id": model_id,
+        "suite": suite,
+        "suite_config": {key: list(value) if value is not None else None for key, value in suite_config.items()},
+        "max_val_samples": max_val_samples,
+        "max_intervention_samples": max_intervention_samples,
+        "write_summaries": write_summaries,
+        "artifacts": artifacts,
+    }
+    (output / "manifest.json").write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    return manifest
+def _suite_config(suite: SuiteProfile) -> dict[str, tuple[str, ...] | None]:
+    if suite == "teacher-qc":
+        return {
+            "teacher": ("teacher-qc",),
+            "routing": ("teacher-qc",),
+            "model": None,
+            "intervention": ("teacher-qc",),
+            "morphology_contrast": ("morphology-contrast",),
+        }
+    if suite == "paper-summary":
+        return {
+            "teacher": ("teacher-qc", "paper-summary"),
+            "routing": ("teacher-qc", "paper-summary"),
+            "model": DEFAULT_MODEL_SUITES,
+            "intervention": DEFAULT_INTERVENTION_SUITES,
+            "morphology_contrast": ("morphology-contrast", "paper-summary"),
+        }
+    if suite == "full":
+        return {
+            "teacher": ("teacher-qc", "paper-summary", "full"),
+            "routing": ("teacher-qc", "paper-summary", "full"),
+            "model": (*DEFAULT_MODEL_SUITES, "full"),
+            "intervention": ("teacher-qc", "intervention-response", "paper-summary", "full"),
+            "morphology_contrast": ("morphology-contrast", "paper-summary", "full"),
+        }
+    raise ValueError(f"unknown v1 benchmark suite profile: {suite}")
+def infer_pair_metadata(path: str | Path, *, default_morphology: str = "unknown") -> tuple[str, str]:
+    """Infer intervention and morphology labels from a pilot pair-file path."""
+    path = Path(path)
+    parts = path.parts
+    intervention = None
+    if "interventions" in parts:
+        index = parts.index("interventions")
+        if index + 1 < len(parts):
+            intervention = parts[index + 1]
+    text = str(path)
+    if intervention is None:
+        intervention = next((name for name in INTERVENTION_CONDITIONS if name in text), "event_dropout")
+    match = re.search(r"specimen_(\d+)", text)
+    morphology_id = f"specimen_{match.group(1)}" if match else default_morphology
+    return intervention, morphology_id
+def _group_pair_paths_by_morphology(
+    pair_paths: Iterable[Path],
+    *,
+    default_morphology: str,
+) -> list[tuple[str, str, list[Path]]]:
+    grouped: dict[tuple[str, str], list[Path]] = {}
+    for pair_path in pair_paths:
+        intervention, morphology_id = infer_pair_metadata(pair_path, default_morphology=default_morphology)
+        grouped.setdefault((intervention, morphology_id), []).append(pair_path)
+    return [
+        (intervention, morphology_id, sorted(paths))
+        for (intervention, morphology_id), paths in sorted(grouped.items())
+    ]
+def _run_grouped_intervention(
+    pair_paths: list[Path],
+    *,
+    output: Path,
+    base: DiagnosticContext,
+    suites: tuple[str, ...],
+    model_fn: Callable[[np.ndarray], np.ndarray] | None,
+    model_id: str | None,
+    intervention: str,
+    morphology_id: str,
+    max_samples: int | None,
+) -> tuple[Path, list[dict[str, Any]]]:
+    intervention_model_id = model_id if model_fn is not None else "teacher"
+    context = _replace_context(
+        base,
+        suites=suites,
+        model_id=intervention_model_id,
+        morphology_id=morphology_id,
+        split="",
+        condition=f"interventions/{intervention}",
+        intervention=intervention,
+    )
+    rows_path = output / f"intervention_{_slug(morphology_id)}_{_slug(intervention)}.jsonl"
+    if len(pair_paths) == 1:
+        rows = run_paired_intervention_on_npz(
+            pair_paths[0],
+            model_fn=model_fn,
+            context=context,
+            max_samples=max_samples,
+            output=rows_path,
+        )
+    else:
+        rows = run_paired_intervention_on_npzs(
+            pair_paths,
+            model_fn=model_fn,
+            context=context,
+            max_samples=max_samples,
+            output=rows_path,
+        )
+    return rows_path, rows
+def _record_artifact(
+    rows: list[dict[str, Any]],
+    rows_path: Path,
+    artifacts: list[dict[str, Any]],
+    *,
+    write_summaries: bool,
+) -> None:
+    artifact = {
+        "rows": len(rows),
+        "rows_path": str(rows_path),
+    }
+    if write_summaries:
+        summary_path = rows_path.with_suffix(".md")
+        write_diagnostic_summary(rows, summary_path)
+        artifact["summary_path"] = str(summary_path)
+    artifacts.append(artifact)
+def _replace_context(context: DiagnosticContext, **updates: Any) -> DiagnosticContext:
+    data = {**context.__dict__, **updates}
+    return DiagnosticContext(**data)
+def _slug(value: str) -> str:
+    return re.sub(r"[^A-Za-z0-9_.-]+", "_", value).strip("_") or "unknown"