geodispbench3d 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. geodispbench3d-0.2.0/.claude/CLAUDE.md +337 -0
  2. geodispbench3d-0.2.0/.github/actions/setup-python-deps/action.yml +49 -0
  3. geodispbench3d-0.2.0/.github/rulesets/README.md +100 -0
  4. geodispbench3d-0.2.0/.github/rulesets/protect-develop.json +41 -0
  5. geodispbench3d-0.2.0/.github/rulesets/protect-main.json +41 -0
  6. geodispbench3d-0.2.0/.github/scripts/apply-rulesets.sh +185 -0
  7. geodispbench3d-0.2.0/.github/scripts/check_ci_ruleset_contexts.py +183 -0
  8. geodispbench3d-0.2.0/.github/scripts/check_publish_gate.py +146 -0
  9. geodispbench3d-0.2.0/.github/scripts/check_release_preflight.py +130 -0
  10. geodispbench3d-0.2.0/.github/workflows/ci.yml +181 -0
  11. geodispbench3d-0.2.0/.github/workflows/publish-pypi.yml +85 -0
  12. geodispbench3d-0.2.0/.github/workflows/publish-testpypi.yml +67 -0
  13. geodispbench3d-0.2.0/.github/workflows/release-please.yml +64 -0
  14. geodispbench3d-0.2.0/.gitignore +69 -0
  15. geodispbench3d-0.2.0/.pre-commit-config.yaml +29 -0
  16. geodispbench3d-0.2.0/.readthedocs.yaml +24 -0
  17. geodispbench3d-0.2.0/.release-please-manifest.json +3 -0
  18. geodispbench3d-0.2.0/AGENTS.md +46 -0
  19. geodispbench3d-0.2.0/CHANGELOG.md +78 -0
  20. geodispbench3d-0.2.0/CITATION.cff +39 -0
  21. geodispbench3d-0.2.0/LICENSE +34 -0
  22. geodispbench3d-0.2.0/OVERVIEW.md +76 -0
  23. geodispbench3d-0.2.0/PKG-INFO +132 -0
  24. geodispbench3d-0.2.0/README.md +86 -0
  25. geodispbench3d-0.2.0/benchmarks/data/mattertal_gt.csv +14 -0
  26. geodispbench3d-0.2.0/benchmarks/datasets/mattertal.yaml +26 -0
  27. geodispbench3d-0.2.0/benchmarks/datasets/mattertal_f2s3.yaml +26 -0
  28. geodispbench3d-0.2.0/benchmarks/metrics/pointing_error.yaml +45 -0
  29. geodispbench3d-0.2.0/benchmarks/suites/f2s3_voxel_refine.yaml +27 -0
  30. geodispbench3d-0.2.0/benchmarks/suites/iof3d_mattertal.yaml +23 -0
  31. geodispbench3d-0.2.0/docs/source/concepts.md +111 -0
  32. geodispbench3d-0.2.0/docs/source/conf.py +28 -0
  33. geodispbench3d-0.2.0/docs/source/index.md +81 -0
  34. geodispbench3d-0.2.0/docs/source/integrating/cli-tool.md +212 -0
  35. geodispbench3d-0.2.0/docs/source/integrating/custom-adapter.md +163 -0
  36. geodispbench3d-0.2.0/docs/source/integrating/datasets.md +134 -0
  37. geodispbench3d-0.2.0/docs/source/integrating/factory.md +122 -0
  38. geodispbench3d-0.2.0/docs/source/integrating/index.md +99 -0
  39. geodispbench3d-0.2.0/docs/source/integrating/metrics.md +144 -0
  40. geodispbench3d-0.2.0/docs/source/integrating/output-parsers.md +169 -0
  41. geodispbench3d-0.2.0/docs/source/integrating/python-callable.md +111 -0
  42. geodispbench3d-0.2.0/docs/source/quickstart.md +70 -0
  43. geodispbench3d-0.2.0/docs/source/reference/yaml-schemas.md +198 -0
  44. geodispbench3d-0.2.0/docs/source/rescoring-and-analysis.md +201 -0
  45. geodispbench3d-0.2.0/docs/source/tools/f2s3.md +160 -0
  46. geodispbench3d-0.2.0/docs/source/tools/iof3d.md +111 -0
  47. geodispbench3d-0.2.0/pyproject.toml +154 -0
  48. geodispbench3d-0.2.0/pyrightconfig.json +22 -0
  49. geodispbench3d-0.2.0/release-please-config.json +28 -0
  50. geodispbench3d-0.2.0/setup.cfg +4 -0
  51. geodispbench3d-0.2.0/src/geodispbench3d/__init__.py +19 -0
  52. geodispbench3d-0.2.0/src/geodispbench3d/_version.py +24 -0
  53. geodispbench3d-0.2.0/src/geodispbench3d/analysis/__init__.py +31 -0
  54. geodispbench3d-0.2.0/src/geodispbench3d/analysis/loader.py +209 -0
  55. geodispbench3d-0.2.0/src/geodispbench3d/analysis/runner.py +205 -0
  56. geodispbench3d-0.2.0/src/geodispbench3d/cli.py +469 -0
  57. geodispbench3d-0.2.0/src/geodispbench3d/conf/schema/analysis.schema.json +45 -0
  58. geodispbench3d-0.2.0/src/geodispbench3d/conf/schema/dataset.schema.json +45 -0
  59. geodispbench3d-0.2.0/src/geodispbench3d/conf/schema/metrics.schema.json +32 -0
  60. geodispbench3d-0.2.0/src/geodispbench3d/conf/schema/suite.schema.json +36 -0
  61. geodispbench3d-0.2.0/src/geodispbench3d/conf/schema/tool.schema.json +76 -0
  62. geodispbench3d-0.2.0/src/geodispbench3d/dashboard/__init__.py +3 -0
  63. geodispbench3d-0.2.0/src/geodispbench3d/dashboard/app.py +177 -0
  64. geodispbench3d-0.2.0/src/geodispbench3d/dataset/__init__.py +31 -0
  65. geodispbench3d-0.2.0/src/geodispbench3d/dataset/ground_truth.py +128 -0
  66. geodispbench3d-0.2.0/src/geodispbench3d/dataset/schema.py +150 -0
  67. geodispbench3d-0.2.0/src/geodispbench3d/diagnostics.py +42 -0
  68. geodispbench3d-0.2.0/src/geodispbench3d/metrics/__init__.py +19 -0
  69. geodispbench3d-0.2.0/src/geodispbench3d/metrics/builtins.py +259 -0
  70. geodispbench3d-0.2.0/src/geodispbench3d/metrics/registry.py +114 -0
  71. geodispbench3d-0.2.0/src/geodispbench3d/results/__init__.py +27 -0
  72. geodispbench3d-0.2.0/src/geodispbench3d/results/predictions_cache.py +219 -0
  73. geodispbench3d-0.2.0/src/geodispbench3d/results/store.py +56 -0
  74. geodispbench3d-0.2.0/src/geodispbench3d/suite/__init__.py +7 -0
  75. geodispbench3d-0.2.0/src/geodispbench3d/suite/loader.py +170 -0
  76. geodispbench3d-0.2.0/src/geodispbench3d/sweep/__init__.py +33 -0
  77. geodispbench3d-0.2.0/src/geodispbench3d/sweep/evaluation.py +215 -0
  78. geodispbench3d-0.2.0/src/geodispbench3d/sweep/parameters.py +223 -0
  79. geodispbench3d-0.2.0/src/geodispbench3d/sweep/rescore.py +475 -0
  80. geodispbench3d-0.2.0/src/geodispbench3d/sweep/runner.py +701 -0
  81. geodispbench3d-0.2.0/src/geodispbench3d/sweep/trial_record.py +369 -0
  82. geodispbench3d-0.2.0/src/geodispbench3d/tool/__init__.py +30 -0
  83. geodispbench3d-0.2.0/src/geodispbench3d/tool/base.py +134 -0
  84. geodispbench3d-0.2.0/src/geodispbench3d/tool/callable_adapter.py +131 -0
  85. geodispbench3d-0.2.0/src/geodispbench3d/tool/cli_adapter.py +533 -0
  86. geodispbench3d-0.2.0/src/geodispbench3d/tool/loader.py +253 -0
  87. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/PKG-INFO +132 -0
  88. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/SOURCES.txt +126 -0
  89. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/dependency_links.txt +1 -0
  90. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/entry_points.txt +3 -0
  91. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/requires.txt +26 -0
  92. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/scm_file_list.json +121 -0
  93. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/scm_version.json +8 -0
  94. geodispbench3d-0.2.0/src/geodispbench3d.egg-info/top_level.txt +3 -0
  95. geodispbench3d-0.2.0/src/geodispbench3d_f2s3/__init__.py +13 -0
  96. geodispbench3d-0.2.0/src/geodispbench3d_f2s3/conf/tool/f2s3.yaml +74 -0
  97. geodispbench3d-0.2.0/src/geodispbench3d_f2s3/output_parser.py +230 -0
  98. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/__init__.py +88 -0
  99. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/_sweep_cli.py +143 -0
  100. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/adapter.py +580 -0
  101. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/cli.py +28 -0
  102. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/conf/config_ax.yaml +14 -0
  103. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/conf/tool/iof3d.yaml +69 -0
  104. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/factory.py +137 -0
  105. geodispbench3d-0.2.0/src/geodispbench3d_iof3d/output_parser.py +187 -0
  106. geodispbench3d-0.2.0/tests/conftest.py +18 -0
  107. geodispbench3d-0.2.0/tests/core/__init__.py +0 -0
  108. geodispbench3d-0.2.0/tests/core/test_analyze.py +183 -0
  109. geodispbench3d-0.2.0/tests/core/test_cli.py +816 -0
  110. geodispbench3d-0.2.0/tests/core/test_cli_adapter.py +146 -0
  111. geodispbench3d-0.2.0/tests/core/test_evaluation.py +314 -0
  112. geodispbench3d-0.2.0/tests/core/test_imports.py +48 -0
  113. geodispbench3d-0.2.0/tests/core/test_iof3d_import_guard.py +138 -0
  114. geodispbench3d-0.2.0/tests/core/test_loaders.py +133 -0
  115. geodispbench3d-0.2.0/tests/core/test_metrics_builtins.py +94 -0
  116. geodispbench3d-0.2.0/tests/core/test_packaging_metadata.py +138 -0
  117. geodispbench3d-0.2.0/tests/core/test_parameters.py +95 -0
  118. geodispbench3d-0.2.0/tests/core/test_predictions_cache.py +118 -0
  119. geodispbench3d-0.2.0/tests/core/test_rescore.py +343 -0
  120. geodispbench3d-0.2.0/tests/core/test_runner.py +748 -0
  121. geodispbench3d-0.2.0/tests/core/test_runner_failure.py +413 -0
  122. geodispbench3d-0.2.0/tests/core/test_store.py +69 -0
  123. geodispbench3d-0.2.0/tests/f2s3/__init__.py +0 -0
  124. geodispbench3d-0.2.0/tests/f2s3/conftest.py +18 -0
  125. geodispbench3d-0.2.0/tests/f2s3/test_parser.py +77 -0
  126. geodispbench3d-0.2.0/tests/iof3d/__init__.py +0 -0
  127. geodispbench3d-0.2.0/tests/iof3d/conftest.py +11 -0
  128. geodispbench3d-0.2.0/tests/iof3d/test_adapter.py +78 -0
@@ -0,0 +1,337 @@
1
+ <!-- GSD:project-start source:PROJECT.md -->
2
+
3
+ ## Project
4
+
5
+ **geodispbench3d — Publication Readiness**
6
+
7
+ `geodispbench3d` is a mature, tool-agnostic benchmark framework for 3D
8
+ displacement / optical-flow tools: a YAML-driven front end (suite → tool +
9
+ dataset + metrics), Bayesian hyperparameter sweeps via Ax, three execution
10
+ modes (`sweep`, `rescore`, `analyze`) over one evaluation core, a pluggable
11
+ `ToolAdapter` contract with two shipped integrations (iof3D, F2S3),
12
+ provenance-first persistence, and a Streamlit dashboard.
13
+
14
+ This milestone takes the existing (already BSD-3-Clause) codebase from "works
15
+ for us" to **publication-ready for public release on PyPI with CI/CD** — gated
16
+ behind a code-health pass that builds confidence in the codebase before
17
+ anything ships.
18
+
19
+ **Core Value:** Confidence: nothing is published to PyPI until the codebase is demonstrably
20
+ lean, correct, well-tested, and its CLI-integration story is sound. The audit
21
+ comes before the cleanup, and the cleanup comes before the release.
22
+
23
+ ### Constraints
24
+
25
+ - **Tech stack**: Python ~=3.11/3.12, numpy 2.0 pin, Ax / Hydra / OmegaConf — preserve; transitive tool stacks must stay NumPy-2 compatible.
26
+ - **Dev environment**: all python/pip/pytest invocations must go through the mandated conda env per `AGENTS.md`.
27
+ - **Process — branching**: GSD work stays on `develop` and phase branches; PRs to `main` happen only at milestone completion and must strip the `.planning/` folder.
28
+ - **Process — review**: internal phase-plan reviews are run through the codex CLI.
29
+ - **Licensing**: already BSD-3-Clause; the `Private :: Do Not Upload` classifier and the README "Proprietary" line must be removed/reconciled before any public PyPI publish.
30
+
31
+ <!-- GSD:project-end -->
32
+
33
+ <!-- GSD:stack-start source:codebase/STACK.md -->
34
+
35
+ ## Technology Stack
36
+
37
+ ## Languages
38
+
39
+ - Python `~=3.11` (declared `requires-python = "~=3.11"` in `pyproject.toml`; classifiers list 3.11 and 3.12) — all source under `src/`.
40
+ - YAML — declarative benchmark definitions (`benchmarks/**/*.yaml`, `src/**/conf/**/*.yaml`) for suites, datasets, metrics, analyses, and tool wiring.
41
+ - JSON — JSON Schema validation files (`src/geodispbench3d/conf/schema/*.json`).
42
+
43
+ ## Runtime
44
+
45
+ - CPython 3.11/3.12. Local development is pinned to a Conda env `iof3d_cosicorr3d-dev312` (see `AGENTS.md`); the F2S3 binary runs in a separate Conda env `f2s3-dev312` invoked via `conda run`.
46
+ - CI runs on Python 3.12 (`.github/workflows/ci.yml`).
47
+ - pip (editable installs, `pip install -e .[extra]`). Conda is used only for environment isolation, not dependency resolution.
48
+ - Lockfile: missing. Dependencies are version-range pinned in `pyproject.toml`, not lockfile-pinned. CI pins lint tools (`ruff==0.8.4`, `pyright==1.1.392`) inline.
49
+
50
+ ## Frameworks
51
+
52
+ - `ax-platform ~= 1.1` — Bayesian hyperparameter optimization engine. Driven via `AxClient` in `src/geodispbench3d/sweep/runner.py` (with a fallback import path for older Ax).
53
+ - `hydra-core ~= 1.3` — configuration composition / structured config. Used in `src/geodispbench3d/cli.py` and the iof3D adapter for `AppConfig` assembly.
54
+ - `omegaconf ~= 2.3` — config object model underpinning Hydra; used throughout for YAML <-> dataclass translation (`OmegaConf.create`, `OmegaConf.save`).
55
+ - `numpy ~= 2.0` — numerical core for displacement/point-cloud math (e.g. `src/geodispbench3d_f2s3/output_parser.py`).
56
+ - `pandas` (unpinned) — DataFrame model for the parquet results store (`src/geodispbench3d/results/store.py`).
57
+ - `pytest ~= 8.4` (`dev` extra) — test runner. Suites split into `tests/core`, `tests/iof3d`, `tests/f2s3`.
58
+ - `coverage ~= 7.0` (`dev` extra) — coverage measurement.
59
+ - `setuptools` + `setuptools_scm` — build backend (`build-system.build-backend = "setuptools.build_meta"`); version derived from git tags, written to `src/geodispbench3d/_version.py`.
60
+ - `ruff ~= 0.8` — linter + formatter (replaces black/isort/flake8). Config in `pyproject.toml` `[tool.ruff]`.
61
+ - `pyright ~= 1.1` — static type checker. Config in `pyrightconfig.json` (basic mode).
62
+ - `pre-commit ~= 4.3` — git hook orchestration (`.pre-commit-config.yaml`).
63
+ - `sphinx ~= 5.1` (`docs` extra) — documentation builder.
64
+
65
+ ## Key Dependencies
66
+
67
+ - `ax-platform ~= 1.1` — without it sweep orchestration cannot run (raised as `ImportError` in `sweep/runner.py`).
68
+ - `hydra-core` / `omegaconf` — config backbone; every suite/dataset/tool YAML is parsed through them.
69
+ - `numpy ~= 2.0` — note the major-version pin; transitive tool stacks must be NumPy-2 compatible.
70
+ - `streamlit ~= 1.41` (`dashboard` extra) — results dashboard UI (`src/geodispbench3d/dashboard/app.py`).
71
+ - `altair ~= 5.4` (`dashboard` extra) — charting inside the dashboard (optional; guarded import).
72
+ - `duckdb ~= 1.4` (`dashboard` extra) — ad-hoc parquet querying. Readers do not require it; pandas is the default reader.
73
+ - `iof3d` extra: `iof3D ~= 0.1`, `pchandler`, `pc2img` — pulls iof3D's full pipeline stack (torch, ptlflow, opencv) transitively. Imported at module level only in `src/geodispbench3d_iof3d/`.
74
+ - `f2s3` extra: empty (`f2s3 = []`). The F2S3 adapter drives the F2S3 binary via subprocess; the Python lib is not required.
75
+
76
+ ## Configuration
77
+
78
+ - `GEODISPBENCH3D_PARQUET` — optional env var pointing the dashboard at a results parquet (`src/geodispbench3d/cli.py`, `src/geodispbench3d/dashboard/app.py`). No `.env` file is used; no secrets are read from the environment.
79
+ - `pyproject.toml` — single source of build, dependency, extras, ruff, and setuptools_scm config.
80
+ - `pyrightconfig.json` — type-checker config.
81
+ - `release-please-config.json` + `.release-please-manifest.json` — automated release/changelog config.
82
+ - Package data (`conf/**/*.yaml`, `conf/**/*.json`) is bundled via `[tool.setuptools.package-data]`.
83
+
84
+ ## Platform Requirements
85
+
86
+ - Conda env `iof3d_cosicorr3d-dev312` (mandated by `AGENTS.md`; bare `python`/`pip`/`pytest` forbidden).
87
+ - Separate Conda env `f2s3-dev312` required to exercise the F2S3 adapter end-to-end.
88
+ - GPU/CUDA implied transitively when the `iof3d` extra is installed (torch/ptlflow stack); the framework core is CPU-only.
89
+ - Distributed as an sdist + wheel to a Python package index (currently flagged `Private :: Do Not Upload`; the classifier must be removed before PyPI publish). Console entry points: `geodispbench3d` and `iof3d-ax` (`[project.scripts]`).
90
+
91
+ <!-- GSD:stack-end -->
92
+
93
+ <!-- GSD:conventions-start source:CONVENTIONS.md -->
94
+
95
+ ## Conventions
96
+
97
+ ## Tooling Baseline
98
+
99
+ - `line-length = 100`, `target-version = "py311"`.
100
+ - Format: `quote-style = "double"`, `indent-style = "space"` (4 spaces).
101
+ - Lint select set: `["E", "F", "B", "I", "UP", "W"]` (pycodestyle, pyflakes,
102
+ - `E501` (line length) is ignored in lint — the formatter owns wrapping.
103
+ - `typeCheckingMode = "basic"`, `pythonVersion = "3.11"`.
104
+ - `strictListInference`, `strictDictionaryInference`, `strictSetInference` all on.
105
+ - `reportMissingImports = "warning"`, `reportMissingTypeStubs = "none"`.
106
+ - Runs whole-project (`pass_filenames: false`) — cross-file type errors are caught.
107
+
108
+ ## Naming Patterns
109
+
110
+ - snake_case module names: `predictions_cache.py`, `cli_adapter.py`, `trial_record.py`.
111
+ - Package dirs are flat single words: `tool/`, `dataset/`, `metrics/`, `sweep/`,
112
+ - Tool-adapter packages live as sibling top-level packages prefixed with the
113
+ - snake_case: `run_trial`, `evaluate_trial`, `load_suite`, `build_parameter_specs`.
114
+ - Private/internal helpers prefixed with a single underscore: `_build_argv`,
115
+ - Module-level private helpers (underscore prefix) sit at the bottom of the file,
116
+ - snake_case throughout. Instance attributes that are internal use a leading
117
+ - PascalCase: `ToolAdapter`, `TrialRequest`, `AxSweepRunner`, `MetricDefinition`,
118
+ - Spec/config dataclasses end in `Spec`, `Config`, `Options`, `Definition`,
119
+ - Named by dotted module path under the package root:
120
+
121
+ ## Code Style
122
+
123
+ - Use modern syntax: `str | None`, `list[str]`, `dict[str, Any]`,
124
+ - Import abstract container types from `collections.abc`
125
+ - `from typing import Any` is the one `typing` import in regular use.
126
+ - Return types are always annotated, including `-> None`.
127
+ - `@dataclass(frozen=True)` is the norm for value/spec types
128
+ - Use `field(default_factory=dict)` for mutable defaults
129
+ - Keyword-only constructors via `*` are used for multi-arg classes
130
+
131
+ ## Import Organization
132
+
133
+ - The CLI (`src/geodispbench3d/cli.py`) imports heavy/optional subsystems
134
+ - Optional dependencies are guarded with `try/except ImportError` that re-raises
135
+ - Keep `geodispbench3d.*` free of any `iof3D` / `pchandler` / `pc2img` import —
136
+
137
+ ## Error Handling
138
+
139
+ - Use `{value!r}` (repr) when echoing user/config input.
140
+ - `ValueError` for bad values, `TypeError` for shape/contract violations,
141
+ - Chain with `from exc` when re-raising on top of a caught exception
142
+ - Always annotate these with a `# pragma: no cover - <reason>` comment explaining
143
+ - Never let observability/caching/provenance failures break the primary path.
144
+
145
+ ## Logging
146
+
147
+ - Modules accept an optional `logger: logging.Logger | None = None` parameter and
148
+ - Use lazy `%`-style args, never f-strings, in log calls:
149
+ - Level usage: `info` for run milestones, `warning` for skips/degradation,
150
+ - CLI entrypoints configure the root logger with the format
151
+
152
+ ## Comments
153
+
154
+ ## Function & Module Design
155
+
156
+ - **Keyword-only public APIs:** multi-argument functions and constructors force
157
+ - **Return immutable/plain dicts** from boundary functions (`dict(result.scalar_metrics)`).
158
+ - **`__all__` is declared in every public module** (33 of the source files) and
159
+ - **Barrel `__init__.py`** re-exports the subpackage's public surface and declares
160
+ - **Runtime introspection for forward-compat:** `inspect.signature(...)` is used
161
+
162
+ <!-- GSD:conventions-end -->
163
+
164
+ <!-- GSD:architecture-start source:ARCHITECTURE.md -->
165
+
166
+ ## Architecture
167
+
168
+ ## System Overview
169
+
170
+ ```text
171
+
172
+ ```
173
+
174
+ ## Component Responsibilities
175
+
176
+ | Component | Responsibility | File |
177
+ |-----------|----------------|------|
178
+ | CLI dispatcher | Parse argv, route to run/rescore/analyze/dashboard/list-metrics | `src/geodispbench3d/cli.py` |
179
+ | Suite loader | Parse `suite.yaml`, eagerly load referenced tool/dataset/metrics configs | `src/geodispbench3d/suite/loader.py` |
180
+ | Tool loader | Build a `ToolAdapter` from `tool.yaml` (cli/callable/custom/factory) | `src/geodispbench3d/tool/loader.py` |
181
+ | Tool adapter contract | Adapter-neutral trial invocation interface | `src/geodispbench3d/tool/base.py` |
182
+ | CLI adapter | Run the tool as a subprocess, one process per trial | `src/geodispbench3d/tool/cli_adapter.py` |
183
+ | Callable adapter | Run an in-process Python callable per trial | `src/geodispbench3d/tool/callable_adapter.py` |
184
+ | Sweep runner | Drive the Ax optimization loop, one trial at a time | `src/geodispbench3d/sweep/runner.py` |
185
+ | Parameter grammar | Translate sweep-param YAML into Ax parameter specs | `src/geodispbench3d/sweep/parameters.py` |
186
+ | Evaluation glue | Bridge tool outputs → parser → metric registry | `src/geodispbench3d/sweep/evaluation.py` |
187
+ | Metric registry | Resolve and cache metric callables from dotted paths | `src/geodispbench3d/metrics/registry.py` |
188
+ | Built-in metrics | Reference scalar + record metric implementations | `src/geodispbench3d/metrics/builtins.py` |
189
+ | Dataset schema | Parse `dataset.yaml` into cases/scans/ground-truth specs | `src/geodispbench3d/dataset/schema.py` |
190
+ | Ground-truth loader | Lazily load GT contents, dispatched on `kind` | `src/geodispbench3d/dataset/ground_truth.py` |
191
+ | Trial record | Read/write per-run `ax_trial/summary.json` + provenance | `src/geodispbench3d/sweep/trial_record.py` |
192
+ | Rescore runner | Re-score existing run dirs without invoking the tool | `src/geodispbench3d/sweep/rescore.py` |
193
+ | Analyze runner | Score cached predictions; no tool, no dataset scan | `src/geodispbench3d/analysis/runner.py` |
194
+ | Predictions cache | Persist/read phase-2 parser output keyed by provenance | `src/geodispbench3d/results/predictions_cache.py` |
195
+ | Results store | Append record rows to a parquet file | `src/geodispbench3d/results/store.py` |
196
+ | Dashboard | Streamlit UI for exploring the results parquet | `src/geodispbench3d/dashboard/app.py` |
197
+ | iof3D plugin | Factory + in-process adapter + output parser for iof3D | `src/geodispbench3d_iof3d/` |
198
+ | F2S3 plugin | Output parser for F2S3 (driven via the generic CLI adapter) | `src/geodispbench3d_f2s3/` |
199
+
200
+ ## Pattern Overview
201
+
202
+ - The core (`geodispbench3d`) is tool-agnostic. It never imports a specific tool; tools enter through the `ToolAdapter` contract and dotted-path callables resolved at load time.
203
+ - Configuration is declarative: `suite.yaml` composes a `tool.yaml`, a `dataset.yaml`, and a `metrics.yaml`. All YAML is parsed into frozen dataclasses up front (`OmegaConf.to_container` → dataclass).
204
+ - Three execution modes share one evaluation core (`evaluate_trial`): **sweep** (Ax-driven, runs the tool), **rescore** (re-runs metrics over existing run dirs), **analyze** (scores cached predictions, no tool involvement).
205
+ - A two-phase trial model: phase 1 = adapter produces raw outputs in a run dir; phase 2 = an `output_parser` turns those into a `prediction = {per_point: [...]}` mapping. Phase 2 output is cached so rescore/analyze can skip it.
206
+ - Provenance-first persistence: every run dir carries an `ax_trial/summary.json` recording tool/dataset/parser provenance, enabling reproducible downstream rescoring without the original suite YAML.
207
+
208
+ ## Layers
209
+
210
+ - Purpose: Argument parsing and command routing; lazy imports keep optional deps (Ax, streamlit) out of the hot path until needed.
211
+ - Location: `src/geodispbench3d/cli.py`
212
+ - Contains: `main`, `_cmd_run`, `_cmd_sweep`, `_cmd_rescore`, `_cmd_analyze`, `_cmd_dashboard`, `_cmd_list_metrics`.
213
+ - Depends on: suite/analysis loaders, sweep runner, results store.
214
+ - Used by: console entry point `geodispbench3d = geodispbench3d.cli:main`.
215
+ - Purpose: Turn YAML files into validated, frozen dataclasses with resolved paths.
216
+ - Location: `src/geodispbench3d/suite/loader.py`, `tool/loader.py`, `dataset/schema.py`, `metrics/registry.py`, `analysis/loader.py`.
217
+ - Contains: `SuiteConfig`, `ToolConfig`, `DatasetSpec`, `MetricsConfig`, `AnalysisConfig`, and their `load_*` functions.
218
+ - Depends on: `omegaconf`, `importlib` (dotted-path resolution).
219
+ - Used by: CLI layer and orchestration layer.
220
+ - Purpose: Drive the three execution modes.
221
+ - Location: `src/geodispbench3d/sweep/runner.py`, `sweep/rescore.py`, `analysis/runner.py`.
222
+ - Contains: `AxSweepRunner`, `rescore_suite`, `analyze`.
223
+ - Depends on: `ax-platform` (sweep only), the evaluation glue, the persistence layer.
224
+ - Used by: CLI layer.
225
+ - Purpose: Single chokepoint that all three modes funnel through; runs the parser and dispatches metrics.
226
+ - Location: `src/geodispbench3d/sweep/evaluation.py` (`evaluate_trial`).
227
+ - Depends on: metric registry, ground-truth loader.
228
+ - Used by: sweep runner, rescore runner, analyze runner.
229
+ - Purpose: Encapsulate tool-specific invocation behind a uniform contract.
230
+ - Location: `src/geodispbench3d/tool/` and the external plugin packages.
231
+ - Contains: `ToolAdapter` ABC, `CliToolAdapter`, `CallableToolAdapter`, and the iof3D/F2S3 plugins.
232
+ - Depends on: the tool itself (only the chosen adapter's package imports it).
233
+ - Used by: the sweep runner via `adapter.run_trial`.
234
+ - Purpose: Durable outputs the dashboard and downstream passes consume.
235
+ - Location: `src/geodispbench3d/results/store.py`, `results/predictions_cache.py`, `sweep/trial_record.py`.
236
+ - Contains: `ResultsStore` (parquet), predictions cache (JSON), trial summaries (JSON).
237
+ - Depends on: `pandas`, `numpy`.
238
+ - Used by: orchestration layer.
239
+
240
+ ## Data Flow
241
+
242
+ ### Primary Request Path (`geodispbench3d run suite.yaml`)
243
+
244
+ ### Rescore Flow (`run --rescore`)
245
+
246
+ ### Analyze Flow (`analyze analysis.yaml`)
247
+
248
+ - No in-process global mutable state. The Ax experiment state lives inside the `AxClient` instance held by `AxSweepRunner`. Durable state is the parquet file, the predictions cache, and per-run `summary.json` files.
249
+
250
+ ## Key Abstractions
251
+
252
+ - Purpose: The single seam between the tool-agnostic core and a specific tool.
253
+ - Examples: `src/geodispbench3d/tool/base.py` (ABC), `cli_adapter.py`, `callable_adapter.py`, `src/geodispbench3d_iof3d/adapter.py`.
254
+ - Pattern: Abstract base class with one required method (`run_trial`) and two optional lifecycle hooks (`prepare`/`teardown`); subclasses opt into in-process execution via `in_process_safe`.
255
+ - Purpose: Adapter-neutral description of a trial and its results.
256
+ - Examples: `TrialRequest`, `TrialOutputs`, `TrialResult` in `src/geodispbench3d/tool/base.py`.
257
+ - Pattern: Frozen dataclasses passed across the adapter boundary.
258
+ - Purpose: Declarative parameter-space grammar mapped to Ax's spec dicts.
259
+ - Examples: `src/geodispbench3d/sweep/parameters.py`.
260
+ - Pattern: Frozen dataclass + pure translation function (`build_parameter_specs`), with conditional activation (`activates_on`).
261
+ - Purpose: Late-bound, cached resolution of metric callables declared in YAML.
262
+ - Examples: `src/geodispbench3d/metrics/registry.py`, implementations in `metrics/builtins.py`.
263
+ - Pattern: Dependency-injection by name — the runner injects only the inputs a metric declares it `needs` (`prediction`, `ground_truth`, `trial_meta`, `case_meta`).
264
+ - Purpose: Make a run dir self-describing so downstream passes don't need the original suite.
265
+ - Examples: `ToolProvenance`, `DatasetProvenance`, `ParserProvenance` in `src/geodispbench3d/sweep/trial_record.py`.
266
+ - Pattern: Frozen dataclasses serialized to/from `summary.json`.
267
+
268
+ ## Entry Points
269
+
270
+ - Location: `src/geodispbench3d/cli.py` (`main`), declared in `pyproject.toml` `[project.scripts]`.
271
+ - Triggers: User shell invocation.
272
+ - Responsibilities: Route to run/rescore/analyze/dashboard/list-metrics.
273
+ - Location: `src/geodispbench3d_iof3d/cli.py` (`main`).
274
+ - Triggers: Legacy Hydra-style CLI for iof3D sweeps, kept for backward compatibility with the pre-framework `iof3D_analysis.ax` workflow.
275
+ - Responsibilities: Hydra-config-driven iof3D sweep, independent of the generic suite path.
276
+ - Location: `src/geodispbench3d/dashboard/app.py`.
277
+ - Triggers: `geodispbench3d dashboard` shells out to `streamlit run app.py`.
278
+ - Responsibilities: Interactive exploration of the results parquet.
279
+
280
+ ## Architectural Constraints
281
+
282
+ - **Threading / concurrency:** Single-threaded trial loop today. `SearchConfig`/`ExecutionConfig` expose `parallel_trials` but the runner evaluates trials sequentially (`AxSweepRunner.run_with_suite`). Parallelism is a future extension, not current behavior.
283
+ - **Process isolation:** Adapters declare `in_process_safe`. `CliToolAdapter` is the safe default (subprocess per trial; tool crashes don't kill the sweep). `CallableToolAdapter` / `Iof3dCallableAdapter` run in-process for speed and must be re-entrant across trials (e.g. iof3D reinitialises CUDA per trial).
284
+ - **Optional dependencies gated by extras:** `ax-platform` is imported lazily and only required for the sweep path; `streamlit`/`altair`/`duckdb` only for the dashboard extra; `iof3D`/`pchandler`/`pc2img` only for the `[iof3d]` extra. The `[iof3d]` adapter imports the tool stack at module level, so installing that extra transitively pulls torch/opencv/ptlflow.
285
+ - **Global state:** None at module level. State is per-`AxSweepRunner`-instance or on disk.
286
+ - **Circular imports:** None observed. `tool/loader.py` imports from `sweep/parameters.py`; `sweep/` imports from `tool/base.py` and `metrics/`; the dependency graph is acyclic with `tool/base.py` and the dataclass schemas at the leaves.
287
+ - **Append-only persistence:** `ResultsStore.append` reads the full existing parquet, concatenates, and rewrites. This is O(n) per append and not concurrency-safe (last writer wins).
288
+
289
+ ## Anti-Patterns
290
+
291
+ ### Orchestration code treats `SuiteConfig` as untyped
292
+
293
+ ### Duplicated hyperparameter-coercion logic
294
+
295
+ ### `_parser_fn_repr` duplicated across modules
296
+
297
+ ## Error Handling
298
+
299
+ - Config loading raises eagerly with descriptive `ValueError`/`FileNotFoundError` (e.g. `load_suite` validates that `tool`/`dataset`/`metrics` are present and that the objective is declared in metrics).
300
+ - Trial execution is defensive: a failing trial is caught, logged via `logger.exception`, and reported to Ax with `log_trial_failure` so the sweep continues (`src/geodispbench3d/sweep/runner.py:159`, `:215`).
301
+ - Best-effort side effects (provenance stamping, prediction caching, audit-log appends) are wrapped in broad `except Exception` with debug-level logging so they never fail a trial (`src/geodispbench3d/sweep/runner.py:295`, `:324`).
302
+ - Metric callables that raise are caught in `_invoke_metric` and skipped, not propagated (`src/geodispbench3d/sweep/evaluation.py:177`).
303
+ - CLI commands return integer exit codes; partial success in rescore/analyze returns `1` when not all targets scored.
304
+
305
+ ## Cross-Cutting Concerns
306
+
307
+ <!-- GSD:architecture-end -->
308
+
309
+ <!-- GSD:skills-start source:skills/ -->
310
+
311
+ ## Project Skills
312
+
313
+ No project skills found. Add skills to any of: `.claude/skills/`, `.agents/skills/`, `.cursor/skills/`, `.github/skills/`, or `.codex/skills/` with a `SKILL.md` index file.
314
+ <!-- GSD:skills-end -->
315
+
316
+ <!-- GSD:workflow-start source:GSD defaults -->
317
+
318
+ ## GSD Workflow Enforcement
319
+
320
+ Before using Edit, Write, or other file-changing tools, start work through a GSD command so planning artifacts and execution context stay in sync.
321
+
322
+ Use these entry points:
323
+
324
+ - `/gsd-quick` for small fixes, doc updates, and ad-hoc tasks
325
+ - `/gsd-debug` for investigation and bug fixing
326
+ - `/gsd-execute-phase` for planned phase work
327
+
328
+ Do not make direct repo edits outside a GSD workflow unless the user explicitly asks to bypass it.
329
+ <!-- GSD:workflow-end -->
330
+
331
+ <!-- GSD:profile-start -->
332
+
333
+ ## Developer Profile
334
+
335
+ > Profile not yet configured. Run `/gsd-profile-user` to generate your developer profile.
336
+ > This section is managed by `generate-claude-profile` -- do not edit manually.
337
+ <!-- GSD:profile-end -->
@@ -0,0 +1,49 @@
1
+ # Parametrized composite: Python toolchain + pip cache + an editable install of
2
+ # the framework with the requested extras. This is the SINGLE dependency-install
3
+ # implementation for the CI jobs (review MEDIUM 05-05): the ci.yml test matrix,
4
+ # build, and docs jobs MUST consume it so there is one place that wires
5
+ # setup-python + cache + `pip install -e .[extras]`.
6
+ #
7
+ # Scope note: the Plan 03 publish workflows (publish-pypi.yml / publish-testpypi.yml)
8
+ # deliberately do NOT use this composite — they use inline setup so they verify
9
+ # independently (they predate this action in Wave 2). The "shared setup" claim is
10
+ # therefore scoped to CI jobs only.
11
+ #
12
+ # Caller contract: the caller MUST `actions/checkout` BEFORE invoking this
13
+ # composite — a local (`./.github/actions/...`) action needs the repository on
14
+ # disk, and the editable install resolves `pyproject.toml` from the working dir.
15
+ #
16
+ # Action SHAs are pinned per D-07 (supply-chain). Pins sourced from
17
+ # 05-RESEARCH.md > Standard Stack:
18
+ # actions/setup-python a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
19
+ # actions/cache 27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
20
+ name: Setup Python + deps (geodispbench3d)
21
+ description: Install Python and an editable geodispbench3d with the requested extras (shared CI setup).
22
+
23
+ inputs:
24
+ python-version:
25
+ description: Python version passed to actions/setup-python.
26
+ required: false
27
+ default: "3.12"
28
+ extras:
29
+ description: Comma-separated extras for the editable install, e.g. "f2s3,dev" or "docs".
30
+ required: false
31
+ default: "dev"
32
+
33
+ runs:
34
+ using: composite
35
+ steps:
36
+ - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
37
+ with:
38
+ python-version: ${{ inputs.python-version }}
39
+
40
+ - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
41
+ with:
42
+ path: ~/.cache/pip
43
+ key: pip-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('pyproject.toml') }}
44
+ restore-keys: |
45
+ pip-${{ runner.os }}-${{ inputs.python-version }}-
46
+
47
+ - name: Install geodispbench3d with extras
48
+ shell: bash
49
+ run: pip install -e ".[${{ inputs.extras }}]"
@@ -0,0 +1,100 @@
1
+ # Branch-protection rulesets
2
+
3
+ This directory holds the **branch-protection enforcement layer** for
4
+ `gseg-ethz/geodispbench3d` as version-controlled JSON, plus the script that
5
+ applies it. The rulesets are a *deliverable*: they are reviewed and committed
6
+ here now, and **enabled at milestone-ship**, not during phase execution.
7
+
8
+ | File | Purpose |
9
+ |------|---------|
10
+ | `protect-main.json` | Ruleset on `refs/heads/main` |
11
+ | `protect-develop.json` | Ruleset on `refs/heads/develop` |
12
+ | `../scripts/apply-rulesets.sh` | Idempotent create-or-update applier (`gh api`) |
13
+
14
+ ## What the rulesets enforce
15
+
16
+ Both payloads share one rule body; the only delta is the protected ref.
17
+
18
+ - **`pull_request`** — changes reach the branch only through a PR.
19
+ `required_approving_review_count: 0` (solo maintainer; no second reviewer is
20
+ required), and `allowed_merge_methods` is **`[squash, rebase]` only**. `merge`
21
+ is deliberately dropped: `required_linear_history` rejects merge commits, so
22
+ offering a merge button that always fails would only confuse (review MEDIUM
23
+ 05-04).
24
+ - **`required_status_checks`** — these contexts must pass before merge, pinned
25
+ **character-for-character** to the rendered CI job names:
26
+ - `Lint (ruff + pyright)`
27
+ - `Test (core, 3.12)`
28
+ - `Test (f2s3, 3.12)`
29
+ - `Build wheel + install smoke`
30
+
31
+ This is an **interface contract** with `ci.yml`. A one-character drift in a job
32
+ name silently leaves the gate unsatisfiable, so Plan 05 machine-checks equality
33
+ between these contexts and the rendered `ci.yml` job names
34
+ (`check_ci_ruleset_contexts.py`). If you rename a CI job, update these payloads
35
+ in the same change.
36
+ - **`non_fast_forward` + `deletion` + `required_linear_history`** — no force-push,
37
+ no branch deletion, no merge commits.
38
+ - **`bypass_actors: []`** — nobody bypasses the gate, including admins.
39
+ - **`enforcement: active`** — the ruleset is live once applied.
40
+
41
+ ### The `strict: false` tradeoff (deliberate)
42
+
43
+ `strict_required_status_checks_policy` is **`false`** in both payloads. With
44
+ strict policy *on*, a PR must be rebased onto the latest base before its green
45
+ checks count — every upstream commit re-invalidates every open PR's checks. With
46
+ it *off*, a PR that was green against a slightly out-of-date base can still merge.
47
+
48
+ For a solo-maintainer repo this is the right call: the churn of forced re-runs on
49
+ every base advance outweighs the small risk of a semantic conflict that the green
50
+ checks didn't catch. This is a conscious choice (review LOW 05-04), not an
51
+ oversight. Flip it to `true` if the project gains concurrent contributors.
52
+
53
+ ## When and how to apply (ship-time only)
54
+
55
+ **Do not run the apply script during normal development or phase execution.**
56
+ Activating required checks before the pipeline is green — or before the milestone
57
+ PR to `main` has passing checks and the gseg-ethz App token is wired — will lock
58
+ out the very PR that ships the milestone (self-lockout, RESEARCH Pitfall 4).
59
+
60
+ Run it once, at milestone-ship, after:
61
+
62
+ 1. CI is green on `main` and `develop` (all four contexts have actually run).
63
+ 2. The gseg-ethz GitHub App is installed and its `APP_ID` / `APP_PRIVATE_KEY`
64
+ secrets are wired (release-please authenticates through it).
65
+
66
+ Then, from a clone with an authenticated `gh`:
67
+
68
+ ```bash
69
+ # 1. Always dry-run first — runs the full preflight, writes nothing.
70
+ ./.github/scripts/apply-rulesets.sh --dry-run
71
+
72
+ # 2. Apply for real once the dry-run is clean.
73
+ ./.github/scripts/apply-rulesets.sh
74
+
75
+ # 3. Read back to confirm.
76
+ gh api repos/gseg-ethz/geodispbench3d/rulesets --jq '.[].name'
77
+ ```
78
+
79
+ ### Idempotent create-or-update
80
+
81
+ The script is safe to re-run. For each payload it GETs the repo's rulesets,
82
+ matches an existing ruleset by its `name`, and then **PUTs** (update) the matched
83
+ id or **POSTs** (create) a new one. A bare `POST` would create duplicate rulesets
84
+ on every re-run (review MEDIUM 05-04 / T-05-14); the name-match create-or-update
85
+ avoids that and prints the resulting ruleset id each time.
86
+
87
+ ### Preflight (always runs before any write)
88
+
89
+ Before any POST/PUT — including in `--dry-run` — the script verifies:
90
+
91
+ - **Authentication** — `gh auth status`.
92
+ - **Repo identity** — the gh context resolves to `gseg-ethz/geodispbench3d`.
93
+ - **App installation** — the GitHub App is installed on the repo.
94
+ - **Recent contexts** — every required status-check context has *recently
95
+ appeared* as a check run on each target branch's HEAD. A context that has never
96
+ run cannot be satisfied, so requiring it would block all merges. The preflight
97
+ refuses to proceed if any context is missing (T-05-09).
98
+
99
+ If any check fails the script exits non-zero with a `preflight: FAIL [check]`
100
+ message and writes nothing.
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "protect-develop",
3
+ "target": "branch",
4
+ "enforcement": "active",
5
+ "bypass_actors": [],
6
+ "conditions": {
7
+ "ref_name": {
8
+ "include": ["refs/heads/develop"],
9
+ "exclude": []
10
+ }
11
+ },
12
+ "rules": [
13
+ { "type": "deletion" },
14
+ { "type": "non_fast_forward" },
15
+ { "type": "required_linear_history" },
16
+ {
17
+ "type": "pull_request",
18
+ "parameters": {
19
+ "required_approving_review_count": 0,
20
+ "dismiss_stale_reviews_on_push": false,
21
+ "require_code_owner_review": false,
22
+ "require_last_push_approval": false,
23
+ "required_review_thread_resolution": false,
24
+ "allowed_merge_methods": ["squash", "rebase"]
25
+ }
26
+ },
27
+ {
28
+ "type": "required_status_checks",
29
+ "parameters": {
30
+ "strict_required_status_checks_policy": false,
31
+ "do_not_enforce_on_create": false,
32
+ "required_status_checks": [
33
+ { "context": "Lint (ruff + pyright)" },
34
+ { "context": "Test (core, 3.12)" },
35
+ { "context": "Test (f2s3, 3.12)" },
36
+ { "context": "Build wheel + install smoke" }
37
+ ]
38
+ }
39
+ }
40
+ ]
41
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "protect-main",
3
+ "target": "branch",
4
+ "enforcement": "active",
5
+ "bypass_actors": [],
6
+ "conditions": {
7
+ "ref_name": {
8
+ "include": ["refs/heads/main"],
9
+ "exclude": []
10
+ }
11
+ },
12
+ "rules": [
13
+ { "type": "deletion" },
14
+ { "type": "non_fast_forward" },
15
+ { "type": "required_linear_history" },
16
+ {
17
+ "type": "pull_request",
18
+ "parameters": {
19
+ "required_approving_review_count": 0,
20
+ "dismiss_stale_reviews_on_push": false,
21
+ "require_code_owner_review": false,
22
+ "require_last_push_approval": false,
23
+ "required_review_thread_resolution": false,
24
+ "allowed_merge_methods": ["squash", "rebase"]
25
+ }
26
+ },
27
+ {
28
+ "type": "required_status_checks",
29
+ "parameters": {
30
+ "strict_required_status_checks_policy": false,
31
+ "do_not_enforce_on_create": false,
32
+ "required_status_checks": [
33
+ { "context": "Lint (ruff + pyright)" },
34
+ { "context": "Test (core, 3.12)" },
35
+ { "context": "Test (f2s3, 3.12)" },
36
+ { "context": "Build wheel + install smoke" }
37
+ ]
38
+ }
39
+ }
40
+ ]
41
+ }