geodispbench3d 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. geodispbench3d/__init__.py +19 -0
  2. geodispbench3d/_version.py +24 -0
  3. geodispbench3d/analysis/__init__.py +31 -0
  4. geodispbench3d/analysis/loader.py +209 -0
  5. geodispbench3d/analysis/runner.py +205 -0
  6. geodispbench3d/cli.py +469 -0
  7. geodispbench3d/conf/schema/analysis.schema.json +45 -0
  8. geodispbench3d/conf/schema/dataset.schema.json +45 -0
  9. geodispbench3d/conf/schema/metrics.schema.json +32 -0
  10. geodispbench3d/conf/schema/suite.schema.json +36 -0
  11. geodispbench3d/conf/schema/tool.schema.json +76 -0
  12. geodispbench3d/dashboard/__init__.py +3 -0
  13. geodispbench3d/dashboard/app.py +177 -0
  14. geodispbench3d/dataset/__init__.py +31 -0
  15. geodispbench3d/dataset/ground_truth.py +128 -0
  16. geodispbench3d/dataset/schema.py +150 -0
  17. geodispbench3d/diagnostics.py +42 -0
  18. geodispbench3d/metrics/__init__.py +19 -0
  19. geodispbench3d/metrics/builtins.py +259 -0
  20. geodispbench3d/metrics/registry.py +114 -0
  21. geodispbench3d/results/__init__.py +27 -0
  22. geodispbench3d/results/predictions_cache.py +219 -0
  23. geodispbench3d/results/store.py +56 -0
  24. geodispbench3d/suite/__init__.py +7 -0
  25. geodispbench3d/suite/loader.py +170 -0
  26. geodispbench3d/sweep/__init__.py +33 -0
  27. geodispbench3d/sweep/evaluation.py +215 -0
  28. geodispbench3d/sweep/parameters.py +223 -0
  29. geodispbench3d/sweep/rescore.py +475 -0
  30. geodispbench3d/sweep/runner.py +701 -0
  31. geodispbench3d/sweep/trial_record.py +369 -0
  32. geodispbench3d/tool/__init__.py +30 -0
  33. geodispbench3d/tool/base.py +134 -0
  34. geodispbench3d/tool/callable_adapter.py +131 -0
  35. geodispbench3d/tool/cli_adapter.py +533 -0
  36. geodispbench3d/tool/loader.py +253 -0
  37. geodispbench3d-0.2.0.dist-info/METADATA +132 -0
  38. geodispbench3d-0.2.0.dist-info/RECORD +55 -0
  39. geodispbench3d-0.2.0.dist-info/WHEEL +5 -0
  40. geodispbench3d-0.2.0.dist-info/entry_points.txt +3 -0
  41. geodispbench3d-0.2.0.dist-info/licenses/LICENSE +34 -0
  42. geodispbench3d-0.2.0.dist-info/scm_file_list.json +121 -0
  43. geodispbench3d-0.2.0.dist-info/scm_version.json +8 -0
  44. geodispbench3d-0.2.0.dist-info/top_level.txt +3 -0
  45. geodispbench3d_f2s3/__init__.py +13 -0
  46. geodispbench3d_f2s3/conf/tool/f2s3.yaml +74 -0
  47. geodispbench3d_f2s3/output_parser.py +230 -0
  48. geodispbench3d_iof3d/__init__.py +88 -0
  49. geodispbench3d_iof3d/_sweep_cli.py +143 -0
  50. geodispbench3d_iof3d/adapter.py +580 -0
  51. geodispbench3d_iof3d/cli.py +28 -0
  52. geodispbench3d_iof3d/conf/config_ax.yaml +14 -0
  53. geodispbench3d_iof3d/conf/tool/iof3d.yaml +69 -0
  54. geodispbench3d_iof3d/factory.py +137 -0
  55. geodispbench3d_iof3d/output_parser.py +187 -0
@@ -0,0 +1,19 @@
1
+ """geodispbench3d: a generic benchmark framework for 3D displacement / optical-flow tools.
2
+
3
+ The package is tool-agnostic: any tool that can be described by a
4
+ :class:`~geodispbench3d.tool.base.ToolAdapter` can be swept, evaluated against a
5
+ dataset, and scored with configurable metrics.
6
+
7
+ Public surface is intentionally small; see the submodule docstrings for details.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ __all__ = [
13
+ "tool",
14
+ "dataset",
15
+ "metrics",
16
+ "sweep",
17
+ "suite",
18
+ "results",
19
+ ]
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.2.0'
22
+ __version_tuple__ = version_tuple = (0, 2, 0)
23
+
24
+ __commit_id__ = commit_id = 'g22a498d'
@@ -0,0 +1,31 @@
1
+ """Tool-agnostic analysis: re-score cached predictions across tools.
2
+
3
+ Where a suite's ``rescore`` pass is bound to one tool's run directories
4
+ and runs phase 2 against tool-specific outputs, an analysis YAML works
5
+ purely from the predictions cache. Predictions live in the common
6
+ ``{per_point: [...]}`` shape, so an analysis can mix iof3D and F2S3
7
+ results in one parquet output, with the metric set as the only knob.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from .loader import (
13
+ AnalysisConfig,
14
+ PredictionFilter,
15
+ PredictionRef,
16
+ PredictionsConfig,
17
+ ResultsConfig,
18
+ load_analysis,
19
+ )
20
+ from .runner import AnalysisSummary, analyze
21
+
22
+ __all__ = [
23
+ "AnalysisConfig",
24
+ "AnalysisSummary",
25
+ "PredictionFilter",
26
+ "PredictionRef",
27
+ "PredictionsConfig",
28
+ "ResultsConfig",
29
+ "analyze",
30
+ "load_analysis",
31
+ ]
@@ -0,0 +1,209 @@
1
+ """``analysis.yaml`` schema and loader.
2
+
3
+ An analysis YAML composes a dataset + a metrics file with a set of
4
+ *cached predictions*. There is no tool reference: predictions are tool-
5
+ agnostic by the time they reach the cache, so the analysis verb can mix
6
+ runs from any number of tools in a single parquet output.
7
+
8
+ Three ways to point at predictions, mix-and-match in any combination:
9
+
10
+ predictions:
11
+ - path: <abs/relative.json> # explicit single file
12
+ - glob: <pattern> # any pattern resolved relative
13
+ # to the analysis YAML
14
+ - root: <dir> # walk the cache layout under
15
+ filter: # this root, optionally
16
+ tool_id: iof3d-v2 # filtering by provenance
17
+ dataset_id: mattertal # segment. Each filter is
18
+ case: mattertal-all # optional.
19
+
20
+ Resolution returns a flat list of prediction file paths in the order
21
+ declared (with glob results sorted lexicographically).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import glob as _glob
27
+ from collections.abc import Mapping, Sequence
28
+ from dataclasses import dataclass, field
29
+ from pathlib import Path
30
+ from typing import Any
31
+
32
+ from omegaconf import OmegaConf
33
+
34
+ from geodispbench3d.dataset.schema import DatasetSpec, load_dataset
35
+ from geodispbench3d.metrics.registry import MetricsConfig, load_metrics_config
36
+ from geodispbench3d.results.predictions_cache import find_predictions
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class PredictionFilter:
41
+ """Provenance filter for a ``root:`` entry in ``predictions:``.
42
+
43
+ Each ``None`` field matches any value in that segment of the cache
44
+ layout (``<root>/<tool_id>/<dataset_id>/<case>/<run_hash>.json``).
45
+ """
46
+
47
+ tool_id: str | None = None
48
+ dataset_id: str | None = None
49
+ case: str | None = None
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class PredictionRef:
54
+ """One source of predictions to consume.
55
+
56
+ Exactly one of ``path``, ``glob``, or ``root`` is populated. The
57
+ loader normalises everything to an iterable of resolved Path objects
58
+ via :meth:`resolve`.
59
+ """
60
+
61
+ path: Path | None = None
62
+ glob: str | None = None
63
+ root: Path | None = None
64
+ filter: PredictionFilter = field(default_factory=PredictionFilter)
65
+
66
+ def resolve(self) -> list[Path]:
67
+ if self.path is not None:
68
+ return [self.path] if self.path.is_file() else []
69
+ if self.glob is not None:
70
+ return sorted(Path(p) for p in _glob.glob(self.glob, recursive=True))
71
+ if self.root is not None:
72
+ return find_predictions(
73
+ self.root,
74
+ tool_id=self.filter.tool_id,
75
+ dataset_id=self.filter.dataset_id,
76
+ case=self.filter.case,
77
+ )
78
+ return []
79
+
80
+
81
+ @dataclass(frozen=True)
82
+ class PredictionsConfig:
83
+ """The ``predictions:`` block plus an aggregate resolver."""
84
+
85
+ refs: Sequence[PredictionRef] = ()
86
+
87
+ def resolve_all(self) -> list[Path]:
88
+ seen: set[Path] = set()
89
+ ordered: list[Path] = []
90
+ for ref in self.refs:
91
+ for path in ref.resolve():
92
+ resolved = path.resolve()
93
+ if resolved in seen:
94
+ continue
95
+ seen.add(resolved)
96
+ ordered.append(resolved)
97
+ return ordered
98
+
99
+
100
+ @dataclass(frozen=True)
101
+ class ResultsConfig:
102
+ parquet_path: Path | None = None
103
+
104
+
105
+ @dataclass(frozen=True)
106
+ class AnalysisConfig:
107
+ """Composite analysis definition with all referenced configs loaded."""
108
+
109
+ id: str
110
+ dataset: DatasetSpec
111
+ metrics: MetricsConfig
112
+ predictions: PredictionsConfig
113
+ results: ResultsConfig = field(default_factory=ResultsConfig)
114
+ pass_id: str | None = None
115
+ source_path: Path | None = None
116
+
117
+
118
+ def load_analysis(path: str | Path) -> AnalysisConfig:
119
+ """Load an ``analysis.yaml`` and resolve its references."""
120
+
121
+ yaml_path = Path(path).resolve()
122
+ if not yaml_path.is_file():
123
+ raise FileNotFoundError(f"Analysis YAML not found: {yaml_path}")
124
+
125
+ raw = OmegaConf.to_container(OmegaConf.load(str(yaml_path)), resolve=True)
126
+ if not isinstance(raw, dict):
127
+ raise ValueError(f"Analysis YAML at {yaml_path} must be a mapping")
128
+
129
+ base = yaml_path.parent
130
+
131
+ dataset_ref = raw.get("dataset")
132
+ metrics_ref = raw.get("metrics")
133
+ if not (dataset_ref and metrics_ref):
134
+ raise ValueError(f"Analysis {yaml_path} must reference 'dataset' and 'metrics'")
135
+
136
+ dataset_spec = load_dataset(_resolve_path(dataset_ref, base))
137
+ metrics_cfg = load_metrics_config(_resolve_path(metrics_ref, base))
138
+
139
+ predictions_raw = raw.get("predictions") or []
140
+ if not isinstance(predictions_raw, list):
141
+ raise ValueError(f"Analysis {yaml_path}: 'predictions' must be a list")
142
+ refs = tuple(_load_prediction_ref(entry, base) for entry in predictions_raw)
143
+ if not refs:
144
+ raise ValueError(f"Analysis {yaml_path}: at least one prediction source required")
145
+
146
+ results_raw = raw.get("results") or {}
147
+ results = ResultsConfig(
148
+ parquet_path=_resolve_optional_path(results_raw.get("parquet_path"), base),
149
+ )
150
+
151
+ return AnalysisConfig(
152
+ id=str(raw.get("id", yaml_path.stem)),
153
+ dataset=dataset_spec,
154
+ metrics=metrics_cfg,
155
+ predictions=PredictionsConfig(refs=refs),
156
+ results=results,
157
+ pass_id=raw.get("pass_id"),
158
+ source_path=yaml_path,
159
+ )
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # Internals
164
+ # ---------------------------------------------------------------------------
165
+
166
+
167
+ def _load_prediction_ref(entry: Mapping[str, Any], base: Path) -> PredictionRef:
168
+ if "path" in entry:
169
+ return PredictionRef(path=_resolve_path(entry["path"], base))
170
+ if "glob" in entry:
171
+ # Resolve relative to the analysis YAML's directory.
172
+ pattern = str(entry["glob"])
173
+ if not Path(pattern).is_absolute():
174
+ pattern = str(base / pattern)
175
+ return PredictionRef(glob=pattern)
176
+ if "root" in entry:
177
+ flt = entry.get("filter") or {}
178
+ return PredictionRef(
179
+ root=_resolve_path(entry["root"], base),
180
+ filter=PredictionFilter(
181
+ tool_id=flt.get("tool_id"),
182
+ dataset_id=flt.get("dataset_id"),
183
+ case=flt.get("case"),
184
+ ),
185
+ )
186
+ raise ValueError(
187
+ f"predictions entry must declare one of 'path', 'glob', or 'root', got {dict(entry)!r}"
188
+ )
189
+
190
+
191
+ def _resolve_path(value: Any, base: Path) -> Path:
192
+ p = Path(str(value))
193
+ return p if p.is_absolute() else (base / p).resolve()
194
+
195
+
196
+ def _resolve_optional_path(value: Any, base: Path) -> Path | None:
197
+ if value is None:
198
+ return None
199
+ return _resolve_path(value, base)
200
+
201
+
202
+ __all__ = [
203
+ "AnalysisConfig",
204
+ "PredictionFilter",
205
+ "PredictionRef",
206
+ "PredictionsConfig",
207
+ "ResultsConfig",
208
+ "load_analysis",
209
+ ]
@@ -0,0 +1,205 @@
1
+ """Analyze runner: score cached predictions against an AnalysisConfig.
2
+
3
+ Loads each prediction JSON file, picks the matching dataset case from
4
+ the analysis config (preferring the case recorded in the prediction's
5
+ provenance, falling back to a single-case dataset), and dispatches the
6
+ metric registry through :func:`evaluate_trial` with the cached
7
+ prediction supplied as ``prediction_override`` so phase 2 is skipped
8
+ entirely.
9
+
10
+ Record rows carry ``mode="analyze"`` plus the prediction's recorded
11
+ ``tool_id`` / ``dataset_id`` / ``case`` so a single parquet file can
12
+ mix runs from multiple tools across multiple analyses without
13
+ columns colliding.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from collections.abc import Callable, Mapping, Sequence
20
+ from dataclasses import dataclass
21
+ from datetime import UTC, datetime
22
+ from pathlib import Path
23
+ from typing import Any
24
+
25
+ from geodispbench3d.dataset.schema import CaseSpec, DatasetSpec
26
+ from geodispbench3d.diagnostics import PassDiagnostics
27
+ from geodispbench3d.metrics.registry import MetricRegistry
28
+ from geodispbench3d.results.predictions_cache import read_prediction
29
+ from geodispbench3d.sweep.evaluation import evaluate_trial
30
+ from geodispbench3d.tool.base import TrialOutputs, TrialResult
31
+
32
+ from .loader import AnalysisConfig
33
+
34
+
35
+ @dataclass
36
+ class AnalysisSummary:
37
+ """One-line counters returned by :func:`analyze`.
38
+
39
+ ``non_fatal_failures`` totals the swallowed fail-soft failures across the
40
+ pass (corrupt prediction reads, per-prediction evaluation skips), surfaced
41
+ as the CLI's aggregate "N non-fatal failures" line (F-08).
42
+
43
+ ``eval_failures`` (03-01) is the genuine-parser/metric subset — the
44
+ ``"evaluation"`` diag kind (both the raise-case skip and the inner
45
+ evaluation non-fatals) — which Plan 02 reads for the analyze exit-1
46
+ condition. ``skipped_unreadable`` stays a genuine data error and
47
+ ``skipped_no_case`` stays a benign skip.
48
+ """
49
+
50
+ total: int = 0
51
+ succeeded: int = 0
52
+ skipped_unreadable: int = 0
53
+ skipped_no_case: int = 0
54
+ rows_emitted: int = 0
55
+ non_fatal_failures: int = 0
56
+ eval_failures: int = 0
57
+
58
+
59
+ def analyze(
60
+ *,
61
+ config: AnalysisConfig,
62
+ on_record_rows: Callable[[Sequence[Mapping[str, Any]]], None] | None = None,
63
+ logger: logging.Logger | None = None,
64
+ ) -> AnalysisSummary:
65
+ """Score every prediction referenced by ``config`` and emit record rows."""
66
+
67
+ log = logger or logging.getLogger("geodispbench3d.analysis")
68
+ summary = AnalysisSummary()
69
+ pass_id = config.pass_id or _utcnow_compact()
70
+
71
+ case_index: Mapping[str, CaseSpec] = {c.name: c for c in config.dataset.cases}
72
+ registry = MetricRegistry()
73
+
74
+ # One PassDiagnostics for the pass: corrupt prediction reads and per-
75
+ # prediction evaluation skips record here, surfaced on AnalysisSummary (F-08).
76
+ diag = PassDiagnostics()
77
+
78
+ paths = config.predictions.resolve_all()
79
+ log.info("analyze: %d prediction file(s) to score (pass_id=%s)", len(paths), pass_id)
80
+
81
+ for path in paths:
82
+ summary.total += 1
83
+ payload = read_prediction(path, on_non_fatal=lambda _exc: diag.add("prediction_read"))
84
+ if payload is None:
85
+ log.warning("analyze: cannot read %s, skipping", path)
86
+ summary.skipped_unreadable += 1
87
+ continue
88
+
89
+ prediction = payload.get("prediction")
90
+ provenance = payload.get("provenance") or {}
91
+ case = _resolve_case(provenance, case_index, config.dataset)
92
+ if case is None:
93
+ log.warning(
94
+ "analyze: cannot map prediction %s to a dataset case (provenance=%s); skipping",
95
+ path,
96
+ provenance.get("dataset"),
97
+ )
98
+ summary.skipped_no_case += 1
99
+ continue
100
+
101
+ record_extras = {
102
+ "tool_id": _provenance_id(provenance, "tool"),
103
+ "dataset_id": _provenance_id(provenance, "dataset") or config.dataset.id,
104
+ "case": case.name,
105
+ "trial_index": _provenance_run_hash(provenance, path),
106
+ "mode": "analyze",
107
+ "pass_id": pass_id,
108
+ "prediction_path": str(path),
109
+ }
110
+
111
+ trial_result = TrialResult(
112
+ outputs=TrialOutputs(run_dir=Path(provenance.get("run_dir") or path.parent)),
113
+ scalar_metrics={},
114
+ duration_seconds=0.0,
115
+ success=True,
116
+ )
117
+
118
+ try:
119
+ evaluation = evaluate_trial(
120
+ trial_result=trial_result,
121
+ parameters={},
122
+ case=case,
123
+ metrics=config.metrics,
124
+ registry=registry,
125
+ output_parser=None,
126
+ output_parser_options=None,
127
+ prediction_override=prediction,
128
+ trial_index=None,
129
+ record_extras=record_extras,
130
+ logger=log,
131
+ )
132
+ except Exception:
133
+ # Plugin/user callable boundary: evaluate_trial runs arbitrary
134
+ # metric code, so a closed exception set is inapplicable. Stay broad
135
+ # so one prediction's failure skips it instead of aborting the whole
136
+ # analyze pass (fail-soft, F-08).
137
+ log.exception("analyze: evaluate_trial raised for %s", path)
138
+ diag.add("evaluation")
139
+ continue
140
+
141
+ diag.add("evaluation", evaluation.non_fatal_failures)
142
+
143
+ if on_record_rows and evaluation.record_rows:
144
+ on_record_rows(list(evaluation.record_rows))
145
+ summary.rows_emitted += len(evaluation.record_rows)
146
+ summary.succeeded += 1
147
+
148
+ summary.non_fatal_failures = diag.non_fatal_failures
149
+ summary.eval_failures = diag.by_kind.get("evaluation", 0)
150
+ log.info(
151
+ "analyze done: succeeded=%d total=%d unreadable=%d no_case=%d rows=%d "
152
+ "non_fatal_failures=%d",
153
+ summary.succeeded,
154
+ summary.total,
155
+ summary.skipped_unreadable,
156
+ summary.skipped_no_case,
157
+ summary.rows_emitted,
158
+ summary.non_fatal_failures,
159
+ )
160
+ return summary
161
+
162
+
163
+ # ---------------------------------------------------------------------------
164
+ # Internals
165
+ # ---------------------------------------------------------------------------
166
+
167
+
168
+ def _resolve_case(
169
+ provenance: Mapping[str, Any],
170
+ case_index: Mapping[str, CaseSpec],
171
+ dataset: DatasetSpec,
172
+ ) -> CaseSpec | None:
173
+ block = provenance.get("dataset")
174
+ if isinstance(block, Mapping):
175
+ name = block.get("case")
176
+ if isinstance(name, str) and name in case_index:
177
+ return case_index[name]
178
+ if len(dataset.cases) == 1:
179
+ return dataset.cases[0]
180
+ return None
181
+
182
+
183
+ def _provenance_id(provenance: Mapping[str, Any], key: str) -> str | None:
184
+ block = provenance.get(key)
185
+ if isinstance(block, Mapping):
186
+ value = block.get("id")
187
+ if isinstance(value, str):
188
+ return value
189
+ return None
190
+
191
+
192
+ def _provenance_run_hash(provenance: Mapping[str, Any], path: Path) -> str:
193
+ """Best-effort identifier for the row's `trial_index` column."""
194
+
195
+ run_dir = provenance.get("run_dir")
196
+ if isinstance(run_dir, str) and run_dir:
197
+ return Path(run_dir).name
198
+ return path.stem
199
+
200
+
201
+ def _utcnow_compact() -> str:
202
+ return datetime.now(UTC).strftime("analyze-%Y%m%dT%H%M%S")
203
+
204
+
205
+ __all__ = ["AnalysisSummary", "analyze"]