euler-eval 2.2.0__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {euler_eval-2.2.0 → euler_eval-2.4.0}/PKG-INFO +11 -11
- {euler_eval-2.2.0 → euler_eval-2.4.0}/README.md +10 -10
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/cli.py +62 -40
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/evaluate.py +224 -95
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/gpu_image_batch.py +1 -1
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/PKG-INFO +11 -11
- {euler_eval-2.2.0 → euler_eval-2.4.0}/pyproject.toml +1 -1
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_depth_alignment_output.py +70 -25
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_meta_output.py +9 -9
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/__init__.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/data.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/__init__.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/absrel.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/daniel_error.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/depth_binned_error.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/depth_edge_f1.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/depth_standard.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/fid_kid.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/gpu_depth_batch.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/high_freq_energy.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/lpips_metric.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/normal_consistency.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/psnr.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rgb_edge_f1.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rgb_lpips.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rgb_psnr_ssim.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rho_a.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rmse.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/scale_invariant_log.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/ssim.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/tail_errors.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/utils.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/sanity_checker.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/utils/hierarchy_parser.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/SOURCES.txt +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/dependency_links.txt +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/entry_points.txt +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/requires.txt +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/top_level.txt +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/init_cache.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/setup.cfg +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_alignment.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_cli_device.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_config.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_data.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_depth_standard.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_evaluate_helpers.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_init_cache.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_integration.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_rgb_fid_output.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_rho_a.py +0 -0
- {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_save_results.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: euler-eval
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: Depth map evaluation toolkit with comprehensive metrics
|
|
5
5
|
Author: Depth Eval Contributors
|
|
6
6
|
License: MIT
|
|
@@ -139,7 +139,7 @@ This pre-downloads:
|
|
|
139
139
|
| `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
|
|
140
140
|
| `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
|
|
141
141
|
| `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
|
|
142
|
-
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth
|
|
142
|
+
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth calibration mode; outputs are emitted in semantic `native`/`metric` spaces and `depth` aliases the canonical branch |
|
|
143
143
|
| `--rgb-fid-backend` | `{builtin,clean-fid}` | `builtin` | RGB FID backend; `clean-fid` requires optional dependency |
|
|
144
144
|
| `--benchmark-depth-range` | `float float` | none | Depth range `[MIN, MAX]` in meters for benchmark evaluation; computes depth and RGB metrics for pixels within this range, subdivided into log-scaled near/mid/far bins (additive to regular metrics) |
|
|
145
145
|
|
|
@@ -341,10 +341,10 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
341
341
|
|
|
342
342
|
```json
|
|
343
343
|
{
|
|
344
|
-
"
|
|
345
|
-
"
|
|
344
|
+
"depth_native": { "...": "native model depth space, if diagnostically meaningful" },
|
|
345
|
+
"depth_metric": { "...": "metric depth space, if available" },
|
|
346
346
|
"depth": {
|
|
347
|
-
"...": "
|
|
347
|
+
"...": "canonical alias of depth_metric when present, else depth_native"
|
|
348
348
|
},
|
|
349
349
|
"rgb": {
|
|
350
350
|
"...": "..."
|
|
@@ -358,9 +358,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
358
358
|
{
|
|
359
359
|
"id": "frame_0001",
|
|
360
360
|
"metrics": {
|
|
361
|
-
"depth": { "...": "
|
|
362
|
-
"
|
|
363
|
-
"
|
|
361
|
+
"depth": { "...": "canonical alias" },
|
|
362
|
+
"depth_native": { "...": "native, when emitted" },
|
|
363
|
+
"depth_metric": { "...": "metric, when emitted" },
|
|
364
364
|
"rgb": { "...": "..." }
|
|
365
365
|
}
|
|
366
366
|
}
|
|
@@ -374,9 +374,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
374
374
|
```
|
|
375
375
|
|
|
376
376
|
For depth outputs:
|
|
377
|
-
- `
|
|
378
|
-
- `
|
|
379
|
-
- `depth`:
|
|
377
|
+
- `depth_native`: the model's native depth space after spatial/radial preprocessing, emitted only when it is diagnostically distinct.
|
|
378
|
+
- `depth_metric`: the comparable metric-depth branch. This is either the native prediction itself or the calibrated scale-shift result.
|
|
379
|
+
- `depth`: canonical alias of `depth_metric` when available, otherwise `depth_native`.
|
|
380
380
|
- `standard`: explicit monocular-depth metrics with three reducers:
|
|
381
381
|
`image_mean`, `image_median`, and `pixel_pool`.
|
|
382
382
|
|
|
@@ -101,7 +101,7 @@ This pre-downloads:
|
|
|
101
101
|
| `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
|
|
102
102
|
| `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
|
|
103
103
|
| `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
|
|
104
|
-
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth
|
|
104
|
+
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth calibration mode; outputs are emitted in semantic `native`/`metric` spaces and `depth` aliases the canonical branch |
|
|
105
105
|
| `--rgb-fid-backend` | `{builtin,clean-fid}` | `builtin` | RGB FID backend; `clean-fid` requires optional dependency |
|
|
106
106
|
| `--benchmark-depth-range` | `float float` | none | Depth range `[MIN, MAX]` in meters for benchmark evaluation; computes depth and RGB metrics for pixels within this range, subdivided into log-scaled near/mid/far bins (additive to regular metrics) |
|
|
107
107
|
|
|
@@ -303,10 +303,10 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
303
303
|
|
|
304
304
|
```json
|
|
305
305
|
{
|
|
306
|
-
"
|
|
307
|
-
"
|
|
306
|
+
"depth_native": { "...": "native model depth space, if diagnostically meaningful" },
|
|
307
|
+
"depth_metric": { "...": "metric depth space, if available" },
|
|
308
308
|
"depth": {
|
|
309
|
-
"...": "
|
|
309
|
+
"...": "canonical alias of depth_metric when present, else depth_native"
|
|
310
310
|
},
|
|
311
311
|
"rgb": {
|
|
312
312
|
"...": "..."
|
|
@@ -320,9 +320,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
320
320
|
{
|
|
321
321
|
"id": "frame_0001",
|
|
322
322
|
"metrics": {
|
|
323
|
-
"depth": { "...": "
|
|
324
|
-
"
|
|
325
|
-
"
|
|
323
|
+
"depth": { "...": "canonical alias" },
|
|
324
|
+
"depth_native": { "...": "native, when emitted" },
|
|
325
|
+
"depth_metric": { "...": "metric, when emitted" },
|
|
326
326
|
"rgb": { "...": "..." }
|
|
327
327
|
}
|
|
328
328
|
}
|
|
@@ -336,9 +336,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
336
336
|
```
|
|
337
337
|
|
|
338
338
|
For depth outputs:
|
|
339
|
-
- `
|
|
340
|
-
- `
|
|
341
|
-
- `depth`:
|
|
339
|
+
- `depth_native`: the model's native depth space after spatial/radial preprocessing, emitted only when it is diagnostically distinct.
|
|
340
|
+
- `depth_metric`: the comparable metric-depth branch. This is either the native prediction itself or the calibrated scale-shift result.
|
|
341
|
+
- `depth`: canonical alias of `depth_metric` when available, otherwise `depth_native`.
|
|
342
342
|
- `standard`: explicit monocular-depth metrics with three reducers:
|
|
343
343
|
`image_mean`, `image_median`, and `pixel_pool`.
|
|
344
344
|
|
|
@@ -56,11 +56,11 @@ class _EvalNamespace(MetricNamespace):
|
|
|
56
56
|
|
|
57
57
|
# ── Axis declarations ───────────────────────────────────────────────────────
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
_DEPTH_SPACE_AXIS = AxisDeclaration(
|
|
60
60
|
position=0,
|
|
61
|
-
values=("
|
|
61
|
+
values=("native", "metric"),
|
|
62
62
|
optional=False,
|
|
63
|
-
description="Depth
|
|
63
|
+
description="Depth space semantics",
|
|
64
64
|
)
|
|
65
65
|
|
|
66
66
|
_DEPTH_CATEGORY_AXIS = AxisDeclaration(
|
|
@@ -107,7 +107,7 @@ _RGB_BENCHMARK_BIN_AXIS = AxisDeclaration(
|
|
|
107
107
|
|
|
108
108
|
def _depth_eval_axes(*, benchmark: bool = False) -> dict[str, AxisDeclaration]:
|
|
109
109
|
axes = {
|
|
110
|
-
"
|
|
110
|
+
"space": _DEPTH_SPACE_AXIS,
|
|
111
111
|
"category": _DEPTH_CATEGORY_AXIS,
|
|
112
112
|
"reduction": _DEPTH_REDUCTION_AXIS,
|
|
113
113
|
}
|
|
@@ -691,7 +691,10 @@ def main():
|
|
|
691
691
|
type=str,
|
|
692
692
|
default="auto_affine",
|
|
693
693
|
choices=["none", "auto_affine", "affine"],
|
|
694
|
-
help=
|
|
694
|
+
help=(
|
|
695
|
+
"Depth calibration mode: none, auto_affine (default), or affine. "
|
|
696
|
+
"Output is emitted in semantic native/metric spaces."
|
|
697
|
+
),
|
|
695
698
|
)
|
|
696
699
|
parser.add_argument(
|
|
697
700
|
"--rgb-fid-backend",
|
|
@@ -853,9 +856,9 @@ def main():
|
|
|
853
856
|
|
|
854
857
|
# Build per-modality results for saving.
|
|
855
858
|
# All metric names must be fully-qualified under the declared
|
|
856
|
-
# metricNamespace.
|
|
859
|
+
# metricNamespace. We nest semantic spaces under depth → eval so
|
|
857
860
|
# every flattened path starts with "depth.eval.".
|
|
858
|
-
|
|
861
|
+
space_info = depth_results.get("space_info", {})
|
|
859
862
|
depth_dataset_info = depth_results.get("dataset_info", {})
|
|
860
863
|
|
|
861
864
|
depth_spatial = depth_results.get("spatial_info", {})
|
|
@@ -870,8 +873,18 @@ def main():
|
|
|
870
873
|
"metricSet": depth_ns.metric_set_envelope(
|
|
871
874
|
"depth",
|
|
872
875
|
metadata={
|
|
873
|
-
"
|
|
874
|
-
|
|
876
|
+
"input_space_detected": space_info.get(
|
|
877
|
+
"input_space_detected", "unknown"
|
|
878
|
+
),
|
|
879
|
+
"metric_space_source": space_info.get("metric_space_source"),
|
|
880
|
+
"calibration_mode": space_info.get(
|
|
881
|
+
"calibration_mode", "unknown"
|
|
882
|
+
),
|
|
883
|
+
"calibration_applied": space_info.get(
|
|
884
|
+
"calibration_applied", False
|
|
885
|
+
),
|
|
886
|
+
"emitted_spaces": space_info.get("emitted_spaces", []),
|
|
887
|
+
"canonical_space": space_info.get("canonical_space", "metric"),
|
|
875
888
|
},
|
|
876
889
|
),
|
|
877
890
|
"dataset_info": depth_dataset_info,
|
|
@@ -908,40 +921,46 @@ def main():
|
|
|
908
921
|
),
|
|
909
922
|
},
|
|
910
923
|
}),
|
|
911
|
-
"depth": {
|
|
912
|
-
"eval": {
|
|
913
|
-
"raw": _clean_metric_tree(depth_results["depth_raw"]),
|
|
914
|
-
"aligned": _clean_metric_tree(
|
|
915
|
-
depth_results["depth_aligned"]
|
|
916
|
-
),
|
|
917
|
-
},
|
|
918
|
-
},
|
|
924
|
+
"depth": {"eval": {}},
|
|
919
925
|
}
|
|
926
|
+
for space_name, result_key in (
|
|
927
|
+
("native", "depth_native"),
|
|
928
|
+
("metric", "depth_metric"),
|
|
929
|
+
):
|
|
930
|
+
branch = depth_results.get(result_key)
|
|
931
|
+
if branch is not None:
|
|
932
|
+
depth_save["depth"]["eval"][space_name] = _clean_metric_tree(branch)
|
|
920
933
|
|
|
921
934
|
# Inject benchmark bin metrics under the existing category
|
|
922
935
|
# keys so that the bin axis decomposes correctly:
|
|
923
|
-
# depth.eval.
|
|
924
|
-
# depth.eval.
|
|
925
|
-
# depth.eval.
|
|
936
|
+
# depth.eval.metric.standard.image_mean.{bin}.absrel
|
|
937
|
+
# depth.eval.metric.depth_metrics.{bin}.absrel.median
|
|
938
|
+
# depth.eval.metric.geometric_metrics.{bin}.normal_consistency.mean_angle
|
|
926
939
|
depth_benchmark = depth_results.get("depth_benchmark")
|
|
927
940
|
if depth_benchmark is not None:
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
941
|
+
for space_name in ("native", "metric"):
|
|
942
|
+
if space_name not in depth_save["depth"]["eval"]:
|
|
943
|
+
continue
|
|
944
|
+
space_benchmark = depth_benchmark.get(space_name)
|
|
945
|
+
if space_benchmark is None:
|
|
946
|
+
continue
|
|
947
|
+
target = depth_save["depth"]["eval"][space_name]
|
|
948
|
+
for bn in ("all", "near", "mid", "far"):
|
|
949
|
+
bin_summary = space_benchmark.get(bn, {})
|
|
950
|
+
for category, metrics in bin_summary.items():
|
|
951
|
+
cleaned = _clean_metric_tree(metrics)
|
|
952
|
+
if cleaned:
|
|
953
|
+
if category == "standard":
|
|
954
|
+
bucket = target.setdefault(category, {})
|
|
955
|
+
for reduction, reduction_metrics in cleaned.items():
|
|
956
|
+
bucket.setdefault(reduction, {})[bn] = reduction_metrics
|
|
957
|
+
else:
|
|
958
|
+
target.setdefault(category, {})[bn] = cleaned
|
|
940
959
|
depth_save["metricSet"]["metadata"]["benchmark"] = {
|
|
941
960
|
"depth_range": depth_benchmark["boundaries"]["range"],
|
|
942
961
|
"boundaries": depth_benchmark["boundaries"],
|
|
943
962
|
}
|
|
944
|
-
for depth_key in ("depth", "
|
|
963
|
+
for depth_key in ("depth", "depth_native", "depth_metric", "depth_benchmark"):
|
|
945
964
|
if depth_key in depth_results and depth_results[depth_key] is not None:
|
|
946
965
|
all_results[depth_key] = depth_results[depth_key]
|
|
947
966
|
depth_pfm = depth_results.get("per_file_metrics", {})
|
|
@@ -949,14 +968,17 @@ def main():
|
|
|
949
968
|
depth_save["per_file_metrics"] = _clean_metric_tree(
|
|
950
969
|
_wrap_pfm_metrics(
|
|
951
970
|
depth_pfm,
|
|
952
|
-
lambda m:
|
|
953
|
-
|
|
954
|
-
"
|
|
955
|
-
"
|
|
956
|
-
|
|
971
|
+
lambda m: (
|
|
972
|
+
{
|
|
973
|
+
"depth": {
|
|
974
|
+
"eval": {
|
|
975
|
+
space: m[f"depth_{space}"]
|
|
976
|
+
for space in ("native", "metric")
|
|
977
|
+
if f"depth_{space}" in m
|
|
978
|
+
},
|
|
957
979
|
},
|
|
958
|
-
}
|
|
959
|
-
|
|
980
|
+
}
|
|
981
|
+
),
|
|
960
982
|
)
|
|
961
983
|
)
|
|
962
984
|
all_results.setdefault("per_file_metrics", {}).update(depth_pfm)
|
|
@@ -103,12 +103,13 @@ def _init_benchmark_bin_store(temp_dir: Path, prefix: str) -> dict:
|
|
|
103
103
|
|
|
104
104
|
def _close_benchmark_stores(stores: dict) -> None:
|
|
105
105
|
"""Close all streaming stores in a benchmark store dict."""
|
|
106
|
-
for
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
106
|
+
for space_stores in stores.values():
|
|
107
|
+
for bin_name in _BENCHMARK_BIN_NAMES:
|
|
108
|
+
s = space_stores[bin_name]
|
|
109
|
+
s["absrel_store"].close()
|
|
110
|
+
s["rmse_store"].close()
|
|
111
|
+
s["silog_store"].close()
|
|
112
|
+
s["normal_store"].close()
|
|
112
113
|
|
|
113
114
|
|
|
114
115
|
def _safe_mean_values(values: list) -> Optional[float]:
|
|
@@ -322,6 +323,13 @@ def _ensure_file_system_sharing() -> None:
|
|
|
322
323
|
runs accumulate FDs and eventually trip ``EMFILE`` ("Too many open
|
|
323
324
|
files") mid-iteration. ``file_system`` uses ``/dev/shm`` file-backed
|
|
324
325
|
storage instead, sidestepping the FD cap. Idempotent.
|
|
326
|
+
|
|
327
|
+
Caveat: activation is lazy — the strategy only flips the first time
|
|
328
|
+
a worker-spawning prefetch call runs. Any ``DataLoader`` / torch IPC
|
|
329
|
+
that fires *before* :func:`_prefetched_iter` reaches its workers
|
|
330
|
+
branch keeps the default strategy. In the current CLI flow, the
|
|
331
|
+
depth/RGB prefetch loop starts before any internal DataLoader (FID,
|
|
332
|
+
etc.), so those inherit the safe strategy by the time they run.
|
|
325
333
|
"""
|
|
326
334
|
global _sharing_strategy_set
|
|
327
335
|
if _sharing_strategy_set:
|
|
@@ -549,9 +557,9 @@ def evaluate_depth_samples(
|
|
|
549
557
|
|
|
550
558
|
Returns:
|
|
551
559
|
Dictionary containing depth aggregate/per-file metrics with:
|
|
552
|
-
``
|
|
553
|
-
|
|
554
|
-
|
|
560
|
+
optional ``depth_native`` and/or ``depth_metric`` semantic branches,
|
|
561
|
+
backward-compatible canonical ``depth``, and optionally
|
|
562
|
+
per-space ``depth_benchmark`` summaries.
|
|
555
563
|
"""
|
|
556
564
|
valid_alignment_modes = {"none", "auto_affine", "affine"}
|
|
557
565
|
if alignment_mode not in valid_alignment_modes:
|
|
@@ -882,12 +890,13 @@ def evaluate_depth_samples(
|
|
|
882
890
|
logged_alignment = False
|
|
883
891
|
normalized_predictions = False
|
|
884
892
|
alignment_applied = False
|
|
893
|
+
input_space_detected = "unknown"
|
|
885
894
|
gt_native_dims: Optional[tuple[int, int]] = None
|
|
886
895
|
pred_native_dims: Optional[tuple[int, int]] = None
|
|
887
896
|
spatial_method = "none"
|
|
888
897
|
|
|
889
898
|
if alignment_mode == "none":
|
|
890
|
-
print("Depth alignment mode: none
|
|
899
|
+
print("Depth alignment mode: none")
|
|
891
900
|
elif alignment_mode == "auto_affine":
|
|
892
901
|
print("Depth alignment mode: auto_affine (normalized-depth detection)")
|
|
893
902
|
else:
|
|
@@ -925,8 +934,14 @@ def evaluate_depth_samples(
|
|
|
925
934
|
f"(log-scaled near/mid/far bins)"
|
|
926
935
|
)
|
|
927
936
|
benchmark_stores = {
|
|
928
|
-
|
|
929
|
-
|
|
937
|
+
"native": {
|
|
938
|
+
bn: _init_benchmark_bin_store(temp_dir, f"bench_native_{bn}")
|
|
939
|
+
for bn in _BENCHMARK_BIN_NAMES
|
|
940
|
+
},
|
|
941
|
+
"metric": {
|
|
942
|
+
bn: _init_benchmark_bin_store(temp_dir, f"bench_metric_{bn}")
|
|
943
|
+
for bn in _BENCHMARK_BIN_NAMES
|
|
944
|
+
},
|
|
930
945
|
}
|
|
931
946
|
|
|
932
947
|
try:
|
|
@@ -969,21 +984,24 @@ def evaluate_depth_samples(
|
|
|
969
984
|
):
|
|
970
985
|
sky_valid = align_to_prediction(sky_valid, depth_pred)
|
|
971
986
|
|
|
972
|
-
if
|
|
987
|
+
if i == 0:
|
|
973
988
|
pred_min = float(np.nanmin(depth_pred))
|
|
974
989
|
pred_max = float(np.nanmax(depth_pred))
|
|
975
990
|
if pred_max <= 1.0 + 1e-3 and pred_min >= -1.0 - 1e-3:
|
|
976
991
|
normalized_predictions = True
|
|
992
|
+
input_space_detected = "normalized"
|
|
977
993
|
print(
|
|
978
|
-
f"
|
|
994
|
+
f" Detected native depth space: normalized "
|
|
979
995
|
f"(range [{pred_min:.3f}, {pred_max:.3f}])"
|
|
980
996
|
)
|
|
981
997
|
else:
|
|
998
|
+
input_space_detected = "metric"
|
|
982
999
|
print(
|
|
983
|
-
f"
|
|
984
|
-
f"(range [{pred_min:.1f}, {pred_max:.1f}])
|
|
985
|
-
f"skipping alignment"
|
|
1000
|
+
f" Detected native depth space: metric "
|
|
1001
|
+
f"(range [{pred_min:.1f}, {pred_max:.1f}])"
|
|
986
1002
|
)
|
|
1003
|
+
if alignment_mode == "auto_affine":
|
|
1004
|
+
print(" Scale-and-shift: skipping calibration")
|
|
987
1005
|
|
|
988
1006
|
depth_gt = process_depth(depth_gt, 1.0, is_radial, intrinsics_K)
|
|
989
1007
|
depth_pred_raw = process_depth(depth_pred, 1.0, is_radial, intrinsics_K)
|
|
@@ -1059,22 +1077,28 @@ def evaluate_depth_samples(
|
|
|
1059
1077
|
_append_metrics(stores["aligned"], aligned_metrics, aligned_pred_path)
|
|
1060
1078
|
|
|
1061
1079
|
raw_value = _build_per_file_depth_value(raw_metrics)
|
|
1062
|
-
|
|
1080
|
+
metric_value = (
|
|
1063
1081
|
raw_value
|
|
1064
1082
|
if aligned_metrics is raw_metrics
|
|
1065
1083
|
else _build_per_file_depth_value(aligned_metrics)
|
|
1066
1084
|
)
|
|
1085
|
+
emit_native = alignment_applied or normalized_predictions
|
|
1086
|
+
emit_metric = alignment_applied or not normalized_predictions
|
|
1087
|
+
canonical_value = metric_value if emit_metric else raw_value
|
|
1088
|
+
file_metrics = {
|
|
1089
|
+
"depth": canonical_value,
|
|
1090
|
+
}
|
|
1091
|
+
if emit_native:
|
|
1092
|
+
file_metrics["depth_native"] = raw_value
|
|
1093
|
+
if emit_metric:
|
|
1094
|
+
file_metrics["depth_metric"] = metric_value
|
|
1067
1095
|
set_value(
|
|
1068
1096
|
per_file_metrics,
|
|
1069
1097
|
hierarchy,
|
|
1070
1098
|
entry_id,
|
|
1071
1099
|
{
|
|
1072
1100
|
"id": entry_id,
|
|
1073
|
-
"metrics":
|
|
1074
|
-
"depth": aligned_value,
|
|
1075
|
-
"depth_raw": raw_value,
|
|
1076
|
-
"depth_aligned": aligned_value,
|
|
1077
|
-
},
|
|
1101
|
+
"metrics": file_metrics,
|
|
1078
1102
|
},
|
|
1079
1103
|
)
|
|
1080
1104
|
|
|
@@ -1102,7 +1126,7 @@ def evaluate_depth_samples(
|
|
|
1102
1126
|
v,
|
|
1103
1127
|
_store=stores["aligned"],
|
|
1104
1128
|
_slot=aligned_lpips_slot,
|
|
1105
|
-
_per_file=
|
|
1129
|
+
_per_file=metric_value,
|
|
1106
1130
|
):
|
|
1107
1131
|
val = float(v) if np.isfinite(v) else float("nan")
|
|
1108
1132
|
_store["lpips_values"][_slot] = val
|
|
@@ -1116,8 +1140,8 @@ def evaluate_depth_samples(
|
|
|
1116
1140
|
|
|
1117
1141
|
# -- Enqueue batched GPU depth metrics; callbacks patch the
|
|
1118
1142
|
# placeholders that _compute_branch_metrics(defer_to_batcher=True)
|
|
1119
|
-
# left behind and run the deferred sanity checks on the
|
|
1120
|
-
# branch.
|
|
1143
|
+
# left behind and run the deferred sanity checks on the
|
|
1144
|
+
# canonical emitted branch.
|
|
1121
1145
|
if defer_depth:
|
|
1122
1146
|
raw_depth_slot = len(stores["raw"]["psnr_values"]) - 1
|
|
1123
1147
|
# When aligned == raw, there is only one enqueue; run the
|
|
@@ -1159,7 +1183,7 @@ def evaluate_depth_samples(
|
|
|
1159
1183
|
_metrics=aligned_metrics,
|
|
1160
1184
|
_store=stores["aligned"],
|
|
1161
1185
|
_slot=aligned_depth_slot,
|
|
1162
|
-
_pf=
|
|
1186
|
+
_pf=metric_value,
|
|
1163
1187
|
_sanity=sanity_checker,
|
|
1164
1188
|
_entry_id=entry_id,
|
|
1165
1189
|
):
|
|
@@ -1178,7 +1202,7 @@ def evaluate_depth_samples(
|
|
|
1178
1202
|
_aligned_depth_cb,
|
|
1179
1203
|
)
|
|
1180
1204
|
|
|
1181
|
-
# -- Benchmark depth-range metrics
|
|
1205
|
+
# -- Benchmark depth-range metrics per emitted semantic space --
|
|
1182
1206
|
if benchmark_stores is not None:
|
|
1183
1207
|
bm_bins = get_benchmark_depth_bins(
|
|
1184
1208
|
depth_gt, benchmark_depth_range[0], benchmark_depth_range[1]
|
|
@@ -1187,72 +1211,136 @@ def evaluate_depth_samples(
|
|
|
1187
1211
|
benchmark_boundaries = bm_bins["boundaries"]
|
|
1188
1212
|
|
|
1189
1213
|
for bn in _BENCHMARK_BIN_NAMES:
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1214
|
+
native_bin_mask = bm_bins[bn].copy()
|
|
1215
|
+
native_bin_mask &= (depth_pred_raw > 0) & np.isfinite(
|
|
1216
|
+
depth_pred_raw
|
|
1193
1217
|
)
|
|
1194
1218
|
if sky_valid is not None:
|
|
1195
|
-
|
|
1196
|
-
if
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
)
|
|
1213
|
-
bm_standard, bm_standard_pool = compute_standard_depth_metrics(
|
|
1214
|
-
depth_pred_aligned, depth_gt, valid_mask=bin_mask
|
|
1215
|
-
)
|
|
1216
|
-
bm_normals = compute_normal_angles(
|
|
1217
|
-
depth_pred_aligned, depth_gt, valid_mask=bin_mask
|
|
1218
|
-
)
|
|
1219
|
-
|
|
1220
|
-
bm_store["absrel_store"].append(bm_absrel)
|
|
1221
|
-
bm_store["rmse_store"].append(np.sqrt(bm_rmse))
|
|
1222
|
-
bm_store["silog_store"].append(bm_silog_arr)
|
|
1223
|
-
bm_store["silog_full_values"].append(bm_silog_val)
|
|
1224
|
-
append_standard_depth_metrics(
|
|
1225
|
-
bm_store["standard_store"],
|
|
1226
|
-
bm_standard,
|
|
1227
|
-
bm_standard_pool,
|
|
1228
|
-
)
|
|
1229
|
-
bm_store["normal_store"].append(bm_normals)
|
|
1230
|
-
if len(bm_normals) > 0:
|
|
1231
|
-
bm_store["normal_below_11_25"] += int(
|
|
1232
|
-
np.sum(bm_normals < 11.25)
|
|
1219
|
+
native_bin_mask &= sky_valid
|
|
1220
|
+
if native_bin_mask.any():
|
|
1221
|
+
bm_store = benchmark_stores["native"][bn]
|
|
1222
|
+
bm_absrel = compute_absrel(
|
|
1223
|
+
depth_pred_raw, depth_gt, valid_mask=native_bin_mask
|
|
1224
|
+
)
|
|
1225
|
+
bm_rmse = compute_rmse_per_pixel(
|
|
1226
|
+
depth_pred_raw, depth_gt, valid_mask=native_bin_mask
|
|
1227
|
+
)
|
|
1228
|
+
bm_silog_arr = compute_silog_per_pixel(
|
|
1229
|
+
depth_pred_raw, depth_gt, valid_mask=native_bin_mask
|
|
1230
|
+
)
|
|
1231
|
+
bm_silog_val = compute_scale_invariant_log_error(
|
|
1232
|
+
depth_pred_raw, depth_gt, valid_mask=native_bin_mask
|
|
1233
|
+
)
|
|
1234
|
+
bm_standard, bm_standard_pool = compute_standard_depth_metrics(
|
|
1235
|
+
depth_pred_raw, depth_gt, valid_mask=native_bin_mask
|
|
1233
1236
|
)
|
|
1234
|
-
|
|
1235
|
-
|
|
1237
|
+
bm_normals = compute_normal_angles(
|
|
1238
|
+
depth_pred_raw, depth_gt, valid_mask=native_bin_mask
|
|
1236
1239
|
)
|
|
1237
|
-
|
|
1238
|
-
|
|
1240
|
+
|
|
1241
|
+
bm_store["absrel_store"].append(bm_absrel)
|
|
1242
|
+
bm_store["rmse_store"].append(np.sqrt(bm_rmse))
|
|
1243
|
+
bm_store["silog_store"].append(bm_silog_arr)
|
|
1244
|
+
bm_store["silog_full_values"].append(bm_silog_val)
|
|
1245
|
+
append_standard_depth_metrics(
|
|
1246
|
+
bm_store["standard_store"],
|
|
1247
|
+
bm_standard,
|
|
1248
|
+
bm_standard_pool,
|
|
1249
|
+
)
|
|
1250
|
+
bm_store["normal_store"].append(bm_normals)
|
|
1251
|
+
if len(bm_normals) > 0:
|
|
1252
|
+
bm_store["normal_below_11_25"] += int(
|
|
1253
|
+
np.sum(bm_normals < 11.25)
|
|
1254
|
+
)
|
|
1255
|
+
bm_store["normal_below_22_5"] += int(
|
|
1256
|
+
np.sum(bm_normals < 22.5)
|
|
1257
|
+
)
|
|
1258
|
+
bm_store["normal_below_30"] += int(
|
|
1259
|
+
np.sum(bm_normals < 30.0)
|
|
1260
|
+
)
|
|
1261
|
+
|
|
1262
|
+
if aligned_metrics is not raw_metrics:
|
|
1263
|
+
metric_bin_mask = bm_bins[bn].copy()
|
|
1264
|
+
metric_bin_mask &= (depth_pred_aligned > 0) & np.isfinite(
|
|
1265
|
+
depth_pred_aligned
|
|
1239
1266
|
)
|
|
1267
|
+
if sky_valid is not None:
|
|
1268
|
+
metric_bin_mask &= sky_valid
|
|
1269
|
+
if metric_bin_mask.any():
|
|
1270
|
+
bm_store = benchmark_stores["metric"][bn]
|
|
1271
|
+
bm_absrel = compute_absrel(
|
|
1272
|
+
depth_pred_aligned,
|
|
1273
|
+
depth_gt,
|
|
1274
|
+
valid_mask=metric_bin_mask,
|
|
1275
|
+
)
|
|
1276
|
+
bm_rmse = compute_rmse_per_pixel(
|
|
1277
|
+
depth_pred_aligned,
|
|
1278
|
+
depth_gt,
|
|
1279
|
+
valid_mask=metric_bin_mask,
|
|
1280
|
+
)
|
|
1281
|
+
bm_silog_arr = compute_silog_per_pixel(
|
|
1282
|
+
depth_pred_aligned,
|
|
1283
|
+
depth_gt,
|
|
1284
|
+
valid_mask=metric_bin_mask,
|
|
1285
|
+
)
|
|
1286
|
+
bm_silog_val = compute_scale_invariant_log_error(
|
|
1287
|
+
depth_pred_aligned,
|
|
1288
|
+
depth_gt,
|
|
1289
|
+
valid_mask=metric_bin_mask,
|
|
1290
|
+
)
|
|
1291
|
+
bm_standard, bm_standard_pool = compute_standard_depth_metrics(
|
|
1292
|
+
depth_pred_aligned,
|
|
1293
|
+
depth_gt,
|
|
1294
|
+
valid_mask=metric_bin_mask,
|
|
1295
|
+
)
|
|
1296
|
+
bm_normals = compute_normal_angles(
|
|
1297
|
+
depth_pred_aligned,
|
|
1298
|
+
depth_gt,
|
|
1299
|
+
valid_mask=metric_bin_mask,
|
|
1300
|
+
)
|
|
1301
|
+
|
|
1302
|
+
bm_store["absrel_store"].append(bm_absrel)
|
|
1303
|
+
bm_store["rmse_store"].append(np.sqrt(bm_rmse))
|
|
1304
|
+
bm_store["silog_store"].append(bm_silog_arr)
|
|
1305
|
+
bm_store["silog_full_values"].append(bm_silog_val)
|
|
1306
|
+
append_standard_depth_metrics(
|
|
1307
|
+
bm_store["standard_store"],
|
|
1308
|
+
bm_standard,
|
|
1309
|
+
bm_standard_pool,
|
|
1310
|
+
)
|
|
1311
|
+
bm_store["normal_store"].append(bm_normals)
|
|
1312
|
+
if len(bm_normals) > 0:
|
|
1313
|
+
bm_store["normal_below_11_25"] += int(
|
|
1314
|
+
np.sum(bm_normals < 11.25)
|
|
1315
|
+
)
|
|
1316
|
+
bm_store["normal_below_22_5"] += int(
|
|
1317
|
+
np.sum(bm_normals < 22.5)
|
|
1318
|
+
)
|
|
1319
|
+
bm_store["normal_below_30"] += int(
|
|
1320
|
+
np.sum(bm_normals < 30.0)
|
|
1321
|
+
)
|
|
1240
1322
|
|
|
1241
1323
|
if sanity_checker is not None:
|
|
1324
|
+
canonical_pred = depth_pred_aligned if (
|
|
1325
|
+
alignment_applied or not normalized_predictions
|
|
1326
|
+
) else depth_pred_raw
|
|
1327
|
+
canonical_metrics = aligned_metrics if (
|
|
1328
|
+
alignment_applied or not normalized_predictions
|
|
1329
|
+
) else raw_metrics
|
|
1242
1330
|
sanity_checker.validate_depth_input(
|
|
1243
|
-
depth_gt,
|
|
1331
|
+
depth_gt, canonical_pred, entry_id
|
|
1244
1332
|
)
|
|
1245
1333
|
if not defer_depth:
|
|
1246
1334
|
# When deferred, the batcher callback runs these.
|
|
1247
1335
|
_run_deferred_depth_sanity(
|
|
1248
|
-
sanity_checker,
|
|
1336
|
+
sanity_checker, canonical_metrics, entry_id
|
|
1249
1337
|
)
|
|
1250
|
-
nm =
|
|
1338
|
+
nm = canonical_metrics["normal_meta"]
|
|
1251
1339
|
if nm["mean_angle"] is not None:
|
|
1252
1340
|
sanity_checker.validate_normal_consistency(
|
|
1253
1341
|
nm["mean_angle"], nm["valid_pixels_after_erosion"], entry_id
|
|
1254
1342
|
)
|
|
1255
|
-
ef =
|
|
1343
|
+
ef = canonical_metrics["edge_f1"]
|
|
1256
1344
|
sanity_checker.validate_depth_edge_f1(
|
|
1257
1345
|
ef["pred_edge_pixels"],
|
|
1258
1346
|
ef["gt_edge_pixels"],
|
|
@@ -1271,28 +1359,61 @@ def evaluate_depth_samples(
|
|
|
1271
1359
|
print("Computing FID/KID (this may take a while)...")
|
|
1272
1360
|
print("Aggregating depth results...")
|
|
1273
1361
|
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1362
|
+
emit_native = alignment_applied or normalized_predictions
|
|
1363
|
+
emit_metric = alignment_applied or not normalized_predictions
|
|
1364
|
+
|
|
1365
|
+
native_summary = (
|
|
1366
|
+
_build_depth_summary(stores["raw"], gt_depth_paths)
|
|
1367
|
+
if emit_native or (emit_metric and not alignment_applied)
|
|
1368
|
+
else None
|
|
1369
|
+
)
|
|
1370
|
+
metric_summary = None
|
|
1371
|
+
if emit_metric:
|
|
1372
|
+
if alignment_applied:
|
|
1373
|
+
metric_summary = _build_depth_summary(
|
|
1374
|
+
stores["aligned"], gt_depth_paths
|
|
1375
|
+
)
|
|
1376
|
+
else:
|
|
1377
|
+
metric_summary = copy.deepcopy(native_summary)
|
|
1378
|
+
depth_summary = metric_summary if emit_metric else native_summary
|
|
1279
1379
|
|
|
1280
1380
|
# -- Benchmark aggregation --
|
|
1281
1381
|
depth_benchmark = None
|
|
1282
1382
|
if benchmark_stores is not None:
|
|
1283
1383
|
print("Aggregating benchmark depth results...")
|
|
1284
|
-
depth_benchmark = {
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1384
|
+
depth_benchmark = {"boundaries": benchmark_boundaries}
|
|
1385
|
+
if emit_native or (emit_metric and not alignment_applied):
|
|
1386
|
+
native_benchmark = {
|
|
1387
|
+
bn: _build_benchmark_bin_summary(
|
|
1388
|
+
benchmark_stores["native"][bn]
|
|
1389
|
+
)
|
|
1390
|
+
for bn in _BENCHMARK_BIN_NAMES
|
|
1391
|
+
}
|
|
1392
|
+
else:
|
|
1393
|
+
native_benchmark = None
|
|
1394
|
+
if emit_native:
|
|
1395
|
+
depth_benchmark["native"] = native_benchmark
|
|
1396
|
+
if emit_metric:
|
|
1397
|
+
if alignment_applied:
|
|
1398
|
+
depth_benchmark["metric"] = {
|
|
1399
|
+
bn: _build_benchmark_bin_summary(
|
|
1400
|
+
benchmark_stores["metric"][bn]
|
|
1401
|
+
)
|
|
1402
|
+
for bn in _BENCHMARK_BIN_NAMES
|
|
1403
|
+
}
|
|
1404
|
+
else:
|
|
1405
|
+
depth_benchmark["metric"] = copy.deepcopy(native_benchmark)
|
|
1406
|
+
|
|
1407
|
+
emitted_spaces = []
|
|
1408
|
+
if emit_native:
|
|
1409
|
+
emitted_spaces.append("native")
|
|
1410
|
+
if emit_metric:
|
|
1411
|
+
emitted_spaces.append("metric")
|
|
1291
1412
|
|
|
1292
1413
|
result = {
|
|
1293
|
-
"
|
|
1294
|
-
"
|
|
1295
|
-
"depth":
|
|
1414
|
+
"depth_native": native_summary if emit_native else None,
|
|
1415
|
+
"depth_metric": metric_summary if emit_metric else None,
|
|
1416
|
+
"depth": depth_summary,
|
|
1296
1417
|
"depth_benchmark": depth_benchmark,
|
|
1297
1418
|
"per_file_metrics": per_file_metrics,
|
|
1298
1419
|
"dataset_info": {
|
|
@@ -1300,9 +1421,17 @@ def evaluate_depth_samples(
|
|
|
1300
1421
|
"gt_name": gt_name,
|
|
1301
1422
|
"pred_name": pred_name,
|
|
1302
1423
|
},
|
|
1303
|
-
"
|
|
1304
|
-
"
|
|
1305
|
-
"
|
|
1424
|
+
"space_info": {
|
|
1425
|
+
"input_space_detected": input_space_detected,
|
|
1426
|
+
"metric_space_source": (
|
|
1427
|
+
"scale_shift"
|
|
1428
|
+
if alignment_applied
|
|
1429
|
+
else ("native" if emit_metric else None)
|
|
1430
|
+
),
|
|
1431
|
+
"calibration_mode": alignment_mode,
|
|
1432
|
+
"calibration_applied": alignment_applied,
|
|
1433
|
+
"emitted_spaces": emitted_spaces,
|
|
1434
|
+
"canonical_space": "metric" if emit_metric else "native",
|
|
1306
1435
|
},
|
|
1307
1436
|
"spatial_info": {
|
|
1308
1437
|
"gt_dimensions": {"height": gt_native_dims[0], "width": gt_native_dims[1]}
|
|
@@ -23,8 +23,8 @@ import numpy as np
|
|
|
23
23
|
import torch
|
|
24
24
|
|
|
25
25
|
try:
|
|
26
|
-
from torchmetrics.functional import peak_signal_noise_ratio as _tm_psnr
|
|
27
26
|
from torchmetrics.functional.image import (
|
|
27
|
+
peak_signal_noise_ratio as _tm_psnr,
|
|
28
28
|
structural_similarity_index_measure as _tm_ssim,
|
|
29
29
|
)
|
|
30
30
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: euler-eval
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.4.0
|
|
4
4
|
Summary: Depth map evaluation toolkit with comprehensive metrics
|
|
5
5
|
Author: Depth Eval Contributors
|
|
6
6
|
License: MIT
|
|
@@ -139,7 +139,7 @@ This pre-downloads:
|
|
|
139
139
|
| `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
|
|
140
140
|
| `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
|
|
141
141
|
| `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
|
|
142
|
-
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth
|
|
142
|
+
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth calibration mode; outputs are emitted in semantic `native`/`metric` spaces and `depth` aliases the canonical branch |
|
|
143
143
|
| `--rgb-fid-backend` | `{builtin,clean-fid}` | `builtin` | RGB FID backend; `clean-fid` requires optional dependency |
|
|
144
144
|
| `--benchmark-depth-range` | `float float` | none | Depth range `[MIN, MAX]` in meters for benchmark evaluation; computes depth and RGB metrics for pixels within this range, subdivided into log-scaled near/mid/far bins (additive to regular metrics) |
|
|
145
145
|
|
|
@@ -341,10 +341,10 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
341
341
|
|
|
342
342
|
```json
|
|
343
343
|
{
|
|
344
|
-
"
|
|
345
|
-
"
|
|
344
|
+
"depth_native": { "...": "native model depth space, if diagnostically meaningful" },
|
|
345
|
+
"depth_metric": { "...": "metric depth space, if available" },
|
|
346
346
|
"depth": {
|
|
347
|
-
"...": "
|
|
347
|
+
"...": "canonical alias of depth_metric when present, else depth_native"
|
|
348
348
|
},
|
|
349
349
|
"rgb": {
|
|
350
350
|
"...": "..."
|
|
@@ -358,9 +358,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
358
358
|
{
|
|
359
359
|
"id": "frame_0001",
|
|
360
360
|
"metrics": {
|
|
361
|
-
"depth": { "...": "
|
|
362
|
-
"
|
|
363
|
-
"
|
|
361
|
+
"depth": { "...": "canonical alias" },
|
|
362
|
+
"depth_native": { "...": "native, when emitted" },
|
|
363
|
+
"depth_metric": { "...": "metric, when emitted" },
|
|
364
364
|
"rgb": { "...": "..." }
|
|
365
365
|
}
|
|
366
366
|
}
|
|
@@ -374,9 +374,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
|
|
|
374
374
|
```
|
|
375
375
|
|
|
376
376
|
For depth outputs:
|
|
377
|
-
- `
|
|
378
|
-
- `
|
|
379
|
-
- `depth`:
|
|
377
|
+
- `depth_native`: the model's native depth space after spatial/radial preprocessing, emitted only when it is diagnostically distinct.
|
|
378
|
+
- `depth_metric`: the comparable metric-depth branch. This is either the native prediction itself or the calibrated scale-shift result.
|
|
379
|
+
- `depth`: canonical alias of `depth_metric` when available, otherwise `depth_native`.
|
|
380
380
|
- `standard`: explicit monocular-depth metrics with three reducers:
|
|
381
381
|
`image_mean`, `image_median`, and `pixel_pool`.
|
|
382
382
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Tests for depth
|
|
1
|
+
"""Tests for semantic depth-space output and calibration behavior."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
@@ -222,7 +222,29 @@ def _make_dataset_with_segmentation():
|
|
|
222
222
|
return _DummyDepthDataset(samples)
|
|
223
223
|
|
|
224
224
|
|
|
225
|
-
def
|
|
225
|
+
def _make_metric_dataset():
|
|
226
|
+
gt_a = np.array([[10.0, 20.0], [30.0, 40.0]], dtype=np.float32)
|
|
227
|
+
gt_b = np.array([[6.0, 12.0], [18.0, 24.0]], dtype=np.float32)
|
|
228
|
+
pred_a = (gt_a * 1.02).astype(np.float32)
|
|
229
|
+
pred_b = (gt_b * 0.98).astype(np.float32)
|
|
230
|
+
samples = [
|
|
231
|
+
{
|
|
232
|
+
"id": "00001",
|
|
233
|
+
"full_id": "/Scene01/clone/00001",
|
|
234
|
+
"gt": gt_a,
|
|
235
|
+
"pred": pred_a,
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
"id": "00002",
|
|
239
|
+
"full_id": "/Scene01/clone/00002",
|
|
240
|
+
"gt": gt_b,
|
|
241
|
+
"pred": pred_b,
|
|
242
|
+
},
|
|
243
|
+
]
|
|
244
|
+
return _DummyDepthDataset(samples)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_depth_output_contains_native_and_metric_for_calibrated_normalized_input(monkeypatch):
|
|
226
248
|
_patch_depth_metrics(monkeypatch)
|
|
227
249
|
|
|
228
250
|
results = eval_mod.evaluate_depth_samples(
|
|
@@ -232,36 +254,38 @@ def test_depth_output_contains_raw_and_aligned(monkeypatch):
|
|
|
232
254
|
alignment_mode="auto_affine",
|
|
233
255
|
)
|
|
234
256
|
|
|
235
|
-
assert "
|
|
236
|
-
assert "
|
|
257
|
+
assert "depth_native" in results
|
|
258
|
+
assert "depth_metric" in results
|
|
237
259
|
assert "depth" in results
|
|
238
260
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
assert
|
|
244
|
-
assert
|
|
245
|
-
assert results["
|
|
261
|
+
native_absrel = results["depth_native"]["depth_metrics"]["absrel"]["median"]
|
|
262
|
+
metric_absrel = results["depth_metric"]["depth_metrics"]["absrel"]["median"]
|
|
263
|
+
native_standard_absrel = results["depth_native"]["standard"]["image_mean"]["absrel"]
|
|
264
|
+
metric_standard_absrel = results["depth_metric"]["standard"]["image_mean"]["absrel"]
|
|
265
|
+
assert metric_absrel < native_absrel
|
|
266
|
+
assert metric_standard_absrel < native_standard_absrel
|
|
267
|
+
assert results["space_info"]["calibration_applied"] is True
|
|
268
|
+
assert results["space_info"]["emitted_spaces"] == ["native", "metric"]
|
|
269
|
+
assert results["space_info"]["canonical_space"] == "metric"
|
|
246
270
|
|
|
247
271
|
files = results["per_file_metrics"]["children"]["Scene01"]["children"]["clone"][
|
|
248
272
|
"files"
|
|
249
273
|
]
|
|
250
274
|
per_file = next(item["metrics"] for item in files if item["id"] == "00001")
|
|
251
275
|
assert "depth" in per_file
|
|
252
|
-
assert "
|
|
253
|
-
assert "
|
|
276
|
+
assert "depth_native" in per_file
|
|
277
|
+
assert "depth_metric" in per_file
|
|
254
278
|
assert (
|
|
255
|
-
per_file["
|
|
256
|
-
< per_file["
|
|
279
|
+
per_file["depth_metric"]["depth_metrics"]["absrel"]
|
|
280
|
+
< per_file["depth_native"]["depth_metrics"]["absrel"]
|
|
257
281
|
)
|
|
258
282
|
assert (
|
|
259
|
-
per_file["
|
|
260
|
-
< per_file["
|
|
283
|
+
per_file["depth_metric"]["standard"]["absrel"]
|
|
284
|
+
< per_file["depth_native"]["standard"]["absrel"]
|
|
261
285
|
)
|
|
262
286
|
|
|
263
287
|
|
|
264
|
-
def
|
|
288
|
+
def test_depth_alignment_none_emits_only_native_for_normalized_input(monkeypatch):
|
|
265
289
|
_patch_depth_metrics(monkeypatch)
|
|
266
290
|
|
|
267
291
|
results = eval_mod.evaluate_depth_samples(
|
|
@@ -271,13 +295,34 @@ def test_depth_alignment_none_keeps_raw_and_aligned_equal(monkeypatch):
|
|
|
271
295
|
alignment_mode="none",
|
|
272
296
|
)
|
|
273
297
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
assert
|
|
279
|
-
assert
|
|
280
|
-
|
|
298
|
+
assert "depth_native" in results and results["depth_native"] is not None
|
|
299
|
+
assert results.get("depth_metric") is None
|
|
300
|
+
assert results["depth"] == results["depth_native"]
|
|
301
|
+
assert results["space_info"]["calibration_applied"] is False
|
|
302
|
+
assert results["space_info"]["emitted_spaces"] == ["native"]
|
|
303
|
+
assert results["space_info"]["canonical_space"] == "native"
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def test_metric_input_emits_only_metric_when_no_calibration_is_needed(monkeypatch):
|
|
307
|
+
_patch_depth_metrics(monkeypatch)
|
|
308
|
+
|
|
309
|
+
results = eval_mod.evaluate_depth_samples(
|
|
310
|
+
dataset=_make_metric_dataset(),
|
|
311
|
+
is_radial=True,
|
|
312
|
+
device="cpu",
|
|
313
|
+
alignment_mode="auto_affine",
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
assert results.get("depth_native") is None
|
|
317
|
+
assert "depth_metric" in results and results["depth_metric"] is not None
|
|
318
|
+
assert results["depth"] == results["depth_metric"]
|
|
319
|
+
assert results["space_info"]["input_space_detected"] == "metric"
|
|
320
|
+
assert results["space_info"]["metric_space_source"] == "native"
|
|
321
|
+
assert results["space_info"]["emitted_spaces"] == ["metric"]
|
|
322
|
+
files = results["per_file_metrics"]["children"]["Scene01"]["children"]["clone"]["files"]
|
|
323
|
+
per_file = next(item["metrics"] for item in files if item["id"] == "00001")
|
|
324
|
+
assert "depth_metric" in per_file
|
|
325
|
+
assert "depth_native" not in per_file
|
|
281
326
|
|
|
282
327
|
|
|
283
328
|
def test_depth_output_contains_spatial_info(monkeypatch):
|
|
@@ -89,7 +89,7 @@ class TestMetaBlockStructure:
|
|
|
89
89
|
"pred": {"dimensions": {"height": 50, "width": 100}},
|
|
90
90
|
"spatial_alignment": {"method": "resize"},
|
|
91
91
|
},
|
|
92
|
-
"depth": {"eval": {"
|
|
92
|
+
"depth": {"eval": {"native": {}, "metric": {}}},
|
|
93
93
|
}
|
|
94
94
|
cleaned = _clean_metric_tree(save_dict)
|
|
95
95
|
assert "meta" in cleaned
|
|
@@ -126,16 +126,16 @@ class TestAxisDeclarations:
|
|
|
126
126
|
"""Verify axis declarations follow the metric-namespacing convention."""
|
|
127
127
|
|
|
128
128
|
def test_depth_axes_structure(self):
|
|
129
|
-
"""depth.eval declares
|
|
130
|
-
assert "
|
|
129
|
+
"""depth.eval declares space, category, and reduction axes."""
|
|
130
|
+
assert "space" in _DEPTH_EVAL_AXES
|
|
131
131
|
assert "category" in _DEPTH_EVAL_AXES
|
|
132
132
|
assert "reduction" in _DEPTH_EVAL_AXES
|
|
133
133
|
|
|
134
|
-
|
|
135
|
-
assert
|
|
136
|
-
assert
|
|
137
|
-
assert "
|
|
138
|
-
assert "
|
|
134
|
+
space = _DEPTH_EVAL_AXES["space"]
|
|
135
|
+
assert space.position == 0
|
|
136
|
+
assert space.optional is False
|
|
137
|
+
assert "native" in space.values
|
|
138
|
+
assert "metric" in space.values
|
|
139
139
|
|
|
140
140
|
category = _DEPTH_EVAL_AXES["category"]
|
|
141
141
|
assert category.position == 1
|
|
@@ -264,6 +264,6 @@ class TestMetricDescriptions:
|
|
|
264
264
|
envelope = ns.metric_set_envelope("depth", metadata={})
|
|
265
265
|
assert "axes" in envelope
|
|
266
266
|
assert "metricDescriptions" in envelope
|
|
267
|
-
assert envelope["axes"]["
|
|
267
|
+
assert envelope["axes"]["space"]["position"] == 0
|
|
268
268
|
assert envelope["axes"]["reduction"]["position"] == 2
|
|
269
269
|
assert "psnr" in envelope["metricDescriptions"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|