euler-eval 2.2.0__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {euler_eval-2.2.0 → euler_eval-2.4.0}/PKG-INFO +11 -11
  2. {euler_eval-2.2.0 → euler_eval-2.4.0}/README.md +10 -10
  3. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/cli.py +62 -40
  4. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/evaluate.py +224 -95
  5. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/gpu_image_batch.py +1 -1
  6. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/PKG-INFO +11 -11
  7. {euler_eval-2.2.0 → euler_eval-2.4.0}/pyproject.toml +1 -1
  8. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_depth_alignment_output.py +70 -25
  9. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_meta_output.py +9 -9
  10. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/__init__.py +0 -0
  11. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/data.py +0 -0
  12. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/__init__.py +0 -0
  13. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/absrel.py +0 -0
  14. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/daniel_error.py +0 -0
  15. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/depth_binned_error.py +0 -0
  16. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/depth_edge_f1.py +0 -0
  17. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/depth_standard.py +0 -0
  18. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/fid_kid.py +0 -0
  19. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/gpu_depth_batch.py +0 -0
  20. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/high_freq_energy.py +0 -0
  21. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/lpips_metric.py +0 -0
  22. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/normal_consistency.py +0 -0
  23. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/psnr.py +0 -0
  24. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rgb_edge_f1.py +0 -0
  25. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rgb_lpips.py +0 -0
  26. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rgb_psnr_ssim.py +0 -0
  27. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rho_a.py +0 -0
  28. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/rmse.py +0 -0
  29. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/scale_invariant_log.py +0 -0
  30. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/ssim.py +0 -0
  31. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/tail_errors.py +0 -0
  32. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/metrics/utils.py +0 -0
  33. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/sanity_checker.py +0 -0
  34. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval/utils/hierarchy_parser.py +0 -0
  35. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/SOURCES.txt +0 -0
  36. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/dependency_links.txt +0 -0
  37. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/entry_points.txt +0 -0
  38. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/requires.txt +0 -0
  39. {euler_eval-2.2.0 → euler_eval-2.4.0}/euler_eval.egg-info/top_level.txt +0 -0
  40. {euler_eval-2.2.0 → euler_eval-2.4.0}/init_cache.py +0 -0
  41. {euler_eval-2.2.0 → euler_eval-2.4.0}/setup.cfg +0 -0
  42. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_alignment.py +0 -0
  43. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_cli_device.py +0 -0
  44. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_config.py +0 -0
  45. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_data.py +0 -0
  46. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_depth_standard.py +0 -0
  47. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_evaluate_helpers.py +0 -0
  48. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_init_cache.py +0 -0
  49. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_integration.py +0 -0
  50. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_rgb_fid_output.py +0 -0
  51. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_rho_a.py +0 -0
  52. {euler_eval-2.2.0 → euler_eval-2.4.0}/tests/test_save_results.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: euler-eval
3
- Version: 2.2.0
3
+ Version: 2.4.0
4
4
  Summary: Depth map evaluation toolkit with comprehensive metrics
5
5
  Author: Depth Eval Contributors
6
6
  License: MIT
@@ -139,7 +139,7 @@ This pre-downloads:
139
139
  | `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
140
140
  | `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
141
141
  | `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
142
- | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
142
+ | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth calibration mode; outputs are emitted in semantic `native`/`metric` spaces and `depth` aliases the canonical branch |
143
143
  | `--rgb-fid-backend` | `{builtin,clean-fid}` | `builtin` | RGB FID backend; `clean-fid` requires optional dependency |
144
144
  | `--benchmark-depth-range` | `float float` | none | Depth range `[MIN, MAX]` in meters for benchmark evaluation; computes depth and RGB metrics for pixels within this range, subdivided into log-scaled near/mid/far bins (additive to regular metrics) |
145
145
 
@@ -341,10 +341,10 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
341
341
 
342
342
  ```json
343
343
  {
344
- "depth_raw": { "...": "metrics without alignment" },
345
- "depth_aligned": { "...": "metrics with selected alignment mode" },
344
+ "depth_native": { "...": "native model depth space, if diagnostically meaningful" },
345
+ "depth_metric": { "...": "metric depth space, if available" },
346
346
  "depth": {
347
- "...": "backward-compatible alias of depth_aligned"
347
+ "...": "canonical alias of depth_metric when present, else depth_native"
348
348
  },
349
349
  "rgb": {
350
350
  "...": "..."
@@ -358,9 +358,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
358
358
  {
359
359
  "id": "frame_0001",
360
360
  "metrics": {
361
- "depth": { "...": "aligned (alias)" },
362
- "depth_raw": { "...": "raw" },
363
- "depth_aligned": { "...": "aligned" },
361
+ "depth": { "...": "canonical alias" },
362
+ "depth_native": { "...": "native, when emitted" },
363
+ "depth_metric": { "...": "metric, when emitted" },
364
364
  "rgb": { "...": "..." }
365
365
  }
366
366
  }
@@ -374,9 +374,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
374
374
  ```
375
375
 
376
376
  For depth outputs:
377
- - `depth_raw`: metric-space depth without any post-hoc alignment.
378
- - `depth_aligned`: metric-space depth after configured alignment mode.
379
- - `depth`: backward-compatible alias of `depth_aligned`.
377
+ - `depth_native`: the model's native depth space after spatial/radial preprocessing, emitted only when it is diagnostically distinct.
378
+ - `depth_metric`: the comparable metric-depth branch. This is either the native prediction itself or the calibrated scale-shift result.
379
+ - `depth`: canonical alias of `depth_metric` when available, otherwise `depth_native`.
380
380
  - `standard`: explicit monocular-depth metrics with three reducers:
381
381
  `image_mean`, `image_median`, and `pixel_pool`.
382
382
 
@@ -101,7 +101,7 @@ This pre-downloads:
101
101
  | `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
102
102
  | `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
103
103
  | `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
104
- | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
104
+ | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth calibration mode; outputs are emitted in semantic `native`/`metric` spaces and `depth` aliases the canonical branch |
105
105
  | `--rgb-fid-backend` | `{builtin,clean-fid}` | `builtin` | RGB FID backend; `clean-fid` requires optional dependency |
106
106
  | `--benchmark-depth-range` | `float float` | none | Depth range `[MIN, MAX]` in meters for benchmark evaluation; computes depth and RGB metrics for pixels within this range, subdivided into log-scaled near/mid/far bins (additive to regular metrics) |
107
107
 
@@ -303,10 +303,10 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
303
303
 
304
304
  ```json
305
305
  {
306
- "depth_raw": { "...": "metrics without alignment" },
307
- "depth_aligned": { "...": "metrics with selected alignment mode" },
306
+ "depth_native": { "...": "native model depth space, if diagnostically meaningful" },
307
+ "depth_metric": { "...": "metric depth space, if available" },
308
308
  "depth": {
309
- "...": "backward-compatible alias of depth_aligned"
309
+ "...": "canonical alias of depth_metric when present, else depth_native"
310
310
  },
311
311
  "rgb": {
312
312
  "...": "..."
@@ -320,9 +320,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
320
320
  {
321
321
  "id": "frame_0001",
322
322
  "metrics": {
323
- "depth": { "...": "aligned (alias)" },
324
- "depth_raw": { "...": "raw" },
325
- "depth_aligned": { "...": "aligned" },
323
+ "depth": { "...": "canonical alias" },
324
+ "depth_native": { "...": "native, when emitted" },
325
+ "depth_metric": { "...": "metric, when emitted" },
326
326
  "rgb": { "...": "..." }
327
327
  }
328
328
  }
@@ -336,9 +336,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
336
336
  ```
337
337
 
338
338
  For depth outputs:
339
- - `depth_raw`: metric-space depth without any post-hoc alignment.
340
- - `depth_aligned`: metric-space depth after configured alignment mode.
341
- - `depth`: backward-compatible alias of `depth_aligned`.
339
+ - `depth_native`: the model's native depth space after spatial/radial preprocessing, emitted only when it is diagnostically distinct.
340
+ - `depth_metric`: the comparable metric-depth branch. This is either the native prediction itself or the calibrated scale-shift result.
341
+ - `depth`: canonical alias of `depth_metric` when available, otherwise `depth_native`.
342
342
  - `standard`: explicit monocular-depth metrics with three reducers:
343
343
  `image_mean`, `image_median`, and `pixel_pool`.
344
344
 
@@ -56,11 +56,11 @@ class _EvalNamespace(MetricNamespace):
56
56
 
57
57
  # ── Axis declarations ───────────────────────────────────────────────────────
58
58
 
59
- _DEPTH_ALIGNMENT_AXIS = AxisDeclaration(
59
+ _DEPTH_SPACE_AXIS = AxisDeclaration(
60
60
  position=0,
61
- values=("raw", "aligned"),
61
+ values=("native", "metric"),
62
62
  optional=False,
63
- description="Depth alignment mode",
63
+ description="Depth space semantics",
64
64
  )
65
65
 
66
66
  _DEPTH_CATEGORY_AXIS = AxisDeclaration(
@@ -107,7 +107,7 @@ _RGB_BENCHMARK_BIN_AXIS = AxisDeclaration(
107
107
 
108
108
  def _depth_eval_axes(*, benchmark: bool = False) -> dict[str, AxisDeclaration]:
109
109
  axes = {
110
- "alignment": _DEPTH_ALIGNMENT_AXIS,
110
+ "space": _DEPTH_SPACE_AXIS,
111
111
  "category": _DEPTH_CATEGORY_AXIS,
112
112
  "reduction": _DEPTH_REDUCTION_AXIS,
113
113
  }
@@ -691,7 +691,10 @@ def main():
691
691
  type=str,
692
692
  default="auto_affine",
693
693
  choices=["none", "auto_affine", "affine"],
694
- help="Depth alignment mode: none, auto_affine (default), or affine",
694
+ help=(
695
+ "Depth calibration mode: none, auto_affine (default), or affine. "
696
+ "Output is emitted in semantic native/metric spaces."
697
+ ),
695
698
  )
696
699
  parser.add_argument(
697
700
  "--rgb-fid-backend",
@@ -853,9 +856,9 @@ def main():
853
856
 
854
857
  # Build per-modality results for saving.
855
858
  # All metric names must be fully-qualified under the declared
856
- # metricNamespace. We nest raw/aligned under depth → eval so
859
+ # metricNamespace. We nest semantic spaces under depth → eval so
857
860
  # every flattened path starts with "depth.eval.".
858
- alignment_info = depth_results.get("alignment", {})
861
+ space_info = depth_results.get("space_info", {})
859
862
  depth_dataset_info = depth_results.get("dataset_info", {})
860
863
 
861
864
  depth_spatial = depth_results.get("spatial_info", {})
@@ -870,8 +873,18 @@ def main():
870
873
  "metricSet": depth_ns.metric_set_envelope(
871
874
  "depth",
872
875
  metadata={
873
- "alignment_mode": alignment_info.get("mode", "unknown"),
874
- "alignment_applied": alignment_info.get("applied", False),
876
+ "input_space_detected": space_info.get(
877
+ "input_space_detected", "unknown"
878
+ ),
879
+ "metric_space_source": space_info.get("metric_space_source"),
880
+ "calibration_mode": space_info.get(
881
+ "calibration_mode", "unknown"
882
+ ),
883
+ "calibration_applied": space_info.get(
884
+ "calibration_applied", False
885
+ ),
886
+ "emitted_spaces": space_info.get("emitted_spaces", []),
887
+ "canonical_space": space_info.get("canonical_space", "metric"),
875
888
  },
876
889
  ),
877
890
  "dataset_info": depth_dataset_info,
@@ -908,40 +921,46 @@ def main():
908
921
  ),
909
922
  },
910
923
  }),
911
- "depth": {
912
- "eval": {
913
- "raw": _clean_metric_tree(depth_results["depth_raw"]),
914
- "aligned": _clean_metric_tree(
915
- depth_results["depth_aligned"]
916
- ),
917
- },
918
- },
924
+ "depth": {"eval": {}},
919
925
  }
926
+ for space_name, result_key in (
927
+ ("native", "depth_native"),
928
+ ("metric", "depth_metric"),
929
+ ):
930
+ branch = depth_results.get(result_key)
931
+ if branch is not None:
932
+ depth_save["depth"]["eval"][space_name] = _clean_metric_tree(branch)
920
933
 
921
934
  # Inject benchmark bin metrics under the existing category
922
935
  # keys so that the bin axis decomposes correctly:
923
- # depth.eval.aligned.standard.image_mean.{bin}.absrel
924
- # depth.eval.aligned.depth_metrics.{bin}.absrel.median
925
- # depth.eval.aligned.geometric_metrics.{bin}.normal_consistency.mean_angle
936
+ # depth.eval.metric.standard.image_mean.{bin}.absrel
937
+ # depth.eval.metric.depth_metrics.{bin}.absrel.median
938
+ # depth.eval.metric.geometric_metrics.{bin}.normal_consistency.mean_angle
926
939
  depth_benchmark = depth_results.get("depth_benchmark")
927
940
  if depth_benchmark is not None:
928
- aligned = depth_save["depth"]["eval"]["aligned"]
929
- for bn in ("all", "near", "mid", "far"):
930
- bin_summary = depth_benchmark.get(bn, {})
931
- for category, metrics in bin_summary.items():
932
- cleaned = _clean_metric_tree(metrics)
933
- if cleaned:
934
- if category == "standard":
935
- bucket = aligned.setdefault(category, {})
936
- for reduction, reduction_metrics in cleaned.items():
937
- bucket.setdefault(reduction, {})[bn] = reduction_metrics
938
- else:
939
- aligned.setdefault(category, {})[bn] = cleaned
941
+ for space_name in ("native", "metric"):
942
+ if space_name not in depth_save["depth"]["eval"]:
943
+ continue
944
+ space_benchmark = depth_benchmark.get(space_name)
945
+ if space_benchmark is None:
946
+ continue
947
+ target = depth_save["depth"]["eval"][space_name]
948
+ for bn in ("all", "near", "mid", "far"):
949
+ bin_summary = space_benchmark.get(bn, {})
950
+ for category, metrics in bin_summary.items():
951
+ cleaned = _clean_metric_tree(metrics)
952
+ if cleaned:
953
+ if category == "standard":
954
+ bucket = target.setdefault(category, {})
955
+ for reduction, reduction_metrics in cleaned.items():
956
+ bucket.setdefault(reduction, {})[bn] = reduction_metrics
957
+ else:
958
+ target.setdefault(category, {})[bn] = cleaned
940
959
  depth_save["metricSet"]["metadata"]["benchmark"] = {
941
960
  "depth_range": depth_benchmark["boundaries"]["range"],
942
961
  "boundaries": depth_benchmark["boundaries"],
943
962
  }
944
- for depth_key in ("depth", "depth_raw", "depth_aligned", "depth_benchmark"):
963
+ for depth_key in ("depth", "depth_native", "depth_metric", "depth_benchmark"):
945
964
  if depth_key in depth_results and depth_results[depth_key] is not None:
946
965
  all_results[depth_key] = depth_results[depth_key]
947
966
  depth_pfm = depth_results.get("per_file_metrics", {})
@@ -949,14 +968,17 @@ def main():
949
968
  depth_save["per_file_metrics"] = _clean_metric_tree(
950
969
  _wrap_pfm_metrics(
951
970
  depth_pfm,
952
- lambda m: {
953
- "depth": {
954
- "eval": {
955
- "raw": m.get("depth_raw", {}),
956
- "aligned": m.get("depth_aligned", {}),
971
+ lambda m: (
972
+ {
973
+ "depth": {
974
+ "eval": {
975
+ space: m[f"depth_{space}"]
976
+ for space in ("native", "metric")
977
+ if f"depth_{space}" in m
978
+ },
957
979
  },
958
- },
959
- },
980
+ }
981
+ ),
960
982
  )
961
983
  )
962
984
  all_results.setdefault("per_file_metrics", {}).update(depth_pfm)
@@ -103,12 +103,13 @@ def _init_benchmark_bin_store(temp_dir: Path, prefix: str) -> dict:
103
103
 
104
104
  def _close_benchmark_stores(stores: dict) -> None:
105
105
  """Close all streaming stores in a benchmark store dict."""
106
- for bin_name in _BENCHMARK_BIN_NAMES:
107
- s = stores[bin_name]
108
- s["absrel_store"].close()
109
- s["rmse_store"].close()
110
- s["silog_store"].close()
111
- s["normal_store"].close()
106
+ for space_stores in stores.values():
107
+ for bin_name in _BENCHMARK_BIN_NAMES:
108
+ s = space_stores[bin_name]
109
+ s["absrel_store"].close()
110
+ s["rmse_store"].close()
111
+ s["silog_store"].close()
112
+ s["normal_store"].close()
112
113
 
113
114
 
114
115
  def _safe_mean_values(values: list) -> Optional[float]:
@@ -322,6 +323,13 @@ def _ensure_file_system_sharing() -> None:
322
323
  runs accumulate FDs and eventually trip ``EMFILE`` ("Too many open
323
324
  files") mid-iteration. ``file_system`` uses ``/dev/shm`` file-backed
324
325
  storage instead, sidestepping the FD cap. Idempotent.
326
+
327
+ Caveat: activation is lazy — the strategy only flips the first time
328
+ a worker-spawning prefetch call runs. Any ``DataLoader`` / torch IPC
329
+ that fires *before* :func:`_prefetched_iter` reaches its workers
330
+ branch keeps the default strategy. In the current CLI flow, the
331
+ depth/RGB prefetch loop starts before any internal DataLoader (FID,
332
+ etc.), so those inherit the safe strategy by the time they run.
325
333
  """
326
334
  global _sharing_strategy_set
327
335
  if _sharing_strategy_set:
@@ -549,9 +557,9 @@ def evaluate_depth_samples(
549
557
 
550
558
  Returns:
551
559
  Dictionary containing depth aggregate/per-file metrics with:
552
- ``depth_raw``, ``depth_aligned``, backward-compatible ``depth``,
553
- explicit ``standard`` reducers within each depth branch, and
554
- optionally ``depth_benchmark``.
560
+ optional ``depth_native`` and/or ``depth_metric`` semantic branches,
561
+ backward-compatible canonical ``depth``, and optionally
562
+ per-space ``depth_benchmark`` summaries.
555
563
  """
556
564
  valid_alignment_modes = {"none", "auto_affine", "affine"}
557
565
  if alignment_mode not in valid_alignment_modes:
@@ -882,12 +890,13 @@ def evaluate_depth_samples(
882
890
  logged_alignment = False
883
891
  normalized_predictions = False
884
892
  alignment_applied = False
893
+ input_space_detected = "unknown"
885
894
  gt_native_dims: Optional[tuple[int, int]] = None
886
895
  pred_native_dims: Optional[tuple[int, int]] = None
887
896
  spatial_method = "none"
888
897
 
889
898
  if alignment_mode == "none":
890
- print("Depth alignment mode: none (raw predictions only)")
899
+ print("Depth alignment mode: none")
891
900
  elif alignment_mode == "auto_affine":
892
901
  print("Depth alignment mode: auto_affine (normalized-depth detection)")
893
902
  else:
@@ -925,8 +934,14 @@ def evaluate_depth_samples(
925
934
  f"(log-scaled near/mid/far bins)"
926
935
  )
927
936
  benchmark_stores = {
928
- bn: _init_benchmark_bin_store(temp_dir, f"bench_{bn}")
929
- for bn in _BENCHMARK_BIN_NAMES
937
+ "native": {
938
+ bn: _init_benchmark_bin_store(temp_dir, f"bench_native_{bn}")
939
+ for bn in _BENCHMARK_BIN_NAMES
940
+ },
941
+ "metric": {
942
+ bn: _init_benchmark_bin_store(temp_dir, f"bench_metric_{bn}")
943
+ for bn in _BENCHMARK_BIN_NAMES
944
+ },
930
945
  }
931
946
 
932
947
  try:
@@ -969,21 +984,24 @@ def evaluate_depth_samples(
969
984
  ):
970
985
  sky_valid = align_to_prediction(sky_valid, depth_pred)
971
986
 
972
- if alignment_mode == "auto_affine" and i == 0:
987
+ if i == 0:
973
988
  pred_min = float(np.nanmin(depth_pred))
974
989
  pred_max = float(np.nanmax(depth_pred))
975
990
  if pred_max <= 1.0 + 1e-3 and pred_min >= -1.0 - 1e-3:
976
991
  normalized_predictions = True
992
+ input_space_detected = "normalized"
977
993
  print(
978
- f" Scale-and-shift: detected normalized predictions "
994
+ f" Detected native depth space: normalized "
979
995
  f"(range [{pred_min:.3f}, {pred_max:.3f}])"
980
996
  )
981
997
  else:
998
+ input_space_detected = "metric"
982
999
  print(
983
- f" Scale-and-shift: predictions appear metric "
984
- f"(range [{pred_min:.1f}, {pred_max:.1f}]), "
985
- f"skipping alignment"
1000
+ f" Detected native depth space: metric "
1001
+ f"(range [{pred_min:.1f}, {pred_max:.1f}])"
986
1002
  )
1003
+ if alignment_mode == "auto_affine":
1004
+ print(" Scale-and-shift: skipping calibration")
987
1005
 
988
1006
  depth_gt = process_depth(depth_gt, 1.0, is_radial, intrinsics_K)
989
1007
  depth_pred_raw = process_depth(depth_pred, 1.0, is_radial, intrinsics_K)
@@ -1059,22 +1077,28 @@ def evaluate_depth_samples(
1059
1077
  _append_metrics(stores["aligned"], aligned_metrics, aligned_pred_path)
1060
1078
 
1061
1079
  raw_value = _build_per_file_depth_value(raw_metrics)
1062
- aligned_value = (
1080
+ metric_value = (
1063
1081
  raw_value
1064
1082
  if aligned_metrics is raw_metrics
1065
1083
  else _build_per_file_depth_value(aligned_metrics)
1066
1084
  )
1085
+ emit_native = alignment_applied or normalized_predictions
1086
+ emit_metric = alignment_applied or not normalized_predictions
1087
+ canonical_value = metric_value if emit_metric else raw_value
1088
+ file_metrics = {
1089
+ "depth": canonical_value,
1090
+ }
1091
+ if emit_native:
1092
+ file_metrics["depth_native"] = raw_value
1093
+ if emit_metric:
1094
+ file_metrics["depth_metric"] = metric_value
1067
1095
  set_value(
1068
1096
  per_file_metrics,
1069
1097
  hierarchy,
1070
1098
  entry_id,
1071
1099
  {
1072
1100
  "id": entry_id,
1073
- "metrics": {
1074
- "depth": aligned_value,
1075
- "depth_raw": raw_value,
1076
- "depth_aligned": aligned_value,
1077
- },
1101
+ "metrics": file_metrics,
1078
1102
  },
1079
1103
  )
1080
1104
 
@@ -1102,7 +1126,7 @@ def evaluate_depth_samples(
1102
1126
  v,
1103
1127
  _store=stores["aligned"],
1104
1128
  _slot=aligned_lpips_slot,
1105
- _per_file=aligned_value,
1129
+ _per_file=metric_value,
1106
1130
  ):
1107
1131
  val = float(v) if np.isfinite(v) else float("nan")
1108
1132
  _store["lpips_values"][_slot] = val
@@ -1116,8 +1140,8 @@ def evaluate_depth_samples(
1116
1140
 
1117
1141
  # -- Enqueue batched GPU depth metrics; callbacks patch the
1118
1142
  # placeholders that _compute_branch_metrics(defer_to_batcher=True)
1119
- # left behind and run the deferred sanity checks on the aligned
1120
- # branch.
1143
+ # left behind and run the deferred sanity checks on the
1144
+ # canonical emitted branch.
1121
1145
  if defer_depth:
1122
1146
  raw_depth_slot = len(stores["raw"]["psnr_values"]) - 1
1123
1147
  # When aligned == raw, there is only one enqueue; run the
@@ -1159,7 +1183,7 @@ def evaluate_depth_samples(
1159
1183
  _metrics=aligned_metrics,
1160
1184
  _store=stores["aligned"],
1161
1185
  _slot=aligned_depth_slot,
1162
- _pf=aligned_value,
1186
+ _pf=metric_value,
1163
1187
  _sanity=sanity_checker,
1164
1188
  _entry_id=entry_id,
1165
1189
  ):
@@ -1178,7 +1202,7 @@ def evaluate_depth_samples(
1178
1202
  _aligned_depth_cb,
1179
1203
  )
1180
1204
 
1181
- # -- Benchmark depth-range metrics (aligned only) --
1205
+ # -- Benchmark depth-range metrics per emitted semantic space --
1182
1206
  if benchmark_stores is not None:
1183
1207
  bm_bins = get_benchmark_depth_bins(
1184
1208
  depth_gt, benchmark_depth_range[0], benchmark_depth_range[1]
@@ -1187,72 +1211,136 @@ def evaluate_depth_samples(
1187
1211
  benchmark_boundaries = bm_bins["boundaries"]
1188
1212
 
1189
1213
  for bn in _BENCHMARK_BIN_NAMES:
1190
- bin_mask = bm_bins[bn].copy()
1191
- bin_mask &= (depth_pred_aligned > 0) & np.isfinite(
1192
- depth_pred_aligned
1214
+ native_bin_mask = bm_bins[bn].copy()
1215
+ native_bin_mask &= (depth_pred_raw > 0) & np.isfinite(
1216
+ depth_pred_raw
1193
1217
  )
1194
1218
  if sky_valid is not None:
1195
- bin_mask &= sky_valid
1196
- if not bin_mask.any():
1197
- continue
1198
-
1199
- bm_store = benchmark_stores[bn]
1200
-
1201
- bm_absrel = compute_absrel(
1202
- depth_pred_aligned, depth_gt, valid_mask=bin_mask
1203
- )
1204
- bm_rmse = compute_rmse_per_pixel(
1205
- depth_pred_aligned, depth_gt, valid_mask=bin_mask
1206
- )
1207
- bm_silog_arr = compute_silog_per_pixel(
1208
- depth_pred_aligned, depth_gt, valid_mask=bin_mask
1209
- )
1210
- bm_silog_val = compute_scale_invariant_log_error(
1211
- depth_pred_aligned, depth_gt, valid_mask=bin_mask
1212
- )
1213
- bm_standard, bm_standard_pool = compute_standard_depth_metrics(
1214
- depth_pred_aligned, depth_gt, valid_mask=bin_mask
1215
- )
1216
- bm_normals = compute_normal_angles(
1217
- depth_pred_aligned, depth_gt, valid_mask=bin_mask
1218
- )
1219
-
1220
- bm_store["absrel_store"].append(bm_absrel)
1221
- bm_store["rmse_store"].append(np.sqrt(bm_rmse))
1222
- bm_store["silog_store"].append(bm_silog_arr)
1223
- bm_store["silog_full_values"].append(bm_silog_val)
1224
- append_standard_depth_metrics(
1225
- bm_store["standard_store"],
1226
- bm_standard,
1227
- bm_standard_pool,
1228
- )
1229
- bm_store["normal_store"].append(bm_normals)
1230
- if len(bm_normals) > 0:
1231
- bm_store["normal_below_11_25"] += int(
1232
- np.sum(bm_normals < 11.25)
1219
+ native_bin_mask &= sky_valid
1220
+ if native_bin_mask.any():
1221
+ bm_store = benchmark_stores["native"][bn]
1222
+ bm_absrel = compute_absrel(
1223
+ depth_pred_raw, depth_gt, valid_mask=native_bin_mask
1224
+ )
1225
+ bm_rmse = compute_rmse_per_pixel(
1226
+ depth_pred_raw, depth_gt, valid_mask=native_bin_mask
1227
+ )
1228
+ bm_silog_arr = compute_silog_per_pixel(
1229
+ depth_pred_raw, depth_gt, valid_mask=native_bin_mask
1230
+ )
1231
+ bm_silog_val = compute_scale_invariant_log_error(
1232
+ depth_pred_raw, depth_gt, valid_mask=native_bin_mask
1233
+ )
1234
+ bm_standard, bm_standard_pool = compute_standard_depth_metrics(
1235
+ depth_pred_raw, depth_gt, valid_mask=native_bin_mask
1233
1236
  )
1234
- bm_store["normal_below_22_5"] += int(
1235
- np.sum(bm_normals < 22.5)
1237
+ bm_normals = compute_normal_angles(
1238
+ depth_pred_raw, depth_gt, valid_mask=native_bin_mask
1236
1239
  )
1237
- bm_store["normal_below_30"] += int(
1238
- np.sum(bm_normals < 30.0)
1240
+
1241
+ bm_store["absrel_store"].append(bm_absrel)
1242
+ bm_store["rmse_store"].append(np.sqrt(bm_rmse))
1243
+ bm_store["silog_store"].append(bm_silog_arr)
1244
+ bm_store["silog_full_values"].append(bm_silog_val)
1245
+ append_standard_depth_metrics(
1246
+ bm_store["standard_store"],
1247
+ bm_standard,
1248
+ bm_standard_pool,
1249
+ )
1250
+ bm_store["normal_store"].append(bm_normals)
1251
+ if len(bm_normals) > 0:
1252
+ bm_store["normal_below_11_25"] += int(
1253
+ np.sum(bm_normals < 11.25)
1254
+ )
1255
+ bm_store["normal_below_22_5"] += int(
1256
+ np.sum(bm_normals < 22.5)
1257
+ )
1258
+ bm_store["normal_below_30"] += int(
1259
+ np.sum(bm_normals < 30.0)
1260
+ )
1261
+
1262
+ if aligned_metrics is not raw_metrics:
1263
+ metric_bin_mask = bm_bins[bn].copy()
1264
+ metric_bin_mask &= (depth_pred_aligned > 0) & np.isfinite(
1265
+ depth_pred_aligned
1239
1266
  )
1267
+ if sky_valid is not None:
1268
+ metric_bin_mask &= sky_valid
1269
+ if metric_bin_mask.any():
1270
+ bm_store = benchmark_stores["metric"][bn]
1271
+ bm_absrel = compute_absrel(
1272
+ depth_pred_aligned,
1273
+ depth_gt,
1274
+ valid_mask=metric_bin_mask,
1275
+ )
1276
+ bm_rmse = compute_rmse_per_pixel(
1277
+ depth_pred_aligned,
1278
+ depth_gt,
1279
+ valid_mask=metric_bin_mask,
1280
+ )
1281
+ bm_silog_arr = compute_silog_per_pixel(
1282
+ depth_pred_aligned,
1283
+ depth_gt,
1284
+ valid_mask=metric_bin_mask,
1285
+ )
1286
+ bm_silog_val = compute_scale_invariant_log_error(
1287
+ depth_pred_aligned,
1288
+ depth_gt,
1289
+ valid_mask=metric_bin_mask,
1290
+ )
1291
+ bm_standard, bm_standard_pool = compute_standard_depth_metrics(
1292
+ depth_pred_aligned,
1293
+ depth_gt,
1294
+ valid_mask=metric_bin_mask,
1295
+ )
1296
+ bm_normals = compute_normal_angles(
1297
+ depth_pred_aligned,
1298
+ depth_gt,
1299
+ valid_mask=metric_bin_mask,
1300
+ )
1301
+
1302
+ bm_store["absrel_store"].append(bm_absrel)
1303
+ bm_store["rmse_store"].append(np.sqrt(bm_rmse))
1304
+ bm_store["silog_store"].append(bm_silog_arr)
1305
+ bm_store["silog_full_values"].append(bm_silog_val)
1306
+ append_standard_depth_metrics(
1307
+ bm_store["standard_store"],
1308
+ bm_standard,
1309
+ bm_standard_pool,
1310
+ )
1311
+ bm_store["normal_store"].append(bm_normals)
1312
+ if len(bm_normals) > 0:
1313
+ bm_store["normal_below_11_25"] += int(
1314
+ np.sum(bm_normals < 11.25)
1315
+ )
1316
+ bm_store["normal_below_22_5"] += int(
1317
+ np.sum(bm_normals < 22.5)
1318
+ )
1319
+ bm_store["normal_below_30"] += int(
1320
+ np.sum(bm_normals < 30.0)
1321
+ )
1240
1322
 
1241
1323
  if sanity_checker is not None:
1324
+ canonical_pred = depth_pred_aligned if (
1325
+ alignment_applied or not normalized_predictions
1326
+ ) else depth_pred_raw
1327
+ canonical_metrics = aligned_metrics if (
1328
+ alignment_applied or not normalized_predictions
1329
+ ) else raw_metrics
1242
1330
  sanity_checker.validate_depth_input(
1243
- depth_gt, depth_pred_aligned, entry_id
1331
+ depth_gt, canonical_pred, entry_id
1244
1332
  )
1245
1333
  if not defer_depth:
1246
1334
  # When deferred, the batcher callback runs these.
1247
1335
  _run_deferred_depth_sanity(
1248
- sanity_checker, aligned_metrics, entry_id
1336
+ sanity_checker, canonical_metrics, entry_id
1249
1337
  )
1250
- nm = aligned_metrics["normal_meta"]
1338
+ nm = canonical_metrics["normal_meta"]
1251
1339
  if nm["mean_angle"] is not None:
1252
1340
  sanity_checker.validate_normal_consistency(
1253
1341
  nm["mean_angle"], nm["valid_pixels_after_erosion"], entry_id
1254
1342
  )
1255
- ef = aligned_metrics["edge_f1"]
1343
+ ef = canonical_metrics["edge_f1"]
1256
1344
  sanity_checker.validate_depth_edge_f1(
1257
1345
  ef["pred_edge_pixels"],
1258
1346
  ef["gt_edge_pixels"],
@@ -1271,28 +1359,61 @@ def evaluate_depth_samples(
1271
1359
  print("Computing FID/KID (this may take a while)...")
1272
1360
  print("Aggregating depth results...")
1273
1361
 
1274
- depth_raw = _build_depth_summary(stores["raw"], gt_depth_paths)
1275
- if alignment_applied:
1276
- depth_aligned = _build_depth_summary(stores["aligned"], gt_depth_paths)
1277
- else:
1278
- depth_aligned = copy.deepcopy(depth_raw)
1362
+ emit_native = alignment_applied or normalized_predictions
1363
+ emit_metric = alignment_applied or not normalized_predictions
1364
+
1365
+ native_summary = (
1366
+ _build_depth_summary(stores["raw"], gt_depth_paths)
1367
+ if emit_native or (emit_metric and not alignment_applied)
1368
+ else None
1369
+ )
1370
+ metric_summary = None
1371
+ if emit_metric:
1372
+ if alignment_applied:
1373
+ metric_summary = _build_depth_summary(
1374
+ stores["aligned"], gt_depth_paths
1375
+ )
1376
+ else:
1377
+ metric_summary = copy.deepcopy(native_summary)
1378
+ depth_summary = metric_summary if emit_metric else native_summary
1279
1379
 
1280
1380
  # -- Benchmark aggregation --
1281
1381
  depth_benchmark = None
1282
1382
  if benchmark_stores is not None:
1283
1383
  print("Aggregating benchmark depth results...")
1284
- depth_benchmark = {
1285
- "boundaries": benchmark_boundaries,
1286
- }
1287
- for bn in _BENCHMARK_BIN_NAMES:
1288
- depth_benchmark[bn] = _build_benchmark_bin_summary(
1289
- benchmark_stores[bn]
1290
- )
1384
+ depth_benchmark = {"boundaries": benchmark_boundaries}
1385
+ if emit_native or (emit_metric and not alignment_applied):
1386
+ native_benchmark = {
1387
+ bn: _build_benchmark_bin_summary(
1388
+ benchmark_stores["native"][bn]
1389
+ )
1390
+ for bn in _BENCHMARK_BIN_NAMES
1391
+ }
1392
+ else:
1393
+ native_benchmark = None
1394
+ if emit_native:
1395
+ depth_benchmark["native"] = native_benchmark
1396
+ if emit_metric:
1397
+ if alignment_applied:
1398
+ depth_benchmark["metric"] = {
1399
+ bn: _build_benchmark_bin_summary(
1400
+ benchmark_stores["metric"][bn]
1401
+ )
1402
+ for bn in _BENCHMARK_BIN_NAMES
1403
+ }
1404
+ else:
1405
+ depth_benchmark["metric"] = copy.deepcopy(native_benchmark)
1406
+
1407
+ emitted_spaces = []
1408
+ if emit_native:
1409
+ emitted_spaces.append("native")
1410
+ if emit_metric:
1411
+ emitted_spaces.append("metric")
1291
1412
 
1292
1413
  result = {
1293
- "depth_raw": depth_raw,
1294
- "depth_aligned": depth_aligned,
1295
- "depth": depth_aligned,
1414
+ "depth_native": native_summary if emit_native else None,
1415
+ "depth_metric": metric_summary if emit_metric else None,
1416
+ "depth": depth_summary,
1296
1417
  "depth_benchmark": depth_benchmark,
1297
1418
  "per_file_metrics": per_file_metrics,
1298
1419
  "dataset_info": {
@@ -1300,9 +1421,17 @@ def evaluate_depth_samples(
1300
1421
  "gt_name": gt_name,
1301
1422
  "pred_name": pred_name,
1302
1423
  },
1303
- "alignment": {
1304
- "mode": alignment_mode,
1305
- "applied": alignment_applied,
1424
+ "space_info": {
1425
+ "input_space_detected": input_space_detected,
1426
+ "metric_space_source": (
1427
+ "scale_shift"
1428
+ if alignment_applied
1429
+ else ("native" if emit_metric else None)
1430
+ ),
1431
+ "calibration_mode": alignment_mode,
1432
+ "calibration_applied": alignment_applied,
1433
+ "emitted_spaces": emitted_spaces,
1434
+ "canonical_space": "metric" if emit_metric else "native",
1306
1435
  },
1307
1436
  "spatial_info": {
1308
1437
  "gt_dimensions": {"height": gt_native_dims[0], "width": gt_native_dims[1]}
@@ -23,8 +23,8 @@ import numpy as np
23
23
  import torch
24
24
 
25
25
  try:
26
- from torchmetrics.functional import peak_signal_noise_ratio as _tm_psnr
27
26
  from torchmetrics.functional.image import (
27
+ peak_signal_noise_ratio as _tm_psnr,
28
28
  structural_similarity_index_measure as _tm_ssim,
29
29
  )
30
30
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: euler-eval
3
- Version: 2.2.0
3
+ Version: 2.4.0
4
4
  Summary: Depth map evaluation toolkit with comprehensive metrics
5
5
  Author: Depth Eval Contributors
6
6
  License: MIT
@@ -139,7 +139,7 @@ This pre-downloads:
139
139
  | `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
140
140
  | `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
141
141
  | `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
142
- | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
142
+ | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth calibration mode; outputs are emitted in semantic `native`/`metric` spaces and `depth` aliases the canonical branch |
143
143
  | `--rgb-fid-backend` | `{builtin,clean-fid}` | `builtin` | RGB FID backend; `clean-fid` requires optional dependency |
144
144
  | `--benchmark-depth-range` | `float float` | none | Depth range `[MIN, MAX]` in meters for benchmark evaluation; computes depth and RGB metrics for pixels within this range, subdivided into log-scaled near/mid/far bins (additive to regular metrics) |
145
145
 
@@ -341,10 +341,10 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
341
341
 
342
342
  ```json
343
343
  {
344
- "depth_raw": { "...": "metrics without alignment" },
345
- "depth_aligned": { "...": "metrics with selected alignment mode" },
344
+ "depth_native": { "...": "native model depth space, if diagnostically meaningful" },
345
+ "depth_metric": { "...": "metric depth space, if available" },
346
346
  "depth": {
347
- "...": "backward-compatible alias of depth_aligned"
347
+ "...": "canonical alias of depth_metric when present, else depth_native"
348
348
  },
349
349
  "rgb": {
350
350
  "...": "..."
@@ -358,9 +358,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
358
358
  {
359
359
  "id": "frame_0001",
360
360
  "metrics": {
361
- "depth": { "...": "aligned (alias)" },
362
- "depth_raw": { "...": "raw" },
363
- "depth_aligned": { "...": "aligned" },
361
+ "depth": { "...": "canonical alias" },
362
+ "depth_native": { "...": "native, when emitted" },
363
+ "depth_metric": { "...": "metric, when emitted" },
364
364
  "rgb": { "...": "..." }
365
365
  }
366
366
  }
@@ -374,9 +374,9 @@ When `--rgb-fid-backend clean-fid` is used, `euler-eval` will honor `CLEANFID_CA
374
374
  ```
375
375
 
376
376
  For depth outputs:
377
- - `depth_raw`: metric-space depth without any post-hoc alignment.
378
- - `depth_aligned`: metric-space depth after configured alignment mode.
379
- - `depth`: backward-compatible alias of `depth_aligned`.
377
+ - `depth_native`: the model's native depth space after spatial/radial preprocessing, emitted only when it is diagnostically distinct.
378
+ - `depth_metric`: the comparable metric-depth branch. This is either the native prediction itself or the calibrated scale-shift result.
379
+ - `depth`: canonical alias of `depth_metric` when available, otherwise `depth_native`.
380
380
  - `standard`: explicit monocular-depth metrics with three reducers:
381
381
  `image_mean`, `image_median`, and `pixel_pool`.
382
382
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "euler-eval"
7
- version = "2.2.0"
7
+ version = "2.4.0"
8
8
  description = "Depth map evaluation toolkit with comprehensive metrics"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,4 +1,4 @@
1
- """Tests for depth raw/aligned output structure and alignment behavior."""
1
+ """Tests for semantic depth-space output and calibration behavior."""
2
2
 
3
3
  from pathlib import Path
4
4
 
@@ -222,7 +222,29 @@ def _make_dataset_with_segmentation():
222
222
  return _DummyDepthDataset(samples)
223
223
 
224
224
 
225
- def test_depth_output_contains_raw_and_aligned(monkeypatch):
225
+ def _make_metric_dataset():
226
+ gt_a = np.array([[10.0, 20.0], [30.0, 40.0]], dtype=np.float32)
227
+ gt_b = np.array([[6.0, 12.0], [18.0, 24.0]], dtype=np.float32)
228
+ pred_a = (gt_a * 1.02).astype(np.float32)
229
+ pred_b = (gt_b * 0.98).astype(np.float32)
230
+ samples = [
231
+ {
232
+ "id": "00001",
233
+ "full_id": "/Scene01/clone/00001",
234
+ "gt": gt_a,
235
+ "pred": pred_a,
236
+ },
237
+ {
238
+ "id": "00002",
239
+ "full_id": "/Scene01/clone/00002",
240
+ "gt": gt_b,
241
+ "pred": pred_b,
242
+ },
243
+ ]
244
+ return _DummyDepthDataset(samples)
245
+
246
+
247
+ def test_depth_output_contains_native_and_metric_for_calibrated_normalized_input(monkeypatch):
226
248
  _patch_depth_metrics(monkeypatch)
227
249
 
228
250
  results = eval_mod.evaluate_depth_samples(
@@ -232,36 +254,38 @@ def test_depth_output_contains_raw_and_aligned(monkeypatch):
232
254
  alignment_mode="auto_affine",
233
255
  )
234
256
 
235
- assert "depth_raw" in results
236
- assert "depth_aligned" in results
257
+ assert "depth_native" in results
258
+ assert "depth_metric" in results
237
259
  assert "depth" in results
238
260
 
239
- raw_absrel = results["depth_raw"]["depth_metrics"]["absrel"]["median"]
240
- aligned_absrel = results["depth_aligned"]["depth_metrics"]["absrel"]["median"]
241
- raw_standard_absrel = results["depth_raw"]["standard"]["image_mean"]["absrel"]
242
- aligned_standard_absrel = results["depth_aligned"]["standard"]["image_mean"]["absrel"]
243
- assert aligned_absrel < raw_absrel
244
- assert aligned_standard_absrel < raw_standard_absrel
245
- assert results["alignment"]["applied"] is True
261
+ native_absrel = results["depth_native"]["depth_metrics"]["absrel"]["median"]
262
+ metric_absrel = results["depth_metric"]["depth_metrics"]["absrel"]["median"]
263
+ native_standard_absrel = results["depth_native"]["standard"]["image_mean"]["absrel"]
264
+ metric_standard_absrel = results["depth_metric"]["standard"]["image_mean"]["absrel"]
265
+ assert metric_absrel < native_absrel
266
+ assert metric_standard_absrel < native_standard_absrel
267
+ assert results["space_info"]["calibration_applied"] is True
268
+ assert results["space_info"]["emitted_spaces"] == ["native", "metric"]
269
+ assert results["space_info"]["canonical_space"] == "metric"
246
270
 
247
271
  files = results["per_file_metrics"]["children"]["Scene01"]["children"]["clone"][
248
272
  "files"
249
273
  ]
250
274
  per_file = next(item["metrics"] for item in files if item["id"] == "00001")
251
275
  assert "depth" in per_file
252
- assert "depth_raw" in per_file
253
- assert "depth_aligned" in per_file
276
+ assert "depth_native" in per_file
277
+ assert "depth_metric" in per_file
254
278
  assert (
255
- per_file["depth_aligned"]["depth_metrics"]["absrel"]
256
- < per_file["depth_raw"]["depth_metrics"]["absrel"]
279
+ per_file["depth_metric"]["depth_metrics"]["absrel"]
280
+ < per_file["depth_native"]["depth_metrics"]["absrel"]
257
281
  )
258
282
  assert (
259
- per_file["depth_aligned"]["standard"]["absrel"]
260
- < per_file["depth_raw"]["standard"]["absrel"]
283
+ per_file["depth_metric"]["standard"]["absrel"]
284
+ < per_file["depth_native"]["standard"]["absrel"]
261
285
  )
262
286
 
263
287
 
264
- def test_depth_alignment_none_keeps_raw_and_aligned_equal(monkeypatch):
288
+ def test_depth_alignment_none_emits_only_native_for_normalized_input(monkeypatch):
265
289
  _patch_depth_metrics(monkeypatch)
266
290
 
267
291
  results = eval_mod.evaluate_depth_samples(
@@ -271,13 +295,34 @@ def test_depth_alignment_none_keeps_raw_and_aligned_equal(monkeypatch):
271
295
  alignment_mode="none",
272
296
  )
273
297
 
274
- raw = results["depth_raw"]["depth_metrics"]["absrel"]["median"]
275
- aligned = results["depth_aligned"]["depth_metrics"]["absrel"]["median"]
276
- raw_standard = results["depth_raw"]["standard"]["pixel_pool"]["absrel"]
277
- aligned_standard = results["depth_aligned"]["standard"]["pixel_pool"]["absrel"]
278
- assert raw == aligned
279
- assert raw_standard == aligned_standard
280
- assert results["alignment"]["applied"] is False
298
+ assert "depth_native" in results and results["depth_native"] is not None
299
+ assert results.get("depth_metric") is None
300
+ assert results["depth"] == results["depth_native"]
301
+ assert results["space_info"]["calibration_applied"] is False
302
+ assert results["space_info"]["emitted_spaces"] == ["native"]
303
+ assert results["space_info"]["canonical_space"] == "native"
304
+
305
+
306
+ def test_metric_input_emits_only_metric_when_no_calibration_is_needed(monkeypatch):
307
+ _patch_depth_metrics(monkeypatch)
308
+
309
+ results = eval_mod.evaluate_depth_samples(
310
+ dataset=_make_metric_dataset(),
311
+ is_radial=True,
312
+ device="cpu",
313
+ alignment_mode="auto_affine",
314
+ )
315
+
316
+ assert results.get("depth_native") is None
317
+ assert "depth_metric" in results and results["depth_metric"] is not None
318
+ assert results["depth"] == results["depth_metric"]
319
+ assert results["space_info"]["input_space_detected"] == "metric"
320
+ assert results["space_info"]["metric_space_source"] == "native"
321
+ assert results["space_info"]["emitted_spaces"] == ["metric"]
322
+ files = results["per_file_metrics"]["children"]["Scene01"]["children"]["clone"]["files"]
323
+ per_file = next(item["metrics"] for item in files if item["id"] == "00001")
324
+ assert "depth_metric" in per_file
325
+ assert "depth_native" not in per_file
281
326
 
282
327
 
283
328
  def test_depth_output_contains_spatial_info(monkeypatch):
@@ -89,7 +89,7 @@ class TestMetaBlockStructure:
89
89
  "pred": {"dimensions": {"height": 50, "width": 100}},
90
90
  "spatial_alignment": {"method": "resize"},
91
91
  },
92
- "depth": {"eval": {"raw": {}, "aligned": {}}},
92
+ "depth": {"eval": {"native": {}, "metric": {}}},
93
93
  }
94
94
  cleaned = _clean_metric_tree(save_dict)
95
95
  assert "meta" in cleaned
@@ -126,16 +126,16 @@ class TestAxisDeclarations:
126
126
  """Verify axis declarations follow the metric-namespacing convention."""
127
127
 
128
128
  def test_depth_axes_structure(self):
129
- """depth.eval declares alignment, category, and reduction axes."""
130
- assert "alignment" in _DEPTH_EVAL_AXES
129
+ """depth.eval declares space, category, and reduction axes."""
130
+ assert "space" in _DEPTH_EVAL_AXES
131
131
  assert "category" in _DEPTH_EVAL_AXES
132
132
  assert "reduction" in _DEPTH_EVAL_AXES
133
133
 
134
- alignment = _DEPTH_EVAL_AXES["alignment"]
135
- assert alignment.position == 0
136
- assert alignment.optional is False
137
- assert "raw" in alignment.values
138
- assert "aligned" in alignment.values
134
+ space = _DEPTH_EVAL_AXES["space"]
135
+ assert space.position == 0
136
+ assert space.optional is False
137
+ assert "native" in space.values
138
+ assert "metric" in space.values
139
139
 
140
140
  category = _DEPTH_EVAL_AXES["category"]
141
141
  assert category.position == 1
@@ -264,6 +264,6 @@ class TestMetricDescriptions:
264
264
  envelope = ns.metric_set_envelope("depth", metadata={})
265
265
  assert "axes" in envelope
266
266
  assert "metricDescriptions" in envelope
267
- assert envelope["axes"]["alignment"]["position"] == 0
267
+ assert envelope["axes"]["space"]["position"] == 0
268
268
  assert envelope["axes"]["reduction"]["position"] == 2
269
269
  assert "psnr" in envelope["metricDescriptions"]
File without changes
File without changes