euler-eval 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. euler_eval-1.0.1/PKG-INFO +365 -0
  2. euler_eval-1.0.1/README.md +330 -0
  3. euler_eval-1.0.1/euler_eval/__init__.py +1 -0
  4. euler_eval-1.0.1/euler_eval/cli.py +764 -0
  5. euler_eval-1.0.1/euler_eval/data.py +509 -0
  6. euler_eval-1.0.1/euler_eval/evaluate.py +1165 -0
  7. euler_eval-1.0.1/euler_eval/metrics/__init__.py +155 -0
  8. euler_eval-1.0.1/euler_eval/metrics/absrel.py +101 -0
  9. euler_eval-1.0.1/euler_eval/metrics/daniel_error.py +81 -0
  10. euler_eval-1.0.1/euler_eval/metrics/depth_binned_error.py +153 -0
  11. euler_eval-1.0.1/euler_eval/metrics/depth_edge_f1.py +200 -0
  12. euler_eval-1.0.1/euler_eval/metrics/fid_kid.py +354 -0
  13. euler_eval-1.0.1/euler_eval/metrics/high_freq_energy.py +228 -0
  14. euler_eval-1.0.1/euler_eval/metrics/lpips_metric.py +160 -0
  15. euler_eval-1.0.1/euler_eval/metrics/normal_consistency.py +222 -0
  16. euler_eval-1.0.1/euler_eval/metrics/psnr.py +85 -0
  17. euler_eval-1.0.1/euler_eval/metrics/rgb_edge_f1.py +179 -0
  18. euler_eval-1.0.1/euler_eval/metrics/rgb_lpips.py +142 -0
  19. euler_eval-1.0.1/euler_eval/metrics/rgb_psnr_ssim.py +160 -0
  20. euler_eval-1.0.1/euler_eval/metrics/rho_a.py +241 -0
  21. euler_eval-1.0.1/euler_eval/metrics/rmse.py +119 -0
  22. euler_eval-1.0.1/euler_eval/metrics/scale_invariant_log.py +130 -0
  23. euler_eval-1.0.1/euler_eval/metrics/ssim.py +139 -0
  24. euler_eval-1.0.1/euler_eval/metrics/tail_errors.py +130 -0
  25. euler_eval-1.0.1/euler_eval/metrics/utils.py +128 -0
  26. euler_eval-1.0.1/euler_eval/sanity_checker.py +1010 -0
  27. euler_eval-1.0.1/euler_eval/utils/hierarchy_parser.py +118 -0
  28. euler_eval-1.0.1/euler_eval.egg-info/PKG-INFO +365 -0
  29. euler_eval-1.0.1/euler_eval.egg-info/SOURCES.txt +42 -0
  30. euler_eval-1.0.1/euler_eval.egg-info/dependency_links.txt +1 -0
  31. euler_eval-1.0.1/euler_eval.egg-info/entry_points.txt +2 -0
  32. euler_eval-1.0.1/euler_eval.egg-info/requires.txt +18 -0
  33. euler_eval-1.0.1/euler_eval.egg-info/top_level.txt +1 -0
  34. euler_eval-1.0.1/pyproject.toml +69 -0
  35. euler_eval-1.0.1/setup.cfg +4 -0
  36. euler_eval-1.0.1/tests/test_alignment.py +244 -0
  37. euler_eval-1.0.1/tests/test_cli_device.py +104 -0
  38. euler_eval-1.0.1/tests/test_config.py +222 -0
  39. euler_eval-1.0.1/tests/test_data.py +273 -0
  40. euler_eval-1.0.1/tests/test_depth_alignment_output.py +234 -0
  41. euler_eval-1.0.1/tests/test_evaluate_helpers.py +180 -0
  42. euler_eval-1.0.1/tests/test_integration.py +512 -0
  43. euler_eval-1.0.1/tests/test_rho_a.py +303 -0
  44. euler_eval-1.0.1/tests/test_save_results.py +262 -0
@@ -0,0 +1,365 @@
1
+ Metadata-Version: 2.4
2
+ Name: euler-eval
3
+ Version: 1.0.1
4
+ Summary: Depth map evaluation toolkit with comprehensive metrics
5
+ Author: Depth Eval Contributors
6
+ License: MIT
7
+ Keywords: depth,evaluation,metrics,computer-vision,3d
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
17
+ Requires-Python: >=3.9
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: numpy>=1.21.0
20
+ Requires-Dist: scipy>=1.7.0
21
+ Requires-Dist: Pillow>=8.0.0
22
+ Requires-Dist: torch>=1.9.0
23
+ Requires-Dist: torchvision>=0.10.0
24
+ Requires-Dist: lpips>=0.1.4
25
+ Requires-Dist: tqdm>=4.62.0
26
+ Requires-Dist: euler-loading
27
+ Requires-Dist: ds-crawler
28
+ Provides-Extra: logging
29
+ Requires-Dist: euler-train; extra == "logging"
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
33
+ Requires-Dist: black>=22.0.0; extra == "dev"
34
+ Requires-Dist: ruff>=0.0.260; extra == "dev"
35
+
36
+ # euler-eval
37
+
38
+ A comprehensive evaluation toolkit for comparing predicted depth maps and RGB images against ground truth, powered by [euler_loading](https://github.com/d-rothen/euler-loading) for flexible dataset loading.
39
+
40
+ ## Features
41
+
42
+ - **Depth metrics**: PSNR, SSIM, LPIPS, FID, KID, AbsRel, RMSE, Scale-Invariant Log Error, Normal Consistency, Depth Edge F1
43
+ - **RGB metrics**: PSNR, SSIM, LPIPS, SCE (Structural Chromatic Error), Edge F1, Tail Errors (p95/p99), High-Frequency Energy Ratio, Depth-Binned Photometric Error
44
+ - **Sanity checking**: Automatic validation of metric results against configurable thresholds, with detailed warning reports
45
+ - **Sky masking**: Optional exclusion of sky regions from metrics using GT segmentation
46
+ - **Flexible dataset loading**: Automatic loader resolution via euler_loading and ds-crawler index metadata
47
+ - **Per-file and aggregate results**: Outputs both per-image metrics and dataset-level aggregates to JSON
48
+ - **euler_train integration**: Optional experiment logging via [euler_train](https://github.com/d-rothen/euler-train)
49
+
50
+ ## Installation
51
+
52
+ Requires Python 3.9+.
53
+
54
+ ```bash
55
+ uv pip install "euler-eval @ git+https://github.com/d-rothen/euler-parser.git"
56
+
57
+ # with euler_train logging support
58
+ uv pip install "euler-eval[logging] @ git+https://github.com/d-rothen/euler-parser.git"
59
+ ```
60
+
61
+ Or install in editable mode:
62
+
63
+ ```bash
64
+ pip install -e .
65
+ ```
66
+
67
+ ### Dependencies
68
+
69
+ Core:
70
+ - numpy, scipy, Pillow
71
+ - torch, torchvision
72
+ - lpips
73
+ - tqdm
74
+ - [euler-loading](https://github.com/d-rothen/euler-loading), [ds-crawler](https://github.com/d-rothen/ds-crawler)
75
+
76
+ Optional:
77
+ - [euler-train](https://github.com/d-rothen/euler-train) (install via `[logging]` extra)
78
+
79
+ ## Usage
80
+
81
+ The package provides a `depth-eval` console script:
82
+
83
+ ```bash
84
+ depth-eval <config> [options]
85
+ ```
86
+
87
+ Or run directly:
88
+
89
+ ```bash
90
+ python main.py <config> [options]
91
+ ```
92
+
93
+ ### Positional arguments
94
+
95
+ | Argument | Description |
96
+ |---|---|
97
+ | `config` | Path to a JSON configuration file (see [Configuration](#configuration)) |
98
+
99
+ ### Options
100
+
101
+ | Flag | Type | Default | Description |
102
+ |---|---|---|---|
103
+ | `--device` | `{auto,cuda,cpu}` | `auto` | Compute device (`auto` prefers CUDA when available) |
104
+ | `--batch-size` | `int` | `16` | Batch size for metrics that support batching |
105
+ | `--num-workers` | `int` | `4` | Number of data loading workers |
106
+ | `--verbose`, `-v` | flag | off | Enable verbose output |
107
+ | `--skip-depth` | flag | off | Skip depth evaluation |
108
+ | `--skip-rgb` | flag | off | Skip RGB evaluation |
109
+ | `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
110
+ | `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
111
+ | `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
112
+ | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
113
+
114
+ ### Examples
115
+
116
+ ```bash
117
+ # Evaluate with default settings (auto-selects CUDA when available)
118
+ depth-eval config.json --batch-size 32
119
+
120
+ # Evaluate with sky masking enabled (requires gt.segmentation in config)
121
+ depth-eval config.json --mask-sky -v
122
+
123
+ # Skip RGB evaluation, only evaluate depth
124
+ depth-eval config.json --skip-rgb
125
+
126
+ # Disable sanity checking
127
+ depth-eval config.json --no-sanity-check
128
+
129
+ # Disable depth alignment
130
+ depth-eval config.json --depth-alignment none
131
+
132
+ # Force affine scale+shift alignment on all depth predictions
133
+ depth-eval config.json --depth-alignment affine
134
+ ```
135
+
136
+ ## Configuration
137
+
138
+ ### `config.json`
139
+
140
+ Defines GT modalities, prediction datasets to evaluate, and optional euler_train logging. See [example_config.json](example_config.json).
141
+
142
+ ```json
143
+ {
144
+ "euler_train": {
145
+ "dir": "runs/my_project",
146
+ "run_id": null,
147
+ "run_name": null
148
+ },
149
+ "gt": {
150
+ "rgb": { "path": "/data/gt/rgb" },
151
+ "depth": { "path": "/data/gt/depth" },
152
+ "segmentation": { "path": "/data/gt/segmentation" },
153
+ "calibration": { "path": "/data/gt/calibration" }
154
+ },
155
+ "datasets": [
156
+ {
157
+ "name": "model_a",
158
+ "rgb": { "path": "/data/model_a/rgb" },
159
+ "depth": { "path": "/data/model_a/depth" },
160
+ "output_file": "/path/to/output/model_a_eval.json"
161
+ },
162
+ {
163
+ "name": "model_b_depth_only",
164
+ "depth": { "path": "/data/model_b/depth" }
165
+ },
166
+ {
167
+ "name": "model_c_rgb_only",
168
+ "rgb": { "path": "/data/model_c/rgb" }
169
+ }
170
+ ]
171
+ }
172
+ ```
173
+
174
+ #### GT section
175
+
176
+ | Field | Required | Description |
177
+ |---|---|---|
178
+ | `gt.rgb.path` | yes | Path to GT RGB dataset |
179
+ | `gt.depth.path` | yes | Path to GT depth dataset |
180
+ | `gt.segmentation.path` | no | Path to GT segmentation (needed for `--mask-sky`) |
181
+ | `gt.calibration.path` | no | Path to calibration data (camera intrinsics matrices) |
182
+ | `gt.name` | no | Display name for ground truth (default: `"GT"`) |
183
+
184
+ #### Prediction datasets
185
+
186
+ Each entry in `datasets` can include `rgb`, `depth`, or both:
187
+
188
+ | Field | Required | Description |
189
+ |---|---|---|
190
+ | `name` | yes | Display name for this prediction dataset |
191
+ | `rgb.path` | no\* | Path to predicted RGB dataset |
192
+ | `depth.path` | no\* | Path to predicted depth dataset |
193
+ | `output_file` | no | Custom output path for results JSON (default: `eval.json` inside the first available modality path) |
194
+
195
+ \* At least one of `rgb.path` or `depth.path` is required.
196
+
197
+ #### `euler_train` section (optional)
198
+
199
+ When present, evaluation results are logged to an [euler_train](https://github.com/d-rothen/euler-train) run. Requires the `euler-train` package to be installed (`pip install euler-eval[logging]`).
200
+
201
+ | Field | Required | Description |
202
+ |---|---|---|
203
+ | `euler_train.dir` | yes | euler_train project directory |
204
+ | `euler_train.run_id` | no | Existing run ID to resume (if `null`, a new run is created) |
205
+ | `euler_train.run_name` | no | Human-readable run label |
206
+
207
+ When `run_id` is provided, the run is detached after evaluation (the run remains active for further use). When `run_id` is `null`, a new run is created and finished upon completion.
208
+
209
+ ### Loader resolution
210
+
211
+ Loaders are resolved automatically by euler_loading from each dataset directory's ds-crawler index metadata. The index's `euler_loading.loader` and `euler_loading.function` fields determine which loader module and function to use (e.g. `"vkitti2"` maps to `euler_loading.loaders.gpu.vkitti2`).
212
+
213
+ No manual loader selection is required. Each dataset directory declares its own loader through its ds-crawler configuration.
214
+
215
+ Dataset metadata (e.g. `radial_depth`, `rgb_range`) is read automatically from the dataset's `output.json` manifest via `get_modality_metadata()`. Depth is assumed to already be in meters.
216
+
217
+ ### Dataset manifest (`output.json`)
218
+
219
+ Each dataset directory must contain an `output.json` manifest (generated by [ds-crawler](https://github.com/d-rothen/ds-crawler)) describing its hierarchical file structure:
220
+
221
+ ```json
222
+ {
223
+ "dataset": {
224
+ "children": {
225
+ "scene_01": {
226
+ "files": [
227
+ { "id": "frame_0001", "path": "scene_01/frame_0001.png" },
228
+ { "id": "frame_0002", "path": "scene_01/frame_0002.png" }
229
+ ]
230
+ }
231
+ }
232
+ }
233
+ }
234
+ ```
235
+
236
+ GT and prediction datasets are matched by hierarchy path and file ID through `MultiModalDataset`.
237
+
238
+ ### `metrics_config.json`
239
+
240
+ Controls sanity check thresholds. See [metrics_config.json](metrics_config.json) for all available options. When `--metrics-config` is not specified, the tool auto-detects `metrics_config.json` at the project root. If not found, built-in defaults are used.
241
+
242
+ ## Metrics
243
+
244
+ ### Depth metrics
245
+
246
+ | Metric | Key | Description |
247
+ |---|---|---|
248
+ | PSNR | `depth.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB), using max depth as dynamic range |
249
+ | SSIM | `depth.image_quality.ssim` | Structural Similarity Index |
250
+ | LPIPS | `depth.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
251
+ | FID | `depth.image_quality.fid` | Fréchet Inception Distance (dataset-level distribution metric) |
252
+ | KID | `depth.image_quality.kid_mean`, `kid_std` | Kernel Inception Distance (mean and std) |
253
+ | AbsRel | `depth.depth_metrics.absrel` | Absolute Relative Error (\|pred-gt\|/gt), reported as median and p90 |
254
+ | RMSE | `depth.depth_metrics.rmse` | Root Mean Square Error, reported as median and p90 |
255
+ | SILog | `depth.depth_metrics.silog` | Scale-Invariant Log Error, reported as mean, median, and p90 |
256
+ | Normal Consistency | `depth.geometric_metrics.normal_consistency` | Surface normal angular error (degrees) via finite differences; includes mean, median, and percent below 11.25°/22.5°/30° |
257
+ | Depth Edge F1 | `depth.geometric_metrics.depth_edge_f1` | Edge detection precision/recall/F1 for depth discontinuities |
258
+
259
+ ### RGB metrics
260
+
261
+ | Metric | Key | Description |
262
+ |---|---|---|
263
+ | PSNR | `rgb.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB) |
264
+ | SSIM | `rgb.image_quality.ssim` | Structural Similarity Index |
265
+ | SCE | `rgb.image_quality.sce` | Structural Chromatic Error |
266
+ | LPIPS | `rgb.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
267
+ | Edge F1 | `rgb.edge_f1` | Edge preservation precision/recall/F1 |
268
+ | Tail Errors | `rgb.tail_errors` | 95th and 99th percentile per-pixel errors |
269
+ | High-Frequency Energy | `rgb.high_frequency` | HF energy preservation ratio (pred vs GT) and relative difference |
270
+ | Depth-Binned Photometric Error | `rgb.depth_binned_photometric` | MAE/MSE in near/mid/far depth bins (requires GT depth) |
271
+
272
+ ## Output
273
+
274
+ Results are saved as JSON per prediction dataset. Default path: `eval.json` inside the first available modality path of the dataset, unless overridden by `output_file` in the config.
275
+
276
+ ### Output structure
277
+
278
+ ```json
279
+ {
280
+ "depth_raw": { "...": "metrics without alignment" },
281
+ "depth_aligned": { "...": "metrics with selected alignment mode" },
282
+ "depth": {
283
+ "...": "backward-compatible alias of depth_aligned"
284
+ },
285
+ "rgb": {
286
+ "...": "..."
287
+ },
288
+ "per_file_metrics": {
289
+ "children": {
290
+ "scene_01": {
291
+ "children": {
292
+ "camera_0": {
293
+ "files": [
294
+ {
295
+ "id": "frame_0001",
296
+ "metrics": {
297
+ "depth": { "...": "aligned (alias)" },
298
+ "depth_raw": { "...": "raw" },
299
+ "depth_aligned": { "...": "aligned" },
300
+ "rgb": { "...": "..." }
301
+ }
302
+ }
303
+ ]
304
+ }
305
+ }
306
+ }
307
+ }
308
+ }
309
+ }
310
+ ```
311
+
312
+ For depth outputs:
313
+ - `depth_raw`: metric-space depth without any post-hoc alignment.
314
+ - `depth_aligned`: metric-space depth after configured alignment mode.
315
+ - `depth`: backward-compatible alias of `depth_aligned`.
316
+
317
+ Previous single-depth structure (kept under `depth`) is:
318
+
319
+ ```json
320
+ {
321
+ "depth": {
322
+ "image_quality": {
323
+ "psnr": 28.5,
324
+ "ssim": 0.92,
325
+ "lpips": 0.08,
326
+ "fid": 12.3,
327
+ "kid_mean": 0.005,
328
+ "kid_std": 0.002
329
+ },
330
+ "depth_metrics": {
331
+ "absrel": { "median": 0.05, "p90": 0.12 },
332
+ "rmse": { "median": 1.2, "p90": 3.1 },
333
+ "silog": { "mean": 0.08, "median": 0.06, "p90": 0.15 }
334
+ },
335
+ "geometric_metrics": {
336
+ "normal_consistency": {
337
+ "mean_angle": 12.3,
338
+ "median_angle": 9.8,
339
+ "percent_below_11_25": 55.2,
340
+ "percent_below_22_5": 82.1,
341
+ "percent_below_30": 91.5
342
+ },
343
+ "depth_edge_f1": {
344
+ "precision": 0.72,
345
+ "recall": 0.68,
346
+ "f1": 0.70
347
+ }
348
+ },
349
+ "dataset_info": {
350
+ "num_pairs": 500,
351
+ "gt_name": "GT",
352
+ "pred_name": "model_a"
353
+ }
354
+ },
355
+ "rgb": { "...": "unchanged" }
356
+ }
357
+ ```
358
+
359
+ ### Sanity check report
360
+
361
+ When sanity checking is enabled (the default), a `sanity_check_report.json` is saved to the current working directory containing warnings grouped by metric type.
362
+
363
+ ## License
364
+
365
+ MIT
@@ -0,0 +1,330 @@
1
+ # euler-eval
2
+
3
+ A comprehensive evaluation toolkit for comparing predicted depth maps and RGB images against ground truth, powered by [euler_loading](https://github.com/d-rothen/euler-loading) for flexible dataset loading.
4
+
5
+ ## Features
6
+
7
+ - **Depth metrics**: PSNR, SSIM, LPIPS, FID, KID, AbsRel, RMSE, Scale-Invariant Log Error, Normal Consistency, Depth Edge F1
8
+ - **RGB metrics**: PSNR, SSIM, LPIPS, SCE (Structural Chromatic Error), Edge F1, Tail Errors (p95/p99), High-Frequency Energy Ratio, Depth-Binned Photometric Error
9
+ - **Sanity checking**: Automatic validation of metric results against configurable thresholds, with detailed warning reports
10
+ - **Sky masking**: Optional exclusion of sky regions from metrics using GT segmentation
11
+ - **Flexible dataset loading**: Automatic loader resolution via euler_loading and ds-crawler index metadata
12
+ - **Per-file and aggregate results**: Outputs both per-image metrics and dataset-level aggregates to JSON
13
+ - **euler_train integration**: Optional experiment logging via [euler_train](https://github.com/d-rothen/euler-train)
14
+
15
+ ## Installation
16
+
17
+ Requires Python 3.9+.
18
+
19
+ ```bash
20
+ uv pip install "euler-eval @ git+https://github.com/d-rothen/euler-parser.git"
21
+
22
+ # with euler_train logging support
23
+ uv pip install "euler-eval[logging] @ git+https://github.com/d-rothen/euler-parser.git"
24
+ ```
25
+
26
+ Or install in editable mode:
27
+
28
+ ```bash
29
+ pip install -e .
30
+ ```
31
+
32
+ ### Dependencies
33
+
34
+ Core:
35
+ - numpy, scipy, Pillow
36
+ - torch, torchvision
37
+ - lpips
38
+ - tqdm
39
+ - [euler-loading](https://github.com/d-rothen/euler-loading), [ds-crawler](https://github.com/d-rothen/ds-crawler)
40
+
41
+ Optional:
42
+ - [euler-train](https://github.com/d-rothen/euler-train) (install via `[logging]` extra)
43
+
44
+ ## Usage
45
+
46
+ The package provides a `depth-eval` console script:
47
+
48
+ ```bash
49
+ depth-eval <config> [options]
50
+ ```
51
+
52
+ Or run directly:
53
+
54
+ ```bash
55
+ python main.py <config> [options]
56
+ ```
57
+
58
+ ### Positional arguments
59
+
60
+ | Argument | Description |
61
+ |---|---|
62
+ | `config` | Path to a JSON configuration file (see [Configuration](#configuration)) |
63
+
64
+ ### Options
65
+
66
+ | Flag | Type | Default | Description |
67
+ |---|---|---|---|
68
+ | `--device` | `{auto,cuda,cpu}` | `auto` | Compute device (`auto` prefers CUDA when available) |
69
+ | `--batch-size` | `int` | `16` | Batch size for metrics that support batching |
70
+ | `--num-workers` | `int` | `4` | Number of data loading workers |
71
+ | `--verbose`, `-v` | flag | off | Enable verbose output |
72
+ | `--skip-depth` | flag | off | Skip depth evaluation |
73
+ | `--skip-rgb` | flag | off | Skip RGB evaluation |
74
+ | `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
75
+ | `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
76
+ | `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
77
+ | `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
78
+
79
+ ### Examples
80
+
81
+ ```bash
82
+ # Evaluate with default settings (auto-selects CUDA when available)
83
+ depth-eval config.json --batch-size 32
84
+
85
+ # Evaluate with sky masking enabled (requires gt.segmentation in config)
86
+ depth-eval config.json --mask-sky -v
87
+
88
+ # Skip RGB evaluation, only evaluate depth
89
+ depth-eval config.json --skip-rgb
90
+
91
+ # Disable sanity checking
92
+ depth-eval config.json --no-sanity-check
93
+
94
+ # Disable depth alignment
95
+ depth-eval config.json --depth-alignment none
96
+
97
+ # Force affine scale+shift alignment on all depth predictions
98
+ depth-eval config.json --depth-alignment affine
99
+ ```
100
+
101
+ ## Configuration
102
+
103
+ ### `config.json`
104
+
105
+ Defines GT modalities, prediction datasets to evaluate, and optional euler_train logging. See [example_config.json](example_config.json).
106
+
107
+ ```json
108
+ {
109
+ "euler_train": {
110
+ "dir": "runs/my_project",
111
+ "run_id": null,
112
+ "run_name": null
113
+ },
114
+ "gt": {
115
+ "rgb": { "path": "/data/gt/rgb" },
116
+ "depth": { "path": "/data/gt/depth" },
117
+ "segmentation": { "path": "/data/gt/segmentation" },
118
+ "calibration": { "path": "/data/gt/calibration" }
119
+ },
120
+ "datasets": [
121
+ {
122
+ "name": "model_a",
123
+ "rgb": { "path": "/data/model_a/rgb" },
124
+ "depth": { "path": "/data/model_a/depth" },
125
+ "output_file": "/path/to/output/model_a_eval.json"
126
+ },
127
+ {
128
+ "name": "model_b_depth_only",
129
+ "depth": { "path": "/data/model_b/depth" }
130
+ },
131
+ {
132
+ "name": "model_c_rgb_only",
133
+ "rgb": { "path": "/data/model_c/rgb" }
134
+ }
135
+ ]
136
+ }
137
+ ```
138
+
139
+ #### GT section
140
+
141
+ | Field | Required | Description |
142
+ |---|---|---|
143
+ | `gt.rgb.path` | yes | Path to GT RGB dataset |
144
+ | `gt.depth.path` | yes | Path to GT depth dataset |
145
+ | `gt.segmentation.path` | no | Path to GT segmentation (needed for `--mask-sky`) |
146
+ | `gt.calibration.path` | no | Path to calibration data (camera intrinsics matrices) |
147
+ | `gt.name` | no | Display name for ground truth (default: `"GT"`) |
148
+
149
+ #### Prediction datasets
150
+
151
+ Each entry in `datasets` can include `rgb`, `depth`, or both:
152
+
153
+ | Field | Required | Description |
154
+ |---|---|---|
155
+ | `name` | yes | Display name for this prediction dataset |
156
+ | `rgb.path` | no\* | Path to predicted RGB dataset |
157
+ | `depth.path` | no\* | Path to predicted depth dataset |
158
+ | `output_file` | no | Custom output path for results JSON (default: `eval.json` inside the first available modality path) |
159
+
160
+ \* At least one of `rgb.path` or `depth.path` is required.
161
+
162
+ #### `euler_train` section (optional)
163
+
164
+ When present, evaluation results are logged to an [euler_train](https://github.com/d-rothen/euler-train) run. Requires the `euler-train` package to be installed (`pip install euler-eval[logging]`).
165
+
166
+ | Field | Required | Description |
167
+ |---|---|---|
168
+ | `euler_train.dir` | yes | euler_train project directory |
169
+ | `euler_train.run_id` | no | Existing run ID to resume (if `null`, a new run is created) |
170
+ | `euler_train.run_name` | no | Human-readable run label |
171
+
172
+ When `run_id` is provided, the run is detached after evaluation (the run remains active for further use). When `run_id` is `null`, a new run is created and finished upon completion.
173
+
174
+ ### Loader resolution
175
+
176
+ Loaders are resolved automatically by euler_loading from each dataset directory's ds-crawler index metadata. The index's `euler_loading.loader` and `euler_loading.function` fields determine which loader module and function to use (e.g. `"vkitti2"` maps to `euler_loading.loaders.gpu.vkitti2`).
177
+
178
+ No manual loader selection is required. Each dataset directory declares its own loader through its ds-crawler configuration.
179
+
180
+ Dataset metadata (e.g. `radial_depth`, `rgb_range`) is read automatically from the dataset's `output.json` manifest via `get_modality_metadata()`. Depth is assumed to already be in meters.
181
+
182
+ ### Dataset manifest (`output.json`)
183
+
184
+ Each dataset directory must contain an `output.json` manifest (generated by [ds-crawler](https://github.com/d-rothen/ds-crawler)) describing its hierarchical file structure:
185
+
186
+ ```json
187
+ {
188
+ "dataset": {
189
+ "children": {
190
+ "scene_01": {
191
+ "files": [
192
+ { "id": "frame_0001", "path": "scene_01/frame_0001.png" },
193
+ { "id": "frame_0002", "path": "scene_01/frame_0002.png" }
194
+ ]
195
+ }
196
+ }
197
+ }
198
+ }
199
+ ```
200
+
201
+ GT and prediction datasets are matched by hierarchy path and file ID through `MultiModalDataset`.
202
+
203
+ ### `metrics_config.json`
204
+
205
+ Controls sanity check thresholds. See [metrics_config.json](metrics_config.json) for all available options. When `--metrics-config` is not specified, the tool auto-detects `metrics_config.json` at the project root. If not found, built-in defaults are used.
206
+
207
+ ## Metrics
208
+
209
+ ### Depth metrics
210
+
211
+ | Metric | Key | Description |
212
+ |---|---|---|
213
+ | PSNR | `depth.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB), using max depth as dynamic range |
214
+ | SSIM | `depth.image_quality.ssim` | Structural Similarity Index |
215
+ | LPIPS | `depth.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
216
+ | FID | `depth.image_quality.fid` | Fréchet Inception Distance (dataset-level distribution metric) |
217
+ | KID | `depth.image_quality.kid_mean`, `kid_std` | Kernel Inception Distance (mean and std) |
218
+ | AbsRel | `depth.depth_metrics.absrel` | Absolute Relative Error (\|pred-gt\|/gt), reported as median and p90 |
219
+ | RMSE | `depth.depth_metrics.rmse` | Root Mean Square Error, reported as median and p90 |
220
+ | SILog | `depth.depth_metrics.silog` | Scale-Invariant Log Error, reported as mean, median, and p90 |
221
+ | Normal Consistency | `depth.geometric_metrics.normal_consistency` | Surface normal angular error (degrees) via finite differences; includes mean, median, and percent below 11.25°/22.5°/30° |
222
+ | Depth Edge F1 | `depth.geometric_metrics.depth_edge_f1` | Edge detection precision/recall/F1 for depth discontinuities |
223
+
224
+ ### RGB metrics
225
+
226
+ | Metric | Key | Description |
227
+ |---|---|---|
228
+ | PSNR | `rgb.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB) |
229
+ | SSIM | `rgb.image_quality.ssim` | Structural Similarity Index |
230
+ | SCE | `rgb.image_quality.sce` | Structural Chromatic Error |
231
+ | LPIPS | `rgb.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
232
+ | Edge F1 | `rgb.edge_f1` | Edge preservation precision/recall/F1 |
233
+ | Tail Errors | `rgb.tail_errors` | 95th and 99th percentile per-pixel errors |
234
+ | High-Frequency Energy | `rgb.high_frequency` | HF energy preservation ratio (pred vs GT) and relative difference |
235
+ | Depth-Binned Photometric Error | `rgb.depth_binned_photometric` | MAE/MSE in near/mid/far depth bins (requires GT depth) |
236
+
237
+ ## Output
238
+
239
+ Results are saved as JSON per prediction dataset. Default path: `eval.json` inside the first available modality path of the dataset, unless overridden by `output_file` in the config.
240
+
241
+ ### Output structure
242
+
243
+ ```json
244
+ {
245
+ "depth_raw": { "...": "metrics without alignment" },
246
+ "depth_aligned": { "...": "metrics with selected alignment mode" },
247
+ "depth": {
248
+ "...": "backward-compatible alias of depth_aligned"
249
+ },
250
+ "rgb": {
251
+ "...": "..."
252
+ },
253
+ "per_file_metrics": {
254
+ "children": {
255
+ "scene_01": {
256
+ "children": {
257
+ "camera_0": {
258
+ "files": [
259
+ {
260
+ "id": "frame_0001",
261
+ "metrics": {
262
+ "depth": { "...": "aligned (alias)" },
263
+ "depth_raw": { "...": "raw" },
264
+ "depth_aligned": { "...": "aligned" },
265
+ "rgb": { "...": "..." }
266
+ }
267
+ }
268
+ ]
269
+ }
270
+ }
271
+ }
272
+ }
273
+ }
274
+ }
275
+ ```
276
+
277
+ For depth outputs:
278
+ - `depth_raw`: metric-space depth without any post-hoc alignment.
279
+ - `depth_aligned`: metric-space depth after configured alignment mode.
280
+ - `depth`: backward-compatible alias of `depth_aligned`.
281
+
282
+ Previous single-depth structure (kept under `depth`) is:
283
+
284
+ ```json
285
+ {
286
+ "depth": {
287
+ "image_quality": {
288
+ "psnr": 28.5,
289
+ "ssim": 0.92,
290
+ "lpips": 0.08,
291
+ "fid": 12.3,
292
+ "kid_mean": 0.005,
293
+ "kid_std": 0.002
294
+ },
295
+ "depth_metrics": {
296
+ "absrel": { "median": 0.05, "p90": 0.12 },
297
+ "rmse": { "median": 1.2, "p90": 3.1 },
298
+ "silog": { "mean": 0.08, "median": 0.06, "p90": 0.15 }
299
+ },
300
+ "geometric_metrics": {
301
+ "normal_consistency": {
302
+ "mean_angle": 12.3,
303
+ "median_angle": 9.8,
304
+ "percent_below_11_25": 55.2,
305
+ "percent_below_22_5": 82.1,
306
+ "percent_below_30": 91.5
307
+ },
308
+ "depth_edge_f1": {
309
+ "precision": 0.72,
310
+ "recall": 0.68,
311
+ "f1": 0.70
312
+ }
313
+ },
314
+ "dataset_info": {
315
+ "num_pairs": 500,
316
+ "gt_name": "GT",
317
+ "pred_name": "model_a"
318
+ }
319
+ },
320
+ "rgb": { "...": "unchanged" }
321
+ }
322
+ ```
323
+
324
+ ### Sanity check report
325
+
326
+ When sanity checking is enabled (the default), a `sanity_check_report.json` is saved to the current working directory containing warnings grouped by metric type.
327
+
328
+ ## License
329
+
330
+ MIT
@@ -0,0 +1 @@
1
+ """Depth evaluation package."""