euler-eval 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- euler_eval-1.0.1/PKG-INFO +365 -0
- euler_eval-1.0.1/README.md +330 -0
- euler_eval-1.0.1/euler_eval/__init__.py +1 -0
- euler_eval-1.0.1/euler_eval/cli.py +764 -0
- euler_eval-1.0.1/euler_eval/data.py +509 -0
- euler_eval-1.0.1/euler_eval/evaluate.py +1165 -0
- euler_eval-1.0.1/euler_eval/metrics/__init__.py +155 -0
- euler_eval-1.0.1/euler_eval/metrics/absrel.py +101 -0
- euler_eval-1.0.1/euler_eval/metrics/daniel_error.py +81 -0
- euler_eval-1.0.1/euler_eval/metrics/depth_binned_error.py +153 -0
- euler_eval-1.0.1/euler_eval/metrics/depth_edge_f1.py +200 -0
- euler_eval-1.0.1/euler_eval/metrics/fid_kid.py +354 -0
- euler_eval-1.0.1/euler_eval/metrics/high_freq_energy.py +228 -0
- euler_eval-1.0.1/euler_eval/metrics/lpips_metric.py +160 -0
- euler_eval-1.0.1/euler_eval/metrics/normal_consistency.py +222 -0
- euler_eval-1.0.1/euler_eval/metrics/psnr.py +85 -0
- euler_eval-1.0.1/euler_eval/metrics/rgb_edge_f1.py +179 -0
- euler_eval-1.0.1/euler_eval/metrics/rgb_lpips.py +142 -0
- euler_eval-1.0.1/euler_eval/metrics/rgb_psnr_ssim.py +160 -0
- euler_eval-1.0.1/euler_eval/metrics/rho_a.py +241 -0
- euler_eval-1.0.1/euler_eval/metrics/rmse.py +119 -0
- euler_eval-1.0.1/euler_eval/metrics/scale_invariant_log.py +130 -0
- euler_eval-1.0.1/euler_eval/metrics/ssim.py +139 -0
- euler_eval-1.0.1/euler_eval/metrics/tail_errors.py +130 -0
- euler_eval-1.0.1/euler_eval/metrics/utils.py +128 -0
- euler_eval-1.0.1/euler_eval/sanity_checker.py +1010 -0
- euler_eval-1.0.1/euler_eval/utils/hierarchy_parser.py +118 -0
- euler_eval-1.0.1/euler_eval.egg-info/PKG-INFO +365 -0
- euler_eval-1.0.1/euler_eval.egg-info/SOURCES.txt +42 -0
- euler_eval-1.0.1/euler_eval.egg-info/dependency_links.txt +1 -0
- euler_eval-1.0.1/euler_eval.egg-info/entry_points.txt +2 -0
- euler_eval-1.0.1/euler_eval.egg-info/requires.txt +18 -0
- euler_eval-1.0.1/euler_eval.egg-info/top_level.txt +1 -0
- euler_eval-1.0.1/pyproject.toml +69 -0
- euler_eval-1.0.1/setup.cfg +4 -0
- euler_eval-1.0.1/tests/test_alignment.py +244 -0
- euler_eval-1.0.1/tests/test_cli_device.py +104 -0
- euler_eval-1.0.1/tests/test_config.py +222 -0
- euler_eval-1.0.1/tests/test_data.py +273 -0
- euler_eval-1.0.1/tests/test_depth_alignment_output.py +234 -0
- euler_eval-1.0.1/tests/test_evaluate_helpers.py +180 -0
- euler_eval-1.0.1/tests/test_integration.py +512 -0
- euler_eval-1.0.1/tests/test_rho_a.py +303 -0
- euler_eval-1.0.1/tests/test_save_results.py +262 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: euler-eval
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Depth map evaluation toolkit with comprehensive metrics
|
|
5
|
+
Author: Depth Eval Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: depth,evaluation,metrics,computer-vision,3d
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: numpy>=1.21.0
|
|
20
|
+
Requires-Dist: scipy>=1.7.0
|
|
21
|
+
Requires-Dist: Pillow>=8.0.0
|
|
22
|
+
Requires-Dist: torch>=1.9.0
|
|
23
|
+
Requires-Dist: torchvision>=0.10.0
|
|
24
|
+
Requires-Dist: lpips>=0.1.4
|
|
25
|
+
Requires-Dist: tqdm>=4.62.0
|
|
26
|
+
Requires-Dist: euler-loading
|
|
27
|
+
Requires-Dist: ds-crawler
|
|
28
|
+
Provides-Extra: logging
|
|
29
|
+
Requires-Dist: euler-train; extra == "logging"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: ruff>=0.0.260; extra == "dev"
|
|
35
|
+
|
|
36
|
+
# euler-eval
|
|
37
|
+
|
|
38
|
+
A comprehensive evaluation toolkit for comparing predicted depth maps and RGB images against ground truth, powered by [euler_loading](https://github.com/d-rothen/euler-loading) for flexible dataset loading.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
|
|
42
|
+
- **Depth metrics**: PSNR, SSIM, LPIPS, FID, KID, AbsRel, RMSE, Scale-Invariant Log Error, Normal Consistency, Depth Edge F1
|
|
43
|
+
- **RGB metrics**: PSNR, SSIM, LPIPS, SCE (Structural Chromatic Error), Edge F1, Tail Errors (p95/p99), High-Frequency Energy Ratio, Depth-Binned Photometric Error
|
|
44
|
+
- **Sanity checking**: Automatic validation of metric results against configurable thresholds, with detailed warning reports
|
|
45
|
+
- **Sky masking**: Optional exclusion of sky regions from metrics using GT segmentation
|
|
46
|
+
- **Flexible dataset loading**: Automatic loader resolution via euler_loading and ds-crawler index metadata
|
|
47
|
+
- **Per-file and aggregate results**: Outputs both per-image metrics and dataset-level aggregates to JSON
|
|
48
|
+
- **euler_train integration**: Optional experiment logging via [euler_train](https://github.com/d-rothen/euler-train)
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
Requires Python 3.9+.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv pip install "euler-eval @ git+https://github.com/d-rothen/euler-parser.git"
|
|
56
|
+
|
|
57
|
+
# with euler_train logging support
|
|
58
|
+
uv pip install "euler-eval[logging] @ git+https://github.com/d-rothen/euler-parser.git"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Or install in editable mode:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install -e .
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Dependencies
|
|
68
|
+
|
|
69
|
+
Core:
|
|
70
|
+
- numpy, scipy, Pillow
|
|
71
|
+
- torch, torchvision
|
|
72
|
+
- lpips
|
|
73
|
+
- tqdm
|
|
74
|
+
- [euler-loading](https://github.com/d-rothen/euler-loading), [ds-crawler](https://github.com/d-rothen/ds-crawler)
|
|
75
|
+
|
|
76
|
+
Optional:
|
|
77
|
+
- [euler-train](https://github.com/d-rothen/euler-train) (install via `[logging]` extra)
|
|
78
|
+
|
|
79
|
+
## Usage
|
|
80
|
+
|
|
81
|
+
The package provides a `depth-eval` console script:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
depth-eval <config> [options]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Or run directly:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
python main.py <config> [options]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Positional arguments
|
|
94
|
+
|
|
95
|
+
| Argument | Description |
|
|
96
|
+
|---|---|
|
|
97
|
+
| `config` | Path to a JSON configuration file (see [Configuration](#configuration)) |
|
|
98
|
+
|
|
99
|
+
### Options
|
|
100
|
+
|
|
101
|
+
| Flag | Type | Default | Description |
|
|
102
|
+
|---|---|---|---|
|
|
103
|
+
| `--device` | `{auto,cuda,cpu}` | `auto` | Compute device (`auto` prefers CUDA when available) |
|
|
104
|
+
| `--batch-size` | `int` | `16` | Batch size for metrics that support batching |
|
|
105
|
+
| `--num-workers` | `int` | `4` | Number of data loading workers |
|
|
106
|
+
| `--verbose`, `-v` | flag | off | Enable verbose output |
|
|
107
|
+
| `--skip-depth` | flag | off | Skip depth evaluation |
|
|
108
|
+
| `--skip-rgb` | flag | off | Skip RGB evaluation |
|
|
109
|
+
| `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
|
|
110
|
+
| `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
|
|
111
|
+
| `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
|
|
112
|
+
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
|
|
113
|
+
|
|
114
|
+
### Examples
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Evaluate with default settings (auto-selects CUDA when available)
|
|
118
|
+
depth-eval config.json --batch-size 32
|
|
119
|
+
|
|
120
|
+
# Evaluate with sky masking enabled (requires gt.segmentation in config)
|
|
121
|
+
depth-eval config.json --mask-sky -v
|
|
122
|
+
|
|
123
|
+
# Skip RGB evaluation, only evaluate depth
|
|
124
|
+
depth-eval config.json --skip-rgb
|
|
125
|
+
|
|
126
|
+
# Disable sanity checking
|
|
127
|
+
depth-eval config.json --no-sanity-check
|
|
128
|
+
|
|
129
|
+
# Disable depth alignment
|
|
130
|
+
depth-eval config.json --depth-alignment none
|
|
131
|
+
|
|
132
|
+
# Force affine scale+shift alignment on all depth predictions
|
|
133
|
+
depth-eval config.json --depth-alignment affine
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Configuration
|
|
137
|
+
|
|
138
|
+
### `config.json`
|
|
139
|
+
|
|
140
|
+
Defines GT modalities, prediction datasets to evaluate, and optional euler_train logging. See [example_config.json](example_config.json).
|
|
141
|
+
|
|
142
|
+
```json
|
|
143
|
+
{
|
|
144
|
+
"euler_train": {
|
|
145
|
+
"dir": "runs/my_project",
|
|
146
|
+
"run_id": null,
|
|
147
|
+
"run_name": null
|
|
148
|
+
},
|
|
149
|
+
"gt": {
|
|
150
|
+
"rgb": { "path": "/data/gt/rgb" },
|
|
151
|
+
"depth": { "path": "/data/gt/depth" },
|
|
152
|
+
"segmentation": { "path": "/data/gt/segmentation" },
|
|
153
|
+
"calibration": { "path": "/data/gt/calibration" }
|
|
154
|
+
},
|
|
155
|
+
"datasets": [
|
|
156
|
+
{
|
|
157
|
+
"name": "model_a",
|
|
158
|
+
"rgb": { "path": "/data/model_a/rgb" },
|
|
159
|
+
"depth": { "path": "/data/model_a/depth" },
|
|
160
|
+
"output_file": "/path/to/output/model_a_eval.json"
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"name": "model_b_depth_only",
|
|
164
|
+
"depth": { "path": "/data/model_b/depth" }
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"name": "model_c_rgb_only",
|
|
168
|
+
"rgb": { "path": "/data/model_c/rgb" }
|
|
169
|
+
}
|
|
170
|
+
]
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### GT section
|
|
175
|
+
|
|
176
|
+
| Field | Required | Description |
|
|
177
|
+
|---|---|---|
|
|
178
|
+
| `gt.rgb.path` | yes | Path to GT RGB dataset |
|
|
179
|
+
| `gt.depth.path` | yes | Path to GT depth dataset |
|
|
180
|
+
| `gt.segmentation.path` | no | Path to GT segmentation (needed for `--mask-sky`) |
|
|
181
|
+
| `gt.calibration.path` | no | Path to calibration data (camera intrinsics matrices) |
|
|
182
|
+
| `gt.name` | no | Display name for ground truth (default: `"GT"`) |
|
|
183
|
+
|
|
184
|
+
#### Prediction datasets
|
|
185
|
+
|
|
186
|
+
Each entry in `datasets` can include `rgb`, `depth`, or both:
|
|
187
|
+
|
|
188
|
+
| Field | Required | Description |
|
|
189
|
+
|---|---|---|
|
|
190
|
+
| `name` | yes | Display name for this prediction dataset |
|
|
191
|
+
| `rgb.path` | no\* | Path to predicted RGB dataset |
|
|
192
|
+
| `depth.path` | no\* | Path to predicted depth dataset |
|
|
193
|
+
| `output_file` | no | Custom output path for results JSON (default: `eval.json` inside the first available modality path) |
|
|
194
|
+
|
|
195
|
+
\* At least one of `rgb.path` or `depth.path` is required.
|
|
196
|
+
|
|
197
|
+
#### `euler_train` section (optional)
|
|
198
|
+
|
|
199
|
+
When present, evaluation results are logged to an [euler_train](https://github.com/d-rothen/euler-train) run. Requires the `euler-train` package to be installed (`pip install euler-eval[logging]`).
|
|
200
|
+
|
|
201
|
+
| Field | Required | Description |
|
|
202
|
+
|---|---|---|
|
|
203
|
+
| `euler_train.dir` | yes | euler_train project directory |
|
|
204
|
+
| `euler_train.run_id` | no | Existing run ID to resume (if `null`, a new run is created) |
|
|
205
|
+
| `euler_train.run_name` | no | Human-readable run label |
|
|
206
|
+
|
|
207
|
+
When `run_id` is provided, the run is detached after evaluation (the run remains active for further use). When `run_id` is `null`, a new run is created and finished upon completion.
|
|
208
|
+
|
|
209
|
+
### Loader resolution
|
|
210
|
+
|
|
211
|
+
Loaders are resolved automatically by euler_loading from each dataset directory's ds-crawler index metadata. The index's `euler_loading.loader` and `euler_loading.function` fields determine which loader module and function to use (e.g. `"vkitti2"` maps to `euler_loading.loaders.gpu.vkitti2`).
|
|
212
|
+
|
|
213
|
+
No manual loader selection is required. Each dataset directory declares its own loader through its ds-crawler configuration.
|
|
214
|
+
|
|
215
|
+
Dataset metadata (e.g. `radial_depth`, `rgb_range`) is read automatically from the dataset's `output.json` manifest via `get_modality_metadata()`. Depth is assumed to already be in meters.
|
|
216
|
+
|
|
217
|
+
### Dataset manifest (`output.json`)
|
|
218
|
+
|
|
219
|
+
Each dataset directory must contain an `output.json` manifest (generated by [ds-crawler](https://github.com/d-rothen/ds-crawler)) describing its hierarchical file structure:
|
|
220
|
+
|
|
221
|
+
```json
|
|
222
|
+
{
|
|
223
|
+
"dataset": {
|
|
224
|
+
"children": {
|
|
225
|
+
"scene_01": {
|
|
226
|
+
"files": [
|
|
227
|
+
{ "id": "frame_0001", "path": "scene_01/frame_0001.png" },
|
|
228
|
+
{ "id": "frame_0002", "path": "scene_01/frame_0002.png" }
|
|
229
|
+
]
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
GT and prediction datasets are matched by hierarchy path and file ID through `MultiModalDataset`.
|
|
237
|
+
|
|
238
|
+
### `metrics_config.json`
|
|
239
|
+
|
|
240
|
+
Controls sanity check thresholds. See [metrics_config.json](metrics_config.json) for all available options. When `--metrics-config` is not specified, the tool auto-detects `metrics_config.json` at the project root. If not found, built-in defaults are used.
|
|
241
|
+
|
|
242
|
+
## Metrics
|
|
243
|
+
|
|
244
|
+
### Depth metrics
|
|
245
|
+
|
|
246
|
+
| Metric | Key | Description |
|
|
247
|
+
|---|---|---|
|
|
248
|
+
| PSNR | `depth.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB), using max depth as dynamic range |
|
|
249
|
+
| SSIM | `depth.image_quality.ssim` | Structural Similarity Index |
|
|
250
|
+
| LPIPS | `depth.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
|
|
251
|
+
| FID | `depth.image_quality.fid` | Fréchet Inception Distance (dataset-level distribution metric) |
|
|
252
|
+
| KID | `depth.image_quality.kid_mean`, `kid_std` | Kernel Inception Distance (mean and std) |
|
|
253
|
+
| AbsRel | `depth.depth_metrics.absrel` | Absolute Relative Error (\|pred-gt\|/gt), reported as median and p90 |
|
|
254
|
+
| RMSE | `depth.depth_metrics.rmse` | Root Mean Square Error, reported as median and p90 |
|
|
255
|
+
| SILog | `depth.depth_metrics.silog` | Scale-Invariant Log Error, reported as mean, median, and p90 |
|
|
256
|
+
| Normal Consistency | `depth.geometric_metrics.normal_consistency` | Surface normal angular error (degrees) via finite differences; includes mean, median, and percent below 11.25°/22.5°/30° |
|
|
257
|
+
| Depth Edge F1 | `depth.geometric_metrics.depth_edge_f1` | Edge detection precision/recall/F1 for depth discontinuities |
|
|
258
|
+
|
|
259
|
+
### RGB metrics
|
|
260
|
+
|
|
261
|
+
| Metric | Key | Description |
|
|
262
|
+
|---|---|---|
|
|
263
|
+
| PSNR | `rgb.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB) |
|
|
264
|
+
| SSIM | `rgb.image_quality.ssim` | Structural Similarity Index |
|
|
265
|
+
| SCE | `rgb.image_quality.sce` | Structural Chromatic Error |
|
|
266
|
+
| LPIPS | `rgb.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
|
|
267
|
+
| Edge F1 | `rgb.edge_f1` | Edge preservation precision/recall/F1 |
|
|
268
|
+
| Tail Errors | `rgb.tail_errors` | 95th and 99th percentile per-pixel errors |
|
|
269
|
+
| High-Frequency Energy | `rgb.high_frequency` | HF energy preservation ratio (pred vs GT) and relative difference |
|
|
270
|
+
| Depth-Binned Photometric Error | `rgb.depth_binned_photometric` | MAE/MSE in near/mid/far depth bins (requires GT depth) |
|
|
271
|
+
|
|
272
|
+
## Output
|
|
273
|
+
|
|
274
|
+
Results are saved as JSON per prediction dataset. Default path: `eval.json` inside the first available modality path of the dataset, unless overridden by `output_file` in the config.
|
|
275
|
+
|
|
276
|
+
### Output structure
|
|
277
|
+
|
|
278
|
+
```json
|
|
279
|
+
{
|
|
280
|
+
"depth_raw": { "...": "metrics without alignment" },
|
|
281
|
+
"depth_aligned": { "...": "metrics with selected alignment mode" },
|
|
282
|
+
"depth": {
|
|
283
|
+
"...": "backward-compatible alias of depth_aligned"
|
|
284
|
+
},
|
|
285
|
+
"rgb": {
|
|
286
|
+
"...": "..."
|
|
287
|
+
},
|
|
288
|
+
"per_file_metrics": {
|
|
289
|
+
"children": {
|
|
290
|
+
"scene_01": {
|
|
291
|
+
"children": {
|
|
292
|
+
"camera_0": {
|
|
293
|
+
"files": [
|
|
294
|
+
{
|
|
295
|
+
"id": "frame_0001",
|
|
296
|
+
"metrics": {
|
|
297
|
+
"depth": { "...": "aligned (alias)" },
|
|
298
|
+
"depth_raw": { "...": "raw" },
|
|
299
|
+
"depth_aligned": { "...": "aligned" },
|
|
300
|
+
"rgb": { "...": "..." }
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
]
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
For depth outputs:
|
|
313
|
+
- `depth_raw`: metric-space depth without any post-hoc alignment.
|
|
314
|
+
- `depth_aligned`: metric-space depth after configured alignment mode.
|
|
315
|
+
- `depth`: backward-compatible alias of `depth_aligned`.
|
|
316
|
+
|
|
317
|
+
Previous single-depth structure (kept under `depth`) is:
|
|
318
|
+
|
|
319
|
+
```json
|
|
320
|
+
{
|
|
321
|
+
"depth": {
|
|
322
|
+
"image_quality": {
|
|
323
|
+
"psnr": 28.5,
|
|
324
|
+
"ssim": 0.92,
|
|
325
|
+
"lpips": 0.08,
|
|
326
|
+
"fid": 12.3,
|
|
327
|
+
"kid_mean": 0.005,
|
|
328
|
+
"kid_std": 0.002
|
|
329
|
+
},
|
|
330
|
+
"depth_metrics": {
|
|
331
|
+
"absrel": { "median": 0.05, "p90": 0.12 },
|
|
332
|
+
"rmse": { "median": 1.2, "p90": 3.1 },
|
|
333
|
+
"silog": { "mean": 0.08, "median": 0.06, "p90": 0.15 }
|
|
334
|
+
},
|
|
335
|
+
"geometric_metrics": {
|
|
336
|
+
"normal_consistency": {
|
|
337
|
+
"mean_angle": 12.3,
|
|
338
|
+
"median_angle": 9.8,
|
|
339
|
+
"percent_below_11_25": 55.2,
|
|
340
|
+
"percent_below_22_5": 82.1,
|
|
341
|
+
"percent_below_30": 91.5
|
|
342
|
+
},
|
|
343
|
+
"depth_edge_f1": {
|
|
344
|
+
"precision": 0.72,
|
|
345
|
+
"recall": 0.68,
|
|
346
|
+
"f1": 0.70
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
"dataset_info": {
|
|
350
|
+
"num_pairs": 500,
|
|
351
|
+
"gt_name": "GT",
|
|
352
|
+
"pred_name": "model_a"
|
|
353
|
+
}
|
|
354
|
+
},
|
|
355
|
+
"rgb": { "...": "unchanged" }
|
|
356
|
+
}
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### Sanity check report
|
|
360
|
+
|
|
361
|
+
When sanity checking is enabled (the default), a `sanity_check_report.json` is saved to the current working directory containing warnings grouped by metric type.
|
|
362
|
+
|
|
363
|
+
## License
|
|
364
|
+
|
|
365
|
+
MIT
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
# euler-eval
|
|
2
|
+
|
|
3
|
+
A comprehensive evaluation toolkit for comparing predicted depth maps and RGB images against ground truth, powered by [euler_loading](https://github.com/d-rothen/euler-loading) for flexible dataset loading.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Depth metrics**: PSNR, SSIM, LPIPS, FID, KID, AbsRel, RMSE, Scale-Invariant Log Error, Normal Consistency, Depth Edge F1
|
|
8
|
+
- **RGB metrics**: PSNR, SSIM, LPIPS, SCE (Structural Chromatic Error), Edge F1, Tail Errors (p95/p99), High-Frequency Energy Ratio, Depth-Binned Photometric Error
|
|
9
|
+
- **Sanity checking**: Automatic validation of metric results against configurable thresholds, with detailed warning reports
|
|
10
|
+
- **Sky masking**: Optional exclusion of sky regions from metrics using GT segmentation
|
|
11
|
+
- **Flexible dataset loading**: Automatic loader resolution via euler_loading and ds-crawler index metadata
|
|
12
|
+
- **Per-file and aggregate results**: Outputs both per-image metrics and dataset-level aggregates to JSON
|
|
13
|
+
- **euler_train integration**: Optional experiment logging via [euler_train](https://github.com/d-rothen/euler-train)
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
Requires Python 3.9+.
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
uv pip install "euler-eval @ git+https://github.com/d-rothen/euler-parser.git"
|
|
21
|
+
|
|
22
|
+
# with euler_train logging support
|
|
23
|
+
uv pip install "euler-eval[logging] @ git+https://github.com/d-rothen/euler-parser.git"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Or install in editable mode:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install -e .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Dependencies
|
|
33
|
+
|
|
34
|
+
Core:
|
|
35
|
+
- numpy, scipy, Pillow
|
|
36
|
+
- torch, torchvision
|
|
37
|
+
- lpips
|
|
38
|
+
- tqdm
|
|
39
|
+
- [euler-loading](https://github.com/d-rothen/euler-loading), [ds-crawler](https://github.com/d-rothen/ds-crawler)
|
|
40
|
+
|
|
41
|
+
Optional:
|
|
42
|
+
- [euler-train](https://github.com/d-rothen/euler-train) (install via `[logging]` extra)
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
The package provides a `depth-eval` console script:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
depth-eval <config> [options]
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Or run directly:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python main.py <config> [options]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Positional arguments
|
|
59
|
+
|
|
60
|
+
| Argument | Description |
|
|
61
|
+
|---|---|
|
|
62
|
+
| `config` | Path to a JSON configuration file (see [Configuration](#configuration)) |
|
|
63
|
+
|
|
64
|
+
### Options
|
|
65
|
+
|
|
66
|
+
| Flag | Type | Default | Description |
|
|
67
|
+
|---|---|---|---|
|
|
68
|
+
| `--device` | `{auto,cuda,cpu}` | `auto` | Compute device (`auto` prefers CUDA when available) |
|
|
69
|
+
| `--batch-size` | `int` | `16` | Batch size for metrics that support batching |
|
|
70
|
+
| `--num-workers` | `int` | `4` | Number of data loading workers |
|
|
71
|
+
| `--verbose`, `-v` | flag | off | Enable verbose output |
|
|
72
|
+
| `--skip-depth` | flag | off | Skip depth evaluation |
|
|
73
|
+
| `--skip-rgb` | flag | off | Skip RGB evaluation |
|
|
74
|
+
| `--mask-sky` | flag | off | Mask sky regions from metrics using GT segmentation |
|
|
75
|
+
| `--no-sanity-check` | flag | off | Disable sanity checking of metric configurations |
|
|
76
|
+
| `--metrics-config` | `str` | auto-detect | Path to `metrics_config.json` for sanity checking |
|
|
77
|
+
| `--depth-alignment` | `{none,auto_affine,affine}` | `auto_affine` | Depth alignment mode (`depth` output uses aligned branch) |
|
|
78
|
+
|
|
79
|
+
### Examples
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# Evaluate with default settings (auto-selects CUDA when available)
|
|
83
|
+
depth-eval config.json --batch-size 32
|
|
84
|
+
|
|
85
|
+
# Evaluate with sky masking enabled (requires gt.segmentation in config)
|
|
86
|
+
depth-eval config.json --mask-sky -v
|
|
87
|
+
|
|
88
|
+
# Skip RGB evaluation, only evaluate depth
|
|
89
|
+
depth-eval config.json --skip-rgb
|
|
90
|
+
|
|
91
|
+
# Disable sanity checking
|
|
92
|
+
depth-eval config.json --no-sanity-check
|
|
93
|
+
|
|
94
|
+
# Disable depth alignment
|
|
95
|
+
depth-eval config.json --depth-alignment none
|
|
96
|
+
|
|
97
|
+
# Force affine scale+shift alignment on all depth predictions
|
|
98
|
+
depth-eval config.json --depth-alignment affine
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Configuration
|
|
102
|
+
|
|
103
|
+
### `config.json`
|
|
104
|
+
|
|
105
|
+
Defines GT modalities, prediction datasets to evaluate, and optional euler_train logging. See [example_config.json](example_config.json).
|
|
106
|
+
|
|
107
|
+
```json
|
|
108
|
+
{
|
|
109
|
+
"euler_train": {
|
|
110
|
+
"dir": "runs/my_project",
|
|
111
|
+
"run_id": null,
|
|
112
|
+
"run_name": null
|
|
113
|
+
},
|
|
114
|
+
"gt": {
|
|
115
|
+
"rgb": { "path": "/data/gt/rgb" },
|
|
116
|
+
"depth": { "path": "/data/gt/depth" },
|
|
117
|
+
"segmentation": { "path": "/data/gt/segmentation" },
|
|
118
|
+
"calibration": { "path": "/data/gt/calibration" }
|
|
119
|
+
},
|
|
120
|
+
"datasets": [
|
|
121
|
+
{
|
|
122
|
+
"name": "model_a",
|
|
123
|
+
"rgb": { "path": "/data/model_a/rgb" },
|
|
124
|
+
"depth": { "path": "/data/model_a/depth" },
|
|
125
|
+
"output_file": "/path/to/output/model_a_eval.json"
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"name": "model_b_depth_only",
|
|
129
|
+
"depth": { "path": "/data/model_b/depth" }
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"name": "model_c_rgb_only",
|
|
133
|
+
"rgb": { "path": "/data/model_c/rgb" }
|
|
134
|
+
}
|
|
135
|
+
]
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
#### GT section
|
|
140
|
+
|
|
141
|
+
| Field | Required | Description |
|
|
142
|
+
|---|---|---|
|
|
143
|
+
| `gt.rgb.path` | yes | Path to GT RGB dataset |
|
|
144
|
+
| `gt.depth.path` | yes | Path to GT depth dataset |
|
|
145
|
+
| `gt.segmentation.path` | no | Path to GT segmentation (needed for `--mask-sky`) |
|
|
146
|
+
| `gt.calibration.path` | no | Path to calibration data (camera intrinsics matrices) |
|
|
147
|
+
| `gt.name` | no | Display name for ground truth (default: `"GT"`) |
|
|
148
|
+
|
|
149
|
+
#### Prediction datasets
|
|
150
|
+
|
|
151
|
+
Each entry in `datasets` can include `rgb`, `depth`, or both:
|
|
152
|
+
|
|
153
|
+
| Field | Required | Description |
|
|
154
|
+
|---|---|---|
|
|
155
|
+
| `name` | yes | Display name for this prediction dataset |
|
|
156
|
+
| `rgb.path` | no\* | Path to predicted RGB dataset |
|
|
157
|
+
| `depth.path` | no\* | Path to predicted depth dataset |
|
|
158
|
+
| `output_file` | no | Custom output path for results JSON (default: `eval.json` inside the first available modality path) |
|
|
159
|
+
|
|
160
|
+
\* At least one of `rgb.path` or `depth.path` is required.
|
|
161
|
+
|
|
162
|
+
#### `euler_train` section (optional)
|
|
163
|
+
|
|
164
|
+
When present, evaluation results are logged to an [euler_train](https://github.com/d-rothen/euler-train) run. Requires the `euler-train` package to be installed (`pip install euler-eval[logging]`).
|
|
165
|
+
|
|
166
|
+
| Field | Required | Description |
|
|
167
|
+
|---|---|---|
|
|
168
|
+
| `euler_train.dir` | yes | euler_train project directory |
|
|
169
|
+
| `euler_train.run_id` | no | Existing run ID to resume (if `null`, a new run is created) |
|
|
170
|
+
| `euler_train.run_name` | no | Human-readable run label |
|
|
171
|
+
|
|
172
|
+
When `run_id` is provided, the run is detached after evaluation (the run remains active for further use). When `run_id` is `null`, a new run is created and finished upon completion.
|
|
173
|
+
|
|
174
|
+
### Loader resolution
|
|
175
|
+
|
|
176
|
+
Loaders are resolved automatically by euler_loading from each dataset directory's ds-crawler index metadata. The index's `euler_loading.loader` and `euler_loading.function` fields determine which loader module and function to use (e.g. `"vkitti2"` maps to `euler_loading.loaders.gpu.vkitti2`).
|
|
177
|
+
|
|
178
|
+
No manual loader selection is required. Each dataset directory declares its own loader through its ds-crawler configuration.
|
|
179
|
+
|
|
180
|
+
Dataset metadata (e.g. `radial_depth`, `rgb_range`) is read automatically from the dataset's `output.json` manifest via `get_modality_metadata()`. Depth is assumed to already be in meters.
|
|
181
|
+
|
|
182
|
+
### Dataset manifest (`output.json`)
|
|
183
|
+
|
|
184
|
+
Each dataset directory must contain an `output.json` manifest (generated by [ds-crawler](https://github.com/d-rothen/ds-crawler)) describing its hierarchical file structure:
|
|
185
|
+
|
|
186
|
+
```json
|
|
187
|
+
{
|
|
188
|
+
"dataset": {
|
|
189
|
+
"children": {
|
|
190
|
+
"scene_01": {
|
|
191
|
+
"files": [
|
|
192
|
+
{ "id": "frame_0001", "path": "scene_01/frame_0001.png" },
|
|
193
|
+
{ "id": "frame_0002", "path": "scene_01/frame_0002.png" }
|
|
194
|
+
]
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
GT and prediction datasets are matched by hierarchy path and file ID through `MultiModalDataset`.
|
|
202
|
+
|
|
203
|
+
### `metrics_config.json`
|
|
204
|
+
|
|
205
|
+
Controls sanity check thresholds. See [metrics_config.json](metrics_config.json) for all available options. When `--metrics-config` is not specified, the tool auto-detects `metrics_config.json` at the project root. If not found, built-in defaults are used.
|
|
206
|
+
|
|
207
|
+
## Metrics
|
|
208
|
+
|
|
209
|
+
### Depth metrics
|
|
210
|
+
|
|
211
|
+
| Metric | Key | Description |
|
|
212
|
+
|---|---|---|
|
|
213
|
+
| PSNR | `depth.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB), using max depth as dynamic range |
|
|
214
|
+
| SSIM | `depth.image_quality.ssim` | Structural Similarity Index |
|
|
215
|
+
| LPIPS | `depth.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
|
|
216
|
+
| FID | `depth.image_quality.fid` | Fréchet Inception Distance (dataset-level distribution metric) |
|
|
217
|
+
| KID | `depth.image_quality.kid_mean`, `kid_std` | Kernel Inception Distance (mean and std) |
|
|
218
|
+
| AbsRel | `depth.depth_metrics.absrel` | Absolute Relative Error (\|pred-gt\|/gt), reported as median and p90 |
|
|
219
|
+
| RMSE | `depth.depth_metrics.rmse` | Root Mean Square Error, reported as median and p90 |
|
|
220
|
+
| SILog | `depth.depth_metrics.silog` | Scale-Invariant Log Error, reported as mean, median, and p90 |
|
|
221
|
+
| Normal Consistency | `depth.geometric_metrics.normal_consistency` | Surface normal angular error (degrees) via finite differences; includes mean, median, and percent below 11.25°/22.5°/30° |
|
|
222
|
+
| Depth Edge F1 | `depth.geometric_metrics.depth_edge_f1` | Edge detection precision/recall/F1 for depth discontinuities |
|
|
223
|
+
|
|
224
|
+
### RGB metrics
|
|
225
|
+
|
|
226
|
+
| Metric | Key | Description |
|
|
227
|
+
|---|---|---|
|
|
228
|
+
| PSNR | `rgb.image_quality.psnr` | Peak Signal-to-Noise Ratio (dB) |
|
|
229
|
+
| SSIM | `rgb.image_quality.ssim` | Structural Similarity Index |
|
|
230
|
+
| SCE | `rgb.image_quality.sce` | Structural Chromatic Error |
|
|
231
|
+
| LPIPS | `rgb.image_quality.lpips` | Learned Perceptual Image Patch Similarity |
|
|
232
|
+
| Edge F1 | `rgb.edge_f1` | Edge preservation precision/recall/F1 |
|
|
233
|
+
| Tail Errors | `rgb.tail_errors` | 95th and 99th percentile per-pixel errors |
|
|
234
|
+
| High-Frequency Energy | `rgb.high_frequency` | HF energy preservation ratio (pred vs GT) and relative difference |
|
|
235
|
+
| Depth-Binned Photometric Error | `rgb.depth_binned_photometric` | MAE/MSE in near/mid/far depth bins (requires GT depth) |
|
|
236
|
+
|
|
237
|
+
## Output
|
|
238
|
+
|
|
239
|
+
Results are saved as JSON per prediction dataset. Default path: `eval.json` inside the first available modality path of the dataset, unless overridden by `output_file` in the config.
|
|
240
|
+
|
|
241
|
+
### Output structure
|
|
242
|
+
|
|
243
|
+
```json
|
|
244
|
+
{
|
|
245
|
+
"depth_raw": { "...": "metrics without alignment" },
|
|
246
|
+
"depth_aligned": { "...": "metrics with selected alignment mode" },
|
|
247
|
+
"depth": {
|
|
248
|
+
"...": "backward-compatible alias of depth_aligned"
|
|
249
|
+
},
|
|
250
|
+
"rgb": {
|
|
251
|
+
"...": "..."
|
|
252
|
+
},
|
|
253
|
+
"per_file_metrics": {
|
|
254
|
+
"children": {
|
|
255
|
+
"scene_01": {
|
|
256
|
+
"children": {
|
|
257
|
+
"camera_0": {
|
|
258
|
+
"files": [
|
|
259
|
+
{
|
|
260
|
+
"id": "frame_0001",
|
|
261
|
+
"metrics": {
|
|
262
|
+
"depth": { "...": "aligned (alias)" },
|
|
263
|
+
"depth_raw": { "...": "raw" },
|
|
264
|
+
"depth_aligned": { "...": "aligned" },
|
|
265
|
+
"rgb": { "...": "..." }
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
]
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
For depth outputs:
|
|
278
|
+
- `depth_raw`: metric-space depth without any post-hoc alignment.
|
|
279
|
+
- `depth_aligned`: metric-space depth after configured alignment mode.
|
|
280
|
+
- `depth`: backward-compatible alias of `depth_aligned`.
|
|
281
|
+
|
|
282
|
+
Previous single-depth structure (kept under `depth`) is:
|
|
283
|
+
|
|
284
|
+
```json
|
|
285
|
+
{
|
|
286
|
+
"depth": {
|
|
287
|
+
"image_quality": {
|
|
288
|
+
"psnr": 28.5,
|
|
289
|
+
"ssim": 0.92,
|
|
290
|
+
"lpips": 0.08,
|
|
291
|
+
"fid": 12.3,
|
|
292
|
+
"kid_mean": 0.005,
|
|
293
|
+
"kid_std": 0.002
|
|
294
|
+
},
|
|
295
|
+
"depth_metrics": {
|
|
296
|
+
"absrel": { "median": 0.05, "p90": 0.12 },
|
|
297
|
+
"rmse": { "median": 1.2, "p90": 3.1 },
|
|
298
|
+
"silog": { "mean": 0.08, "median": 0.06, "p90": 0.15 }
|
|
299
|
+
},
|
|
300
|
+
"geometric_metrics": {
|
|
301
|
+
"normal_consistency": {
|
|
302
|
+
"mean_angle": 12.3,
|
|
303
|
+
"median_angle": 9.8,
|
|
304
|
+
"percent_below_11_25": 55.2,
|
|
305
|
+
"percent_below_22_5": 82.1,
|
|
306
|
+
"percent_below_30": 91.5
|
|
307
|
+
},
|
|
308
|
+
"depth_edge_f1": {
|
|
309
|
+
"precision": 0.72,
|
|
310
|
+
"recall": 0.68,
|
|
311
|
+
"f1": 0.70
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
"dataset_info": {
|
|
315
|
+
"num_pairs": 500,
|
|
316
|
+
"gt_name": "GT",
|
|
317
|
+
"pred_name": "model_a"
|
|
318
|
+
}
|
|
319
|
+
},
|
|
320
|
+
"rgb": { "...": "unchanged" }
|
|
321
|
+
}
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Sanity check report
|
|
325
|
+
|
|
326
|
+
When sanity checking is enabled (the default), a `sanity_check_report.json` is saved to the current working directory containing warnings grouped by metric type.
|
|
327
|
+
|
|
328
|
+
## License
|
|
329
|
+
|
|
330
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Depth evaluation package."""
|