euler-preprocess 2.1.0__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/PKG-INFO +116 -12
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/README.md +115 -11
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/cli.py +131 -2
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/output.py +186 -8
- euler_preprocess-2.3.0/euler_preprocess/fog/augmentations.py +318 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/models.py +312 -22
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/transform.py +546 -65
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/PKG-INFO +116 -12
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/SOURCES.txt +2 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/pyproject.toml +1 -1
- euler_preprocess-2.3.0/tests/test_cli_sample_selection.py +127 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_fog_aux_outputs.py +178 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/__init__.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/__init__.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/dataset.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/device.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/intrinsics.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/io.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/logging.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/noise.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/normalize.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/sampling.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/transform.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/__init__.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/airlight_from_sky.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_airlight.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_airlight_torch.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_heuristic_airlight.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_heuristic_airlight_torch.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/foggify.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/foggify_logging.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/logging.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/radial/__init__.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/radial/transform.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/sky_depth/__init__.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/sky_depth/transform.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/dependency_links.txt +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/entry_points.txt +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/requires.txt +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/top_level.txt +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/setup.cfg +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_airlight_fallback.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_dcp_heuristic_airlight.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_foggify_integration.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_radial.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_sky_depth.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_source_backed_output.py +0 -0
- {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_zip_output.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: euler-preprocess
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: Physics-based preprocessing (fog, etc.) for RGB+depth datasets
|
|
5
5
|
Requires-Python: >=3.9
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -49,6 +49,7 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
|
|
|
49
49
|
"transform_config_path": "configs/run1.json",
|
|
50
50
|
"output_path": "/path/to/output",
|
|
51
51
|
"output_slot": "rgb",
|
|
52
|
+
"sample": 42,
|
|
52
53
|
"modalities": {
|
|
53
54
|
"rgb": {"path": "/path/to/rgb", "split": "train"},
|
|
54
55
|
"depth": "/path/to/depth",
|
|
@@ -78,6 +79,8 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
|
|
|
78
79
|
| `transform_config_path` | Path to the transform-specific config (see below). `fog_config_path` is also accepted for backward compatibility. |
|
|
79
80
|
| `output_path` | Output root used when no pipeline target overrides it. Optional if `pipeline.output_root` or `pipeline.output_targets[].path` supplies the destination. |
|
|
80
81
|
| `output_slot` | Optional slot selector when `pipeline.output_targets` contains multiple entries. Defaults to `rgb` for `fog`, `depth` for `sky-depth`, and `depth` for `radial`. |
|
|
82
|
+
| `sample` | Optional 0-based euler-loading dataset index. When set, only `dataset[sample]` is transformed, which is useful for small augmented benchmark slices from large datasets. |
|
|
83
|
+
| `samples` | Optional multi-sample selector. Use a list of 0-based indices (`[0, 10, 20]`) or a slice object such as `{"start": 0, "stop": 1000, "step": 2, "count": 100}`. `stop` is exclusive; `count` caps the selected indices after slicing. Do not set both `sample` and `samples`. |
|
|
81
84
|
| `modalities` | Regular modalities that participate in sample-ID intersection. Each value is either a plain path string or an object with a `path` key and an optional `split` key (see below). Which modalities are required depends on the transform (see table below). |
|
|
82
85
|
| `hierarchical_modalities` | Per-scene data (e.g. intrinsics). Same format as `modalities`. Loaded once per scene and cached. |
|
|
83
86
|
| `pipeline` | Optional runtime routing block compatible with `euler-inference` (`output_root`, `outputs_manifest_path`, `output_targets`). |
|
|
@@ -86,6 +89,11 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
|
|
|
86
89
|
|
|
87
90
|
When a modality directory contains [ds-crawler](https://github.com/d-rothen/ds-crawler) split files (`.ds_crawler/split_<name>.json`), you can select a subset of the data by setting the `split` key on that modality. Sample IDs are matched by intersection across all modalities, so specifying a split on a single modality is sufficient to restrict the entire dataset.
|
|
88
91
|
|
|
92
|
+
For quick slices after euler-loading has matched modalities, set `samples`.
|
|
93
|
+
For example, `{"samples": {"step": 2}}` processes every second matched sample,
|
|
94
|
+
and `{"samples": {"start": 10, "step": 5, "count": 20}}` processes 20 samples
|
|
95
|
+
starting at index 10 with stride 5.
|
|
96
|
+
|
|
89
97
|
**Required modalities per transform:**
|
|
90
98
|
|
|
91
99
|
| Transform | `modalities` | `hierarchical_modalities` |
|
|
@@ -142,6 +150,7 @@ Controls the fog simulation.
|
|
|
142
150
|
"contrast_threshold": 0.05,
|
|
143
151
|
"device": "cpu",
|
|
144
152
|
"gpu_batch_size": 4,
|
|
153
|
+
"augmentations": { ... },
|
|
145
154
|
"selection": { ... },
|
|
146
155
|
"models": { ... }
|
|
147
156
|
}
|
|
@@ -156,6 +165,7 @@ Controls the fog simulation.
|
|
|
156
165
|
| `contrast_threshold` | Threshold *C_t* used in the visibility-to-attenuation conversion (default `0.05`). |
|
|
157
166
|
| `device` | `"cpu"`, `"cuda"`, `"mps"`, or `"gpu"` (alias for cuda). |
|
|
158
167
|
| `gpu_batch_size` | Batch size when running on GPU. Uniform-model samples are batched; heterogeneous samples are processed individually. |
|
|
168
|
+
| `augmentations` | Optional stepped augmentation set. When present, every input sample produces every configured augmentation and uses the file-id hierarchy output layout described below. |
|
|
159
169
|
|
|
160
170
|
### Fog Model
|
|
161
171
|
|
|
@@ -222,10 +232,10 @@ Each image is assigned a fog model via the `selection` block:
|
|
|
222
232
|
"selection": {
|
|
223
233
|
"mode": "weighted",
|
|
224
234
|
"weights": {
|
|
225
|
-
"uniform":
|
|
226
|
-
"heterogeneous_k": 0.
|
|
227
|
-
"heterogeneous_ls": 0.
|
|
228
|
-
"heterogeneous_k_ls": 0.
|
|
235
|
+
"uniform": 0.25,
|
|
236
|
+
"heterogeneous_k": 0.35,
|
|
237
|
+
"heterogeneous_ls": 0.25,
|
|
238
|
+
"heterogeneous_k_ls": 0.15
|
|
229
239
|
}
|
|
230
240
|
}
|
|
231
241
|
```
|
|
@@ -256,28 +266,106 @@ Each model specifies a `visibility_m` distribution from which a visibility dista
|
|
|
256
266
|
|
|
257
267
|
The sampled visibility *V* is converted to the attenuation coefficient: **k = -ln(C_t) / V**.
|
|
258
268
|
|
|
269
|
+
### Stepped Augmentations
|
|
270
|
+
|
|
271
|
+
For benchmark generation, set `augmentations` in the fog config. This switches
|
|
272
|
+
the fog transform from one sampled output per input to one output per configured
|
|
273
|
+
variant:
|
|
274
|
+
|
|
275
|
+
```json
|
|
276
|
+
{
|
|
277
|
+
"airlight": "from_sky",
|
|
278
|
+
"seed": 1337,
|
|
279
|
+
"contrast_threshold": 0.05,
|
|
280
|
+
"augmentations": {
|
|
281
|
+
"file_id_hierarchy_name": "file_id",
|
|
282
|
+
"attribute_key": "fog_augmentation",
|
|
283
|
+
"models": ["uniform"],
|
|
284
|
+
"visibility_m": [10, 20, 40, 70, 100],
|
|
285
|
+
"airlight_methods": ["from_sky"]
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
The matrix form above expands as the Cartesian product of `models`,
|
|
291
|
+
`visibility_m` (MOR in metres), optional `scattering_coefficients` / `beta`, and
|
|
292
|
+
airlight choices. `file_id_hierarchy_name` names the inserted hierarchy level
|
|
293
|
+
when the underlying ds-crawler writer has a hierarchy separator; the directory
|
|
294
|
+
name is the source file id in either case. For tighter control, use explicit
|
|
295
|
+
variants:
|
|
296
|
+
|
|
297
|
+
```json
|
|
298
|
+
"augmentations": {
|
|
299
|
+
"variants": [
|
|
300
|
+
{
|
|
301
|
+
"id": "mor_010m_sky",
|
|
302
|
+
"model": "uniform",
|
|
303
|
+
"visibility_m": 10,
|
|
304
|
+
"airlight_method": "from_sky"
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
"id": "beta_0.15_white",
|
|
308
|
+
"model": "heterogeneous_k",
|
|
309
|
+
"scattering_coefficient": 0.15,
|
|
310
|
+
"atmospheric_light": [1.0, 1.0, 1.0],
|
|
311
|
+
"k_hetero": {
|
|
312
|
+
"scales": "smooth_auto",
|
|
313
|
+
"correlation_length_fraction": 0.25,
|
|
314
|
+
"octaves": 3,
|
|
315
|
+
"min_factor": 0.65,
|
|
316
|
+
"max_factor": 1.45,
|
|
317
|
+
"contrast": 0.65,
|
|
318
|
+
"normalize_to_mean": true
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
]
|
|
322
|
+
}
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
Each output entry receives per-file ds-crawler attributes under
|
|
326
|
+
`fog_augmentation`, including the augmentation id, source id, source full id,
|
|
327
|
+
model, actual scattering coefficient, actual atmospheric light, and configured
|
|
328
|
+
MOR/beta descriptors when available. euler-loading exposes these as
|
|
329
|
+
`sample["attributes"]["rgb"]["fog_augmentation"]`.
|
|
330
|
+
|
|
259
331
|
### Heterogeneous Noise Fields
|
|
260
332
|
|
|
261
|
-
Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian
|
|
333
|
+
Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian
|
|
334
|
+
motion) to generate spatially-varying factor fields. For realistic fog,
|
|
335
|
+
prefer the smooth mode: it keeps Perlin wavelengths tied to the image size,
|
|
336
|
+
then optionally reduces noise contrast and applies a final blur before mapping
|
|
337
|
+
the noise to physical factors.
|
|
262
338
|
|
|
263
339
|
```json
|
|
264
340
|
"k_hetero": {
|
|
265
|
-
"scales": "
|
|
266
|
-
"
|
|
341
|
+
"scales": "smooth_auto",
|
|
342
|
+
"correlation_length_fraction": 0.25,
|
|
343
|
+
"octaves": 3,
|
|
267
344
|
"max_scale": null,
|
|
268
|
-
"min_factor": 0.
|
|
269
|
-
"max_factor": 1.
|
|
345
|
+
"min_factor": 0.65,
|
|
346
|
+
"max_factor": 1.45,
|
|
347
|
+
"contrast": 0.65,
|
|
348
|
+
"smooth_sigma_fraction": 0.0,
|
|
270
349
|
"normalize_to_mean": true
|
|
271
350
|
}
|
|
272
351
|
```
|
|
273
352
|
|
|
274
|
-
The noise field (values in [0, 1]) is mapped to a factor field:
|
|
353
|
+
The noise field (values in [0, 1]) is mapped to a factor field:
|
|
354
|
+
`factor(x) = min_factor + (max_factor - min_factor) * noise(x)`.
|
|
355
|
+
`contrast < 1` compresses the noise around 0.5 before this mapping, avoiding
|
|
356
|
+
extreme local fog density. When `normalize_to_mean` is `true`, the factor field
|
|
357
|
+
is rescaled so its spatial mean equals 1.0, preserving the overall fog density
|
|
358
|
+
while introducing spatial variation.
|
|
275
359
|
|
|
276
360
|
| Parameter | Effect |
|
|
277
361
|
|---|---|
|
|
278
362
|
| `min_factor` / `max_factor` | Range of the multiplicative factor. |
|
|
279
363
|
| `normalize_to_mean` | Rescale factors so the image-wide mean equals the base value. Recommended for `k_hetero`. |
|
|
280
|
-
| `scales`
|
|
364
|
+
| `scales: "smooth_auto"` | Build low-frequency Perlin scales from the image size. |
|
|
365
|
+
| `correlation_length_fraction` | Approximate smallest fog feature size as a fraction of the shorter image side. Larger values create smoother gradients. |
|
|
366
|
+
| `octaves` / `lacunarity` / `max_scale` | Control how many increasingly broad Perlin components are mixed. |
|
|
367
|
+
| `contrast` | Compress or expand the Perlin range before mapping to factors. Values below 1 are recommended. |
|
|
368
|
+
| `smooth_sigma` / `smooth_sigma_fraction` | Optional final Gaussian blur in pixels or as a fraction of the shorter image side. |
|
|
281
369
|
|
|
282
370
|
### Fog Output
|
|
283
371
|
|
|
@@ -297,6 +385,22 @@ When a pipeline target is present, `pipeline.output_targets[].path` replaces
|
|
|
297
385
|
`output_path` entirely. Standalone/direct `FogTransform(...)` usage without the
|
|
298
386
|
CLI still uses the legacy per-model layout with `config.json` sidecars.
|
|
299
387
|
|
|
388
|
+
With `augmentations` enabled, source-backed outputs are written one level below
|
|
389
|
+
the source file id instead:
|
|
390
|
+
|
|
391
|
+
```
|
|
392
|
+
<output_path>/
|
|
393
|
+
.ds_crawler/output.json
|
|
394
|
+
Scene01/
|
|
395
|
+
Camera_0/
|
|
396
|
+
00000/
|
|
397
|
+
mor_10m_airlight_from_sky.png
|
|
398
|
+
mor_20m_airlight_from_sky.png
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
Auxiliary `scattering_coefficient` and `atmospheric_light` pipeline targets use
|
|
402
|
+
the same file-id hierarchy and write matching `.npy` augmentation files.
|
|
403
|
+
|
|
300
404
|
---
|
|
301
405
|
|
|
302
406
|
## Sky-Depth Transform
|
|
@@ -35,6 +35,7 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
|
|
|
35
35
|
"transform_config_path": "configs/run1.json",
|
|
36
36
|
"output_path": "/path/to/output",
|
|
37
37
|
"output_slot": "rgb",
|
|
38
|
+
"sample": 42,
|
|
38
39
|
"modalities": {
|
|
39
40
|
"rgb": {"path": "/path/to/rgb", "split": "train"},
|
|
40
41
|
"depth": "/path/to/depth",
|
|
@@ -64,6 +65,8 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
|
|
|
64
65
|
| `transform_config_path` | Path to the transform-specific config (see below). `fog_config_path` is also accepted for backward compatibility. |
|
|
65
66
|
| `output_path` | Output root used when no pipeline target overrides it. Optional if `pipeline.output_root` or `pipeline.output_targets[].path` supplies the destination. |
|
|
66
67
|
| `output_slot` | Optional slot selector when `pipeline.output_targets` contains multiple entries. Defaults to `rgb` for `fog`, `depth` for `sky-depth`, and `depth` for `radial`. |
|
|
68
|
+
| `sample` | Optional 0-based euler-loading dataset index. When set, only `dataset[sample]` is transformed, which is useful for small augmented benchmark slices from large datasets. |
|
|
69
|
+
| `samples` | Optional multi-sample selector. Use a list of 0-based indices (`[0, 10, 20]`) or a slice object such as `{"start": 0, "stop": 1000, "step": 2, "count": 100}`. `stop` is exclusive; `count` caps the selected indices after slicing. Do not set both `sample` and `samples`. |
|
|
67
70
|
| `modalities` | Regular modalities that participate in sample-ID intersection. Each value is either a plain path string or an object with a `path` key and an optional `split` key (see below). Which modalities are required depends on the transform (see table below). |
|
|
68
71
|
| `hierarchical_modalities` | Per-scene data (e.g. intrinsics). Same format as `modalities`. Loaded once per scene and cached. |
|
|
69
72
|
| `pipeline` | Optional runtime routing block compatible with `euler-inference` (`output_root`, `outputs_manifest_path`, `output_targets`). |
|
|
@@ -72,6 +75,11 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
|
|
|
72
75
|
|
|
73
76
|
When a modality directory contains [ds-crawler](https://github.com/d-rothen/ds-crawler) split files (`.ds_crawler/split_<name>.json`), you can select a subset of the data by setting the `split` key on that modality. Sample IDs are matched by intersection across all modalities, so specifying a split on a single modality is sufficient to restrict the entire dataset.
|
|
74
77
|
|
|
78
|
+
For quick slices after euler-loading has matched modalities, set `samples`.
|
|
79
|
+
For example, `{"samples": {"step": 2}}` processes every second matched sample,
|
|
80
|
+
and `{"samples": {"start": 10, "step": 5, "count": 20}}` processes 20 samples
|
|
81
|
+
starting at index 10 with stride 5.
|
|
82
|
+
|
|
75
83
|
**Required modalities per transform:**
|
|
76
84
|
|
|
77
85
|
| Transform | `modalities` | `hierarchical_modalities` |
|
|
@@ -128,6 +136,7 @@ Controls the fog simulation.
|
|
|
128
136
|
"contrast_threshold": 0.05,
|
|
129
137
|
"device": "cpu",
|
|
130
138
|
"gpu_batch_size": 4,
|
|
139
|
+
"augmentations": { ... },
|
|
131
140
|
"selection": { ... },
|
|
132
141
|
"models": { ... }
|
|
133
142
|
}
|
|
@@ -142,6 +151,7 @@ Controls the fog simulation.
|
|
|
142
151
|
| `contrast_threshold` | Threshold *C_t* used in the visibility-to-attenuation conversion (default `0.05`). |
|
|
143
152
|
| `device` | `"cpu"`, `"cuda"`, `"mps"`, or `"gpu"` (alias for cuda). |
|
|
144
153
|
| `gpu_batch_size` | Batch size when running on GPU. Uniform-model samples are batched; heterogeneous samples are processed individually. |
|
|
154
|
+
| `augmentations` | Optional stepped augmentation set. When present, every input sample produces every configured augmentation and uses the file-id hierarchy output layout described below. |
|
|
145
155
|
|
|
146
156
|
### Fog Model
|
|
147
157
|
|
|
@@ -208,10 +218,10 @@ Each image is assigned a fog model via the `selection` block:
|
|
|
208
218
|
"selection": {
|
|
209
219
|
"mode": "weighted",
|
|
210
220
|
"weights": {
|
|
211
|
-
"uniform":
|
|
212
|
-
"heterogeneous_k": 0.
|
|
213
|
-
"heterogeneous_ls": 0.
|
|
214
|
-
"heterogeneous_k_ls": 0.
|
|
221
|
+
"uniform": 0.25,
|
|
222
|
+
"heterogeneous_k": 0.35,
|
|
223
|
+
"heterogeneous_ls": 0.25,
|
|
224
|
+
"heterogeneous_k_ls": 0.15
|
|
215
225
|
}
|
|
216
226
|
}
|
|
217
227
|
```
|
|
@@ -242,28 +252,106 @@ Each model specifies a `visibility_m` distribution from which a visibility dista
|
|
|
242
252
|
|
|
243
253
|
The sampled visibility *V* is converted to the attenuation coefficient: **k = -ln(C_t) / V**.
|
|
244
254
|
|
|
255
|
+
### Stepped Augmentations
|
|
256
|
+
|
|
257
|
+
For benchmark generation, set `augmentations` in the fog config. This switches
|
|
258
|
+
the fog transform from one sampled output per input to one output per configured
|
|
259
|
+
variant:
|
|
260
|
+
|
|
261
|
+
```json
|
|
262
|
+
{
|
|
263
|
+
"airlight": "from_sky",
|
|
264
|
+
"seed": 1337,
|
|
265
|
+
"contrast_threshold": 0.05,
|
|
266
|
+
"augmentations": {
|
|
267
|
+
"file_id_hierarchy_name": "file_id",
|
|
268
|
+
"attribute_key": "fog_augmentation",
|
|
269
|
+
"models": ["uniform"],
|
|
270
|
+
"visibility_m": [10, 20, 40, 70, 100],
|
|
271
|
+
"airlight_methods": ["from_sky"]
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
The matrix form above expands as the Cartesian product of `models`,
|
|
277
|
+
`visibility_m` (MOR in metres), optional `scattering_coefficients` / `beta`, and
|
|
278
|
+
airlight choices. `file_id_hierarchy_name` names the inserted hierarchy level
|
|
279
|
+
when the underlying ds-crawler writer has a hierarchy separator; the directory
|
|
280
|
+
name is the source file id in either case. For tighter control, use explicit
|
|
281
|
+
variants:
|
|
282
|
+
|
|
283
|
+
```json
|
|
284
|
+
"augmentations": {
|
|
285
|
+
"variants": [
|
|
286
|
+
{
|
|
287
|
+
"id": "mor_010m_sky",
|
|
288
|
+
"model": "uniform",
|
|
289
|
+
"visibility_m": 10,
|
|
290
|
+
"airlight_method": "from_sky"
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
"id": "beta_0.15_white",
|
|
294
|
+
"model": "heterogeneous_k",
|
|
295
|
+
"scattering_coefficient": 0.15,
|
|
296
|
+
"atmospheric_light": [1.0, 1.0, 1.0],
|
|
297
|
+
"k_hetero": {
|
|
298
|
+
"scales": "smooth_auto",
|
|
299
|
+
"correlation_length_fraction": 0.25,
|
|
300
|
+
"octaves": 3,
|
|
301
|
+
"min_factor": 0.65,
|
|
302
|
+
"max_factor": 1.45,
|
|
303
|
+
"contrast": 0.65,
|
|
304
|
+
"normalize_to_mean": true
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
]
|
|
308
|
+
}
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
Each output entry receives per-file ds-crawler attributes under
|
|
312
|
+
`fog_augmentation`, including the augmentation id, source id, source full id,
|
|
313
|
+
model, actual scattering coefficient, actual atmospheric light, and configured
|
|
314
|
+
MOR/beta descriptors when available. euler-loading exposes these as
|
|
315
|
+
`sample["attributes"]["rgb"]["fog_augmentation"]`.
|
|
316
|
+
|
|
245
317
|
### Heterogeneous Noise Fields
|
|
246
318
|
|
|
247
|
-
Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian
|
|
319
|
+
Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian
|
|
320
|
+
motion) to generate spatially-varying factor fields. For realistic fog,
|
|
321
|
+
prefer the smooth mode: it keeps Perlin wavelengths tied to the image size,
|
|
322
|
+
then optionally reduces noise contrast and applies a final blur before mapping
|
|
323
|
+
the noise to physical factors.
|
|
248
324
|
|
|
249
325
|
```json
|
|
250
326
|
"k_hetero": {
|
|
251
|
-
"scales": "
|
|
252
|
-
"
|
|
327
|
+
"scales": "smooth_auto",
|
|
328
|
+
"correlation_length_fraction": 0.25,
|
|
329
|
+
"octaves": 3,
|
|
253
330
|
"max_scale": null,
|
|
254
|
-
"min_factor": 0.
|
|
255
|
-
"max_factor": 1.
|
|
331
|
+
"min_factor": 0.65,
|
|
332
|
+
"max_factor": 1.45,
|
|
333
|
+
"contrast": 0.65,
|
|
334
|
+
"smooth_sigma_fraction": 0.0,
|
|
256
335
|
"normalize_to_mean": true
|
|
257
336
|
}
|
|
258
337
|
```
|
|
259
338
|
|
|
260
|
-
The noise field (values in [0, 1]) is mapped to a factor field:
|
|
339
|
+
The noise field (values in [0, 1]) is mapped to a factor field:
|
|
340
|
+
`factor(x) = min_factor + (max_factor - min_factor) * noise(x)`.
|
|
341
|
+
`contrast < 1` compresses the noise around 0.5 before this mapping, avoiding
|
|
342
|
+
extreme local fog density. When `normalize_to_mean` is `true`, the factor field
|
|
343
|
+
is rescaled so its spatial mean equals 1.0, preserving the overall fog density
|
|
344
|
+
while introducing spatial variation.
|
|
261
345
|
|
|
262
346
|
| Parameter | Effect |
|
|
263
347
|
|---|---|
|
|
264
348
|
| `min_factor` / `max_factor` | Range of the multiplicative factor. |
|
|
265
349
|
| `normalize_to_mean` | Rescale factors so the image-wide mean equals the base value. Recommended for `k_hetero`. |
|
|
266
|
-
| `scales`
|
|
350
|
+
| `scales: "smooth_auto"` | Build low-frequency Perlin scales from the image size. |
|
|
351
|
+
| `correlation_length_fraction` | Approximate smallest fog feature size as a fraction of the shorter image side. Larger values create smoother gradients. |
|
|
352
|
+
| `octaves` / `lacunarity` / `max_scale` | Control how many increasingly broad Perlin components are mixed. |
|
|
353
|
+
| `contrast` | Compress or expand the Perlin range before mapping to factors. Values below 1 are recommended. |
|
|
354
|
+
| `smooth_sigma` / `smooth_sigma_fraction` | Optional final Gaussian blur in pixels or as a fraction of the shorter image side. |
|
|
267
355
|
|
|
268
356
|
### Fog Output
|
|
269
357
|
|
|
@@ -283,6 +371,22 @@ When a pipeline target is present, `pipeline.output_targets[].path` replaces
|
|
|
283
371
|
`output_path` entirely. Standalone/direct `FogTransform(...)` usage without the
|
|
284
372
|
CLI still uses the legacy per-model layout with `config.json` sidecars.
|
|
285
373
|
|
|
374
|
+
With `augmentations` enabled, source-backed outputs are written one level below
|
|
375
|
+
the source file id instead:
|
|
376
|
+
|
|
377
|
+
```
|
|
378
|
+
<output_path>/
|
|
379
|
+
.ds_crawler/output.json
|
|
380
|
+
Scene01/
|
|
381
|
+
Camera_0/
|
|
382
|
+
00000/
|
|
383
|
+
mor_10m_airlight_from_sky.png
|
|
384
|
+
mor_20m_airlight_from_sky.png
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
Auxiliary `scattering_coefficient` and `atmospheric_light` pipeline targets use
|
|
388
|
+
the same file-id hierarchy and write matching `.npy` augmentation files.
|
|
389
|
+
|
|
286
390
|
---
|
|
287
391
|
|
|
288
392
|
## Sky-Depth Transform
|
|
@@ -8,7 +8,9 @@ from __future__ import annotations
|
|
|
8
8
|
import argparse
|
|
9
9
|
import inspect
|
|
10
10
|
import json
|
|
11
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
11
12
|
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
12
14
|
|
|
13
15
|
from euler_preprocess.common.dataset import build_dataset
|
|
14
16
|
from euler_preprocess.common.logging import get_logger, log_dataset_info
|
|
@@ -27,6 +29,132 @@ def _resolve(path_str: str, config_dir: Path) -> Path:
|
|
|
27
29
|
return (config_dir / p).resolve()
|
|
28
30
|
|
|
29
31
|
|
|
32
|
+
class _SelectedSamples(Sequence):
|
|
33
|
+
"""Lazy view over selected euler-loading dataset entries."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, dataset, indices: Iterable[int]) -> None:
|
|
36
|
+
self.dataset = dataset
|
|
37
|
+
self.indices = tuple(indices)
|
|
38
|
+
|
|
39
|
+
def __len__(self) -> int:
|
|
40
|
+
return len(self.indices)
|
|
41
|
+
|
|
42
|
+
def __iter__(self) -> Iterator[dict]:
|
|
43
|
+
for index in self.indices:
|
|
44
|
+
yield self.dataset[index]
|
|
45
|
+
|
|
46
|
+
def __getitem__(self, index: int | slice):
|
|
47
|
+
if isinstance(index, slice):
|
|
48
|
+
return [self.dataset[i] for i in self.indices[index]]
|
|
49
|
+
return self.dataset[self.indices[index]]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _validate_sample_index(value: Any, *, key: str, dataset_size: int) -> int:
|
|
53
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
54
|
+
raise ValueError(f"{key} must be a non-negative integer index")
|
|
55
|
+
if value < 0:
|
|
56
|
+
raise ValueError(f"{key} must be a non-negative integer index")
|
|
57
|
+
if value >= dataset_size:
|
|
58
|
+
raise IndexError(
|
|
59
|
+
f"{key} {value} out of range for dataset of length {dataset_size}"
|
|
60
|
+
)
|
|
61
|
+
return value
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _positive_int(value: Any, *, key: str) -> int:
|
|
65
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
66
|
+
raise ValueError(f"{key} must be a positive integer")
|
|
67
|
+
if value <= 0:
|
|
68
|
+
raise ValueError(f"{key} must be a positive integer")
|
|
69
|
+
return value
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _non_negative_int(value: Any, *, key: str) -> int:
|
|
73
|
+
if isinstance(value, bool) or not isinstance(value, int):
|
|
74
|
+
raise ValueError(f"{key} must be a non-negative integer")
|
|
75
|
+
if value < 0:
|
|
76
|
+
raise ValueError(f"{key} must be a non-negative integer")
|
|
77
|
+
return value
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _resolve_sample_indices(selection: Any, *, dataset_size: int) -> tuple[int, ...]:
|
|
81
|
+
if isinstance(selection, list):
|
|
82
|
+
indices = tuple(
|
|
83
|
+
_validate_sample_index(value, key="samples[]", dataset_size=dataset_size)
|
|
84
|
+
for value in selection
|
|
85
|
+
)
|
|
86
|
+
if not indices:
|
|
87
|
+
raise ValueError("samples must select at least one dataset entry")
|
|
88
|
+
return indices
|
|
89
|
+
|
|
90
|
+
if not isinstance(selection, dict):
|
|
91
|
+
raise ValueError("samples must be an object or a list of integer indices")
|
|
92
|
+
|
|
93
|
+
allowed = {"start", "stop", "step", "count"}
|
|
94
|
+
unknown = sorted(set(selection) - allowed)
|
|
95
|
+
if unknown:
|
|
96
|
+
raise ValueError(f"samples contains unknown keys: {', '.join(unknown)}")
|
|
97
|
+
|
|
98
|
+
start = _non_negative_int(selection.get("start", 0), key="samples.start")
|
|
99
|
+
stop_value = selection.get("stop")
|
|
100
|
+
if stop_value is None:
|
|
101
|
+
stop = dataset_size
|
|
102
|
+
else:
|
|
103
|
+
stop = _non_negative_int(stop_value, key="samples.stop")
|
|
104
|
+
step = _positive_int(selection.get("step", 1), key="samples.step")
|
|
105
|
+
|
|
106
|
+
if start >= dataset_size:
|
|
107
|
+
raise IndexError(
|
|
108
|
+
f"samples.start {start} out of range for dataset of length {dataset_size}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
indices = tuple(range(start, min(stop, dataset_size), step))
|
|
112
|
+
if "count" in selection:
|
|
113
|
+
count = _positive_int(selection["count"], key="samples.count")
|
|
114
|
+
indices = indices[:count]
|
|
115
|
+
|
|
116
|
+
if not indices:
|
|
117
|
+
raise ValueError("samples must select at least one dataset entry")
|
|
118
|
+
return indices
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _select_configured_samples(config: dict, dataset, logger):
|
|
122
|
+
"""Apply optional top-level sample selection from the dataset config."""
|
|
123
|
+
has_sample = "sample" in config
|
|
124
|
+
has_samples = "samples" in config
|
|
125
|
+
if has_sample and has_samples:
|
|
126
|
+
raise ValueError("Use either sample or samples, not both")
|
|
127
|
+
if not has_sample and not has_samples:
|
|
128
|
+
return dataset
|
|
129
|
+
|
|
130
|
+
dataset_size = len(dataset)
|
|
131
|
+
if has_sample:
|
|
132
|
+
sample_index = _validate_sample_index(
|
|
133
|
+
config["sample"],
|
|
134
|
+
key="sample",
|
|
135
|
+
dataset_size=dataset_size,
|
|
136
|
+
)
|
|
137
|
+
sample = dataset[sample_index]
|
|
138
|
+
logger.info(
|
|
139
|
+
"Sample selection: using sample=%d of %d (id=%s, full_id=%s)",
|
|
140
|
+
sample_index,
|
|
141
|
+
dataset_size,
|
|
142
|
+
sample.get("id"),
|
|
143
|
+
sample.get("full_id"),
|
|
144
|
+
)
|
|
145
|
+
return [sample]
|
|
146
|
+
|
|
147
|
+
indices = _resolve_sample_indices(config["samples"], dataset_size=dataset_size)
|
|
148
|
+
logger.info(
|
|
149
|
+
"Sample selection: using %d/%d samples (first_index=%d, last_index=%d)",
|
|
150
|
+
len(indices),
|
|
151
|
+
dataset_size,
|
|
152
|
+
indices[0],
|
|
153
|
+
indices[-1],
|
|
154
|
+
)
|
|
155
|
+
return _SelectedSamples(dataset, indices)
|
|
156
|
+
|
|
157
|
+
|
|
30
158
|
def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
|
|
31
159
|
"""Shared logic for all subcommands."""
|
|
32
160
|
logger = get_logger()
|
|
@@ -57,6 +185,7 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
|
|
|
57
185
|
dataset = build_dataset(config, required_modalities, required_hierarchical)
|
|
58
186
|
output_backends = prepare_output_backends(config, dataset, transform_class)
|
|
59
187
|
primary_backend = next(iter(output_backends.values()))
|
|
188
|
+
samples = _select_configured_samples(config, dataset, logger)
|
|
60
189
|
dataset_name = config.get("dataset", "dataset")
|
|
61
190
|
|
|
62
191
|
raw_modalities = {
|
|
@@ -69,7 +198,7 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
|
|
|
69
198
|
modality_info[name] = {"path": entry}
|
|
70
199
|
else:
|
|
71
200
|
modality_info[name] = entry
|
|
72
|
-
log_dataset_info(logger, dataset_name, len(
|
|
201
|
+
log_dataset_info(logger, dataset_name, len(samples), modality_info, use_gpu)
|
|
73
202
|
for slot, backend in output_backends.items():
|
|
74
203
|
logger.info("Output path [%s]: %s", slot, backend.root)
|
|
75
204
|
|
|
@@ -98,7 +227,7 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
|
|
|
98
227
|
)
|
|
99
228
|
transform = transform_class(**transform_kwargs)
|
|
100
229
|
|
|
101
|
-
saved_paths = transform.run(
|
|
230
|
+
saved_paths = transform.run(samples)
|
|
102
231
|
|
|
103
232
|
logger.info("Transform complete. Generated %d outputs.", len(saved_paths))
|
|
104
233
|
return 0
|