euler-preprocess 2.1.0__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/PKG-INFO +116 -12
  2. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/README.md +115 -11
  3. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/cli.py +131 -2
  4. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/output.py +186 -8
  5. euler_preprocess-2.3.0/euler_preprocess/fog/augmentations.py +318 -0
  6. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/models.py +312 -22
  7. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/transform.py +546 -65
  8. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/PKG-INFO +116 -12
  9. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/SOURCES.txt +2 -0
  10. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/pyproject.toml +1 -1
  11. euler_preprocess-2.3.0/tests/test_cli_sample_selection.py +127 -0
  12. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_fog_aux_outputs.py +178 -0
  13. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/__init__.py +0 -0
  14. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/__init__.py +0 -0
  15. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/dataset.py +0 -0
  16. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/device.py +0 -0
  17. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/intrinsics.py +0 -0
  18. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/io.py +0 -0
  19. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/logging.py +0 -0
  20. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/noise.py +0 -0
  21. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/normalize.py +0 -0
  22. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/sampling.py +0 -0
  23. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/common/transform.py +0 -0
  24. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/__init__.py +0 -0
  25. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/airlight_from_sky.py +0 -0
  26. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_airlight.py +0 -0
  27. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_airlight_torch.py +0 -0
  28. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_heuristic_airlight.py +0 -0
  29. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/dcp_heuristic_airlight_torch.py +0 -0
  30. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/foggify.py +0 -0
  31. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/foggify_logging.py +0 -0
  32. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/fog/logging.py +0 -0
  33. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/radial/__init__.py +0 -0
  34. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/radial/transform.py +0 -0
  35. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/sky_depth/__init__.py +0 -0
  36. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess/sky_depth/transform.py +0 -0
  37. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/dependency_links.txt +0 -0
  38. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/entry_points.txt +0 -0
  39. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/requires.txt +0 -0
  40. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/euler_preprocess.egg-info/top_level.txt +0 -0
  41. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/setup.cfg +0 -0
  42. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_airlight_fallback.py +0 -0
  43. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_dcp_heuristic_airlight.py +0 -0
  44. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_foggify_integration.py +0 -0
  45. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_radial.py +0 -0
  46. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_sky_depth.py +0 -0
  47. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_source_backed_output.py +0 -0
  48. {euler_preprocess-2.1.0 → euler_preprocess-2.3.0}/tests/test_zip_output.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: euler-preprocess
3
- Version: 2.1.0
3
+ Version: 2.3.0
4
4
  Summary: Physics-based preprocessing (fog, etc.) for RGB+depth datasets
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -49,6 +49,7 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
49
49
  "transform_config_path": "configs/run1.json",
50
50
  "output_path": "/path/to/output",
51
51
  "output_slot": "rgb",
52
+ "sample": 42,
52
53
  "modalities": {
53
54
  "rgb": {"path": "/path/to/rgb", "split": "train"},
54
55
  "depth": "/path/to/depth",
@@ -78,6 +79,8 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
78
79
  | `transform_config_path` | Path to the transform-specific config (see below). `fog_config_path` is also accepted for backward compatibility. |
79
80
  | `output_path` | Output root used when no pipeline target overrides it. Optional if `pipeline.output_root` or `pipeline.output_targets[].path` supplies the destination. |
80
81
  | `output_slot` | Optional slot selector when `pipeline.output_targets` contains multiple entries. Defaults to `rgb` for `fog`, `depth` for `sky-depth`, and `depth` for `radial`. |
82
+ | `sample` | Optional 0-based euler-loading dataset index. When set, only `dataset[sample]` is transformed, which is useful for small augmented benchmark slices from large datasets. |
83
+ | `samples` | Optional multi-sample selector. Use a list of 0-based indices (`[0, 10, 20]`) or a slice object such as `{"start": 0, "stop": 1000, "step": 2, "count": 100}`. `stop` is exclusive; `count` caps the selected indices after slicing. Do not set both `sample` and `samples`. |
81
84
  | `modalities` | Regular modalities that participate in sample-ID intersection. Each value is either a plain path string or an object with a `path` key and an optional `split` key (see below). Which modalities are required depends on the transform (see table below). |
82
85
  | `hierarchical_modalities` | Per-scene data (e.g. intrinsics). Same format as `modalities`. Loaded once per scene and cached. |
83
86
  | `pipeline` | Optional runtime routing block compatible with `euler-inference` (`output_root`, `outputs_manifest_path`, `output_targets`). |
@@ -86,6 +89,11 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
86
89
 
87
90
  When a modality directory contains [ds-crawler](https://github.com/d-rothen/ds-crawler) split files (`.ds_crawler/split_<name>.json`), you can select a subset of the data by setting the `split` key on that modality. Sample IDs are matched by intersection across all modalities, so specifying a split on a single modality is sufficient to restrict the entire dataset.
88
91
 
92
+ For quick slices after euler-loading has matched modalities, set `samples`.
93
+ For example, `{"samples": {"step": 2}}` processes every second matched sample,
94
+ and `{"samples": {"start": 10, "step": 5, "count": 20}}` processes 20 samples
95
+ starting at index 10 with stride 5.
96
+
89
97
  **Required modalities per transform:**
90
98
 
91
99
  | Transform | `modalities` | `hierarchical_modalities` |
@@ -142,6 +150,7 @@ Controls the fog simulation.
142
150
  "contrast_threshold": 0.05,
143
151
  "device": "cpu",
144
152
  "gpu_batch_size": 4,
153
+ "augmentations": { ... },
145
154
  "selection": { ... },
146
155
  "models": { ... }
147
156
  }
@@ -156,6 +165,7 @@ Controls the fog simulation.
156
165
  | `contrast_threshold` | Threshold *C_t* used in the visibility-to-attenuation conversion (default `0.05`). |
157
166
  | `device` | `"cpu"`, `"cuda"`, `"mps"`, or `"gpu"` (alias for cuda). |
158
167
  | `gpu_batch_size` | Batch size when running on GPU. Uniform-model samples are batched; heterogeneous samples are processed individually. |
168
+ | `augmentations` | Optional stepped augmentation set. When present, every input sample produces every configured augmentation and uses the file-id hierarchy output layout described below. |
159
169
 
160
170
  ### Fog Model
161
171
 
@@ -222,10 +232,10 @@ Each image is assigned a fog model via the `selection` block:
222
232
  "selection": {
223
233
  "mode": "weighted",
224
234
  "weights": {
225
- "uniform": 1.0,
226
- "heterogeneous_k": 0.0,
227
- "heterogeneous_ls": 0.0,
228
- "heterogeneous_k_ls": 0.0
235
+ "uniform": 0.25,
236
+ "heterogeneous_k": 0.35,
237
+ "heterogeneous_ls": 0.25,
238
+ "heterogeneous_k_ls": 0.15
229
239
  }
230
240
  }
231
241
  ```
@@ -256,28 +266,106 @@ Each model specifies a `visibility_m` distribution from which a visibility dista
256
266
 
257
267
  The sampled visibility *V* is converted to the attenuation coefficient: **k = -ln(C_t) / V**.
258
268
 
269
+ ### Stepped Augmentations
270
+
271
+ For benchmark generation, set `augmentations` in the fog config. This switches
272
+ the fog transform from one sampled output per input to one output per configured
273
+ variant:
274
+
275
+ ```json
276
+ {
277
+ "airlight": "from_sky",
278
+ "seed": 1337,
279
+ "contrast_threshold": 0.05,
280
+ "augmentations": {
281
+ "file_id_hierarchy_name": "file_id",
282
+ "attribute_key": "fog_augmentation",
283
+ "models": ["uniform"],
284
+ "visibility_m": [10, 20, 40, 70, 100],
285
+ "airlight_methods": ["from_sky"]
286
+ }
287
+ }
288
+ ```
289
+
290
+ The matrix form above expands as the Cartesian product of `models`,
291
+ `visibility_m` (MOR in metres), optional `scattering_coefficients` / `beta`, and
292
+ airlight choices. `file_id_hierarchy_name` names the inserted hierarchy level
293
+ when the underlying ds-crawler writer has a hierarchy separator; the directory
294
+ name is the source file id in either case. For tighter control, use explicit
295
+ variants:
296
+
297
+ ```json
298
+ "augmentations": {
299
+ "variants": [
300
+ {
301
+ "id": "mor_010m_sky",
302
+ "model": "uniform",
303
+ "visibility_m": 10,
304
+ "airlight_method": "from_sky"
305
+ },
306
+ {
307
+ "id": "beta_0.15_white",
308
+ "model": "heterogeneous_k",
309
+ "scattering_coefficient": 0.15,
310
+ "atmospheric_light": [1.0, 1.0, 1.0],
311
+ "k_hetero": {
312
+ "scales": "smooth_auto",
313
+ "correlation_length_fraction": 0.25,
314
+ "octaves": 3,
315
+ "min_factor": 0.65,
316
+ "max_factor": 1.45,
317
+ "contrast": 0.65,
318
+ "normalize_to_mean": true
319
+ }
320
+ }
321
+ ]
322
+ }
323
+ ```
324
+
325
+ Each output entry receives per-file ds-crawler attributes under
326
+ `fog_augmentation`, including the augmentation id, source id, source full id,
327
+ model, actual scattering coefficient, actual atmospheric light, and configured
328
+ MOR/beta descriptors when available. euler-loading exposes these as
329
+ `sample["attributes"]["rgb"]["fog_augmentation"]`.
330
+
259
331
  ### Heterogeneous Noise Fields
260
332
 
261
- Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian motion) to generate spatially-varying factor fields:
333
+ Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian
334
+ motion) to generate spatially-varying factor fields. For realistic fog,
335
+ prefer the smooth mode: it keeps Perlin wavelengths tied to the image size,
336
+ then optionally reduces noise contrast and applies a final blur before mapping
337
+ the noise to physical factors.
262
338
 
263
339
  ```json
264
340
  "k_hetero": {
265
- "scales": "auto",
266
- "min_scale": 2,
341
+ "scales": "smooth_auto",
342
+ "correlation_length_fraction": 0.25,
343
+ "octaves": 3,
267
344
  "max_scale": null,
268
- "min_factor": 0.0,
269
- "max_factor": 1.0,
345
+ "min_factor": 0.65,
346
+ "max_factor": 1.45,
347
+ "contrast": 0.65,
348
+ "smooth_sigma_fraction": 0.0,
270
349
  "normalize_to_mean": true
271
350
  }
272
351
  ```
273
352
 
274
- The noise field (values in [0, 1]) is mapped to a factor field: `factor(x) = min_factor + (max_factor - min_factor) * noise(x)`. When `normalize_to_mean` is `true`, the factor field is rescaled so its spatial mean equals 1.0, preserving the overall fog density while introducing spatial variation.
353
+ The noise field (values in [0, 1]) is mapped to a factor field:
354
+ `factor(x) = min_factor + (max_factor - min_factor) * noise(x)`.
355
+ `contrast < 1` compresses the noise around 0.5 before this mapping, avoiding
356
+ extreme local fog density. When `normalize_to_mean` is `true`, the factor field
357
+ is rescaled so its spatial mean equals 1.0, preserving the overall fog density
358
+ while introducing spatial variation.
275
359
 
276
360
  | Parameter | Effect |
277
361
  |---|---|
278
362
  | `min_factor` / `max_factor` | Range of the multiplicative factor. |
279
363
  | `normalize_to_mean` | Rescale factors so the image-wide mean equals the base value. Recommended for `k_hetero`. |
280
- | `scales` / `min_scale` / `max_scale` | Control spatial frequency content. |
364
+ | `scales: "smooth_auto"` | Build low-frequency Perlin scales from the image size. |
365
+ | `correlation_length_fraction` | Approximate smallest fog feature size as a fraction of the shorter image side. Larger values create smoother gradients. |
366
+ | `octaves` / `lacunarity` / `max_scale` | Control how many increasingly broad Perlin components are mixed. |
367
+ | `contrast` | Compress or expand the Perlin range before mapping to factors. Values below 1 are recommended. |
368
+ | `smooth_sigma` / `smooth_sigma_fraction` | Optional final Gaussian blur in pixels or as a fraction of the shorter image side. |
281
369
 
282
370
  ### Fog Output
283
371
 
@@ -297,6 +385,22 @@ When a pipeline target is present, `pipeline.output_targets[].path` replaces
297
385
  `output_path` entirely. Standalone/direct `FogTransform(...)` usage without the
298
386
  CLI still uses the legacy per-model layout with `config.json` sidecars.
299
387
 
388
+ With `augmentations` enabled, source-backed outputs are written one level below
389
+ the source file id instead:
390
+
391
+ ```
392
+ <output_path>/
393
+ .ds_crawler/output.json
394
+ Scene01/
395
+ Camera_0/
396
+ 00000/
397
+ mor_10m_airlight_from_sky.png
398
+ mor_20m_airlight_from_sky.png
399
+ ```
400
+
401
+ Auxiliary `scattering_coefficient` and `atmospheric_light` pipeline targets use
402
+ the same file-id hierarchy and write matching `.npy` augmentation files.
403
+
300
404
  ---
301
405
 
302
406
  ## Sky-Depth Transform
@@ -35,6 +35,7 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
35
35
  "transform_config_path": "configs/run1.json",
36
36
  "output_path": "/path/to/output",
37
37
  "output_slot": "rgb",
38
+ "sample": 42,
38
39
  "modalities": {
39
40
  "rgb": {"path": "/path/to/rgb", "split": "train"},
40
41
  "depth": "/path/to/depth",
@@ -64,6 +65,8 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
64
65
  | `transform_config_path` | Path to the transform-specific config (see below). `fog_config_path` is also accepted for backward compatibility. |
65
66
  | `output_path` | Output root used when no pipeline target overrides it. Optional if `pipeline.output_root` or `pipeline.output_targets[].path` supplies the destination. |
66
67
  | `output_slot` | Optional slot selector when `pipeline.output_targets` contains multiple entries. Defaults to `rgb` for `fog`, `depth` for `sky-depth`, and `depth` for `radial`. |
68
+ | `sample` | Optional 0-based euler-loading dataset index. When set, only `dataset[sample]` is transformed, which is useful for small augmented benchmark slices from large datasets. |
69
+ | `samples` | Optional multi-sample selector. Use a list of 0-based indices (`[0, 10, 20]`) or a slice object such as `{"start": 0, "stop": 1000, "step": 2, "count": 100}`. `stop` is exclusive; `count` caps the selected indices after slicing. Do not set both `sample` and `samples`. |
67
70
  | `modalities` | Regular modalities that participate in sample-ID intersection. Each value is either a plain path string or an object with a `path` key and an optional `split` key (see below). Which modalities are required depends on the transform (see table below). |
68
71
  | `hierarchical_modalities` | Per-scene data (e.g. intrinsics). Same format as `modalities`. Loaded once per scene and cached. |
69
72
  | `pipeline` | Optional runtime routing block compatible with `euler-inference` (`output_root`, `outputs_manifest_path`, `output_targets`). |
@@ -72,6 +75,11 @@ Every subcommand takes a **dataset config** JSON that points to the input data a
72
75
 
73
76
  When a modality directory contains [ds-crawler](https://github.com/d-rothen/ds-crawler) split files (`.ds_crawler/split_<name>.json`), you can select a subset of the data by setting the `split` key on that modality. Sample IDs are matched by intersection across all modalities, so specifying a split on a single modality is sufficient to restrict the entire dataset.
74
77
 
78
+ For quick slices after euler-loading has matched modalities, set `samples`.
79
+ For example, `{"samples": {"step": 2}}` processes every second matched sample,
80
+ and `{"samples": {"start": 10, "step": 5, "count": 20}}` processes 20 samples
81
+ starting at index 10 with stride 5.
82
+
75
83
  **Required modalities per transform:**
76
84
 
77
85
  | Transform | `modalities` | `hierarchical_modalities` |
@@ -128,6 +136,7 @@ Controls the fog simulation.
128
136
  "contrast_threshold": 0.05,
129
137
  "device": "cpu",
130
138
  "gpu_batch_size": 4,
139
+ "augmentations": { ... },
131
140
  "selection": { ... },
132
141
  "models": { ... }
133
142
  }
@@ -142,6 +151,7 @@ Controls the fog simulation.
142
151
  | `contrast_threshold` | Threshold *C_t* used in the visibility-to-attenuation conversion (default `0.05`). |
143
152
  | `device` | `"cpu"`, `"cuda"`, `"mps"`, or `"gpu"` (alias for cuda). |
144
153
  | `gpu_batch_size` | Batch size when running on GPU. Uniform-model samples are batched; heterogeneous samples are processed individually. |
154
+ | `augmentations` | Optional stepped augmentation set. When present, every input sample produces every configured augmentation and uses the file-id hierarchy output layout described below. |
145
155
 
146
156
  ### Fog Model
147
157
 
@@ -208,10 +218,10 @@ Each image is assigned a fog model via the `selection` block:
208
218
  "selection": {
209
219
  "mode": "weighted",
210
220
  "weights": {
211
- "uniform": 1.0,
212
- "heterogeneous_k": 0.0,
213
- "heterogeneous_ls": 0.0,
214
- "heterogeneous_k_ls": 0.0
221
+ "uniform": 0.25,
222
+ "heterogeneous_k": 0.35,
223
+ "heterogeneous_ls": 0.25,
224
+ "heterogeneous_k_ls": 0.15
215
225
  }
216
226
  }
217
227
  ```
@@ -242,28 +252,106 @@ Each model specifies a `visibility_m` distribution from which a visibility dista
242
252
 
243
253
  The sampled visibility *V* is converted to the attenuation coefficient: **k = -ln(C_t) / V**.
244
254
 
255
+ ### Stepped Augmentations
256
+
257
+ For benchmark generation, set `augmentations` in the fog config. This switches
258
+ the fog transform from one sampled output per input to one output per configured
259
+ variant:
260
+
261
+ ```json
262
+ {
263
+ "airlight": "from_sky",
264
+ "seed": 1337,
265
+ "contrast_threshold": 0.05,
266
+ "augmentations": {
267
+ "file_id_hierarchy_name": "file_id",
268
+ "attribute_key": "fog_augmentation",
269
+ "models": ["uniform"],
270
+ "visibility_m": [10, 20, 40, 70, 100],
271
+ "airlight_methods": ["from_sky"]
272
+ }
273
+ }
274
+ ```
275
+
276
+ The matrix form above expands as the Cartesian product of `models`,
277
+ `visibility_m` (MOR in metres), optional `scattering_coefficients` / `beta`, and
278
+ airlight choices. `file_id_hierarchy_name` names the inserted hierarchy level
279
+ when the underlying ds-crawler writer has a hierarchy separator; the directory
280
+ name is the source file id in either case. For tighter control, use explicit
281
+ variants:
282
+
283
+ ```json
284
+ "augmentations": {
285
+ "variants": [
286
+ {
287
+ "id": "mor_010m_sky",
288
+ "model": "uniform",
289
+ "visibility_m": 10,
290
+ "airlight_method": "from_sky"
291
+ },
292
+ {
293
+ "id": "beta_0.15_white",
294
+ "model": "heterogeneous_k",
295
+ "scattering_coefficient": 0.15,
296
+ "atmospheric_light": [1.0, 1.0, 1.0],
297
+ "k_hetero": {
298
+ "scales": "smooth_auto",
299
+ "correlation_length_fraction": 0.25,
300
+ "octaves": 3,
301
+ "min_factor": 0.65,
302
+ "max_factor": 1.45,
303
+ "contrast": 0.65,
304
+ "normalize_to_mean": true
305
+ }
306
+ }
307
+ ]
308
+ }
309
+ ```
310
+
311
+ Each output entry receives per-file ds-crawler attributes under
312
+ `fog_augmentation`, including the augmentation id, source id, source full id,
313
+ model, actual scattering coefficient, actual atmospheric light, and configured
314
+ MOR/beta descriptors when available. euler-loading exposes these as
315
+ `sample["attributes"]["rgb"]["fog_augmentation"]`.
316
+
245
317
  ### Heterogeneous Noise Fields
246
318
 
247
- Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian motion) to generate spatially-varying factor fields:
319
+ Both `k_hetero` and `ls_hetero` use Perlin FBM (fractional Brownian
320
+ motion) to generate spatially-varying factor fields. For realistic fog,
321
+ prefer the smooth mode: it keeps Perlin wavelengths tied to the image size,
322
+ then optionally reduces noise contrast and applies a final blur before mapping
323
+ the noise to physical factors.
248
324
 
249
325
  ```json
250
326
  "k_hetero": {
251
- "scales": "auto",
252
- "min_scale": 2,
327
+ "scales": "smooth_auto",
328
+ "correlation_length_fraction": 0.25,
329
+ "octaves": 3,
253
330
  "max_scale": null,
254
- "min_factor": 0.0,
255
- "max_factor": 1.0,
331
+ "min_factor": 0.65,
332
+ "max_factor": 1.45,
333
+ "contrast": 0.65,
334
+ "smooth_sigma_fraction": 0.0,
256
335
  "normalize_to_mean": true
257
336
  }
258
337
  ```
259
338
 
260
- The noise field (values in [0, 1]) is mapped to a factor field: `factor(x) = min_factor + (max_factor - min_factor) * noise(x)`. When `normalize_to_mean` is `true`, the factor field is rescaled so its spatial mean equals 1.0, preserving the overall fog density while introducing spatial variation.
339
+ The noise field (values in [0, 1]) is mapped to a factor field:
340
+ `factor(x) = min_factor + (max_factor - min_factor) * noise(x)`.
341
+ `contrast < 1` compresses the noise around 0.5 before this mapping, avoiding
342
+ extreme local fog density. When `normalize_to_mean` is `true`, the factor field
343
+ is rescaled so its spatial mean equals 1.0, preserving the overall fog density
344
+ while introducing spatial variation.
261
345
 
262
346
  | Parameter | Effect |
263
347
  |---|---|
264
348
  | `min_factor` / `max_factor` | Range of the multiplicative factor. |
265
349
  | `normalize_to_mean` | Rescale factors so the image-wide mean equals the base value. Recommended for `k_hetero`. |
266
- | `scales` / `min_scale` / `max_scale` | Control spatial frequency content. |
350
+ | `scales: "smooth_auto"` | Build low-frequency Perlin scales from the image size. |
351
+ | `correlation_length_fraction` | Approximate smallest fog feature size as a fraction of the shorter image side. Larger values create smoother gradients. |
352
+ | `octaves` / `lacunarity` / `max_scale` | Control how many increasingly broad Perlin components are mixed. |
353
+ | `contrast` | Compress or expand the Perlin range before mapping to factors. Values below 1 are recommended. |
354
+ | `smooth_sigma` / `smooth_sigma_fraction` | Optional final Gaussian blur in pixels or as a fraction of the shorter image side. |
267
355
 
268
356
  ### Fog Output
269
357
 
@@ -283,6 +371,22 @@ When a pipeline target is present, `pipeline.output_targets[].path` replaces
283
371
  `output_path` entirely. Standalone/direct `FogTransform(...)` usage without the
284
372
  CLI still uses the legacy per-model layout with `config.json` sidecars.
285
373
 
374
+ With `augmentations` enabled, source-backed outputs are written one level below
375
+ the source file id instead:
376
+
377
+ ```
378
+ <output_path>/
379
+ .ds_crawler/output.json
380
+ Scene01/
381
+ Camera_0/
382
+ 00000/
383
+ mor_10m_airlight_from_sky.png
384
+ mor_20m_airlight_from_sky.png
385
+ ```
386
+
387
+ Auxiliary `scattering_coefficient` and `atmospheric_light` pipeline targets use
388
+ the same file-id hierarchy and write matching `.npy` augmentation files.
389
+
286
390
  ---
287
391
 
288
392
  ## Sky-Depth Transform
@@ -8,7 +8,9 @@ from __future__ import annotations
8
8
  import argparse
9
9
  import inspect
10
10
  import json
11
+ from collections.abc import Iterable, Iterator, Sequence
11
12
  from pathlib import Path
13
+ from typing import Any
12
14
 
13
15
  from euler_preprocess.common.dataset import build_dataset
14
16
  from euler_preprocess.common.logging import get_logger, log_dataset_info
@@ -27,6 +29,132 @@ def _resolve(path_str: str, config_dir: Path) -> Path:
27
29
  return (config_dir / p).resolve()
28
30
 
29
31
 
32
+ class _SelectedSamples(Sequence):
33
+ """Lazy view over selected euler-loading dataset entries."""
34
+
35
+ def __init__(self, dataset, indices: Iterable[int]) -> None:
36
+ self.dataset = dataset
37
+ self.indices = tuple(indices)
38
+
39
+ def __len__(self) -> int:
40
+ return len(self.indices)
41
+
42
+ def __iter__(self) -> Iterator[dict]:
43
+ for index in self.indices:
44
+ yield self.dataset[index]
45
+
46
+ def __getitem__(self, index: int | slice):
47
+ if isinstance(index, slice):
48
+ return [self.dataset[i] for i in self.indices[index]]
49
+ return self.dataset[self.indices[index]]
50
+
51
+
52
+ def _validate_sample_index(value: Any, *, key: str, dataset_size: int) -> int:
53
+ if isinstance(value, bool) or not isinstance(value, int):
54
+ raise ValueError(f"{key} must be a non-negative integer index")
55
+ if value < 0:
56
+ raise ValueError(f"{key} must be a non-negative integer index")
57
+ if value >= dataset_size:
58
+ raise IndexError(
59
+ f"{key} {value} out of range for dataset of length {dataset_size}"
60
+ )
61
+ return value
62
+
63
+
64
+ def _positive_int(value: Any, *, key: str) -> int:
65
+ if isinstance(value, bool) or not isinstance(value, int):
66
+ raise ValueError(f"{key} must be a positive integer")
67
+ if value <= 0:
68
+ raise ValueError(f"{key} must be a positive integer")
69
+ return value
70
+
71
+
72
+ def _non_negative_int(value: Any, *, key: str) -> int:
73
+ if isinstance(value, bool) or not isinstance(value, int):
74
+ raise ValueError(f"{key} must be a non-negative integer")
75
+ if value < 0:
76
+ raise ValueError(f"{key} must be a non-negative integer")
77
+ return value
78
+
79
+
80
+ def _resolve_sample_indices(selection: Any, *, dataset_size: int) -> tuple[int, ...]:
81
+ if isinstance(selection, list):
82
+ indices = tuple(
83
+ _validate_sample_index(value, key="samples[]", dataset_size=dataset_size)
84
+ for value in selection
85
+ )
86
+ if not indices:
87
+ raise ValueError("samples must select at least one dataset entry")
88
+ return indices
89
+
90
+ if not isinstance(selection, dict):
91
+ raise ValueError("samples must be an object or a list of integer indices")
92
+
93
+ allowed = {"start", "stop", "step", "count"}
94
+ unknown = sorted(set(selection) - allowed)
95
+ if unknown:
96
+ raise ValueError(f"samples contains unknown keys: {', '.join(unknown)}")
97
+
98
+ start = _non_negative_int(selection.get("start", 0), key="samples.start")
99
+ stop_value = selection.get("stop")
100
+ if stop_value is None:
101
+ stop = dataset_size
102
+ else:
103
+ stop = _non_negative_int(stop_value, key="samples.stop")
104
+ step = _positive_int(selection.get("step", 1), key="samples.step")
105
+
106
+ if start >= dataset_size:
107
+ raise IndexError(
108
+ f"samples.start {start} out of range for dataset of length {dataset_size}"
109
+ )
110
+
111
+ indices = tuple(range(start, min(stop, dataset_size), step))
112
+ if "count" in selection:
113
+ count = _positive_int(selection["count"], key="samples.count")
114
+ indices = indices[:count]
115
+
116
+ if not indices:
117
+ raise ValueError("samples must select at least one dataset entry")
118
+ return indices
119
+
120
+
121
+ def _select_configured_samples(config: dict, dataset, logger):
122
+ """Apply optional top-level sample selection from the dataset config."""
123
+ has_sample = "sample" in config
124
+ has_samples = "samples" in config
125
+ if has_sample and has_samples:
126
+ raise ValueError("Use either sample or samples, not both")
127
+ if not has_sample and not has_samples:
128
+ return dataset
129
+
130
+ dataset_size = len(dataset)
131
+ if has_sample:
132
+ sample_index = _validate_sample_index(
133
+ config["sample"],
134
+ key="sample",
135
+ dataset_size=dataset_size,
136
+ )
137
+ sample = dataset[sample_index]
138
+ logger.info(
139
+ "Sample selection: using sample=%d of %d (id=%s, full_id=%s)",
140
+ sample_index,
141
+ dataset_size,
142
+ sample.get("id"),
143
+ sample.get("full_id"),
144
+ )
145
+ return [sample]
146
+
147
+ indices = _resolve_sample_indices(config["samples"], dataset_size=dataset_size)
148
+ logger.info(
149
+ "Sample selection: using %d/%d samples (first_index=%d, last_index=%d)",
150
+ len(indices),
151
+ dataset_size,
152
+ indices[0],
153
+ indices[-1],
154
+ )
155
+ return _SelectedSamples(dataset, indices)
156
+
157
+
30
158
  def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
31
159
  """Shared logic for all subcommands."""
32
160
  logger = get_logger()
@@ -57,6 +185,7 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
57
185
  dataset = build_dataset(config, required_modalities, required_hierarchical)
58
186
  output_backends = prepare_output_backends(config, dataset, transform_class)
59
187
  primary_backend = next(iter(output_backends.values()))
188
+ samples = _select_configured_samples(config, dataset, logger)
60
189
  dataset_name = config.get("dataset", "dataset")
61
190
 
62
191
  raw_modalities = {
@@ -69,7 +198,7 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
69
198
  modality_info[name] = {"path": entry}
70
199
  else:
71
200
  modality_info[name] = entry
72
- log_dataset_info(logger, dataset_name, len(dataset), modality_info, use_gpu)
201
+ log_dataset_info(logger, dataset_name, len(samples), modality_info, use_gpu)
73
202
  for slot, backend in output_backends.items():
74
203
  logger.info("Output path [%s]: %s", slot, backend.root)
75
204
 
@@ -98,7 +227,7 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
98
227
  )
99
228
  transform = transform_class(**transform_kwargs)
100
229
 
101
- saved_paths = transform.run(dataset)
230
+ saved_paths = transform.run(samples)
102
231
 
103
232
  logger.info("Transform complete. Generated %d outputs.", len(saved_paths))
104
233
  return 0