euler-loading 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. euler_loading-1.1.0/.github/workflows/workflow.yml +30 -0
  2. euler_loading-1.1.0/.gitignore +2 -0
  3. euler_loading-1.1.0/PKG-INFO +12 -0
  4. euler_loading-1.1.0/README.md +360 -0
  5. euler_loading-1.1.0/euler_loading/__init__.py +19 -0
  6. euler_loading-1.1.0/euler_loading/_ds_crawler_utils.py +59 -0
  7. euler_loading-1.1.0/euler_loading/_metadata.py +275 -0
  8. euler_loading-1.1.0/euler_loading/_resolution.py +164 -0
  9. euler_loading-1.1.0/euler_loading/_writing.py +176 -0
  10. euler_loading-1.1.0/euler_loading/dataset.py +605 -0
  11. euler_loading-1.1.0/euler_loading/indexing.py +122 -0
  12. euler_loading-1.1.0/euler_loading/loaders/__init__.py +26 -0
  13. euler_loading-1.1.0/euler_loading/loaders/_annotations.py +70 -0
  14. euler_loading-1.1.0/euler_loading/loaders/_writer_utils.py +170 -0
  15. euler_loading-1.1.0/euler_loading/loaders/contracts.py +63 -0
  16. euler_loading-1.1.0/euler_loading/loaders/cpu/__init__.py +11 -0
  17. euler_loading-1.1.0/euler_loading/loaders/cpu/generic.py +109 -0
  18. euler_loading-1.1.0/euler_loading/loaders/cpu/generic_dense_depth.py +286 -0
  19. euler_loading-1.1.0/euler_loading/loaders/cpu/real_drive_sim.py +151 -0
  20. euler_loading-1.1.0/euler_loading/loaders/cpu/vkitti2.py +278 -0
  21. euler_loading-1.1.0/euler_loading/loaders/generate/__init__.py +1 -0
  22. euler_loading-1.1.0/euler_loading/loaders/generate/__main__.py +114 -0
  23. euler_loading-1.1.0/euler_loading/loaders/generate/loaders.json +422 -0
  24. euler_loading-1.1.0/euler_loading/loaders/generic.py +12 -0
  25. euler_loading-1.1.0/euler_loading/loaders/gpu/__init__.py +10 -0
  26. euler_loading-1.1.0/euler_loading/loaders/gpu/generic.py +103 -0
  27. euler_loading-1.1.0/euler_loading/loaders/gpu/generic_dense_depth.py +295 -0
  28. euler_loading-1.1.0/euler_loading/loaders/gpu/real_drive_sim.py +441 -0
  29. euler_loading-1.1.0/euler_loading/loaders/gpu/vkitti2.py +284 -0
  30. euler_loading-1.1.0/euler_loading/loaders/real_drive_sim.py +12 -0
  31. euler_loading-1.1.0/euler_loading/loaders/vkitti2.py +12 -0
  32. euler_loading-1.1.0/example.py +82 -0
  33. euler_loading-1.1.0/package-lock.json +6 -0
  34. euler_loading-1.1.0/pyproject.toml +27 -0
  35. euler_loading-1.1.0/sample_rds.py +30 -0
  36. euler_loading-1.1.0/tests/__init__.py +0 -0
  37. euler_loading-1.1.0/tests/conftest.py +280 -0
  38. euler_loading-1.1.0/tests/example_rds_calib.json +101 -0
  39. euler_loading-1.1.0/tests/test_dataset.py +1140 -0
  40. euler_loading-1.1.0/tests/test_indexing.py +165 -0
  41. euler_loading-1.1.0/tests/test_loaders.py +640 -0
  42. euler_loading-1.1.0/tests/test_real_dataset.py +223 -0
  43. euler_loading-1.1.0/tests/test_writing.py +276 -0
  44. euler_loading-1.1.0/vkitti_cpu_example_output.json +30 -0
  45. euler_loading-1.1.0/vkitti_gpu_example_output.json +30 -0
@@ -0,0 +1,30 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*" # This triggers the workflow only when you push a tag starting with 'v'
7
+
8
+ jobs:
9
+ build-and-publish:
10
+ name: Build and Publish
11
+ runs-on: ubuntu-latest
12
+ environment: pypi # This matches the environment we set up in Step 2
13
+
14
+ permissions:
15
+ id-token: write # CRITICAL: This is what allows OIDC (passwordless) auth to PyPI
16
+ contents: read # Required to check out the code
17
+
18
+ steps:
19
+ - name: Checkout code
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Install uv
23
+ uses: astral-sh/setup-uv@v5 # The official Astral action
24
+
25
+ - name: Build package
26
+ run: uv build
27
+
28
+ - name: Publish to PyPI
29
+ # uv publish automatically detects it is in GitHub Actions and uses OIDC
30
+ run: uv publish
@@ -0,0 +1,2 @@
1
+ __pycache__
2
+ .vscode
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: euler-loading
3
+ Version: 1.1.0
4
+ Summary: Multi-modal PyTorch dataloader using ds-crawler indices
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: ds-crawler
7
+ Requires-Dist: numpy
8
+ Requires-Dist: pillow
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest; extra == 'dev'
11
+ Provides-Extra: gpu
12
+ Requires-Dist: torch; extra == 'gpu'
@@ -0,0 +1,360 @@
1
+ # euler-loading
2
+
3
+ Multi-modal PyTorch `Dataset` that synchronises files across arbitrary dataset modalities indexed by [ds-crawler](https://github.com/d-rothen/ds-crawler).
4
+
5
+ Each modality points at a directory (or `.zip` archive) that carries its own `ds-crawler.config` (or cached `output.json`).
6
+ ds-crawler indexes the directory tree, discovers files, and exposes hierarchical metadata (path properties, calibration files, …).
7
+ euler-loading then **intersects file IDs** across all modalities so that every sample contains exactly one file per modality. Additional hierarchical data (e.g. per-scene calibration files) can be loaded via `hierarchical_modalities`.
8
+ How a file is actually **loaded** (image, depth map, point cloud, …) is configurable per modality — either supply a `Callable` or let euler-loading resolve a built-in loader automatically from the ds-crawler config.
9
+ Writer functions can be resolved the same way, so inference outputs can be written back in dataset-native formats.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ uv pip install "euler-loading[gpu] @ git+https://github.com/d-rothen/euler-loading.git"
15
+ ```
16
+
17
+ Requires Python >= 3.9. PyTorch and ds-crawler are pulled in automatically.
18
+
19
+ The `[gpu]` extra installs PyTorch. Without it the package still works but the GPU loader variants are unavailable — use the CPU (numpy) loaders instead.
20
+
21
+ ## Quick start
22
+
23
+ ```python
24
+ from euler_loading import Modality, MultiModalDataset
25
+
26
+ dataset = MultiModalDataset(
27
+ modalities={
28
+ "rgb": Modality("/data/vkitti2/rgb", loader=load_rgb),
29
+ "depth": Modality("/data/vkitti2/depth", loader=load_depth),
30
+ "classSegmentation": Modality("/data/vkitti2/classSegmentation", loader=load_classSegmentation),
31
+ },
32
+ hierarchical_modalities={ # optional – for files at intermediate hierarchy levels
33
+ "intrinsics": Modality("/data/vkitti2/intrinsics", loader=parse_intrinsics),
34
+ },
35
+ transforms=[normalize, augment], # optional
36
+ )
37
+
38
+ sample = dataset[0]
39
+ # sample["rgb"] – whatever load_rgb returned
40
+ # sample["depth"] – whatever load_depth returned
41
+ # sample["classSegmentation"] – whatever load_classSegmentation returned
42
+ # sample["intrinsics"] – dict {file_id: parsed_result} for hierarchical modality
43
+ # sample["id"] – the file ID (leaf only)
44
+ # sample["full_id"] – full hierarchical path including file ID
45
+ # sample["meta"] – per-modality ds-crawler file entries
46
+ ```
47
+
48
+ Works with `torch.utils.data.DataLoader` out of the box.
49
+
50
+ ## API
51
+
52
+ ### `Modality(path, ..., loader=None, metadata=None)`
53
+
54
+ Frozen dataclass describing one data modality.
55
+
56
+ | Parameter | Type | Description |
57
+ |-----------|------|-------------|
58
+ | `path` | `str` | Absolute path to the modality root directory or `.zip` archive. Must contain a `ds-crawler.config` or cached `output.json`. |
59
+ | `origin_path` | `str \| None` | Original path before copying/symlinking (e.g. for SLURM staging). Not used by euler-loading itself — useful for experiment logging to retain references to the original dataset location. |
60
+ | `loader` | `Callable[..., Any] \| None` | Receives the file path (or `BinaryIO` buffer for zip-backed modalities) and an optional `meta` dict. Returns loaded data. When `None`, the loader is resolved automatically from the ds-crawler index (see [Automatic loader resolution](#automatic-loader-resolution)). |
61
+ | `writer` | `Callable[..., Any] \| None` | Receives `(path, value, meta)` and writes modality data to disk. When `None`, euler-loading tries to resolve a built-in writer from ds-crawler metadata (`write_<function>` or `write_<suffix>` for `read_<suffix>`). |
62
+ | `used_as` | `str \| None` | Optional experiment role (e.g. `input`, `target`, `condition`). |
63
+ | `slot` | `str \| None` | Optional fully-qualified logging slot (e.g. `dehaze.input.rgb`). |
64
+ | `modality_type` | `str \| None` | Optional modality type override (e.g. `rgb`, `depth`). |
65
+ | `hierarchy_scope` | `str \| None` | Optional scope label for hierarchical modalities (e.g. `scene_camera`). |
66
+ | `applies_to` | `list[str] \| None` | Optional list of regular modality names a hierarchical modality applies to. |
67
+ | `metadata` | `dict[str, Any]` | Optional arbitrary metadata. Keys under `metadata["euler_loading"]` are treated as euler-loading defaults. |
68
+
69
+ The loader is the **only** place where domain-specific I/O happens.
70
+ euler-loading never interprets file contents — it only resolves *which* file to load and passes the path (or in-memory buffer) to your function.
71
+
72
+ ### `MultiModalDataset.get_writer(modality_name)`
73
+
74
+ Returns the resolved writer callable for a modality. Raises `ValueError` when no writer is configured/discoverable.
75
+
76
+ ### `MultiModalDataset.write_sample(sample_index, outputs, output_root, ...)`
77
+
78
+ Writes one sample's modality outputs back to disk using resolved writers.
79
+
80
+ - `outputs` is `{modality_name: value}`.
81
+ - `output_root` is either one root path for all modalities or per-modality roots.
82
+ - Relative dataset paths are preserved under the output root(s), so generated data can be re-indexed with matching IDs.
83
+
84
+ ### `MultiModalDataset.describe_for_runlog()`
85
+
86
+ Returns a structured descriptor for run metadata:
87
+
88
+ ```python
89
+ {
90
+ "modalities": {
91
+ "hazy_rgb": {
92
+ "path": "...",
93
+ "origin_path": "...",
94
+ "used_as": "input",
95
+ "slot": "dehaze.input.rgb",
96
+ "modality_type": "rgb",
97
+ },
98
+ },
99
+ "hierarchical_modalities": {
100
+ "camera_intrinsics": {
101
+ "path": "...",
102
+ "origin_path": "...",
103
+ "used_as": "condition",
104
+ "slot": "dehaze.condition.camera_intrinsics",
105
+ "hierarchy_scope": "scene_camera",
106
+ "applies_to": ["hazy_rgb"],
107
+ },
108
+ },
109
+ }
110
+ ```
111
+
112
+ Resolution order is: explicit `Modality` fields -> `Modality.metadata["euler_loading"]` -> ds-crawler config `properties["euler_loading"]` -> heuristics.
113
+
114
+ ### `MultiModalDataset.modality_paths()`
115
+
116
+ Returns a dict mapping each regular modality name to `{"path": ..., "origin_path": ...}`.
117
+
118
+ ### `MultiModalDataset.hierarchical_modality_paths()`
119
+
120
+ Returns a dict mapping each hierarchical modality name to `{"path": ..., "origin_path": ...}`.
121
+
122
+ ### `MultiModalDataset.get_modality_metadata(modality_name)`
123
+
124
+ Returns the ds-crawler metadata dict for the given modality.
125
+
126
+ ### `MultiModalDataset(modalities, hierarchical_modalities=None, transforms=None)`
127
+
128
+ PyTorch `Dataset`. On construction it:
129
+
130
+ 1. Runs `ds_crawler.index_dataset_from_path()` for every modality (regular and hierarchical).
131
+ 2. Computes the **sorted intersection** of file IDs across all regular modalities.
132
+ 3. Logs warnings for unmatched files; raises `ValueError` when the intersection is empty.
133
+
134
+ | Parameter | Type | Description |
135
+ |-----------|------|-------------|
136
+ | `modalities` | `dict[str, Modality]` | At least one entry required. Keys become the sample dict keys. These modalities participate in ID intersection. |
137
+ | `hierarchical_modalities` | `dict[str, Modality] \| None` | Optional modalities whose files live at intermediate hierarchy levels (e.g. per-scene intrinsics). These do **not** participate in ID intersection. Each sample will contain a dict `{file_id: loaded_result}` with all files at or above the sample's hierarchy level. Results are cached so shared files are parsed only once. |
138
+ | `transforms` | `list[Callable[[dict], dict]] \| None` | Applied in order after loading. Each receives and returns the full sample dict. |
139
+
140
+ #### Sample dict
141
+
142
+ `dataset[i]` returns:
143
+
144
+ ```python
145
+ {
146
+ "<modality_name>": <loader result>, # one entry per regular modality
147
+ ...
148
+ "<hierarchical_modality_name>": { # one entry per hierarchical modality
149
+ "<file_id>": <loader result>, # all files at or above the sample's hierarchy level
150
+ ...
151
+ },
152
+ ...
153
+ "id": str, # file ID (leaf only, shared across modalities)
154
+ "full_id": str, # full hierarchical path including file ID (e.g. "/scene/camera/frame")
155
+ "meta": { # per-modality ds-crawler file entries (regular modalities only)
156
+ "<modality_name>": {"id": ..., "path": ..., "path_properties": ..., "basename_properties": ...},
157
+ ...
158
+ },
159
+ }
160
+ ```
161
+
162
+ Hierarchical modality results are cached so shared files are parsed only once.
163
+
164
+ ### `FileRecord`
165
+
166
+ Frozen dataclass exposed for introspection. Each record ties a ds-crawler file entry to its position in the hierarchy.
167
+
168
+ | Field | Type | Description |
169
+ |-------|------|-------------|
170
+ | `file_entry` | `dict[str, Any]` | Raw ds-crawler entry (keys: `id`, `path`, `path_properties`, `basename_properties`). |
171
+ | `hierarchy_path` | `tuple[str, ...]` | Tuple of children keys from the dataset root to this file's parent node. Used for matching against hierarchical modalities. |
172
+
173
+ ## Loader functions
174
+
175
+ A loader is any callable with the signature `(path: str | BinaryIO, meta: dict | None) -> Any`.
176
+ The `meta` argument receives the ds-crawler metadata for the modality (or `None` if unavailable).
177
+ For zip-backed modalities, `path` is an in-memory `io.BytesIO` buffer instead of a filesystem path.
178
+
179
+ A writer is any callable with the signature `(path: str, value: Any, meta: dict | None) -> None`.
180
+ Use `meta` for format parameters (units, encoding details) instead of modality-specific argument variants.
181
+
182
+ ```python
183
+ from PIL import Image
184
+ import numpy as np
185
+
186
+ def load_rgb(path, meta=None):
187
+ return Image.open(path).convert("RGB")
188
+
189
+ def load_depth(path, meta=None):
190
+ return np.load(path)
191
+ ```
192
+
193
+ ## Transforms
194
+
195
+ Each transform receives the **full sample dict** (all modalities, calibration, metadata) and must return a dict.
196
+ This enables cross-modal operations:
197
+
198
+ ```python
199
+ def mask_sky_in_depth(sample: dict) -> dict:
200
+ seg = np.array(sample["segmentation"])
201
+ sample["depth"][seg == SKY_CLASS] = 0.0
202
+ return sample
203
+ ```
204
+
205
+ ## Zip archive support
206
+
207
+ Modality paths can point to `.zip` files instead of directories. euler-loading detects zip paths automatically and reads files directly from the archive without extraction:
208
+
209
+ ```python
210
+ dataset = MultiModalDataset(
211
+ modalities={
212
+ "rgb": Modality("/data/vkitti2/rgb.zip", loader=load_rgb),
213
+ "depth": Modality("/data/vkitti2/depth", loader=load_depth), # filesystem and zip can be mixed
214
+ },
215
+ )
216
+ ```
217
+
218
+ - Loaders receive an `io.BytesIO` buffer (with a `.name` attribute for extension detection) instead of a file path.
219
+ - Each DataLoader worker process gets its own `ZipFile` handle, so multi-worker loading is safe.
220
+ - Built-in loaders accept both `str` paths and `BinaryIO` buffers transparently.
221
+
222
+ ## Automatic loader resolution
223
+
224
+ When `Modality.loader` is `None`, euler-loading resolves the loader from the ds-crawler index. The index must contain:
225
+
226
+ ```json
227
+ {
228
+ "euler_loading": {
229
+ "loader": "vkitti2",
230
+ "function": "rgb"
231
+ }
232
+ }
233
+ ```
234
+
235
+ `loader` is the module name (`vkitti2`, `real_drive_sim`, or `generic_dense_depth`) and `function` is the function within that module. The GPU variant is used by default.
236
+
237
+ Writer resolution uses the same module and function metadata:
238
+
239
+ - preferred explicit key: `euler_loading.writer_function`
240
+ - fallback naming: `write_<function>`
241
+ - for read-style functions: also tries `write_<suffix>` for `read_<suffix>`
242
+
243
+ ## ds-crawler integration
244
+
245
+ Every modality root must be independently indexable by ds-crawler.
246
+ Place a `ds-crawler.config` in the root of each modality directory (or zip archive) — ds-crawler will then parse the directory tree and assign each file an ID derived from its path properties.
247
+ Files across modalities are matched by these IDs, so **the directory structure must be consistent** across modalities (identical hierarchy and naming conventions up to the modality-specific parts captured in the config).
248
+
249
+ Calibration files or other per-scene/per-sequence metadata can be loaded via `hierarchical_modalities`. These files are matched to samples based on their position in the hierarchy — all files at or above a sample's hierarchy level are included and cached for efficiency.
250
+
251
+ ## DenseDepthLoader protocol
252
+
253
+ `euler_loading.DenseDepthLoader` is a `runtime_checkable` Protocol defining the loader contract for dense-depth datasets. A conforming module must expose:
254
+
255
+ | Function | Return type |
256
+ |----------|-------------|
257
+ | `rgb(path, meta=None)` | `(3, H, W)` float32 in `[0, 1]` |
258
+ | `depth(path, meta=None)` | `(1, H, W)` float32 in metres |
259
+ | `sky_mask(path, meta=None)` | `(1, H, W)` bool |
260
+ | `read_intrinsics(path, meta=None)` | `(3, 3)` float32 camera matrix |
261
+
262
+ `euler_loading.DenseDepthWriter` and `euler_loading.DenseDepthCodec` provide matching writer and combined reader/writer contracts.
263
+
264
+ ```python
265
+ from euler_loading import DenseDepthLoader
266
+ from euler_loading.loaders.gpu import vkitti2
267
+
268
+ assert isinstance(vkitti2, DenseDepthLoader)
269
+ ```
270
+
271
+ ## Testing
272
+
273
+ ```bash
274
+ pip install -e ".[dev]"
275
+
276
+ # unit tests (mocked, no data needed)
277
+ pytest
278
+
279
+ # integration tests against real on-disk datasets
280
+ pytest -m real
281
+ ```
282
+
283
+ See `tests/test_real_dataset.py` for a full example of wiring up a real multi-modality dataset (VKITTI2).
284
+
285
+
286
+ ## Use with pytorch DataLoaders
287
+ ```python
288
+ from torch.utils.data import DataLoader
289
+
290
+ loader = DataLoader(dataset, batch_size=16, num_workers=4, pin_memory=True)
291
+
292
+ for batch in loader:
293
+ # batch["rgb"] is already (16, 3, H, W) — auto-collated by DataLoader
294
+ ...
295
+ ```
296
+
297
+ ## Built-in loaders
298
+
299
+ `euler_loading.loaders` ships ready-made loader functions for supported datasets.
300
+ Each dataset has a **GPU** variant (returns `torch.Tensor` in CHW format) and a **CPU** variant (returns `np.ndarray` in HWC format).
301
+ The top-level imports (`euler_loading.loaders.vkitti2`, `euler_loading.loaders.real_drive_sim`) re-export the GPU variants for backward compatibility.
302
+
303
+ All built-in loaders accept both filesystem paths (`str`) and in-memory buffers (`BinaryIO`), so they work transparently with zip-backed modalities.
304
+
305
+ ### Virtual KITTI 2 (`euler_loading.loaders.vkitti2`)
306
+
307
+ | Function | Description |
308
+ |----------|-------------|
309
+ | `rgb` | RGB image as float32, normalised to [0, 1] |
310
+ | `depth` | 16-bit PNG depth map, converted from centimetres to metres |
311
+ | `class_segmentation` | RGB-encoded class segmentation mask |
312
+ | `instance_segmentation` | RGB-encoded instance segmentation mask |
313
+ | `scene_flow` | Optical/scene flow map as float32, normalised to [0, 1] |
314
+ | `read_intrinsics` | Parses a 3×3 camera intrinsic matrix from a text file (use with `hierarchical_modalities`) |
315
+ | `read_extrinsics` | Parses a camera extrinsic matrix from a text file (use with `hierarchical_modalities`) |
316
+
317
+ ### Real Drive Sim (`euler_loading.loaders.real_drive_sim`)
318
+
319
+ | Function | Description |
320
+ |----------|-------------|
321
+ | `rgb` | RGB image as float32, normalised to [0, 1] |
322
+ | `depth` | Depth from `.npz` files (metres) |
323
+ | `class_segmentation` | Single-channel class IDs extracted from the red channel of an RGBA PNG |
324
+ | `sky_mask` | Binary mask where class ID == 29 (sky) |
325
+ | `calibration` | Per-sensor calibration from JSON: returns `dict[sensor_name, {"K": (3,3), "T": (4,4), "distortion": (8,)}]` (use with `hierarchical_modalities`) |
326
+
327
+ ### Generic Dense Depth (`euler_loading.loaders.gpu.generic_dense_depth`)
328
+
329
+ A format-agnostic loader that infers the loading strategy from the file extension. Useful for datasets that don't have a dedicated loader module.
330
+
331
+ | Function | Description |
332
+ |----------|-------------|
333
+ | `rgb` | RGB from image files (`.png`, `.jpg`, `.bmp`, `.tif`) or NumPy files (`.npy`, `.npz`), normalised to [0, 1] |
334
+ | `depth` | Depth map from image or NumPy files, returned as-is (no unit conversion) |
335
+ | `sky_mask` | Binary mask by comparing pixels against `meta["sky_mask"]` (`[R, G, B]`). Requires `meta` |
336
+ | `read_intrinsics` | Returns `meta["intrinsics"]` as a `(3, 3)` tensor. Ignores path; requires `meta` |
337
+
338
+ CPU variants of all loaders live under `euler_loading.loaders.cpu.{vkitti2,real_drive_sim,generic_dense_depth}`.
339
+
340
+ ### Flattening hierarchical modalities
341
+
342
+ Hierarchical modalities always return `{file_id: loader_result}` because multiple files can match at different hierarchy levels. When a modality has exactly one file per hierarchy level (common for calibration), you can flatten this with a transform:
343
+
344
+ ```python
345
+ dataset = MultiModalDataset(
346
+ modalities={...},
347
+ hierarchical_modalities={
348
+ "calibration": Modality("/data/rds/calibration", loader=real_drive_sim.calibration),
349
+ },
350
+ transforms=[
351
+ lambda sample: {
352
+ **sample,
353
+ "calibration": next(iter(sample["calibration"].values())),
354
+ },
355
+ ],
356
+ )
357
+
358
+ # Without the transform: sample["calibration"]["<file_id>"]["CS_FRONT"]["K"]
359
+ # With the transform: sample["calibration"]["CS_FRONT"]["K"]
360
+ ```
@@ -0,0 +1,19 @@
1
+ """euler-loading: Multi-modal PyTorch dataloader using ds-crawler indices."""
2
+
3
+ from ._resolution import resolve_loader_module, resolve_writer_module
4
+ from ._writing import create_dataset_writer_from_index
5
+ from .dataset import Modality, MultiModalDataset
6
+ from .indexing import FileRecord
7
+ from .loaders.contracts import DenseDepthCodec, DenseDepthLoader, DenseDepthWriter
8
+
9
+ __all__ = [
10
+ "DenseDepthCodec",
11
+ "DenseDepthLoader",
12
+ "DenseDepthWriter",
13
+ "FileRecord",
14
+ "Modality",
15
+ "MultiModalDataset",
16
+ "create_dataset_writer_from_index",
17
+ "resolve_loader_module",
18
+ "resolve_writer_module",
19
+ ]
@@ -0,0 +1,59 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from typing import Any
5
+
6
+
7
+ DS_CRAWLER_STRUCTURAL_KEYS = frozenset({
8
+ "name",
9
+ "type",
10
+ "id_regex",
11
+ "id_regex_join_char",
12
+ "euler_train",
13
+ "dataset",
14
+ "hierarchy_regex",
15
+ "named_capture_group_value_separator",
16
+ "sampled",
17
+ })
18
+
19
+
20
+ def as_non_empty_str(value: Any) -> str | None:
21
+ if value is None:
22
+ return None
23
+ text = str(value).strip()
24
+ return text or None
25
+
26
+
27
+ def as_string_list(value: Any) -> list[str] | None:
28
+ if value is None:
29
+ return None
30
+ if isinstance(value, (list, tuple, set)):
31
+ parsed = [as_non_empty_str(item) for item in value]
32
+ return [item for item in parsed if item is not None]
33
+
34
+ single = as_non_empty_str(value)
35
+ if single is None:
36
+ return []
37
+ return [single]
38
+
39
+
40
+ def first_non_empty(*candidates: str | None) -> str | None:
41
+ for candidate in candidates:
42
+ if candidate is not None:
43
+ return candidate
44
+ return None
45
+
46
+
47
+ def first_non_empty_list(*candidates: list[str] | None) -> list[str]:
48
+ for candidate in candidates:
49
+ if candidate is not None:
50
+ return candidate
51
+ return []
52
+
53
+
54
+ def extract_ds_crawler_properties(index_output: Mapping[str, Any]) -> dict[str, Any]:
55
+ return {
56
+ str(key): value
57
+ for key, value in index_output.items()
58
+ if key not in DS_CRAWLER_STRUCTURAL_KEYS
59
+ }