apairo 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. apairo-0.2.0/LICENSE +21 -0
  2. apairo-0.2.0/PKG-INFO +286 -0
  3. apairo-0.2.0/README.md +247 -0
  4. apairo-0.2.0/apairo/__init__.py +68 -0
  5. apairo-0.2.0/apairo/cli.py +350 -0
  6. apairo-0.2.0/apairo/core/__init__.py +30 -0
  7. apairo-0.2.0/apairo/core/abstract_dataset.py +398 -0
  8. apairo-0.2.0/apairo/core/abstract_loader.py +30 -0
  9. apairo-0.2.0/apairo/core/cached_dataset.py +71 -0
  10. apairo-0.2.0/apairo/core/channel_view.py +60 -0
  11. apairo-0.2.0/apairo/core/config/keys.yaml +80 -0
  12. apairo-0.2.0/apairo/core/config.py +257 -0
  13. apairo-0.2.0/apairo/core/configurable_dataset.py +246 -0
  14. apairo-0.2.0/apairo/core/filtered_view.py +75 -0
  15. apairo-0.2.0/apairo/core/interpolator.py +44 -0
  16. apairo-0.2.0/apairo/core/layout.py +148 -0
  17. apairo-0.2.0/apairo/core/preprocessor.py +113 -0
  18. apairo-0.2.0/apairo/core/profiled_dataset.py +699 -0
  19. apairo-0.2.0/apairo/core/root_sequence.py +193 -0
  20. apairo-0.2.0/apairo/core/sample.py +16 -0
  21. apairo-0.2.0/apairo/core/sequence_view.py +55 -0
  22. apairo-0.2.0/apairo/core/synchronized_view.py +315 -0
  23. apairo-0.2.0/apairo/core/synchronous_dataset.py +47 -0
  24. apairo-0.2.0/apairo/core/transform.py +23 -0
  25. apairo-0.2.0/apairo/core/utils/__init__.py +0 -0
  26. apairo-0.2.0/apairo/core/utils/exceptions.py +20 -0
  27. apairo-0.2.0/apairo/core/utils/typing.py +3 -0
  28. apairo-0.2.0/apairo/dataset/__init__.py +46 -0
  29. apairo-0.2.0/apairo/dataset/concat.py +87 -0
  30. apairo-0.2.0/apairo/dataset/config/tartan_kitti.yaml +94 -0
  31. apairo-0.2.0/apairo/dataset/goose/__init__.py +3 -0
  32. apairo-0.2.0/apairo/dataset/goose/dataset.py +18 -0
  33. apairo-0.2.0/apairo/dataset/kitti/__init__.py +3 -0
  34. apairo-0.2.0/apairo/dataset/kitti/dataset.py +281 -0
  35. apairo-0.2.0/apairo/dataset/mnt/__init__.py +4 -0
  36. apairo-0.2.0/apairo/dataset/mnt/dataset.py +516 -0
  37. apairo-0.2.0/apairo/dataset/mnt/layout.py +109 -0
  38. apairo-0.2.0/apairo/dataset/profiles/goose.yaml +14 -0
  39. apairo-0.2.0/apairo/dataset/profiles/rellis.yaml +25 -0
  40. apairo-0.2.0/apairo/dataset/profiles/semantic_kitti.yaml +16 -0
  41. apairo-0.2.0/apairo/dataset/raw/__init__.py +3 -0
  42. apairo-0.2.0/apairo/dataset/raw/dataset.py +278 -0
  43. apairo-0.2.0/apairo/dataset/rellis/__init__.py +3 -0
  44. apairo-0.2.0/apairo/dataset/rellis/dataset.py +22 -0
  45. apairo-0.2.0/apairo/dataset/semantic_kitti/__init__.py +3 -0
  46. apairo-0.2.0/apairo/dataset/semantic_kitti/dataset.py +17 -0
  47. apairo-0.2.0/apairo/dataset/stream.py +88 -0
  48. apairo-0.2.0/apairo/dataset/tartan_kitti/__init__.py +3 -0
  49. apairo-0.2.0/apairo/dataset/tartan_kitti/dataset.py +286 -0
  50. apairo-0.2.0/apairo/dataset/tartan_kitti/integration.yaml +68 -0
  51. apairo-0.2.0/apairo/dataset/tartan_kitti/profile.yaml +28 -0
  52. apairo-0.2.0/apairo/dataset/zip.py +104 -0
  53. apairo-0.2.0/apairo/loader/__init__.py +93 -0
  54. apairo-0.2.0/apairo/loader/bin_loader.py +22 -0
  55. apairo-0.2.0/apairo/loader/img_loader.py +61 -0
  56. apairo-0.2.0/apairo/loader/npy_loader.py +29 -0
  57. apairo-0.2.0/apairo/loader/npys_loader.py +54 -0
  58. apairo-0.2.0/apairo/loader/tar_loader.py +99 -0
  59. apairo-0.2.0/apairo/loader/txt_loader.py +38 -0
  60. apairo-0.2.0/apairo/loader/zarr_loader.py +48 -0
  61. apairo-0.2.0/apairo/preprocess/__init__.py +8 -0
  62. apairo-0.2.0/apairo/preprocess/runner.py +133 -0
  63. apairo-0.2.0/apairo/utils/__init__.py +15 -0
  64. apairo-0.2.0/apairo/utils/files.py +36 -0
  65. apairo-0.2.0/apairo/utils/naming.py +22 -0
  66. apairo-0.2.0/apairo/utils/paths.py +4 -0
  67. apairo-0.2.0/apairo/utils/resample.py +84 -0
  68. apairo-0.2.0/apairo/utils/timestamps.py +88 -0
  69. apairo-0.2.0/apairo/utils/types.py +10 -0
  70. apairo-0.2.0/apairo/utils/utils.py +55 -0
  71. apairo-0.2.0/apairo/writer/__init__.py +14 -0
  72. apairo-0.2.0/apairo/writer/bin_writer.py +12 -0
  73. apairo-0.2.0/apairo/writer/npy_writer.py +9 -0
  74. apairo-0.2.0/apairo/writer/tar_writer.py +60 -0
  75. apairo-0.2.0/apairo/writer/zarr_writer.py +72 -0
  76. apairo-0.2.0/apairo.egg-info/PKG-INFO +286 -0
  77. apairo-0.2.0/apairo.egg-info/SOURCES.txt +84 -0
  78. apairo-0.2.0/apairo.egg-info/dependency_links.txt +1 -0
  79. apairo-0.2.0/apairo.egg-info/entry_points.txt +2 -0
  80. apairo-0.2.0/apairo.egg-info/requires.txt +22 -0
  81. apairo-0.2.0/apairo.egg-info/top_level.txt +1 -0
  82. apairo-0.2.0/pyproject.toml +58 -0
  83. apairo-0.2.0/setup.cfg +4 -0
  84. apairo-0.2.0/test/test_public_api.py +21 -0
  85. apairo-0.2.0/test/test_sequence_view.py +110 -0
  86. apairo-0.2.0/test/test_utils.py +66 -0
apairo-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Augustin BRESSET
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
apairo-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,286 @@
1
+ Metadata-Version: 2.4
2
+ Name: apairo
3
+ Version: 0.2.0
4
+ Summary: Extensible framework to load robotics datasets
5
+ Author-email: Augustin Bresset <augustin.bresset@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/apairo-robotics/apairo
8
+ Project-URL: Repository, https://github.com/apairo-robotics/apairo
9
+ Project-URL: Issues, https://github.com/apairo-robotics/apairo/issues
10
+ Keywords: lidar,robotics,dataset,point-cloud,traversability
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
17
+ Classifier: Intended Audience :: Science/Research
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: numpy
22
+ Requires-Dist: PyYAML
23
+ Provides-Extra: torch
24
+ Requires-Dist: torch; extra == "torch"
25
+ Provides-Extra: vision
26
+ Requires-Dist: Pillow; extra == "vision"
27
+ Provides-Extra: mnt
28
+ Requires-Dist: zarr>=3.0; extra == "mnt"
29
+ Requires-Dist: Pillow; extra == "mnt"
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest; extra == "dev"
32
+ Requires-Dist: Pillow; extra == "dev"
33
+ Requires-Dist: zarr>=3.0; extra == "dev"
34
+ Provides-Extra: docs
35
+ Requires-Dist: mkdocs>=1.6; extra == "docs"
36
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
37
+ Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
38
+ Dynamic: license-file
39
+
40
+ # apairo
41
+
42
+ Unified Python loader for robotics sensor datasets — one API across synchronous and asynchronous layouts, with built-in preprocessing, filtering, and dataset composition.
43
+
44
+ All data is returned as `numpy.ndarray`. Convert to the framework of your choice.
45
+
46
+ ---
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install apairo
52
+ ```
53
+
54
+ Optional extras:
55
+
56
+ ```bash
57
+ pip install apairo[torch] # PyTorch support (.pt files)
58
+ pip install apairo[vision] # Image loading (Pillow)
59
+ ```
60
+
61
+ Requires Python ≥ 3.11.
62
+
63
+ ---
64
+
65
+ ## Quickstart
66
+
67
+ ```python
68
+ import apairo
69
+
70
+ ds = apairo.SemanticKittiDataset("/data/semantic_kitti", keys=["lidar", "labels"])
71
+ sample = ds[0]
72
+ # sample.data["lidar"] -> np.ndarray (N, 4) float32 [x, y, z, intensity]
73
+ # sample.data["labels"] -> np.ndarray (N,) int64
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Supported datasets
79
+
80
+ | Class | Layout | Modalities |
81
+ |---|---|---|
82
+ | `SemanticKittiDataset` | synchronous | lidar, labels |
83
+ | `Rellis3DDataset` | synchronous | lidar, labels, poses |
84
+ | `Goose3DDataset` | synchronous | lidar, labels |
85
+ | `MNTDataset` | synchronous | lidar, labels, poses |
86
+ | `RawDataset` | asynchronous | any channels — declared in `.apairo/channels.yaml` |
87
+ | `TartanKittiDataset` | asynchronous | any TartanDrive v2 channel |
88
+
89
+ `RawDataset` is the profile-free loader for the asynchronous layout: it takes its
90
+ channels — and their format (`npy`, `npys`, `bin`, `img`, `zarr`) — entirely from
91
+ `.apairo/channels.yaml`, so it loads any such dataset, including the output of
92
+ [apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), with no
93
+ code change.
94
+
95
+ ---
96
+
97
+ ## Command line
98
+
99
+ Installing apairo provides the `apairo` command to inspect and initialize
100
+ datasets from the terminal:
101
+
102
+ ```bash
103
+ # Write/repair the .apairo sidecars by scanning a directory (root-aware, idempotent)
104
+ apairo init /data/my_dataset
105
+
106
+ # Show sequences, channels (tracked + untracked), event count and any issues
107
+ apairo status /data/my_dataset # add --json for machine output
108
+ ```
109
+
110
+ `apairo init` reconstructs the `.apairo` files for data laid out before they
111
+ existed (e.g. an older extraction) — no re-extraction needed — and the result
112
+ loads directly with `RawDataset`. See [Command Line](https://apairo-robotics.github.io/apairo/cli/) for the full reference.
113
+
114
+ ---
115
+
116
+ ## Pipeline
117
+
118
+ apairo provides a composable set of operations that chain together — each returns a full dataset:
119
+
120
+ ```python
121
+ from apairo import Rellis3DDataset, FramePreprocessor
122
+ from torch.utils.data import DataLoader
123
+ import numpy as np
124
+
125
+ # 1. Preprocess — run once, persisted in .apairo, reloaded transparently
126
+ class TravLabel(FramePreprocessor):
127
+ output_key = "trav_gt"; output_loader = "npys"
128
+ input_keys = ["labels"]; timestamps_from = "lidar"; sources = ["labels"]
129
+ def process(self, sample): return (sample.data["labels"] < 10).astype(np.uint8)
130
+
131
+ ds = Rellis3DDataset(root, keys=["lidar", "labels", "ground_height_csf"])
132
+ ds.run_preprocess(TravLabel())
133
+
134
+ # 2. Cache an expensive derived channel — computed once, served from RAM
135
+ ds.transform("ground_height_csf", expensive_smooth)
136
+ ds_prior = ds.select(["ground_height_csf"]).cache()
137
+
138
+ # 3. Build train split — filter, join cached prior, apply augmentation
139
+ valid = np.load("cache/valid_indices.npy")
140
+ ds_train = (
141
+ Rellis3DDataset(root, keys=["lidar", "trav_gt"])
142
+ .filter(valid)
143
+ .join(ds_prior)
144
+ .transform("lidar", RangeFilter(max=50.0))
145
+ )
146
+
147
+ # 4. Drop into DataLoader — no adapter needed
148
+ loader = DataLoader(ds_train, batch_size=8, shuffle=True, collate_fn=my_collate)
149
+ ```
150
+
151
+ See [`examples/`](examples/) for complete runnable pipelines.
152
+
153
+ ---
154
+
155
+ ## Preprocessing
156
+
157
+ Define a `FramePreprocessor` or `SequencePreprocessor`, run it once — apairo persists the output and reloads it transparently on subsequent runs.
158
+
159
+ ```python
160
+ from apairo.preprocess import FramePreprocessor
161
+
162
+ class TravLabel(FramePreprocessor):
163
+ output_key = "trav_label"
164
+ output_loader = "npys"
165
+ input_keys = ["labels"]
166
+ timestamps_from = "labels"
167
+ sources = ["labels"]
168
+
169
+ def process(self, sample) -> np.ndarray:
170
+ return (sample.data["labels"] < 10).astype(np.uint8)
171
+
172
+ ds = apairo.Goose3DDataset("/data/goose", keys=["lidar", "labels"])
173
+ ds.run_preprocess(TravLabel())
174
+ ```
175
+
176
+ See [`apairo_preprocess`](https://github.com/apairo/apairo_preprocess) for a collection of ready-made preprocessors.
177
+
178
+ ---
179
+
180
+ ## Transforms
181
+
182
+ Apply callables at access time — no disk writes.
183
+
184
+ ```python
185
+ # Per-channel
186
+ ds.transform("lidar", RangeFilter(max=50.0))
187
+
188
+ # Sample-level — consistent mask across aligned channels
189
+ def sync_filter(sample):
190
+ mask = np.linalg.norm(sample.data["lidar"][:, :3], axis=1) < 50.0
191
+ sample.data["lidar"] = sample.data["lidar"][mask]
192
+ sample.data["labels"] = sample.data["labels"][mask]
193
+ return sample
194
+
195
+ ds.transform(sync_filter)
196
+ ```
197
+
198
+ See [`apairo_transform`](https://github.com/apairo/apairo_transform) for a collection of ready-made transforms.
199
+
200
+ ---
201
+
202
+ ## Filtering
203
+
204
+ `filter()` returns a dataset view restricted to frames that pass a predicate. Sweep once, persist the indices, reload without I/O cost on subsequent runs:
205
+
206
+ ```python
207
+ # Compute and save
208
+ view = ds.filter("trav_gt", lambda gt: (gt == 1).sum() >= 50)
209
+ np.save("cache/valid.npy", view.indices)
210
+
211
+ # Reload — no sweep
212
+ view = ds.filter(np.load("cache/valid.npy"))
213
+ ```
214
+
215
+ ---
216
+
217
+ ## Select & cache
218
+
219
+ `select(keys)` narrows a dataset to a subset of channels. `cache()` materialises it in RAM. Together they let you cache only the channels worth caching:
220
+
221
+ ```python
222
+ ds.transform("ground_height_csf", expensive_smooth)
223
+
224
+ # Compute once, store in RAM
225
+ ds_prior = ds.select(["ground_height_csf"]).cache()
226
+
227
+ # Reuse across training runs — prior served from RAM, base channels from disk
228
+ ds_v1 = base.join(ds_prior).transform(augment_v1)
229
+ ds_v2 = base.join(ds_prior).transform(augment_v2)
230
+ ```
231
+
232
+ ---
233
+
234
+ ## Asynchronous datasets — `synchronize()`
235
+
236
+ Asynchronous datasets (multi-rate sensor rigs) expose a timestamp-ordered event timeline: `ds[i]` is one event from one sensor. To get complete multi-channel frames, resample onto a reference clock:
237
+
238
+ ```python
239
+ ds = apairo.TartanKittiDataset(seq_dir, keys=["velodyne_0", "image_left", "cmd"])
240
+
241
+ ds_sync = ds.synchronize(
242
+ reference="velodyne_0", # default: lowest-frequency channel
243
+ method="latest", # "latest" (zero-order hold) or "nearest"
244
+ tolerance=0.05, # drop frames with no match within ±50 ms
245
+ )
246
+
247
+ ds_sync[0].data # {"velodyne_0": ..., "image_left": ..., "cmd": ...}
248
+ ```
249
+
250
+ The result is a synchronous view — random access, shuffling, and the whole chaining API (`filter`, `select`, `cache`, `join`, `DataLoader`) work unchanged. Matching is a pure index computation; no data is read until access.
251
+
252
+ ---
253
+
254
+ ## Combining datasets
255
+
256
+ ```python
257
+ # ConcatDataset — frame axis (different recording sessions)
258
+ combined = apairo.ConcatDataset([ds_session1, ds_session2])
259
+
260
+ # ZipDataset — channel axis (same frames, different modalities)
261
+ combined = apairo.ZipDataset(ds_base, ds_prior)
262
+ # or: ds_base.join(ds_prior)
263
+
264
+ # Built-in splits
265
+ ds_train = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("train")
266
+ ds_val = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("val")
267
+ ```
268
+
269
+ ---
270
+
271
+ ## Extending apairo
272
+
273
+ Add a new synchronous dataset with a YAML profile and a minimal subclass.
274
+ See [documentation](https://apairo-robotics.github.io/apairo/) for the full guide.
275
+
276
+ ---
277
+
278
+ ## Contributing
279
+
280
+ apairo is one repository of a small ecosystem ([apairo_transform](https://github.com/apairo-robotics/apairo_transform), [apairo_preprocess](https://github.com/apairo-robotics/apairo_preprocess), [apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), [apairo_rr](https://github.com/apairo-robotics/apairo_rr)). Where a change belongs, the design invariants, and the dev workflow are documented in [CONTRIBUTING.md](CONTRIBUTING.md).
281
+
282
+ ---
283
+
284
+ ## License
285
+
286
+ MIT
apairo-0.2.0/README.md ADDED
@@ -0,0 +1,247 @@
1
+ # apairo
2
+
3
+ Unified Python loader for robotics sensor datasets — one API across synchronous and asynchronous layouts, with built-in preprocessing, filtering, and dataset composition.
4
+
5
+ All data is returned as `numpy.ndarray`. Convert to the framework of your choice.
6
+
7
+ ---
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install apairo
13
+ ```
14
+
15
+ Optional extras:
16
+
17
+ ```bash
18
+ pip install apairo[torch] # PyTorch support (.pt files)
19
+ pip install apairo[vision] # Image loading (Pillow)
20
+ ```
21
+
22
+ Requires Python ≥ 3.11.
23
+
24
+ ---
25
+
26
+ ## Quickstart
27
+
28
+ ```python
29
+ import apairo
30
+
31
+ ds = apairo.SemanticKittiDataset("/data/semantic_kitti", keys=["lidar", "labels"])
32
+ sample = ds[0]
33
+ # sample.data["lidar"] -> np.ndarray (N, 4) float32 [x, y, z, intensity]
34
+ # sample.data["labels"] -> np.ndarray (N,) int64
35
+ ```
36
+
37
+ ---
38
+
39
+ ## Supported datasets
40
+
41
+ | Class | Layout | Modalities |
42
+ |---|---|---|
43
+ | `SemanticKittiDataset` | synchronous | lidar, labels |
44
+ | `Rellis3DDataset` | synchronous | lidar, labels, poses |
45
+ | `Goose3DDataset` | synchronous | lidar, labels |
46
+ | `MNTDataset` | synchronous | lidar, labels, poses |
47
+ | `RawDataset` | asynchronous | any channels — declared in `.apairo/channels.yaml` |
48
+ | `TartanKittiDataset` | asynchronous | any TartanDrive v2 channel |
49
+
50
+ `RawDataset` is the profile-free loader for the asynchronous layout: it takes its
51
+ channels — and their format (`npy`, `npys`, `bin`, `img`, `zarr`) — entirely from
52
+ `.apairo/channels.yaml`, so it loads any such dataset, including the output of
53
+ [apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), with no
54
+ code change.
55
+
56
+ ---
57
+
58
+ ## Command line
59
+
60
+ Installing apairo provides the `apairo` command to inspect and initialize
61
+ datasets from the terminal:
62
+
63
+ ```bash
64
+ # Write/repair the .apairo sidecars by scanning a directory (root-aware, idempotent)
65
+ apairo init /data/my_dataset
66
+
67
+ # Show sequences, channels (tracked + untracked), event count and any issues
68
+ apairo status /data/my_dataset # add --json for machine output
69
+ ```
70
+
71
+ `apairo init` reconstructs the `.apairo` files for data laid out before they
72
+ existed (e.g. an older extraction) — no re-extraction needed — and the result
73
+ loads directly with `RawDataset`. See [Command Line](https://apairo-robotics.github.io/apairo/cli/) for the full reference.
74
+
75
+ ---
76
+
77
+ ## Pipeline
78
+
79
+ apairo provides a composable set of operations that chain together — each returns a full dataset:
80
+
81
+ ```python
82
+ from apairo import Rellis3DDataset, FramePreprocessor
83
+ from torch.utils.data import DataLoader
84
+ import numpy as np
85
+
86
+ # 1. Preprocess — run once, persisted in .apairo, reloaded transparently
87
+ class TravLabel(FramePreprocessor):
88
+ output_key = "trav_gt"; output_loader = "npys"
89
+ input_keys = ["labels"]; timestamps_from = "lidar"; sources = ["labels"]
90
+ def process(self, sample): return (sample.data["labels"] < 10).astype(np.uint8)
91
+
92
+ ds = Rellis3DDataset(root, keys=["lidar", "labels", "ground_height_csf"])
93
+ ds.run_preprocess(TravLabel())
94
+
95
+ # 2. Cache an expensive derived channel — computed once, served from RAM
96
+ ds.transform("ground_height_csf", expensive_smooth)
97
+ ds_prior = ds.select(["ground_height_csf"]).cache()
98
+
99
+ # 3. Build train split — filter, join cached prior, apply augmentation
100
+ valid = np.load("cache/valid_indices.npy")
101
+ ds_train = (
102
+ Rellis3DDataset(root, keys=["lidar", "trav_gt"])
103
+ .filter(valid)
104
+ .join(ds_prior)
105
+ .transform("lidar", RangeFilter(max=50.0))
106
+ )
107
+
108
+ # 4. Drop into DataLoader — no adapter needed
109
+ loader = DataLoader(ds_train, batch_size=8, shuffle=True, collate_fn=my_collate)
110
+ ```
111
+
112
+ See [`examples/`](examples/) for complete runnable pipelines.
113
+
114
+ ---
115
+
116
+ ## Preprocessing
117
+
118
+ Define a `FramePreprocessor` or `SequencePreprocessor`, run it once — apairo persists the output and reloads it transparently on subsequent runs.
119
+
120
+ ```python
121
+ from apairo.preprocess import FramePreprocessor
122
+
123
+ class TravLabel(FramePreprocessor):
124
+ output_key = "trav_label"
125
+ output_loader = "npys"
126
+ input_keys = ["labels"]
127
+ timestamps_from = "labels"
128
+ sources = ["labels"]
129
+
130
+ def process(self, sample) -> np.ndarray:
131
+ return (sample.data["labels"] < 10).astype(np.uint8)
132
+
133
+ ds = apairo.Goose3DDataset("/data/goose", keys=["lidar", "labels"])
134
+ ds.run_preprocess(TravLabel())
135
+ ```
136
+
137
+ See [`apairo_preprocess`](https://github.com/apairo/apairo_preprocess) for a collection of ready-made preprocessors.
138
+
139
+ ---
140
+
141
+ ## Transforms
142
+
143
+ Apply callables at access time — no disk writes.
144
+
145
+ ```python
146
+ # Per-channel
147
+ ds.transform("lidar", RangeFilter(max=50.0))
148
+
149
+ # Sample-level — consistent mask across aligned channels
150
+ def sync_filter(sample):
151
+ mask = np.linalg.norm(sample.data["lidar"][:, :3], axis=1) < 50.0
152
+ sample.data["lidar"] = sample.data["lidar"][mask]
153
+ sample.data["labels"] = sample.data["labels"][mask]
154
+ return sample
155
+
156
+ ds.transform(sync_filter)
157
+ ```
158
+
159
+ See [`apairo_transform`](https://github.com/apairo/apairo_transform) for a collection of ready-made transforms.
160
+
161
+ ---
162
+
163
+ ## Filtering
164
+
165
+ `filter()` returns a dataset view restricted to frames that pass a predicate. Sweep once, persist the indices, reload without I/O cost on subsequent runs:
166
+
167
+ ```python
168
+ # Compute and save
169
+ view = ds.filter("trav_gt", lambda gt: (gt == 1).sum() >= 50)
170
+ np.save("cache/valid.npy", view.indices)
171
+
172
+ # Reload — no sweep
173
+ view = ds.filter(np.load("cache/valid.npy"))
174
+ ```
175
+
176
+ ---
177
+
178
+ ## Select & cache
179
+
180
+ `select(keys)` narrows a dataset to a subset of channels. `cache()` materialises it in RAM. Together they let you cache only the channels worth caching:
181
+
182
+ ```python
183
+ ds.transform("ground_height_csf", expensive_smooth)
184
+
185
+ # Compute once, store in RAM
186
+ ds_prior = ds.select(["ground_height_csf"]).cache()
187
+
188
+ # Reuse across training runs — prior served from RAM, base channels from disk
189
+ ds_v1 = base.join(ds_prior).transform(augment_v1)
190
+ ds_v2 = base.join(ds_prior).transform(augment_v2)
191
+ ```
192
+
193
+ ---
194
+
195
+ ## Asynchronous datasets — `synchronize()`
196
+
197
+ Asynchronous datasets (multi-rate sensor rigs) expose a timestamp-ordered event timeline: `ds[i]` is one event from one sensor. To get complete multi-channel frames, resample onto a reference clock:
198
+
199
+ ```python
200
+ ds = apairo.TartanKittiDataset(seq_dir, keys=["velodyne_0", "image_left", "cmd"])
201
+
202
+ ds_sync = ds.synchronize(
203
+ reference="velodyne_0", # default: lowest-frequency channel
204
+ method="latest", # "latest" (zero-order hold) or "nearest"
205
+ tolerance=0.05, # drop frames with no match within ±50 ms
206
+ )
207
+
208
+ ds_sync[0].data # {"velodyne_0": ..., "image_left": ..., "cmd": ...}
209
+ ```
210
+
211
+ The result is a synchronous view — random access, shuffling, and the whole chaining API (`filter`, `select`, `cache`, `join`, `DataLoader`) work unchanged. Matching is a pure index computation; no data is read until access.
212
+
213
+ ---
214
+
215
+ ## Combining datasets
216
+
217
+ ```python
218
+ # ConcatDataset — frame axis (different recording sessions)
219
+ combined = apairo.ConcatDataset([ds_session1, ds_session2])
220
+
221
+ # ZipDataset — channel axis (same frames, different modalities)
222
+ combined = apairo.ZipDataset(ds_base, ds_prior)
223
+ # or: ds_base.join(ds_prior)
224
+
225
+ # Built-in splits
226
+ ds_train = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("train")
227
+ ds_val = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("val")
228
+ ```
229
+
230
+ ---
231
+
232
+ ## Extending apairo
233
+
234
+ Add a new synchronous dataset with a YAML profile and a minimal subclass.
235
+ See [documentation](https://apairo-robotics.github.io/apairo/) for the full guide.
236
+
237
+ ---
238
+
239
+ ## Contributing
240
+
241
+ apairo is one repository of a small ecosystem ([apairo_transform](https://github.com/apairo-robotics/apairo_transform), [apairo_preprocess](https://github.com/apairo-robotics/apairo_preprocess), [apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), [apairo_rr](https://github.com/apairo-robotics/apairo_rr)). Where a change belongs, the design invariants, and the dev workflow are documented in [CONTRIBUTING.md](CONTRIBUTING.md).
242
+
243
+ ---
244
+
245
+ ## License
246
+
247
+ MIT
@@ -0,0 +1,68 @@
1
+ """Apairo -- unified robotics dataset loader."""
2
+
3
+ import logging
4
+
5
+ from apairo.core.sample import Sample
6
+ from apairo.core.synchronous_dataset import SynchronousDataset
7
+ from apairo.core.configurable_dataset import ConfigurableDataset
8
+ from apairo.preprocess import FramePreprocessor, SequencePreprocessor
9
+
10
+ from apairo.dataset.raw import RawDataset
11
+ from apairo.dataset.tartan_kitti import TartanKittiDataset
12
+ from apairo.dataset.concat import ConcatDataset
13
+ from apairo.dataset.zip import ZipDataset
14
+ from apairo.dataset.stream import StreamDataset
15
+ from apairo.dataset import split_sequences
16
+ from apairo.core.sequence_view import SequenceView
17
+ from apairo.core.filtered_view import FilteredView
18
+ from apairo.core.channel_view import ChannelView
19
+ from apairo.core.cached_dataset import CachedDataset
20
+ from apairo.core.synchronized_view import SynchronizedView
21
+ from apairo.core.interpolator import Interpolator
22
+ from apairo.dataset.semantic_kitti import SemanticKittiDataset
23
+ from apairo.dataset.rellis import Rellis3DDataset
24
+ from apairo.dataset.goose import Goose3DDataset
25
+ from apairo.dataset.mnt import MNTDataset
26
+
27
+ from apairo.core.layout import ChannelSpec, DatasetLayout
28
+ from apairo.core.transform import Compose
29
+ from apairo.core.config import register_channel, register_raw_channel, verify_config
30
+ from apairo.writer import WRITERS
31
+ from apairo.loader import DERIVED_LOADERS
32
+
33
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
34
+
35
+ __version__ = "0.2.0"
36
+
37
+ __all__ = [
38
+ "Sample",
39
+ "SynchronousDataset",
40
+ "ConfigurableDataset",
41
+ "FramePreprocessor",
42
+ "SequencePreprocessor",
43
+ "RawDataset",
44
+ "TartanKittiDataset",
45
+ "ConcatDataset",
46
+ "ZipDataset",
47
+ "StreamDataset",
48
+ "split_sequences",
49
+ "SequenceView",
50
+ "FilteredView",
51
+ "ChannelView",
52
+ "CachedDataset",
53
+ "SynchronizedView",
54
+ "Interpolator",
55
+ "SemanticKittiDataset",
56
+ "Rellis3DDataset",
57
+ "Goose3DDataset",
58
+ "MNTDataset",
59
+ "ChannelSpec",
60
+ "DatasetLayout",
61
+ "Compose",
62
+ "register_channel",
63
+ "register_raw_channel",
64
+ "verify_config",
65
+ "WRITERS",
66
+ "DERIVED_LOADERS",
67
+ "__version__",
68
+ ]