apairo 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apairo-0.2.0/LICENSE +21 -0
- apairo-0.2.0/PKG-INFO +286 -0
- apairo-0.2.0/README.md +247 -0
- apairo-0.2.0/apairo/__init__.py +68 -0
- apairo-0.2.0/apairo/cli.py +350 -0
- apairo-0.2.0/apairo/core/__init__.py +30 -0
- apairo-0.2.0/apairo/core/abstract_dataset.py +398 -0
- apairo-0.2.0/apairo/core/abstract_loader.py +30 -0
- apairo-0.2.0/apairo/core/cached_dataset.py +71 -0
- apairo-0.2.0/apairo/core/channel_view.py +60 -0
- apairo-0.2.0/apairo/core/config/keys.yaml +80 -0
- apairo-0.2.0/apairo/core/config.py +257 -0
- apairo-0.2.0/apairo/core/configurable_dataset.py +246 -0
- apairo-0.2.0/apairo/core/filtered_view.py +75 -0
- apairo-0.2.0/apairo/core/interpolator.py +44 -0
- apairo-0.2.0/apairo/core/layout.py +148 -0
- apairo-0.2.0/apairo/core/preprocessor.py +113 -0
- apairo-0.2.0/apairo/core/profiled_dataset.py +699 -0
- apairo-0.2.0/apairo/core/root_sequence.py +193 -0
- apairo-0.2.0/apairo/core/sample.py +16 -0
- apairo-0.2.0/apairo/core/sequence_view.py +55 -0
- apairo-0.2.0/apairo/core/synchronized_view.py +315 -0
- apairo-0.2.0/apairo/core/synchronous_dataset.py +47 -0
- apairo-0.2.0/apairo/core/transform.py +23 -0
- apairo-0.2.0/apairo/core/utils/__init__.py +0 -0
- apairo-0.2.0/apairo/core/utils/exceptions.py +20 -0
- apairo-0.2.0/apairo/core/utils/typing.py +3 -0
- apairo-0.2.0/apairo/dataset/__init__.py +46 -0
- apairo-0.2.0/apairo/dataset/concat.py +87 -0
- apairo-0.2.0/apairo/dataset/config/tartan_kitti.yaml +94 -0
- apairo-0.2.0/apairo/dataset/goose/__init__.py +3 -0
- apairo-0.2.0/apairo/dataset/goose/dataset.py +18 -0
- apairo-0.2.0/apairo/dataset/kitti/__init__.py +3 -0
- apairo-0.2.0/apairo/dataset/kitti/dataset.py +281 -0
- apairo-0.2.0/apairo/dataset/mnt/__init__.py +4 -0
- apairo-0.2.0/apairo/dataset/mnt/dataset.py +516 -0
- apairo-0.2.0/apairo/dataset/mnt/layout.py +109 -0
- apairo-0.2.0/apairo/dataset/profiles/goose.yaml +14 -0
- apairo-0.2.0/apairo/dataset/profiles/rellis.yaml +25 -0
- apairo-0.2.0/apairo/dataset/profiles/semantic_kitti.yaml +16 -0
- apairo-0.2.0/apairo/dataset/raw/__init__.py +3 -0
- apairo-0.2.0/apairo/dataset/raw/dataset.py +278 -0
- apairo-0.2.0/apairo/dataset/rellis/__init__.py +3 -0
- apairo-0.2.0/apairo/dataset/rellis/dataset.py +22 -0
- apairo-0.2.0/apairo/dataset/semantic_kitti/__init__.py +3 -0
- apairo-0.2.0/apairo/dataset/semantic_kitti/dataset.py +17 -0
- apairo-0.2.0/apairo/dataset/stream.py +88 -0
- apairo-0.2.0/apairo/dataset/tartan_kitti/__init__.py +3 -0
- apairo-0.2.0/apairo/dataset/tartan_kitti/dataset.py +286 -0
- apairo-0.2.0/apairo/dataset/tartan_kitti/integration.yaml +68 -0
- apairo-0.2.0/apairo/dataset/tartan_kitti/profile.yaml +28 -0
- apairo-0.2.0/apairo/dataset/zip.py +104 -0
- apairo-0.2.0/apairo/loader/__init__.py +93 -0
- apairo-0.2.0/apairo/loader/bin_loader.py +22 -0
- apairo-0.2.0/apairo/loader/img_loader.py +61 -0
- apairo-0.2.0/apairo/loader/npy_loader.py +29 -0
- apairo-0.2.0/apairo/loader/npys_loader.py +54 -0
- apairo-0.2.0/apairo/loader/tar_loader.py +99 -0
- apairo-0.2.0/apairo/loader/txt_loader.py +38 -0
- apairo-0.2.0/apairo/loader/zarr_loader.py +48 -0
- apairo-0.2.0/apairo/preprocess/__init__.py +8 -0
- apairo-0.2.0/apairo/preprocess/runner.py +133 -0
- apairo-0.2.0/apairo/utils/__init__.py +15 -0
- apairo-0.2.0/apairo/utils/files.py +36 -0
- apairo-0.2.0/apairo/utils/naming.py +22 -0
- apairo-0.2.0/apairo/utils/paths.py +4 -0
- apairo-0.2.0/apairo/utils/resample.py +84 -0
- apairo-0.2.0/apairo/utils/timestamps.py +88 -0
- apairo-0.2.0/apairo/utils/types.py +10 -0
- apairo-0.2.0/apairo/utils/utils.py +55 -0
- apairo-0.2.0/apairo/writer/__init__.py +14 -0
- apairo-0.2.0/apairo/writer/bin_writer.py +12 -0
- apairo-0.2.0/apairo/writer/npy_writer.py +9 -0
- apairo-0.2.0/apairo/writer/tar_writer.py +60 -0
- apairo-0.2.0/apairo/writer/zarr_writer.py +72 -0
- apairo-0.2.0/apairo.egg-info/PKG-INFO +286 -0
- apairo-0.2.0/apairo.egg-info/SOURCES.txt +84 -0
- apairo-0.2.0/apairo.egg-info/dependency_links.txt +1 -0
- apairo-0.2.0/apairo.egg-info/entry_points.txt +2 -0
- apairo-0.2.0/apairo.egg-info/requires.txt +22 -0
- apairo-0.2.0/apairo.egg-info/top_level.txt +1 -0
- apairo-0.2.0/pyproject.toml +58 -0
- apairo-0.2.0/setup.cfg +4 -0
- apairo-0.2.0/test/test_public_api.py +21 -0
- apairo-0.2.0/test/test_sequence_view.py +110 -0
- apairo-0.2.0/test/test_utils.py +66 -0
apairo-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Augustin BRESSET
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
apairo-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: apairo
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Extensible framework to load robotics datasets
|
|
5
|
+
Author-email: Augustin Bresset <augustin.bresset@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/apairo-robotics/apairo
|
|
8
|
+
Project-URL: Repository, https://github.com/apairo-robotics/apairo
|
|
9
|
+
Project-URL: Issues, https://github.com/apairo-robotics/apairo/issues
|
|
10
|
+
Keywords: lidar,robotics,dataset,point-cloud,traversability
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: numpy
|
|
22
|
+
Requires-Dist: PyYAML
|
|
23
|
+
Provides-Extra: torch
|
|
24
|
+
Requires-Dist: torch; extra == "torch"
|
|
25
|
+
Provides-Extra: vision
|
|
26
|
+
Requires-Dist: Pillow; extra == "vision"
|
|
27
|
+
Provides-Extra: mnt
|
|
28
|
+
Requires-Dist: zarr>=3.0; extra == "mnt"
|
|
29
|
+
Requires-Dist: Pillow; extra == "mnt"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest; extra == "dev"
|
|
32
|
+
Requires-Dist: Pillow; extra == "dev"
|
|
33
|
+
Requires-Dist: zarr>=3.0; extra == "dev"
|
|
34
|
+
Provides-Extra: docs
|
|
35
|
+
Requires-Dist: mkdocs>=1.6; extra == "docs"
|
|
36
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
37
|
+
Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# apairo
|
|
41
|
+
|
|
42
|
+
Unified Python loader for robotics sensor datasets — one API across synchronous and asynchronous layouts, with built-in preprocessing, filtering, and dataset composition.
|
|
43
|
+
|
|
44
|
+
All data is returned as `numpy.ndarray`. Convert to the framework of your choice.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install apairo
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Optional extras:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install apairo[torch] # PyTorch support (.pt files)
|
|
58
|
+
pip install apairo[vision] # Image loading (Pillow)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Requires Python ≥ 3.11.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Quickstart
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import apairo
|
|
69
|
+
|
|
70
|
+
ds = apairo.SemanticKittiDataset("/data/semantic_kitti", keys=["lidar", "labels"])
|
|
71
|
+
sample = ds[0]
|
|
72
|
+
# sample.data["lidar"] -> np.ndarray (N, 4) float32 [x, y, z, intensity]
|
|
73
|
+
# sample.data["labels"] -> np.ndarray (N,) int64
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Supported datasets
|
|
79
|
+
|
|
80
|
+
| Class | Layout | Modalities |
|
|
81
|
+
|---|---|---|
|
|
82
|
+
| `SemanticKittiDataset` | synchronous | lidar, labels |
|
|
83
|
+
| `Rellis3DDataset` | synchronous | lidar, labels, poses |
|
|
84
|
+
| `Goose3DDataset` | synchronous | lidar, labels |
|
|
85
|
+
| `MNTDataset` | synchronous | lidar, labels, poses |
|
|
86
|
+
| `RawDataset` | asynchronous | any channels — declared in `.apairo/channels.yaml` |
|
|
87
|
+
| `TartanKittiDataset` | asynchronous | any TartanDrive v2 channel |
|
|
88
|
+
|
|
89
|
+
`RawDataset` is the profile-free loader for the asynchronous layout: it takes its
|
|
90
|
+
channels — and their format (`npy`, `npys`, `bin`, `img`, `zarr`) — entirely from
|
|
91
|
+
`.apairo/channels.yaml`, so it loads any such dataset, including the output of
|
|
92
|
+
[apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), with no
|
|
93
|
+
code change.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Command line
|
|
98
|
+
|
|
99
|
+
Installing apairo provides the `apairo` command to inspect and initialize
|
|
100
|
+
datasets from the terminal:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Write/repair the .apairo sidecars by scanning a directory (root-aware, idempotent)
|
|
104
|
+
apairo init /data/my_dataset
|
|
105
|
+
|
|
106
|
+
# Show sequences, channels (tracked + untracked), event count and any issues
|
|
107
|
+
apairo status /data/my_dataset # add --json for machine output
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
`apairo init` reconstructs the `.apairo` files for data laid out before they
|
|
111
|
+
existed (e.g. an older extraction) — no re-extraction needed — and the result
|
|
112
|
+
loads directly with `RawDataset`. See [Command Line](https://apairo-robotics.github.io/apairo/cli/) for the full reference.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Pipeline
|
|
117
|
+
|
|
118
|
+
apairo provides a composable set of operations that chain together — each returns a full dataset:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from apairo import Rellis3DDataset, FramePreprocessor
|
|
122
|
+
from torch.utils.data import DataLoader
|
|
123
|
+
import numpy as np
|
|
124
|
+
|
|
125
|
+
# 1. Preprocess — run once, persisted in .apairo, reloaded transparently
|
|
126
|
+
class TravLabel(FramePreprocessor):
|
|
127
|
+
output_key = "trav_gt"; output_loader = "npys"
|
|
128
|
+
input_keys = ["labels"]; timestamps_from = "lidar"; sources = ["labels"]
|
|
129
|
+
def process(self, sample): return (sample.data["labels"] < 10).astype(np.uint8)
|
|
130
|
+
|
|
131
|
+
ds = Rellis3DDataset(root, keys=["lidar", "labels", "ground_height_csf"])
|
|
132
|
+
ds.run_preprocess(TravLabel())
|
|
133
|
+
|
|
134
|
+
# 2. Cache an expensive derived channel — computed once, served from RAM
|
|
135
|
+
ds.transform("ground_height_csf", expensive_smooth)
|
|
136
|
+
ds_prior = ds.select(["ground_height_csf"]).cache()
|
|
137
|
+
|
|
138
|
+
# 3. Build train split — filter, join cached prior, apply augmentation
|
|
139
|
+
valid = np.load("cache/valid_indices.npy")
|
|
140
|
+
ds_train = (
|
|
141
|
+
Rellis3DDataset(root, keys=["lidar", "trav_gt"])
|
|
142
|
+
.filter(valid)
|
|
143
|
+
.join(ds_prior)
|
|
144
|
+
.transform("lidar", RangeFilter(max=50.0))
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# 4. Drop into DataLoader — no adapter needed
|
|
148
|
+
loader = DataLoader(ds_train, batch_size=8, shuffle=True, collate_fn=my_collate)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
See [`examples/`](examples/) for complete runnable pipelines.
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Preprocessing
|
|
156
|
+
|
|
157
|
+
Define a `FramePreprocessor` or `SequencePreprocessor`, run it once — apairo persists the output and reloads it transparently on subsequent runs.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from apairo.preprocess import FramePreprocessor
|
|
161
|
+
|
|
162
|
+
class TravLabel(FramePreprocessor):
|
|
163
|
+
output_key = "trav_label"
|
|
164
|
+
output_loader = "npys"
|
|
165
|
+
input_keys = ["labels"]
|
|
166
|
+
timestamps_from = "labels"
|
|
167
|
+
sources = ["labels"]
|
|
168
|
+
|
|
169
|
+
def process(self, sample) -> np.ndarray:
|
|
170
|
+
return (sample.data["labels"] < 10).astype(np.uint8)
|
|
171
|
+
|
|
172
|
+
ds = apairo.Goose3DDataset("/data/goose", keys=["lidar", "labels"])
|
|
173
|
+
ds.run_preprocess(TravLabel())
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
See [`apairo_preprocess`](https://github.com/apairo/apairo_preprocess) for a collection of ready-made preprocessors.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Transforms
|
|
181
|
+
|
|
182
|
+
Apply callables at access time — no disk writes.
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
# Per-channel
|
|
186
|
+
ds.transform("lidar", RangeFilter(max=50.0))
|
|
187
|
+
|
|
188
|
+
# Sample-level — consistent mask across aligned channels
|
|
189
|
+
def sync_filter(sample):
|
|
190
|
+
mask = np.linalg.norm(sample.data["lidar"][:, :3], axis=1) < 50.0
|
|
191
|
+
sample.data["lidar"] = sample.data["lidar"][mask]
|
|
192
|
+
sample.data["labels"] = sample.data["labels"][mask]
|
|
193
|
+
return sample
|
|
194
|
+
|
|
195
|
+
ds.transform(sync_filter)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
See [`apairo_transform`](https://github.com/apairo/apairo_transform) for a collection of ready-made transforms.
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## Filtering
|
|
203
|
+
|
|
204
|
+
`filter()` returns a dataset view restricted to frames that pass a predicate. Sweep once, persist the indices, reload without I/O cost on subsequent runs:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
# Compute and save
|
|
208
|
+
view = ds.filter("trav_gt", lambda gt: (gt == 1).sum() >= 50)
|
|
209
|
+
np.save("cache/valid.npy", view.indices)
|
|
210
|
+
|
|
211
|
+
# Reload — no sweep
|
|
212
|
+
view = ds.filter(np.load("cache/valid.npy"))
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## Select & cache
|
|
218
|
+
|
|
219
|
+
`select(keys)` narrows a dataset to a subset of channels. `cache()` materialises it in RAM. Together they let you cache only the channels worth caching:
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
ds.transform("ground_height_csf", expensive_smooth)
|
|
223
|
+
|
|
224
|
+
# Compute once, store in RAM
|
|
225
|
+
ds_prior = ds.select(["ground_height_csf"]).cache()
|
|
226
|
+
|
|
227
|
+
# Reuse across training runs — prior served from RAM, base channels from disk
|
|
228
|
+
ds_v1 = base.join(ds_prior).transform(augment_v1)
|
|
229
|
+
ds_v2 = base.join(ds_prior).transform(augment_v2)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
---
|
|
233
|
+
|
|
234
|
+
## Asynchronous datasets — `synchronize()`
|
|
235
|
+
|
|
236
|
+
Asynchronous datasets (multi-rate sensor rigs) expose a timestamp-ordered event timeline: `ds[i]` is one event from one sensor. To get complete multi-channel frames, resample onto a reference clock:
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
ds = apairo.TartanKittiDataset(seq_dir, keys=["velodyne_0", "image_left", "cmd"])
|
|
240
|
+
|
|
241
|
+
ds_sync = ds.synchronize(
|
|
242
|
+
reference="velodyne_0", # default: lowest-frequency channel
|
|
243
|
+
method="latest", # "latest" (zero-order hold) or "nearest"
|
|
244
|
+
tolerance=0.05, # drop frames with no match within ±50 ms
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
ds_sync[0].data # {"velodyne_0": ..., "image_left": ..., "cmd": ...}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
The result is a synchronous view — random access, shuffling, and the whole chaining API (`filter`, `select`, `cache`, `join`, `DataLoader`) work unchanged. Matching is a pure index computation; no data is read until access.
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Combining datasets
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
# ConcatDataset — frame axis (different recording sessions)
|
|
258
|
+
combined = apairo.ConcatDataset([ds_session1, ds_session2])
|
|
259
|
+
|
|
260
|
+
# ZipDataset — channel axis (same frames, different modalities)
|
|
261
|
+
combined = apairo.ZipDataset(ds_base, ds_prior)
|
|
262
|
+
# or: ds_base.join(ds_prior)
|
|
263
|
+
|
|
264
|
+
# Built-in splits
|
|
265
|
+
ds_train = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("train")
|
|
266
|
+
ds_val = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("val")
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Extending apairo
|
|
272
|
+
|
|
273
|
+
Add a new synchronous dataset with a YAML profile and a minimal subclass.
|
|
274
|
+
See [documentation](https://apairo-robotics.github.io/apairo/) for the full guide.
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Contributing
|
|
279
|
+
|
|
280
|
+
apairo is one repository of a small ecosystem ([apairo_transform](https://github.com/apairo-robotics/apairo_transform), [apairo_preprocess](https://github.com/apairo-robotics/apairo_preprocess), [apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), [apairo_rr](https://github.com/apairo-robotics/apairo_rr)). Where a change belongs, the design invariants, and the dev workflow are documented in [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## License
|
|
285
|
+
|
|
286
|
+
MIT
|
apairo-0.2.0/README.md
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# apairo
|
|
2
|
+
|
|
3
|
+
Unified Python loader for robotics sensor datasets — one API across synchronous and asynchronous layouts, with built-in preprocessing, filtering, and dataset composition.
|
|
4
|
+
|
|
5
|
+
All data is returned as `numpy.ndarray`. Convert to the framework of your choice.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install apairo
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Optional extras:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install apairo[torch] # PyTorch support (.pt files)
|
|
19
|
+
pip install apairo[vision] # Image loading (Pillow)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Requires Python ≥ 3.11.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quickstart
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
import apairo
|
|
30
|
+
|
|
31
|
+
ds = apairo.SemanticKittiDataset("/data/semantic_kitti", keys=["lidar", "labels"])
|
|
32
|
+
sample = ds[0]
|
|
33
|
+
# sample.data["lidar"] -> np.ndarray (N, 4) float32 [x, y, z, intensity]
|
|
34
|
+
# sample.data["labels"] -> np.ndarray (N,) int64
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Supported datasets
|
|
40
|
+
|
|
41
|
+
| Class | Layout | Modalities |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| `SemanticKittiDataset` | synchronous | lidar, labels |
|
|
44
|
+
| `Rellis3DDataset` | synchronous | lidar, labels, poses |
|
|
45
|
+
| `Goose3DDataset` | synchronous | lidar, labels |
|
|
46
|
+
| `MNTDataset` | synchronous | lidar, labels, poses |
|
|
47
|
+
| `RawDataset` | asynchronous | any channels — declared in `.apairo/channels.yaml` |
|
|
48
|
+
| `TartanKittiDataset` | asynchronous | any TartanDrive v2 channel |
|
|
49
|
+
|
|
50
|
+
`RawDataset` is the profile-free loader for the asynchronous layout: it takes its
|
|
51
|
+
channels — and their format (`npy`, `npys`, `bin`, `img`, `zarr`) — entirely from
|
|
52
|
+
`.apairo/channels.yaml`, so it loads any such dataset, including the output of
|
|
53
|
+
[apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), with no
|
|
54
|
+
code change.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Command line
|
|
59
|
+
|
|
60
|
+
Installing apairo provides the `apairo` command to inspect and initialize
|
|
61
|
+
datasets from the terminal:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Write/repair the .apairo sidecars by scanning a directory (root-aware, idempotent)
|
|
65
|
+
apairo init /data/my_dataset
|
|
66
|
+
|
|
67
|
+
# Show sequences, channels (tracked + untracked), event count and any issues
|
|
68
|
+
apairo status /data/my_dataset # add --json for machine output
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
`apairo init` reconstructs the `.apairo` files for data laid out before they
|
|
72
|
+
existed (e.g. an older extraction) — no re-extraction needed — and the result
|
|
73
|
+
loads directly with `RawDataset`. See [Command Line](https://apairo-robotics.github.io/apairo/cli/) for the full reference.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Pipeline
|
|
78
|
+
|
|
79
|
+
apairo provides a composable set of operations that chain together — each returns a full dataset:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from apairo import Rellis3DDataset, FramePreprocessor
|
|
83
|
+
from torch.utils.data import DataLoader
|
|
84
|
+
import numpy as np
|
|
85
|
+
|
|
86
|
+
# 1. Preprocess — run once, persisted in .apairo, reloaded transparently
|
|
87
|
+
class TravLabel(FramePreprocessor):
|
|
88
|
+
output_key = "trav_gt"; output_loader = "npys"
|
|
89
|
+
input_keys = ["labels"]; timestamps_from = "lidar"; sources = ["labels"]
|
|
90
|
+
def process(self, sample): return (sample.data["labels"] < 10).astype(np.uint8)
|
|
91
|
+
|
|
92
|
+
ds = Rellis3DDataset(root, keys=["lidar", "labels", "ground_height_csf"])
|
|
93
|
+
ds.run_preprocess(TravLabel())
|
|
94
|
+
|
|
95
|
+
# 2. Cache an expensive derived channel — computed once, served from RAM
|
|
96
|
+
ds.transform("ground_height_csf", expensive_smooth)
|
|
97
|
+
ds_prior = ds.select(["ground_height_csf"]).cache()
|
|
98
|
+
|
|
99
|
+
# 3. Build train split — filter, join cached prior, apply augmentation
|
|
100
|
+
valid = np.load("cache/valid_indices.npy")
|
|
101
|
+
ds_train = (
|
|
102
|
+
Rellis3DDataset(root, keys=["lidar", "trav_gt"])
|
|
103
|
+
.filter(valid)
|
|
104
|
+
.join(ds_prior)
|
|
105
|
+
.transform("lidar", RangeFilter(max=50.0))
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# 4. Drop into DataLoader — no adapter needed
|
|
109
|
+
loader = DataLoader(ds_train, batch_size=8, shuffle=True, collate_fn=my_collate)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
See [`examples/`](examples/) for complete runnable pipelines.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Preprocessing
|
|
117
|
+
|
|
118
|
+
Define a `FramePreprocessor` or `SequencePreprocessor`, run it once — apairo persists the output and reloads it transparently on subsequent runs.
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from apairo.preprocess import FramePreprocessor
|
|
122
|
+
|
|
123
|
+
class TravLabel(FramePreprocessor):
|
|
124
|
+
output_key = "trav_label"
|
|
125
|
+
output_loader = "npys"
|
|
126
|
+
input_keys = ["labels"]
|
|
127
|
+
timestamps_from = "labels"
|
|
128
|
+
sources = ["labels"]
|
|
129
|
+
|
|
130
|
+
def process(self, sample) -> np.ndarray:
|
|
131
|
+
return (sample.data["labels"] < 10).astype(np.uint8)
|
|
132
|
+
|
|
133
|
+
ds = apairo.Goose3DDataset("/data/goose", keys=["lidar", "labels"])
|
|
134
|
+
ds.run_preprocess(TravLabel())
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
See [`apairo_preprocess`](https://github.com/apairo/apairo_preprocess) for a collection of ready-made preprocessors.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## Transforms
|
|
142
|
+
|
|
143
|
+
Apply callables at access time — no disk writes.
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
# Per-channel
|
|
147
|
+
ds.transform("lidar", RangeFilter(max=50.0))
|
|
148
|
+
|
|
149
|
+
# Sample-level — consistent mask across aligned channels
|
|
150
|
+
def sync_filter(sample):
|
|
151
|
+
mask = np.linalg.norm(sample.data["lidar"][:, :3], axis=1) < 50.0
|
|
152
|
+
sample.data["lidar"] = sample.data["lidar"][mask]
|
|
153
|
+
sample.data["labels"] = sample.data["labels"][mask]
|
|
154
|
+
return sample
|
|
155
|
+
|
|
156
|
+
ds.transform(sync_filter)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
See [`apairo_transform`](https://github.com/apairo/apairo_transform) for a collection of ready-made transforms.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Filtering
|
|
164
|
+
|
|
165
|
+
`filter()` returns a dataset view restricted to frames that pass a predicate. Sweep once, persist the indices, reload without I/O cost on subsequent runs:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
# Compute and save
|
|
169
|
+
view = ds.filter("trav_gt", lambda gt: (gt == 1).sum() >= 50)
|
|
170
|
+
np.save("cache/valid.npy", view.indices)
|
|
171
|
+
|
|
172
|
+
# Reload — no sweep
|
|
173
|
+
view = ds.filter(np.load("cache/valid.npy"))
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Select & cache
|
|
179
|
+
|
|
180
|
+
`select(keys)` narrows a dataset to a subset of channels. `cache()` materialises it in RAM. Together they let you cache only the channels worth caching:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
ds.transform("ground_height_csf", expensive_smooth)
|
|
184
|
+
|
|
185
|
+
# Compute once, store in RAM
|
|
186
|
+
ds_prior = ds.select(["ground_height_csf"]).cache()
|
|
187
|
+
|
|
188
|
+
# Reuse across training runs — prior served from RAM, base channels from disk
|
|
189
|
+
ds_v1 = base.join(ds_prior).transform(augment_v1)
|
|
190
|
+
ds_v2 = base.join(ds_prior).transform(augment_v2)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Asynchronous datasets — `synchronize()`
|
|
196
|
+
|
|
197
|
+
Asynchronous datasets (multi-rate sensor rigs) expose a timestamp-ordered event timeline: `ds[i]` is one event from one sensor. To get complete multi-channel frames, resample onto a reference clock:
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
ds = apairo.TartanKittiDataset(seq_dir, keys=["velodyne_0", "image_left", "cmd"])
|
|
201
|
+
|
|
202
|
+
ds_sync = ds.synchronize(
|
|
203
|
+
reference="velodyne_0", # default: lowest-frequency channel
|
|
204
|
+
method="latest", # "latest" (zero-order hold) or "nearest"
|
|
205
|
+
tolerance=0.05, # drop frames with no match within ±50 ms
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
ds_sync[0].data # {"velodyne_0": ..., "image_left": ..., "cmd": ...}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
The result is a synchronous view — random access, shuffling, and the whole chaining API (`filter`, `select`, `cache`, `join`, `DataLoader`) work unchanged. Matching is a pure index computation; no data is read until access.
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Combining datasets
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
# ConcatDataset — frame axis (different recording sessions)
|
|
219
|
+
combined = apairo.ConcatDataset([ds_session1, ds_session2])
|
|
220
|
+
|
|
221
|
+
# ZipDataset — channel axis (same frames, different modalities)
|
|
222
|
+
combined = apairo.ZipDataset(ds_base, ds_prior)
|
|
223
|
+
# or: ds_base.join(ds_prior)
|
|
224
|
+
|
|
225
|
+
# Built-in splits
|
|
226
|
+
ds_train = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("train")
|
|
227
|
+
ds_val = apairo.Rellis3DDataset(root, keys=["lidar", "labels"]).split("val")
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## Extending apairo
|
|
233
|
+
|
|
234
|
+
Add a new synchronous dataset with a YAML profile and a minimal subclass.
|
|
235
|
+
See [documentation](https://apairo-robotics.github.io/apairo/) for the full guide.
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## Contributing
|
|
240
|
+
|
|
241
|
+
apairo is one repository of a small ecosystem ([apairo_transform](https://github.com/apairo-robotics/apairo_transform), [apairo_preprocess](https://github.com/apairo-robotics/apairo_preprocess), [apairo_extractor](https://github.com/apairo-robotics/apairo_extractor), [apairo_rr](https://github.com/apairo-robotics/apairo_rr)). Where a change belongs, the design invariants, and the dev workflow are documented in [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## License
|
|
246
|
+
|
|
247
|
+
MIT
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Apairo -- unified robotics dataset loader."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from apairo.core.sample import Sample
|
|
6
|
+
from apairo.core.synchronous_dataset import SynchronousDataset
|
|
7
|
+
from apairo.core.configurable_dataset import ConfigurableDataset
|
|
8
|
+
from apairo.preprocess import FramePreprocessor, SequencePreprocessor
|
|
9
|
+
|
|
10
|
+
from apairo.dataset.raw import RawDataset
|
|
11
|
+
from apairo.dataset.tartan_kitti import TartanKittiDataset
|
|
12
|
+
from apairo.dataset.concat import ConcatDataset
|
|
13
|
+
from apairo.dataset.zip import ZipDataset
|
|
14
|
+
from apairo.dataset.stream import StreamDataset
|
|
15
|
+
from apairo.dataset import split_sequences
|
|
16
|
+
from apairo.core.sequence_view import SequenceView
|
|
17
|
+
from apairo.core.filtered_view import FilteredView
|
|
18
|
+
from apairo.core.channel_view import ChannelView
|
|
19
|
+
from apairo.core.cached_dataset import CachedDataset
|
|
20
|
+
from apairo.core.synchronized_view import SynchronizedView
|
|
21
|
+
from apairo.core.interpolator import Interpolator
|
|
22
|
+
from apairo.dataset.semantic_kitti import SemanticKittiDataset
|
|
23
|
+
from apairo.dataset.rellis import Rellis3DDataset
|
|
24
|
+
from apairo.dataset.goose import Goose3DDataset
|
|
25
|
+
from apairo.dataset.mnt import MNTDataset
|
|
26
|
+
|
|
27
|
+
from apairo.core.layout import ChannelSpec, DatasetLayout
|
|
28
|
+
from apairo.core.transform import Compose
|
|
29
|
+
from apairo.core.config import register_channel, register_raw_channel, verify_config
|
|
30
|
+
from apairo.writer import WRITERS
|
|
31
|
+
from apairo.loader import DERIVED_LOADERS
|
|
32
|
+
|
|
33
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
34
|
+
|
|
35
|
+
__version__ = "0.2.0"
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"Sample",
|
|
39
|
+
"SynchronousDataset",
|
|
40
|
+
"ConfigurableDataset",
|
|
41
|
+
"FramePreprocessor",
|
|
42
|
+
"SequencePreprocessor",
|
|
43
|
+
"RawDataset",
|
|
44
|
+
"TartanKittiDataset",
|
|
45
|
+
"ConcatDataset",
|
|
46
|
+
"ZipDataset",
|
|
47
|
+
"StreamDataset",
|
|
48
|
+
"split_sequences",
|
|
49
|
+
"SequenceView",
|
|
50
|
+
"FilteredView",
|
|
51
|
+
"ChannelView",
|
|
52
|
+
"CachedDataset",
|
|
53
|
+
"SynchronizedView",
|
|
54
|
+
"Interpolator",
|
|
55
|
+
"SemanticKittiDataset",
|
|
56
|
+
"Rellis3DDataset",
|
|
57
|
+
"Goose3DDataset",
|
|
58
|
+
"MNTDataset",
|
|
59
|
+
"ChannelSpec",
|
|
60
|
+
"DatasetLayout",
|
|
61
|
+
"Compose",
|
|
62
|
+
"register_channel",
|
|
63
|
+
"register_raw_channel",
|
|
64
|
+
"verify_config",
|
|
65
|
+
"WRITERS",
|
|
66
|
+
"DERIVED_LOADERS",
|
|
67
|
+
"__version__",
|
|
68
|
+
]
|