apairo 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. apairo/__init__.py +68 -0
  2. apairo/cli.py +350 -0
  3. apairo/core/__init__.py +30 -0
  4. apairo/core/abstract_dataset.py +398 -0
  5. apairo/core/abstract_loader.py +30 -0
  6. apairo/core/cached_dataset.py +71 -0
  7. apairo/core/channel_view.py +60 -0
  8. apairo/core/config/keys.yaml +80 -0
  9. apairo/core/config.py +257 -0
  10. apairo/core/configurable_dataset.py +246 -0
  11. apairo/core/filtered_view.py +75 -0
  12. apairo/core/interpolator.py +44 -0
  13. apairo/core/layout.py +148 -0
  14. apairo/core/preprocessor.py +113 -0
  15. apairo/core/profiled_dataset.py +699 -0
  16. apairo/core/root_sequence.py +193 -0
  17. apairo/core/sample.py +16 -0
  18. apairo/core/sequence_view.py +55 -0
  19. apairo/core/synchronized_view.py +315 -0
  20. apairo/core/synchronous_dataset.py +47 -0
  21. apairo/core/transform.py +23 -0
  22. apairo/core/utils/__init__.py +0 -0
  23. apairo/core/utils/exceptions.py +20 -0
  24. apairo/core/utils/typing.py +3 -0
  25. apairo/dataset/__init__.py +46 -0
  26. apairo/dataset/concat.py +87 -0
  27. apairo/dataset/config/tartan_kitti.yaml +94 -0
  28. apairo/dataset/goose/__init__.py +3 -0
  29. apairo/dataset/goose/dataset.py +18 -0
  30. apairo/dataset/kitti/__init__.py +3 -0
  31. apairo/dataset/kitti/dataset.py +281 -0
  32. apairo/dataset/mnt/__init__.py +4 -0
  33. apairo/dataset/mnt/dataset.py +516 -0
  34. apairo/dataset/mnt/layout.py +109 -0
  35. apairo/dataset/profiles/goose.yaml +14 -0
  36. apairo/dataset/profiles/rellis.yaml +25 -0
  37. apairo/dataset/profiles/semantic_kitti.yaml +16 -0
  38. apairo/dataset/raw/__init__.py +3 -0
  39. apairo/dataset/raw/dataset.py +278 -0
  40. apairo/dataset/rellis/__init__.py +3 -0
  41. apairo/dataset/rellis/dataset.py +22 -0
  42. apairo/dataset/semantic_kitti/__init__.py +3 -0
  43. apairo/dataset/semantic_kitti/dataset.py +17 -0
  44. apairo/dataset/stream.py +88 -0
  45. apairo/dataset/tartan_kitti/__init__.py +3 -0
  46. apairo/dataset/tartan_kitti/dataset.py +286 -0
  47. apairo/dataset/tartan_kitti/integration.yaml +68 -0
  48. apairo/dataset/tartan_kitti/profile.yaml +28 -0
  49. apairo/dataset/zip.py +104 -0
  50. apairo/loader/__init__.py +93 -0
  51. apairo/loader/bin_loader.py +22 -0
  52. apairo/loader/img_loader.py +61 -0
  53. apairo/loader/npy_loader.py +29 -0
  54. apairo/loader/npys_loader.py +54 -0
  55. apairo/loader/tar_loader.py +99 -0
  56. apairo/loader/txt_loader.py +38 -0
  57. apairo/loader/zarr_loader.py +48 -0
  58. apairo/preprocess/__init__.py +8 -0
  59. apairo/preprocess/runner.py +133 -0
  60. apairo/utils/__init__.py +15 -0
  61. apairo/utils/files.py +36 -0
  62. apairo/utils/naming.py +22 -0
  63. apairo/utils/paths.py +4 -0
  64. apairo/utils/resample.py +84 -0
  65. apairo/utils/timestamps.py +88 -0
  66. apairo/utils/types.py +10 -0
  67. apairo/utils/utils.py +55 -0
  68. apairo/writer/__init__.py +14 -0
  69. apairo/writer/bin_writer.py +12 -0
  70. apairo/writer/npy_writer.py +9 -0
  71. apairo/writer/tar_writer.py +60 -0
  72. apairo/writer/zarr_writer.py +72 -0
  73. apairo-0.2.0.dist-info/METADATA +286 -0
  74. apairo-0.2.0.dist-info/RECORD +78 -0
  75. apairo-0.2.0.dist-info/WHEEL +5 -0
  76. apairo-0.2.0.dist-info/entry_points.txt +2 -0
  77. apairo-0.2.0.dist-info/licenses/LICENSE +21 -0
  78. apairo-0.2.0.dist-info/top_level.txt +1 -0
apairo/__init__.py ADDED
@@ -0,0 +1,68 @@
1
+ """Apairo -- unified robotics dataset loader."""
2
+
3
+ import logging
4
+
5
+ from apairo.core.sample import Sample
6
+ from apairo.core.synchronous_dataset import SynchronousDataset
7
+ from apairo.core.configurable_dataset import ConfigurableDataset
8
+ from apairo.preprocess import FramePreprocessor, SequencePreprocessor
9
+
10
+ from apairo.dataset.raw import RawDataset
11
+ from apairo.dataset.tartan_kitti import TartanKittiDataset
12
+ from apairo.dataset.concat import ConcatDataset
13
+ from apairo.dataset.zip import ZipDataset
14
+ from apairo.dataset.stream import StreamDataset
15
+ from apairo.dataset import split_sequences
16
+ from apairo.core.sequence_view import SequenceView
17
+ from apairo.core.filtered_view import FilteredView
18
+ from apairo.core.channel_view import ChannelView
19
+ from apairo.core.cached_dataset import CachedDataset
20
+ from apairo.core.synchronized_view import SynchronizedView
21
+ from apairo.core.interpolator import Interpolator
22
+ from apairo.dataset.semantic_kitti import SemanticKittiDataset
23
+ from apairo.dataset.rellis import Rellis3DDataset
24
+ from apairo.dataset.goose import Goose3DDataset
25
+ from apairo.dataset.mnt import MNTDataset
26
+
27
+ from apairo.core.layout import ChannelSpec, DatasetLayout
28
+ from apairo.core.transform import Compose
29
+ from apairo.core.config import register_channel, register_raw_channel, verify_config
30
+ from apairo.writer import WRITERS
31
+ from apairo.loader import DERIVED_LOADERS
32
+
33
+ logging.getLogger(__name__).addHandler(logging.NullHandler())
34
+
35
+ __version__ = "0.2.0"
36
+
37
+ __all__ = [
38
+ "Sample",
39
+ "SynchronousDataset",
40
+ "ConfigurableDataset",
41
+ "FramePreprocessor",
42
+ "SequencePreprocessor",
43
+ "RawDataset",
44
+ "TartanKittiDataset",
45
+ "ConcatDataset",
46
+ "ZipDataset",
47
+ "StreamDataset",
48
+ "split_sequences",
49
+ "SequenceView",
50
+ "FilteredView",
51
+ "ChannelView",
52
+ "CachedDataset",
53
+ "SynchronizedView",
54
+ "Interpolator",
55
+ "SemanticKittiDataset",
56
+ "Rellis3DDataset",
57
+ "Goose3DDataset",
58
+ "MNTDataset",
59
+ "ChannelSpec",
60
+ "DatasetLayout",
61
+ "Compose",
62
+ "register_channel",
63
+ "register_raw_channel",
64
+ "verify_config",
65
+ "WRITERS",
66
+ "DERIVED_LOADERS",
67
+ "__version__",
68
+ ]
apairo/cli.py ADDED
@@ -0,0 +1,350 @@
1
+ """apairo command-line interface.
2
+
3
+ A thin wrapper over the library -- no third-party dependencies. Commands mirror
4
+ familiar terminal/git verbs:
5
+
6
+ * ``apairo init`` -- write the ``.apairo`` sidecar(s) by scanning a directory
7
+ (sequence -> ``channels.yaml``; root -> ``dataset.yaml``).
8
+ * ``apairo status`` -- report what a dataset directory contains: sequences,
9
+ tracked channels, channels detected on disk but not yet
10
+ registered ("untracked"), event count, and config issues.
11
+
12
+ ``add`` (register an untracked channel) and ``check`` (consistency check) are
13
+ planned follow-ups; ``status`` already surfaces what ``add`` will act on.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import json
19
+ import sys
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ import numpy as np
24
+
25
+ from apairo.core.config import config_exists, read_calibration, read_config, verify_config
26
+ from apairo.dataset.kitti.dataset import _detect_loader
27
+ from apairo.dataset.raw import RawDataset
28
+ from apairo.dataset.raw.dataset import _read_manifest
29
+
30
+ # Datasets selectable with ``--as``. Generic (profile-free) for now; profiled
31
+ # datasets (Tartan, Semantic, ...) will register here as the CLI grows.
32
+ DATASETS = {"RawDataset": RawDataset}
33
+ _BAR = "─" * 52
34
+
35
+
36
+ # ── helpers ─────────────────────────────────────────────────────────────────
37
+
38
+ def _read_timestamps(channel_dir: Path):
39
+ ts_path = channel_dir / "timestamps.txt"
40
+ if not ts_path.exists():
41
+ return None
42
+ try:
43
+ return np.atleast_1d(np.loadtxt(ts_path))
44
+ except Exception:
45
+ return None
46
+
47
+
48
+ def _rate_span(ts):
49
+ """Average rate (Hz) and (first, last) timestamps from a timestamp array."""
50
+ if ts is None or len(ts) == 0:
51
+ return None, None
52
+ t0, t1 = float(ts[0]), float(ts[-1])
53
+ rate = (len(ts) - 1) / (t1 - t0) if len(ts) >= 2 and t1 > t0 else None
54
+ return rate, (t0, t1)
55
+
56
+
57
+ def _channel_shape(channel_dir: Path, loader: Optional[str]):
58
+ """Per-frame shape + dtype from a ``.npy`` header (mmap -- no data read)."""
59
+ npys = sorted(channel_dir.glob("*.npy"))
60
+ if not npys:
61
+ return None, None
62
+ try:
63
+ arr = np.load(npys[0], mmap_mode="r")
64
+ except Exception:
65
+ return None, None
66
+ # A stacked ``npy`` file is (N, *frame); a per-frame ``npys`` file is one frame.
67
+ shape = arr.shape[1:] if loader == "npy" else arr.shape
68
+ return list(shape), str(arr.dtype)
69
+
70
+
71
+ def _count_files(channel_dir: Path) -> int:
72
+ return sum(
73
+ 1 for p in channel_dir.iterdir() if p.is_file() and p.name != "timestamps.txt"
74
+ )
75
+
76
+
77
+ def _channel_detail(seq_dir: Path, channel: str, meta: Optional[dict]) -> dict:
78
+ """Per-channel facts, all cheap: timestamps give frames/rate/span, the .npy
79
+ header gives shape/dtype (mmap). ``meta=None`` marks an untracked channel."""
80
+ cdir = seq_dir / channel
81
+ ts = _read_timestamps(cdir)
82
+ rate, span = _rate_span(ts)
83
+ loader = meta.get("loader") if meta else _detect_loader(cdir)
84
+ shape, dtype = _channel_shape(cdir, loader)
85
+ detail = {
86
+ "kind": meta.get("kind", "raw") if meta else "untracked",
87
+ "frame": meta.get("frame") if meta else None,
88
+ "transform": meta.get("transform") if meta else None,
89
+ "loader": loader,
90
+ "frames": len(ts) if ts is not None else _count_files(cdir),
91
+ "rate_hz": rate,
92
+ "span": list(span) if span else None,
93
+ "shape": shape,
94
+ "dtype": dtype,
95
+ }
96
+ if meta and meta.get("timestamps_from"):
97
+ detail["timestamps_from"] = meta["timestamps_from"]
98
+ if meta and meta.get("sources"):
99
+ detail["sources"] = list(meta["sources"])
100
+ return detail
101
+
102
+
103
+ def _untracked_channels(seq_dir: Path) -> list[str]:
104
+ """Channel-like sub-directories present on disk but absent from channels.yaml."""
105
+ tracked = set(read_config(seq_dir).get("channels", {})) if config_exists(seq_dir) else set()
106
+ return [
107
+ d.name
108
+ for d in sorted(seq_dir.iterdir())
109
+ if d.is_dir()
110
+ and not d.name.startswith(".")
111
+ and d.name not in tracked
112
+ and _detect_loader(d) is not None
113
+ ]
114
+
115
+
116
+ def _seq_info(seq_dir: Path) -> dict:
117
+ cfg = read_config(seq_dir).get("channels", {}) if config_exists(seq_dir) else {}
118
+ channels = {k: _channel_detail(seq_dir, k, v) for k, v in sorted(cfg.items())}
119
+ untracked = {u: _channel_detail(seq_dir, u, None) for u in _untracked_channels(seq_dir)}
120
+ starts = [c["span"][0] for c in {**channels, **untracked}.values() if c["span"]]
121
+ return {
122
+ "channels": channels,
123
+ "untracked": untracked,
124
+ "start": min(starts) if starts else None,
125
+ "events": sum(c["frames"] for c in channels.values()),
126
+ "issues": verify_config(seq_dir) if config_exists(seq_dir)
127
+ else ["not initialized — run `apairo init`"],
128
+ }
129
+
130
+
131
+ def _is_sequence(path: Path) -> bool:
132
+ return config_exists(path) or RawDataset._is_sequence_layout(path)
133
+
134
+
135
+ def _sequence_dirs(root: Path) -> list[Path]:
136
+ return [
137
+ d for d in sorted(root.iterdir())
138
+ if d.is_dir() and not d.name.startswith(".") and _is_sequence(d)
139
+ ]
140
+
141
+
142
+ def _fmt_channels(d: dict) -> str:
143
+ return ", ".join(f"{k} ({v})" for k, v in sorted(d.items())) if d else "—"
144
+
145
+
146
+ # ── status ──────────────────────────────────────────────────────────────────
147
+
148
+ def _build_status(path: Path) -> Optional[dict]:
149
+ if _is_sequence(path):
150
+ return {
151
+ "name": path.name, "kind": "sequence",
152
+ "calibration": sorted(read_calibration(path)),
153
+ **_seq_info(path),
154
+ }
155
+
156
+ seq_dirs = _sequence_dirs(path)
157
+ if not seq_dirs:
158
+ return None
159
+ per = {d.name: _seq_info(d) for d in seq_dirs}
160
+ raw: dict = {}
161
+ preprocess: dict = {}
162
+ untracked: set[str] = set()
163
+ issues: list[str] = []
164
+ calibration: set[str] = set()
165
+ events = 0
166
+ for name, info in per.items():
167
+ for ch, d in info["channels"].items():
168
+ (raw if d["kind"] == "raw" else preprocess)[ch] = d["loader"]
169
+ untracked.update(f"{name}/{u}" for u in info["untracked"])
170
+ issues += [f"{name}: {i}" for i in info["issues"]]
171
+ events += info["events"]
172
+ for d in seq_dirs:
173
+ calibration.update(read_calibration(d))
174
+ manifest = _read_manifest(path)
175
+ return {
176
+ "name": manifest.get("name", path.name),
177
+ "kind": "root",
178
+ "sequences": list(per),
179
+ "raw": raw,
180
+ "preprocess": preprocess,
181
+ "untracked": sorted(untracked),
182
+ "calibration": sorted(calibration),
183
+ "events": events,
184
+ "issues": issues,
185
+ }
186
+
187
+
188
+ def _fmt_shape(detail: dict) -> str:
189
+ if detail["shape"] is None:
190
+ return "?"
191
+ s = f"({', '.join(map(str, detail['shape']))})"
192
+ return f"{s} {detail['dtype']}" if detail.get("dtype") else s
193
+
194
+
195
+ def _print_channel_table(channels: dict, untracked: dict, t0_ref: Optional[float]) -> None:
196
+ ref = t0_ref or 0.0
197
+ all_ch = list(channels.items()) + list(untracked.items())
198
+ show_frame = any(c.get("frame") for _, c in all_ch) # only when declared
199
+ headers = ["channel", "kind"] + (["frame"] if show_frame else []) + \
200
+ ["loader", "frames", "rate", "span", "shape", ""]
201
+ rows = []
202
+ for name, c in all_ch:
203
+ rate = f"{c['rate_hz']:.1f} Hz" if c["rate_hz"] else "—"
204
+ span = f"{c['span'][0] - ref:.2f}–{c['span'][1] - ref:.2f}s" if c["span"] else "—"
205
+ if c["kind"] == "untracked":
206
+ note = "← run `apairo add`"
207
+ elif c.get("transform"):
208
+ tf = c["transform"]
209
+ note = f"← tf {tf.get('parent')}→{tf.get('child')}"
210
+ if tf.get("static"):
211
+ note += " (static)"
212
+ elif c.get("timestamps_from"):
213
+ note = f"← from {c['timestamps_from']}"
214
+ else:
215
+ note = ""
216
+ row = [name, c["kind"]] + ([c.get("frame") or "—"] if show_frame else []) + [
217
+ c["loader"] or "?", str(c["frames"]), rate, span, _fmt_shape(c), note,
218
+ ]
219
+ rows.append(row)
220
+ widths = [max(len(headers[i]), *(len(r[i]) for r in rows)) for i in range(len(headers))]
221
+ line = lambda cols: " ".join(c.ljust(widths[i]) for i, c in enumerate(cols)).rstrip()
222
+ print(line(headers))
223
+ for r in rows:
224
+ print(line(r))
225
+
226
+
227
+ def _print_status(s: dict) -> None:
228
+ if s["kind"] == "root":
229
+ print(f"RawDataset — {s['name']} (root · {len(s['sequences'])} sequences)")
230
+ print(_BAR)
231
+ print(f"sequences {', '.join(s['sequences'])}")
232
+ print(f"raw {_fmt_channels(s['raw'])}")
233
+ print(f"preprocess {_fmt_channels(s['preprocess'])}")
234
+ if s["untracked"]:
235
+ print(f"untracked {', '.join(s['untracked'])} ← run `apairo add`")
236
+ else:
237
+ print(f"RawDataset — {s['name']} (sequence)")
238
+ print(_BAR)
239
+ if s.get("start") is not None:
240
+ print(f"start {s['start']:.2f}s (span shown relative to this)")
241
+ if s["channels"] or s["untracked"]:
242
+ _print_channel_table(s["channels"], s["untracked"], s.get("start"))
243
+ else:
244
+ print("(no channels)")
245
+ if s.get("calibration"):
246
+ print(f"calibration {', '.join(s['calibration'])} (static, in .apairo/calibration.yaml)")
247
+ print(f"events {s['events']}")
248
+ print(f"issues {'none' if not s['issues'] else ''}")
249
+ for issue in s["issues"]:
250
+ print(f" - {issue}")
251
+
252
+
253
+ def cmd_status(args: argparse.Namespace) -> int:
254
+ path = Path(args.path).expanduser()
255
+ if not path.is_dir():
256
+ print(f"Not a directory: {path}", file=sys.stderr)
257
+ return 2
258
+ status = _build_status(path)
259
+ if status is None:
260
+ print(f"'{path}' is not an apairo dataset (no .apairo, no sequences). "
261
+ f"Run `apairo init` to set it up.", file=sys.stderr)
262
+ return 1
263
+ if args.json:
264
+ print(json.dumps(status, indent=2, sort_keys=True))
265
+ else:
266
+ _print_status(status)
267
+ return 0
268
+
269
+
270
+ # ── init ────────────────────────────────────────────────────────────────────
271
+
272
+ def cmd_init(args: argparse.Namespace) -> int:
273
+ path = Path(args.path).expanduser()
274
+ if not path.is_dir():
275
+ print(f"Not a directory: {path}", file=sys.stderr)
276
+ return 2
277
+ try:
278
+ written = RawDataset.init(
279
+ path, merge=not args.force, overwrite=args.force, name=args.name
280
+ )
281
+ except (FileNotFoundError, ValueError) as exc:
282
+ print(f"init failed: {exc}", file=sys.stderr)
283
+ return 1
284
+ rel = written.relative_to(path) if written.is_relative_to(path) else written
285
+ print(f"✓ wrote {rel}")
286
+ _print_status(_build_status(path))
287
+ return 0
288
+
289
+
290
+ # ── entry point ───────────────────────────────────────────────────────────────
291
+
292
+ def _add_common(p: argparse.ArgumentParser) -> None:
293
+ p.add_argument("path", nargs="?", default=".", help="dataset directory (default: .)")
294
+ p.add_argument("--as", dest="as_", metavar="CLASS", choices=list(DATASETS),
295
+ default="RawDataset", help="interpret with this dataset class")
296
+
297
+
298
+ def _discover_plugins() -> dict:
299
+ """Ecosystem subcommands registered under the ``apairo.cli_plugins`` entry
300
+ point group (e.g. ``apairo extractor`` from ``apairo_extractor``).
301
+
302
+ Discovery is by installed metadata only -- apairo never imports or depends
303
+ on its tools; it dispatches to whatever is installed.
304
+ """
305
+ from importlib.metadata import entry_points
306
+
307
+ return {ep.name: ep for ep in entry_points(group="apairo.cli_plugins")}
308
+
309
+
310
+ def _build_parser(plugin_names) -> argparse.ArgumentParser:
311
+ epilog = None
312
+ if plugin_names:
313
+ epilog = ("ecosystem commands: " + ", ".join(sorted(plugin_names))
314
+ + " (run `apairo <command> --help`)")
315
+ parser = argparse.ArgumentParser(
316
+ prog="apairo", description="Inspect and initialize apairo datasets.",
317
+ epilog=epilog,
318
+ )
319
+ sub = parser.add_subparsers(dest="command", required=True)
320
+
321
+ p_init = sub.add_parser("init", help="write .apairo sidecars by scanning a directory")
322
+ _add_common(p_init)
323
+ p_init.add_argument("--name", help="dataset name for the root manifest")
324
+ p_init.add_argument("--force", action="store_true",
325
+ help="rebuild from scratch (default: merge, non-destructive)")
326
+
327
+ p_status = sub.add_parser("status", help="show what a dataset directory contains")
328
+ _add_common(p_status)
329
+ p_status.add_argument("--json", action="store_true", help="machine-readable output")
330
+ return parser
331
+
332
+
333
+ def main(argv: Optional[list[str]] = None) -> None:
334
+ argv = list(sys.argv[1:] if argv is None else argv)
335
+
336
+ # Ecosystem dispatch: `apairo <plugin> ...` hands the rest to the plugin,
337
+ # which parses its own arguments. Built-ins (init/status) fall through.
338
+ plugins = _discover_plugins()
339
+ if argv and argv[0] in plugins:
340
+ plugin_main = plugins[argv[0]].load()
341
+ result = plugin_main(argv[1:])
342
+ raise SystemExit(result if isinstance(result, int) else 0)
343
+
344
+ args = _build_parser(set(plugins)).parse_args(argv)
345
+ handler = {"init": cmd_init, "status": cmd_status}[args.command]
346
+ sys.exit(handler(args))
347
+
348
+
349
+ if __name__ == "__main__":
350
+ main()
@@ -0,0 +1,30 @@
1
+ from .abstract_loader import AbstractLoader
2
+ from .abstract_dataset import AbstractDataset
3
+ from .synchronous_dataset import SynchronousDataset
4
+ from .configurable_dataset import ConfigurableDataset
5
+ from .sample import Sample
6
+ from .sequence_view import SequenceView
7
+ from .filtered_view import FilteredView
8
+ from .channel_view import ChannelView
9
+ from .cached_dataset import CachedDataset
10
+ from .synchronized_view import SynchronizedView
11
+ from .interpolator import Interpolator
12
+ from .transform import Compose
13
+
14
+ from . import utils
15
+
16
+ __all__ = [
17
+ "AbstractLoader",
18
+ "AbstractDataset",
19
+ "SynchronousDataset",
20
+ "ConfigurableDataset",
21
+ "Sample",
22
+ "SequenceView",
23
+ "FilteredView",
24
+ "ChannelView",
25
+ "CachedDataset",
26
+ "SynchronizedView",
27
+ "Interpolator",
28
+ "Compose",
29
+ "utils",
30
+ ]