PyPI - apairo - Versions diffs - 0.2.0__py3-none-any.whl - Mend

apairo 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

apairo/__init__.py +68 -0
apairo/cli.py +350 -0
apairo/core/__init__.py +30 -0
apairo/core/abstract_dataset.py +398 -0
apairo/core/abstract_loader.py +30 -0
apairo/core/cached_dataset.py +71 -0
apairo/core/channel_view.py +60 -0
apairo/core/config/keys.yaml +80 -0
apairo/core/config.py +257 -0
apairo/core/configurable_dataset.py +246 -0
apairo/core/filtered_view.py +75 -0
apairo/core/interpolator.py +44 -0
apairo/core/layout.py +148 -0
apairo/core/preprocessor.py +113 -0
apairo/core/profiled_dataset.py +699 -0
apairo/core/root_sequence.py +193 -0
apairo/core/sample.py +16 -0
apairo/core/sequence_view.py +55 -0
apairo/core/synchronized_view.py +315 -0
apairo/core/synchronous_dataset.py +47 -0
apairo/core/transform.py +23 -0
apairo/core/utils/__init__.py +0 -0
apairo/core/utils/exceptions.py +20 -0
apairo/core/utils/typing.py +3 -0
apairo/dataset/__init__.py +46 -0
apairo/dataset/concat.py +87 -0
apairo/dataset/config/tartan_kitti.yaml +94 -0
apairo/dataset/goose/__init__.py +3 -0
apairo/dataset/goose/dataset.py +18 -0
apairo/dataset/kitti/__init__.py +3 -0
apairo/dataset/kitti/dataset.py +281 -0
apairo/dataset/mnt/__init__.py +4 -0
apairo/dataset/mnt/dataset.py +516 -0
apairo/dataset/mnt/layout.py +109 -0
apairo/dataset/profiles/goose.yaml +14 -0
apairo/dataset/profiles/rellis.yaml +25 -0
apairo/dataset/profiles/semantic_kitti.yaml +16 -0
apairo/dataset/raw/__init__.py +3 -0
apairo/dataset/raw/dataset.py +278 -0
apairo/dataset/rellis/__init__.py +3 -0
apairo/dataset/rellis/dataset.py +22 -0
apairo/dataset/semantic_kitti/__init__.py +3 -0
apairo/dataset/semantic_kitti/dataset.py +17 -0
apairo/dataset/stream.py +88 -0
apairo/dataset/tartan_kitti/__init__.py +3 -0
apairo/dataset/tartan_kitti/dataset.py +286 -0
apairo/dataset/tartan_kitti/integration.yaml +68 -0
apairo/dataset/tartan_kitti/profile.yaml +28 -0
apairo/dataset/zip.py +104 -0
apairo/loader/__init__.py +93 -0
apairo/loader/bin_loader.py +22 -0
apairo/loader/img_loader.py +61 -0
apairo/loader/npy_loader.py +29 -0
apairo/loader/npys_loader.py +54 -0
apairo/loader/tar_loader.py +99 -0
apairo/loader/txt_loader.py +38 -0
apairo/loader/zarr_loader.py +48 -0
apairo/preprocess/__init__.py +8 -0
apairo/preprocess/runner.py +133 -0
apairo/utils/__init__.py +15 -0
apairo/utils/files.py +36 -0
apairo/utils/naming.py +22 -0
apairo/utils/paths.py +4 -0
apairo/utils/resample.py +84 -0
apairo/utils/timestamps.py +88 -0
apairo/utils/types.py +10 -0
apairo/utils/utils.py +55 -0
apairo/writer/__init__.py +14 -0
apairo/writer/bin_writer.py +12 -0
apairo/writer/npy_writer.py +9 -0
apairo/writer/tar_writer.py +60 -0
apairo/writer/zarr_writer.py +72 -0
apairo-0.2.0.dist-info/METADATA +286 -0
apairo-0.2.0.dist-info/RECORD +78 -0
apairo-0.2.0.dist-info/WHEEL +5 -0
apairo-0.2.0.dist-info/entry_points.txt +2 -0
apairo-0.2.0.dist-info/licenses/LICENSE +21 -0
apairo-0.2.0.dist-info/top_level.txt +1 -0

apairo/__init__.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Apairo -- unified robotics dataset loader."""
+import logging
+from apairo.core.sample import Sample
+from apairo.core.synchronous_dataset import SynchronousDataset
+from apairo.core.configurable_dataset import ConfigurableDataset
+from apairo.preprocess import FramePreprocessor, SequencePreprocessor
+from apairo.dataset.raw import RawDataset
+from apairo.dataset.tartan_kitti import TartanKittiDataset
+from apairo.dataset.concat import ConcatDataset
+from apairo.dataset.zip import ZipDataset
+from apairo.dataset.stream import StreamDataset
+from apairo.dataset import split_sequences
+from apairo.core.sequence_view import SequenceView
+from apairo.core.filtered_view import FilteredView
+from apairo.core.channel_view import ChannelView
+from apairo.core.cached_dataset import CachedDataset
+from apairo.core.synchronized_view import SynchronizedView
+from apairo.core.interpolator import Interpolator
+from apairo.dataset.semantic_kitti import SemanticKittiDataset
+from apairo.dataset.rellis import Rellis3DDataset
+from apairo.dataset.goose import Goose3DDataset
+from apairo.dataset.mnt import MNTDataset
+from apairo.core.layout import ChannelSpec, DatasetLayout
+from apairo.core.transform import Compose
+from apairo.core.config import register_channel, register_raw_channel, verify_config
+from apairo.writer import WRITERS
+from apairo.loader import DERIVED_LOADERS
+logging.getLogger(__name__).addHandler(logging.NullHandler())
+__version__ = "0.2.0"
+__all__ = [
+    "Sample",
+    "SynchronousDataset",
+    "ConfigurableDataset",
+    "FramePreprocessor",
+    "SequencePreprocessor",
+    "RawDataset",
+    "TartanKittiDataset",
+    "ConcatDataset",
+    "ZipDataset",
+    "StreamDataset",
+    "split_sequences",
+    "SequenceView",
+    "FilteredView",
+    "ChannelView",
+    "CachedDataset",
+    "SynchronizedView",
+    "Interpolator",
+    "SemanticKittiDataset",
+    "Rellis3DDataset",
+    "Goose3DDataset",
+    "MNTDataset",
+    "ChannelSpec",
+    "DatasetLayout",
+    "Compose",
+    "register_channel",
+    "register_raw_channel",
+    "verify_config",
+    "WRITERS",
+    "DERIVED_LOADERS",
+    "__version__",
+]

apairo/cli.py ADDED Viewed

@@ -0,0 +1,350 @@
+"""apairo command-line interface.
+A thin wrapper over the library -- no third-party dependencies. Commands mirror
+familiar terminal/git verbs:
+* ``apairo init``   -- write the ``.apairo`` sidecar(s) by scanning a directory
+                       (sequence -> ``channels.yaml``; root -> ``dataset.yaml``).
+* ``apairo status`` -- report what a dataset directory contains: sequences,
+                       tracked channels, channels detected on disk but not yet
+                       registered ("untracked"), event count, and config issues.
+``add`` (register an untracked channel) and ``check`` (consistency check) are
+planned follow-ups; ``status`` already surfaces what ``add`` will act on.
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+import numpy as np
+from apairo.core.config import config_exists, read_calibration, read_config, verify_config
+from apairo.dataset.kitti.dataset import _detect_loader
+from apairo.dataset.raw import RawDataset
+from apairo.dataset.raw.dataset import _read_manifest
+# Datasets selectable with ``--as``. Generic (profile-free) for now; profiled
+# datasets (Tartan, Semantic, ...) will register here as the CLI grows.
+DATASETS = {"RawDataset": RawDataset}
+_BAR = "─" * 52
+# ── helpers ─────────────────────────────────────────────────────────────────
+def _read_timestamps(channel_dir: Path):
+    ts_path = channel_dir / "timestamps.txt"
+    if not ts_path.exists():
+        return None
+    try:
+        return np.atleast_1d(np.loadtxt(ts_path))
+    except Exception:
+        return None
+def _rate_span(ts):
+    """Average rate (Hz) and (first, last) timestamps from a timestamp array."""
+    if ts is None or len(ts) == 0:
+        return None, None
+    t0, t1 = float(ts[0]), float(ts[-1])
+    rate = (len(ts) - 1) / (t1 - t0) if len(ts) >= 2 and t1 > t0 else None
+    return rate, (t0, t1)
+def _channel_shape(channel_dir: Path, loader: Optional[str]):
+    """Per-frame shape + dtype from a ``.npy`` header (mmap -- no data read)."""
+    npys = sorted(channel_dir.glob("*.npy"))
+    if not npys:
+        return None, None
+    try:
+        arr = np.load(npys[0], mmap_mode="r")
+    except Exception:
+        return None, None
+    # A stacked ``npy`` file is (N, *frame); a per-frame ``npys`` file is one frame.
+    shape = arr.shape[1:] if loader == "npy" else arr.shape
+    return list(shape), str(arr.dtype)
+def _count_files(channel_dir: Path) -> int:
+    return sum(
+        1 for p in channel_dir.iterdir() if p.is_file() and p.name != "timestamps.txt"
+    )
+def _channel_detail(seq_dir: Path, channel: str, meta: Optional[dict]) -> dict:
+    """Per-channel facts, all cheap: timestamps give frames/rate/span, the .npy
+    header gives shape/dtype (mmap). ``meta=None`` marks an untracked channel."""
+    cdir = seq_dir / channel
+    ts = _read_timestamps(cdir)
+    rate, span = _rate_span(ts)
+    loader = meta.get("loader") if meta else _detect_loader(cdir)
+    shape, dtype = _channel_shape(cdir, loader)
+    detail = {
+        "kind": meta.get("kind", "raw") if meta else "untracked",
+        "frame": meta.get("frame") if meta else None,
+        "transform": meta.get("transform") if meta else None,
+        "loader": loader,
+        "frames": len(ts) if ts is not None else _count_files(cdir),
+        "rate_hz": rate,
+        "span": list(span) if span else None,
+        "shape": shape,
+        "dtype": dtype,
+    }
+    if meta and meta.get("timestamps_from"):
+        detail["timestamps_from"] = meta["timestamps_from"]
+    if meta and meta.get("sources"):
+        detail["sources"] = list(meta["sources"])
+    return detail
+def _untracked_channels(seq_dir: Path) -> list[str]:
+    """Channel-like sub-directories present on disk but absent from channels.yaml."""
+    tracked = set(read_config(seq_dir).get("channels", {})) if config_exists(seq_dir) else set()
+    return [
+        d.name
+        for d in sorted(seq_dir.iterdir())
+        if d.is_dir()
+        and not d.name.startswith(".")
+        and d.name not in tracked
+        and _detect_loader(d) is not None
+    ]
+def _seq_info(seq_dir: Path) -> dict:
+    cfg = read_config(seq_dir).get("channels", {}) if config_exists(seq_dir) else {}
+    channels = {k: _channel_detail(seq_dir, k, v) for k, v in sorted(cfg.items())}
+    untracked = {u: _channel_detail(seq_dir, u, None) for u in _untracked_channels(seq_dir)}
+    starts = [c["span"][0] for c in {**channels, **untracked}.values() if c["span"]]
+    return {
+        "channels": channels,
+        "untracked": untracked,
+        "start": min(starts) if starts else None,
+        "events": sum(c["frames"] for c in channels.values()),
+        "issues": verify_config(seq_dir) if config_exists(seq_dir)
+        else ["not initialized — run `apairo init`"],
+    }
+def _is_sequence(path: Path) -> bool:
+    return config_exists(path) or RawDataset._is_sequence_layout(path)
+def _sequence_dirs(root: Path) -> list[Path]:
+    return [
+        d for d in sorted(root.iterdir())
+        if d.is_dir() and not d.name.startswith(".") and _is_sequence(d)
+    ]
+def _fmt_channels(d: dict) -> str:
+    return ", ".join(f"{k} ({v})" for k, v in sorted(d.items())) if d else "—"
+# ── status ──────────────────────────────────────────────────────────────────
+def _build_status(path: Path) -> Optional[dict]:
+    if _is_sequence(path):
+        return {
+            "name": path.name, "kind": "sequence",
+            "calibration": sorted(read_calibration(path)),
+            **_seq_info(path),
+        }
+    seq_dirs = _sequence_dirs(path)
+    if not seq_dirs:
+        return None
+    per = {d.name: _seq_info(d) for d in seq_dirs}
+    raw: dict = {}
+    preprocess: dict = {}
+    untracked: set[str] = set()
+    issues: list[str] = []
+    calibration: set[str] = set()
+    events = 0
+    for name, info in per.items():
+        for ch, d in info["channels"].items():
+            (raw if d["kind"] == "raw" else preprocess)[ch] = d["loader"]
+        untracked.update(f"{name}/{u}" for u in info["untracked"])
+        issues += [f"{name}: {i}" for i in info["issues"]]
+        events += info["events"]
+    for d in seq_dirs:
+        calibration.update(read_calibration(d))
+    manifest = _read_manifest(path)
+    return {
+        "name": manifest.get("name", path.name),
+        "kind": "root",
+        "sequences": list(per),
+        "raw": raw,
+        "preprocess": preprocess,
+        "untracked": sorted(untracked),
+        "calibration": sorted(calibration),
+        "events": events,
+        "issues": issues,
+    }
+def _fmt_shape(detail: dict) -> str:
+    if detail["shape"] is None:
+        return "?"
+    s = f"({', '.join(map(str, detail['shape']))})"
+    return f"{s} {detail['dtype']}" if detail.get("dtype") else s
+def _print_channel_table(channels: dict, untracked: dict, t0_ref: Optional[float]) -> None:
+    ref = t0_ref or 0.0
+    all_ch = list(channels.items()) + list(untracked.items())
+    show_frame = any(c.get("frame") for _, c in all_ch)  # only when declared
+    headers = ["channel", "kind"] + (["frame"] if show_frame else []) + \
+        ["loader", "frames", "rate", "span", "shape", ""]
+    rows = []
+    for name, c in all_ch:
+        rate = f"{c['rate_hz']:.1f} Hz" if c["rate_hz"] else "—"
+        span = f"{c['span'][0] - ref:.2f}–{c['span'][1] - ref:.2f}s" if c["span"] else "—"
+        if c["kind"] == "untracked":
+            note = "← run `apairo add`"
+        elif c.get("transform"):
+            tf = c["transform"]
+            note = f"← tf {tf.get('parent')}→{tf.get('child')}"
+            if tf.get("static"):
+                note += " (static)"
+        elif c.get("timestamps_from"):
+            note = f"← from {c['timestamps_from']}"
+        else:
+            note = ""
+        row = [name, c["kind"]] + ([c.get("frame") or "—"] if show_frame else []) + [
+            c["loader"] or "?", str(c["frames"]), rate, span, _fmt_shape(c), note,
+        ]
+        rows.append(row)
+    widths = [max(len(headers[i]), *(len(r[i]) for r in rows)) for i in range(len(headers))]
+    line = lambda cols: "  ".join(c.ljust(widths[i]) for i, c in enumerate(cols)).rstrip()
+    print(line(headers))
+    for r in rows:
+        print(line(r))
+def _print_status(s: dict) -> None:
+    if s["kind"] == "root":
+        print(f"RawDataset — {s['name']}   (root · {len(s['sequences'])} sequences)")
+        print(_BAR)
+        print(f"sequences   {', '.join(s['sequences'])}")
+        print(f"raw         {_fmt_channels(s['raw'])}")
+        print(f"preprocess  {_fmt_channels(s['preprocess'])}")
+        if s["untracked"]:
+            print(f"untracked   {', '.join(s['untracked'])}   ← run `apairo add`")
+    else:
+        print(f"RawDataset — {s['name']}   (sequence)")
+        print(_BAR)
+        if s.get("start") is not None:
+            print(f"start       {s['start']:.2f}s   (span shown relative to this)")
+        if s["channels"] or s["untracked"]:
+            _print_channel_table(s["channels"], s["untracked"], s.get("start"))
+        else:
+            print("(no channels)")
+    if s.get("calibration"):
+        print(f"calibration {', '.join(s['calibration'])}   (static, in .apairo/calibration.yaml)")
+    print(f"events      {s['events']}")
+    print(f"issues      {'none' if not s['issues'] else ''}")
+    for issue in s["issues"]:
+        print(f"            - {issue}")
+def cmd_status(args: argparse.Namespace) -> int:
+    path = Path(args.path).expanduser()
+    if not path.is_dir():
+        print(f"Not a directory: {path}", file=sys.stderr)
+        return 2
+    status = _build_status(path)
+    if status is None:
+        print(f"'{path}' is not an apairo dataset (no .apairo, no sequences). "
+              f"Run `apairo init` to set it up.", file=sys.stderr)
+        return 1
+    if args.json:
+        print(json.dumps(status, indent=2, sort_keys=True))
+    else:
+        _print_status(status)
+    return 0
+# ── init ────────────────────────────────────────────────────────────────────
+def cmd_init(args: argparse.Namespace) -> int:
+    path = Path(args.path).expanduser()
+    if not path.is_dir():
+        print(f"Not a directory: {path}", file=sys.stderr)
+        return 2
+    try:
+        written = RawDataset.init(
+            path, merge=not args.force, overwrite=args.force, name=args.name
+        )
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"init failed: {exc}", file=sys.stderr)
+        return 1
+    rel = written.relative_to(path) if written.is_relative_to(path) else written
+    print(f"✓ wrote {rel}")
+    _print_status(_build_status(path))
+    return 0
+# ── entry point ───────────────────────────────────────────────────────────────
+def _add_common(p: argparse.ArgumentParser) -> None:
+    p.add_argument("path", nargs="?", default=".", help="dataset directory (default: .)")
+    p.add_argument("--as", dest="as_", metavar="CLASS", choices=list(DATASETS),
+                   default="RawDataset", help="interpret with this dataset class")
+def _discover_plugins() -> dict:
+    """Ecosystem subcommands registered under the ``apairo.cli_plugins`` entry
+    point group (e.g. ``apairo extractor`` from ``apairo_extractor``).
+    Discovery is by installed metadata only -- apairo never imports or depends
+    on its tools; it dispatches to whatever is installed.
+    """
+    from importlib.metadata import entry_points
+    return {ep.name: ep for ep in entry_points(group="apairo.cli_plugins")}
+def _build_parser(plugin_names) -> argparse.ArgumentParser:
+    epilog = None
+    if plugin_names:
+        epilog = ("ecosystem commands: " + ", ".join(sorted(plugin_names))
+                  + "   (run `apairo <command> --help`)")
+    parser = argparse.ArgumentParser(
+        prog="apairo", description="Inspect and initialize apairo datasets.",
+        epilog=epilog,
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    p_init = sub.add_parser("init", help="write .apairo sidecars by scanning a directory")
+    _add_common(p_init)
+    p_init.add_argument("--name", help="dataset name for the root manifest")
+    p_init.add_argument("--force", action="store_true",
+                        help="rebuild from scratch (default: merge, non-destructive)")
+    p_status = sub.add_parser("status", help="show what a dataset directory contains")
+    _add_common(p_status)
+    p_status.add_argument("--json", action="store_true", help="machine-readable output")
+    return parser
+def main(argv: Optional[list[str]] = None) -> None:
+    argv = list(sys.argv[1:] if argv is None else argv)
+    # Ecosystem dispatch: `apairo <plugin> ...` hands the rest to the plugin,
+    # which parses its own arguments. Built-ins (init/status) fall through.
+    plugins = _discover_plugins()
+    if argv and argv[0] in plugins:
+        plugin_main = plugins[argv[0]].load()
+        result = plugin_main(argv[1:])
+        raise SystemExit(result if isinstance(result, int) else 0)
+    args = _build_parser(set(plugins)).parse_args(argv)
+    handler = {"init": cmd_init, "status": cmd_status}[args.command]
+    sys.exit(handler(args))
+if __name__ == "__main__":
+    main()

apairo/core/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+from .abstract_loader import AbstractLoader
+from .abstract_dataset import AbstractDataset
+from .synchronous_dataset import SynchronousDataset
+from .configurable_dataset import ConfigurableDataset
+from .sample import Sample
+from .sequence_view import SequenceView
+from .filtered_view import FilteredView
+from .channel_view import ChannelView
+from .cached_dataset import CachedDataset
+from .synchronized_view import SynchronizedView
+from .interpolator import Interpolator
+from .transform import Compose
+from . import utils
+__all__ = [
+    "AbstractLoader",
+    "AbstractDataset",
+    "SynchronousDataset",
+    "ConfigurableDataset",
+    "Sample",
+    "SequenceView",
+    "FilteredView",
+    "ChannelView",
+    "CachedDataset",
+    "SynchronizedView",
+    "Interpolator",
+    "Compose",
+    "utils",
+]