PyPI - mvdata - Versions diffs - 0.9.2__tar.gz → 0.9.4__tar.gz - Mend

mvdata 0.9.2tar.gz → 0.9.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

{mvdata-0.9.2 → mvdata-0.9.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mvdata
-Version: 0.9.2
+Version: 0.9.4
 Summary: Gracia Dataset Convention - Python library for working with multi-view video datasets
 Author: Gracia Team
 License: MIT
@@ -50,3 +50,21 @@ Python library for working with Gracia multi-view video datasets.
 The package provides readers, writers, conversion tools, and GPU-aware video
 decode helpers for the dataset layouts documented in the `docs` directory.
+## Release
+Prepare the next release with the local helper script:
+```bash
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
+```
+Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
+explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
+`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
+commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
+`--push` is set.
+Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
+Release workflow.

mvdata-0.9.4/README.md ADDED Viewed

@@ -0,0 +1,24 @@
+# mvdata
+Python library for working with Gracia multi-view video datasets.
+The package provides readers, writers, conversion tools, and GPU-aware video
+decode helpers for the dataset layouts documented in the `docs` directory.
+## Release
+Prepare the next release with the local helper script:
+```bash
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
+```
+Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
+explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
+`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
+commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
+`--push` is set.
+Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
+Release workflow.

{mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/__init__.py RENAMED Viewed

@@ -29,7 +29,6 @@ from ._imports import (
     try_import_torch,
 )
 from .decode import (
-    _try_open_nvdec,
     decode_mp4_to_rgb,
     decode_mp4_to_rgb_nvdec,
     decode_mp4_to_rgb_pyav,
@@ -59,6 +58,7 @@ from .frames import (
     numpy_to_cupy_rgb,
 )
 from .native_yuv import native_nvdec_to_rgb_cupy, native_nvdec_to_rgb_numpy
+from .nvdec import _try_open_nvdec
 from .probe import (
     infer_video_bit_depth_from_frame,
     infer_video_bit_depth_from_pixel_format_name,

{mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/decode.py RENAMED Viewed

@@ -11,9 +11,15 @@ from ..gpu_policy import nvdec_decode_allowed
 from ._imports import try_import_av, try_import_cupy, try_import_pynvvideocodec, try_import_torch
 from .frames import _decoded_frame_to_rgb_numpy, _pyav_frame_to_rgb
 from .native_yuv import native_nvdec_to_rgb_numpy
+from .nvdec import (
+    _close_nvdec_decoder,
+    _NvdecOrdinalRawFrameSource,
+    _NvdecPtsRawFrameSource,
+    _nvdec_scanned_presentation_pts,
+    _try_open_nvdec,
+)
 from .probe import (
     infer_video_bit_depth_from_stream,
-    nvdec_decode_compatibility_issue,
     nvdec_decode_compatibility_issue_for_path,
     probe_video_color_metadata_pyav,
     probe_video_bit_depth,
@@ -21,32 +27,88 @@ from .probe import (
 )
-def _try_open_nvdec(
-    nvc,
-    path_str: str,
+def _nvdec_raw_to_rgb_numpy(
+    raw,
+    *,
+    source_bit_depth: int,
+    color_metadata: dict[str, Any],
+    torch_mod: Any,
+) -> np.ndarray:
+    if source_bit_depth > 8:
+        return native_nvdec_to_rgb_numpy(
+            raw,
+            bit_depth=source_bit_depth,
+            **color_metadata,
+        )
+    return _decoded_frame_to_rgb_numpy(raw, torch_mod, bit_depth=source_bit_depth)
+def _decode_simple_nvdec_by_ordinal(
+    decoder: Any,
+    path: Path,
+    expect_count: int,
+    *,
+    source_bit_depth: int,
+    color_metadata: dict[str, Any],
+    torch_mod: Any,
+) -> list[np.ndarray]:
+    raw_source = _NvdecOrdinalRawFrameSource(decoder, path)
+    try:
+        try:
+            frame_count = min(len(decoder), expect_count)
+        except Exception:
+            frame_count = expect_count
+        frames: list[np.ndarray] = []
+        for index in range(frame_count):
+            raw, _ = raw_source.frame_by_index(index)
+            frames.append(
+                _nvdec_raw_to_rgb_numpy(
+                    raw,
+                    source_bit_depth=source_bit_depth,
+                    color_metadata=color_metadata,
+                    torch_mod=torch_mod,
+                )
+            )
+        return frames
+    finally:
+        raw_source.close()
+def _decode_nvdec_by_presentation_pts(
+    nvc: Any,
+    mp4_path: Path,
+    *,
     gpu_id: int,
     use_device_memory: bool,
-    *,
-    output_color_type=None,
-):
-    """Open a SimpleDecoder, tolerating API-version differences in the kwarg surface."""
-    base = dict(
+    output_color_type: Any,
+    presentation_pts: list[int],
+    expect_count: int,
+    source_bit_depth: int,
+    color_metadata: dict[str, Any],
+    torch_mod: Any,
+) -> list[np.ndarray]:
+    raw_source = _NvdecPtsRawFrameSource(
+        nvc,
+        mp4_path,
         gpu_id=gpu_id,
         use_device_memory=use_device_memory,
-        output_color_type=output_color_type or nvc.OutputColorType.RGB,
+        output_color_type=output_color_type,
+        presentation_pts=presentation_pts,
     )
-    for extra in ({}, {"need_scanned_stream_metadata": True}):
-        try:
-            return nvc.SimpleDecoder(path_str, **base, **extra)
-        except TypeError:
-            if extra:
-                try:
-                    return nvc.SimpleDecoder(path_str, **base)
-                except Exception:
-                    pass
-        except Exception:
-            pass
-    return None
+    try:
+        return [
+            _nvdec_raw_to_rgb_numpy(
+                raw_source.frame(index),
+                source_bit_depth=source_bit_depth,
+                color_metadata=color_metadata,
+                torch_mod=torch_mod,
+            )
+            for index in range(expect_count)
+        ]
+    finally:
+        raw_source.close()
 def decode_mp4_to_rgb_pyav(mp4_path: Path, expect_count: int) -> List[np.ndarray]:
@@ -72,11 +134,7 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
         )
     meta = probe_video_stream_metadata(nvc, mp4_path)
     bit_depth = int(meta.get("bitdepth", 8))
-    issue = nvdec_decode_compatibility_issue(
-        nvc, meta["width"], meta["height"],
-        gpu_id=gpu_id, codec=meta["codec"],
-        chroma_subsampling=meta["chroma_subsampling"], bitdepth=bit_depth,
-    )
+    issue = nvdec_decode_compatibility_issue_for_path(nvc, mp4_path, gpu_id)
     if issue is not None:
         raise RuntimeError(f"NVDEC decode unsupported for {mp4_path}: {issue}")
     source_bit_depth = probe_video_bit_depth(mp4_path, nvc=nvc)
@@ -91,7 +149,7 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
     last_err: Exception | None = None
     for use_dev in use_dev_options:
         if source_bit_depth > 8:
-            dec = _try_open_nvdec(
+            metadata_decoder = _try_open_nvdec(
                 nvc,
                 path_str,
                 gpu_id,
@@ -99,27 +157,39 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
                 output_color_type=output_color_type,
             )
         else:
-            dec = _try_open_nvdec(nvc, path_str, gpu_id, use_dev)
-        if dec is None:
+            metadata_decoder = _try_open_nvdec(nvc, path_str, gpu_id, use_dev)
+        if metadata_decoder is None:
             continue
         try:
-            n = min(len(dec), expect_count)
-            frames: List[np.ndarray] = []
-            for i in range(n):
-                raw = dec.get_batch_frames_by_index([i])[0]
-                if source_bit_depth > 8:
-                    frames.append(
-                        native_nvdec_to_rgb_numpy(
-                            raw,
-                            bit_depth=source_bit_depth,
-                            **color_metadata,
-                        )
-                    )
-                else:
-                    frames.append(
-                        _decoded_frame_to_rgb_numpy(raw, torch_mod, bit_depth=source_bit_depth)
-                    )
-            return frames
+            presentation_pts = _nvdec_scanned_presentation_pts(metadata_decoder, expect_count)
+            if presentation_pts is None:
+                return _decode_simple_nvdec_by_ordinal(
+                    metadata_decoder,
+                    mp4_path,
+                    expect_count,
+                    source_bit_depth=source_bit_depth,
+                    color_metadata=color_metadata,
+                    torch_mod=torch_mod,
+                )
+            _close_nvdec_decoder(metadata_decoder)
+            if len(presentation_pts) < expect_count:
+                raise RuntimeError(
+                    f"NVDEC scanned metadata has {len(presentation_pts)} timestamps, "
+                    f"expected at least {expect_count}"
+                )
+            return _decode_nvdec_by_presentation_pts(
+                nvc,
+                mp4_path,
+                gpu_id=gpu_id,
+                use_device_memory=use_dev,
+                output_color_type=output_color_type,
+                presentation_pts=presentation_pts,
+                expect_count=expect_count,
+                source_bit_depth=source_bit_depth,
+                color_metadata=color_metadata,
+                torch_mod=torch_mod,
+            )
         except Exception as e:
             last_err = e

mvdata-0.9.4/mvdata/codec/nvdec.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""Shared NVDEC / PyNvVideoCodec helpers."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+def _try_open_nvdec(
+    nvc,
+    path_str: str,
+    gpu_id: int,
+    use_device_memory: bool,
+    *,
+    output_color_type=None,
+):
+    """Open a SimpleDecoder, tolerating API-version differences in the kwarg surface."""
+    base = dict(
+        gpu_id=gpu_id,
+        use_device_memory=use_device_memory,
+        output_color_type=output_color_type or nvc.OutputColorType.RGB,
+    )
+    for extra in ({"need_scanned_stream_metadata": True}, {}):
+        try:
+            return nvc.SimpleDecoder(path_str, **base, **extra)
+        except TypeError:
+            if extra:
+                try:
+                    return nvc.SimpleDecoder(path_str, **base)
+                except Exception:
+                    pass
+        except Exception:
+            pass
+    return None
+def _nvdec_frame_pts(frame: Any) -> int | None:
+    get_pts = getattr(frame, "getPTS", None)
+    if callable(get_pts):
+        pts = get_pts()
+        if pts is not None:
+            return int(pts)
+    pts = getattr(frame, "timestamp", None)
+    if pts is not None:
+        return int(pts)
+    return None
+def _nvdec_scanned_presentation_pts(decoder: Any, expect_count: int) -> list[int] | None:
+    del expect_count
+    get_scanned = getattr(decoder, "get_scanned_stream_metadata", None)
+    if not callable(get_scanned):
+        return None
+    metadata = get_scanned()
+    pts_values = getattr(metadata, "pts", None)
+    if pts_values is None:
+        return None
+    pts = [int(value) for value in pts_values]
+    if not pts:
+        return None
+    return pts
+def _create_low_level_nvdec(
+    nvc: Any,
+    path: Path | str,
+    *,
+    gpu_id: int,
+    use_device_memory: bool,
+    output_color_type: Any,
+) -> tuple[Any, Any]:
+    demuxer = nvc.CreateDemuxer(str(path))
+    decoder = nvc.CreateDecoder(
+        gpuid=gpu_id,
+        codec=demuxer.GetNvCodecId(),
+        usedevicememory=use_device_memory,
+        outputColorType=output_color_type,
+        latency=nvc.DisplayDecodeLatencyType.NATIVE,
+    )
+    return demuxer, decoder
+def _close_nvdec_decoder(decoder: Any) -> None:
+    if decoder is None:
+        return
+    for close_name in ("close", "stop"):
+        close_fn = getattr(decoder, close_name, None)
+        if callable(close_fn):
+            try:
+                close_fn()
+            except Exception:
+                pass
+            return
+class _NvdecOrdinalRawFrameSource:
+    def __init__(self, decoder: Any, path: Path | str):
+        self._decoder = decoder
+        self._path = path
+        self._get_batch_frames = getattr(decoder, "get_batch_frames", None)
+        self._get_batch_frames_by_index = getattr(decoder, "get_batch_frames_by_index", None)
+        self._seek_to_index = getattr(decoder, "seek_to_index", None)
+        if not callable(self._get_batch_frames_by_index) and not callable(self._get_batch_frames):
+            self.close()
+            raise RuntimeError(
+                f"{path}: NVDEC SimpleDecoder has no scanned metadata or ordinal frame API"
+            )
+        self._next_sequential_index: int | None = (
+            0 if callable(self._get_batch_frames) else None
+        )
+    def _decode_sequential(self, index: int):
+        if not callable(self._get_batch_frames):
+            raise RuntimeError("Sequential NVDEC batch API is unavailable")
+        batch = self._get_batch_frames(1)
+        if not batch:
+            raise RuntimeError(f"NVDEC returned no frame for {self._path} at index {index}")
+        self._next_sequential_index = index + 1
+        return batch[0]
+    def _decode_indexed(self, index: int):
+        if callable(self._get_batch_frames_by_index):
+            self._next_sequential_index = None
+            batch = self._get_batch_frames_by_index([index])
+            if not batch:
+                raise RuntimeError(f"NVDEC returned no frame for {self._path} at index {index}")
+            return batch[0]
+        if not callable(self._seek_to_index) or not callable(self._get_batch_frames):
+            raise RuntimeError("Indexed NVDEC access API is unavailable")
+        self._seek_to_index(index)
+        self._next_sequential_index = index
+        return self._decode_sequential(index)
+    def frame(self, index: int) -> tuple[Any, str]:
+        if self._next_sequential_index == index:
+            return self._decode_sequential(index), "sequential"
+        return self._decode_indexed(index), "indexed"
+    def frame_by_index(self, index: int) -> tuple[Any, str]:
+        if callable(self._get_batch_frames_by_index) or callable(self._seek_to_index):
+            return self._decode_indexed(index), "indexed"
+        return self.frame(index)
+    def reset(self) -> None:
+        self._next_sequential_index = 0 if callable(self._get_batch_frames) else None
+    def close(self) -> None:
+        _close_nvdec_decoder(self._decoder)
+        self._decoder = None
+class _NvdecPtsRawFrameSource:
+    def __init__(
+        self,
+        nvc: Any,
+        path: Path | str,
+        *,
+        gpu_id: int,
+        use_device_memory: bool,
+        output_color_type: Any,
+        presentation_pts: list[int],
+    ):
+        self._nvc = nvc
+        self._path = path
+        self._gpu_id = gpu_id
+        self._use_device_memory = use_device_memory
+        self._output_color_type = output_color_type
+        self._presentation_pts = presentation_pts
+        self._pts_to_index = {
+            pts: index
+            for index, pts in enumerate(self._presentation_pts)
+        }
+        if len(self._pts_to_index) != len(self._presentation_pts):
+            raise RuntimeError(f"{path}: duplicate presentation timestamps in NVDEC metadata")
+        self._demuxer = None
+        self._decoder = None
+        self._eos = False
+        self._last_decoded_index: int | None = None
+        self._pending_raw_by_index: dict[int, Any] = {}
+    def reset(self) -> None:
+        _close_nvdec_decoder(self._decoder)
+        self._demuxer, self._decoder = _create_low_level_nvdec(
+            self._nvc,
+            self._path,
+            gpu_id=self._gpu_id,
+            use_device_memory=self._use_device_memory,
+            output_color_type=self._output_color_type,
+        )
+        self._eos = False
+        self._last_decoded_index = None
+        self._pending_raw_by_index.clear()
+    def _ensure_decoder(self) -> None:
+        if self._decoder is None or self._demuxer is None:
+            self.reset()
+    def frame(self, index: int):
+        pending = self._pending_raw_by_index.pop(index, None)
+        if pending is not None:
+            return pending
+        if self._last_decoded_index is not None and index <= self._last_decoded_index:
+            self.reset()
+        else:
+            self._ensure_decoder()
+        assert self._decoder is not None
+        assert self._demuxer is not None
+        while not self._eos:
+            packet = self._demuxer.Demux()
+            outputs = self._decoder.Decode(packet)
+            if getattr(packet, "bsl", 0) == 0:
+                self._eos = True
+            target_raw = None
+            for raw in outputs:
+                pts = _nvdec_frame_pts(raw)
+                if pts is None:
+                    raise RuntimeError(
+                        f"{self._path}: NVDEC returned a frame without presentation timestamp"
+                    )
+                decoded_index = self._pts_to_index.get(pts)
+                if decoded_index is None:
+                    raise RuntimeError(
+                        f"{self._path}: NVDEC returned unknown presentation timestamp {pts}"
+                    )
+                self._last_decoded_index = decoded_index
+                if decoded_index > index:
+                    if target_raw is None:
+                        expected_pts = self._presentation_pts[index]
+                        raise RuntimeError(
+                            f"{self._path}: NVDEC skipped requested presentation frame "
+                            f"{index} (pts={expected_pts}); first later frame was "
+                            f"{decoded_index} (pts={pts}). The MP4 slice is missing "
+                            "decode dependencies before the requested frame."
+                        )
+                    self._pending_raw_by_index[decoded_index] = raw
+                if decoded_index == index:
+                    target_raw = raw
+            if target_raw is not None:
+                return target_raw
+        expected_pts = self._presentation_pts[index]
+        raise RuntimeError(
+            f"{self._path}: NVDEC reached end of stream before presentation frame "
+            f"{index} (pts={expected_pts})"
+        )
+    def close(self) -> None:
+        _close_nvdec_decoder(self._decoder)
+        self._decoder = None
+        self._demuxer = None

{mvdata-0.9.2 → mvdata-0.9.4}/mvdata/downloader.py RENAMED Viewed

@@ -116,8 +116,11 @@ class DatasetDownloader(ABC):
         return None
+RANGED_STREAM_EXTENSIONS = (".avif", ".mp4")
 class RangedDatasetDownloader(DatasetDownloader):
-    """Downloader for ranged AVIF dataset format."""
+    """Downloader for ranged dataset format."""
     def _parse_range_folder(self, folder_name: str) -> Optional[Tuple[int, int]]:
         """Parse range folder name to extract start and end frame numbers."""
@@ -260,7 +263,7 @@ class RangedDatasetDownloader(DatasetDownloader):
                 if is_system_file(rel_path_wrapped):
                     continue
-                if rel_path_wrapped.suffix.lower() != ".avif":
+                if rel_path_wrapped.suffix.lower() not in RANGED_STREAM_EXTENSIONS:
                     files_to_download.append((obj_key, local_file))
                     continue

{mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo.py RENAMED Viewed

@@ -20,6 +20,59 @@ SUPPORTED_VIDEO_EXTENSIONS = ['.mov', '.mp4', '.avi']
 SUPPORTED_CODECS = ['h264', 'hevc', 'av1']
+def _is_content_packet(packet: Any) -> bool:
+    return getattr(packet, "size", 0) > 0
+def _count_visible_video_packets_if_discard_preroll_detected(
+    container: Any,
+    video_stream: Any,
+    *,
+    probe_packets: int = 64,
+) -> Optional[int]:
+    if not _may_include_discard_preroll(video_stream):
+        return None
+    demux = getattr(container, "demux", None)
+    if not callable(demux):
+        return None
+    visible_packets = 0
+    content_packets = 0
+    found_discard = False
+    packets = demux(video_stream)
+    for packet in packets:
+        if not _is_content_packet(packet):
+            continue
+        content_packets += 1
+        if packet.is_discard:
+            found_discard = True
+            break
+        visible_packets += 1
+        if content_packets >= probe_packets:
+            return None
+    if not found_discard:
+        return visible_packets
+    for packet in packets:
+        if _is_content_packet(packet) and not packet.is_discard:
+            visible_packets += 1
+    return visible_packets
+def _may_include_discard_preroll(video_stream: Any) -> bool:
+    if getattr(video_stream, "frames", 0) <= 0:
+        return False
+    codec_context = getattr(video_stream, "codec_context", None)
+    if bool(getattr(codec_context, "has_b_frames", False)):
+        return True
+    start_time = getattr(video_stream, "start_time", None)
+    return start_time not in (None, 0)
 def _check_av_available():
     if not HAS_AV:
         raise ImportError(
@@ -120,9 +173,17 @@ class MultiVideoDataset(Dataset):
             video_stream = container.streams.video[0]
             frame_count = video_stream.frames
-            if frame_count == 0:
+            visible_packet_count = _count_visible_video_packets_if_discard_preroll_detected(
+                container,
+                video_stream,
+            )
+            if visible_packet_count is not None and visible_packet_count > 0:
+                frame_count = visible_packet_count
+            elif frame_count == 0:
+                seek = getattr(container, "seek", None)
+                if callable(seek):
+                    seek(0)
                 frame_count = sum(1 for _ in container.decode(video=0))
-                container.seek(0)
             fps = float(video_stream.average_rate) if video_stream.average_rate else 30.0
             duration = float(video_stream.duration * video_stream.time_base) if video_stream.duration else 0.0

mvdata 0.9.2__tar.gz → 0.9.4__tar.gz

mvdata 0.9.2tar.gz → 0.9.4tar.gz