PyPI - mvdata - Versions diffs - 0.9.2__tar.gz → 0.9.3__tar.gz - Mend

mvdata 0.9.2tar.gz → 0.9.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{mvdata-0.9.2 → mvdata-0.9.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mvdata
-Version: 0.9.2
+Version: 0.9.3
 Summary: Gracia Dataset Convention - Python library for working with multi-view video datasets
 Author: Gracia Team
 License: MIT
@@ -50,3 +50,21 @@ Python library for working with Gracia multi-view video datasets.
 The package provides readers, writers, conversion tools, and GPU-aware video
 decode helpers for the dataset layouts documented in the `docs` directory.
+## Release
+Prepare the next release with the local helper script:
+```bash
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
+```
+Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
+explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
+`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
+commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
+`--push` is set.
+Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
+Release workflow.

mvdata-0.9.3/README.md ADDED Viewed

@@ -0,0 +1,24 @@
+# mvdata
+Python library for working with Gracia multi-view video datasets.
+The package provides readers, writers, conversion tools, and GPU-aware video
+decode helpers for the dataset layouts documented in the `docs` directory.
+## Release
+Prepare the next release with the local helper script:
+```bash
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
+UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
+```
+Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
+explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
+`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
+commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
+`--push` is set.
+Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
+Release workflow.

{mvdata-0.9.2 → mvdata-0.9.3}/mvdata/downloader.py RENAMED Viewed

@@ -116,8 +116,11 @@ class DatasetDownloader(ABC):
         return None
+RANGED_STREAM_EXTENSIONS = (".avif", ".mp4")
 class RangedDatasetDownloader(DatasetDownloader):
-    """Downloader for ranged AVIF dataset format."""
+    """Downloader for ranged dataset format."""
     def _parse_range_folder(self, folder_name: str) -> Optional[Tuple[int, int]]:
         """Parse range folder name to extract start and end frame numbers."""
@@ -260,7 +263,7 @@ class RangedDatasetDownloader(DatasetDownloader):
                 if is_system_file(rel_path_wrapped):
                     continue
-                if rel_path_wrapped.suffix.lower() != ".avif":
+                if rel_path_wrapped.suffix.lower() not in RANGED_STREAM_EXTENSIONS:
                     files_to_download.append((obj_key, local_file))
                     continue

{mvdata-0.9.2 → mvdata-0.9.3}/mvdata/multivideo.py RENAMED Viewed

@@ -20,6 +20,59 @@ SUPPORTED_VIDEO_EXTENSIONS = ['.mov', '.mp4', '.avi']
 SUPPORTED_CODECS = ['h264', 'hevc', 'av1']
+def _is_content_packet(packet: Any) -> bool:
+    return getattr(packet, "size", 0) > 0
+def _count_visible_video_packets_if_discard_preroll_detected(
+    container: Any,
+    video_stream: Any,
+    *,
+    probe_packets: int = 64,
+) -> Optional[int]:
+    if not _may_include_discard_preroll(video_stream):
+        return None
+    demux = getattr(container, "demux", None)
+    if not callable(demux):
+        return None
+    visible_packets = 0
+    content_packets = 0
+    found_discard = False
+    packets = demux(video_stream)
+    for packet in packets:
+        if not _is_content_packet(packet):
+            continue
+        content_packets += 1
+        if packet.is_discard:
+            found_discard = True
+            break
+        visible_packets += 1
+        if content_packets >= probe_packets:
+            return None
+    if not found_discard:
+        return visible_packets
+    for packet in packets:
+        if _is_content_packet(packet) and not packet.is_discard:
+            visible_packets += 1
+    return visible_packets
+def _may_include_discard_preroll(video_stream: Any) -> bool:
+    if getattr(video_stream, "frames", 0) <= 0:
+        return False
+    codec_context = getattr(video_stream, "codec_context", None)
+    if bool(getattr(codec_context, "has_b_frames", False)):
+        return True
+    start_time = getattr(video_stream, "start_time", None)
+    return start_time not in (None, 0)
 def _check_av_available():
     if not HAS_AV:
         raise ImportError(
@@ -120,9 +173,17 @@ class MultiVideoDataset(Dataset):
             video_stream = container.streams.video[0]
             frame_count = video_stream.frames
-            if frame_count == 0:
+            visible_packet_count = _count_visible_video_packets_if_discard_preroll_detected(
+                container,
+                video_stream,
+            )
+            if visible_packet_count is not None and visible_packet_count > 0:
+                frame_count = visible_packet_count
+            elif frame_count == 0:
+                seek = getattr(container, "seek", None)
+                if callable(seek):
+                    seek(0)
                 frame_count = sum(1 for _ in container.decode(video=0))
-                container.seek(0)
             fps = float(video_stream.average_rate) if video_stream.average_rate else 30.0
             duration = float(video_stream.duration * video_stream.time_base) if video_stream.duration else 0.0

{mvdata-0.9.2 → mvdata-0.9.3}/mvdata/multivideo_slicer.py RENAMED Viewed

@@ -35,6 +35,7 @@ class MultiVideoStreamSliceInfo:
     width: int
     height: int
     packet_count: int
+    visible_frame_count: int
     keyframe_indices: tuple[int, ...]
     packet_duration_ticks: Optional[int]
@@ -350,6 +351,175 @@ def _stream_from_template(output_container: Any, input_stream: Any) -> Any:
     return output_container.add_stream(template=input_stream)
+@dataclass(frozen=True)
+class _VideoPacketRecord:
+    packet_index: int
+    pts: int
+    dts: int
+    is_keyframe: bool
+    is_discard: bool
+@dataclass(frozen=True)
+class _PacketCopyPlan:
+    packet_start: int
+    packet_end: int
+    timestamp_offset: int
+    packet_indices: frozenset[int]
+def _is_content_packet(packet: Any) -> bool:
+    return getattr(packet, "size", 0) > 0
+def _record_packet(packet: Any, packet_index: int) -> _VideoPacketRecord:
+    return _VideoPacketRecord(
+        packet_index=packet_index,
+        pts=int(packet.pts),
+        dts=int(packet.dts),
+        is_keyframe=bool(packet.is_keyframe),
+        is_discard=bool(packet.is_discard),
+    )
+def _presentation_order(records: list[_VideoPacketRecord]) -> list[_VideoPacketRecord]:
+    return sorted(
+        (record for record in records if not record.is_discard),
+        key=lambda record: (record.pts, record.dts, record.packet_index),
+    )
+def _count_visible_video_packets(video_path: Path) -> int:
+    container = av.open(str(video_path))
+    try:
+        if not container.streams.video:
+            raise MultiVideoSliceError(f"No video stream in sliced output: {video_path}")
+        video_stream = container.streams.video[0]
+        return sum(
+            1
+            for packet in container.demux(video_stream)
+            if _is_content_packet(packet) and not packet.is_discard
+        )
+    finally:
+        container.close()
+def _read_video_packet_records(video_path: Path) -> list[_VideoPacketRecord]:
+    container = av.open(str(video_path))
+    try:
+        if not container.streams.video:
+            raise MultiVideoSliceError(f"No video stream: {video_path}")
+        records = _scan_packet_records(container, container.streams.video[0])
+        if not records:
+            raise MultiVideoSliceError(f"No video packets: {video_path}")
+        return records
+    finally:
+        container.close()
+def _scan_packet_records(container: Any, video_stream: Any) -> list[_VideoPacketRecord]:
+    records: list[_VideoPacketRecord] = []
+    packet_index = 0
+    missing_timestamps = False
+    for packet in container.demux(video_stream):
+        if not _is_content_packet(packet):
+            continue
+        if packet.pts is None or packet.dts is None:
+            missing_timestamps = True
+        else:
+            records.append(_record_packet(packet, packet_index))
+        packet_index += 1
+    if missing_timestamps:
+        raise MultiVideoSliceError(
+            "Video is not eligible for frame-based packet-copy slicing: "
+            "video packets must have presentation and decode timestamps"
+        )
+    return records
+def _copy_packet_for_mux(packet: Any) -> Any:
+    packet_copy = av.Packet(bytes(packet))
+    packet_copy.pts = packet.pts
+    packet_copy.dts = packet.dts
+    packet_copy.duration = packet.duration
+    if packet.time_base is not None:
+        packet_copy.time_base = packet.time_base
+    packet_copy.is_keyframe = bool(packet.is_keyframe)
+    packet_copy.is_corrupt = bool(packet.is_corrupt)
+    packet_copy.opaque = packet.opaque
+    for side_data in packet.iter_sidedata():
+        packet_copy.set_sidedata(side_data)
+    return packet_copy
+def _packet_copy_plans(
+    video_path: Path,
+    ranges: tuple[MultiVideoSliceRange, ...],
+    stream_id: int,
+) -> dict[MultiVideoSliceRange, _PacketCopyPlan]:
+    return _packet_copy_plans_from_records(
+        _read_video_packet_records(video_path),
+        ranges,
+        stream_id,
+    )
+def _packet_copy_plans_from_records(
+    records: list[_VideoPacketRecord],
+    ranges: tuple[MultiVideoSliceRange, ...],
+    stream_id: int,
+) -> dict[MultiVideoSliceRange, _PacketCopyPlan]:
+    presentation = _presentation_order(records)
+    visible_frame_by_packet = {
+        record.packet_index: frame_index
+        for frame_index, record in enumerate(presentation)
+    }
+    plans: dict[MultiVideoSliceRange, _PacketCopyPlan] = {}
+    for range_info in ranges:
+        if range_info.source_end_frame >= len(presentation):
+            raise MultiVideoSliceError(
+                f"Stream {stream_id} range {range_info.output_name} ends at frame "
+                f"{range_info.source_end_frame}, but only {len(presentation)} visible "
+                "frames are available"
+            )
+        start_record = presentation[range_info.source_start_frame]
+        if not start_record.is_keyframe:
+            raise MultiVideoSliceError(
+                f"Stream {stream_id} range {range_info.output_name} starts at visible "
+                f"frame {range_info.source_start_frame}, which is not a keyframe"
+            )
+        packet_start = start_record.packet_index
+        packet_end = packet_start
+        for frame_index in range(
+            range_info.source_start_frame,
+            range_info.source_end_frame + 1,
+        ):
+            packet_end = max(packet_end, presentation[frame_index].packet_index)
+        packet_indices = frozenset(
+            packet_index
+            for packet_index in range(packet_start, packet_end + 1)
+            if visible_frame_by_packet.get(packet_index, -1) >= range_info.source_start_frame
+        )
+        plans[range_info] = _PacketCopyPlan(
+            packet_start,
+            packet_end,
+            start_record.pts,
+            packet_indices,
+        )
+    return plans
 def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStreamSliceInfo:
     _check_av_available()
@@ -370,18 +540,19 @@ def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStrea
             )
         codec = _normalize_codec_name(video_stream.codec_context.name)
-        keyframe_indices: list[int] = []
         packet_count = 0
         first_duration: Optional[int] = None
+        records: list[_VideoPacketRecord] = []
         missing_duration = False
+        missing_timestamps = False
         variable_duration = False
         for packet in container.demux(video_stream):
-            if getattr(packet, "size", 0) == 0:
+            if not _is_content_packet(packet):
                 continue
-            if packet.is_keyframe:
-                keyframe_indices.append(packet_count)
+            if packet.pts is None or packet.dts is None:
+                missing_timestamps = True
             duration = packet.duration
             if duration is None or duration <= 0:
@@ -391,16 +562,34 @@ def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStrea
             elif int(duration) != first_duration:
                 variable_duration = True
+            if packet.pts is not None and packet.dts is not None:
+                records.append(_record_packet(packet, packet_count))
             packet_count += 1
         if packet_count == 0:
             raise MultiVideoSliceError(f"Stream {stream_id} has no video packets: {video_path}")
+        if missing_timestamps:
+            raise MultiVideoSliceError(
+                f"Stream {stream_id} is not eligible for frame-based packet-copy slicing: "
+                "video packets must have presentation and decode timestamps"
+            )
         if missing_duration or variable_duration:
             raise MultiVideoSliceError(
                 f"Stream {stream_id} is not eligible for frame-based slicing: "
                 "video packets must have a constant non-zero duration"
             )
+        presentation = _presentation_order(records)
+        if not presentation:
+            raise MultiVideoSliceError(
+                f"Stream {stream_id} has no visible video frames after discards: {video_path}"
+            )
+        keyframe_indices = tuple(
+            frame_index
+            for frame_index, record in enumerate(presentation)
+            if record.is_keyframe
+        )
         return MultiVideoStreamSliceInfo(
             stream_id=stream_id,
             path=str(video_path),
@@ -409,7 +598,8 @@ def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStrea
             width=int(video_stream.width),
             height=int(video_stream.height),
             packet_count=packet_count,
-            keyframe_indices=tuple(keyframe_indices),
+            visible_frame_count=len(presentation),
+            keyframe_indices=keyframe_indices,
             packet_duration_ticks=first_duration,
         )
     finally:
@@ -447,73 +637,100 @@ def _slice_video_to_ranged_mp4s(
     if not ranges:
         return
-    input_container = av.open(str(video_path))
-    output_container = None
-    current_range_index = 0
-    current_range = ranges[current_range_index]
-    packets_written = 0
-    def close_current_output() -> None:
-        nonlocal output_container, packets_written, current_range
-        if output_container is None:
-            return
-        output_container.close()
-        expected = current_range.frame_count
-        if packets_written != expected:
-            raise MultiVideoSliceError(
-                f"Expected {expected} packets for stream {stream_id} range "
-                f"{current_range.output_name}, wrote {packets_written}"
-            )
-        output_container = None
-        packets_written = 0
+    packet_plans = _packet_copy_plans(video_path, ranges, stream_id)
+    packet_targets: dict[int, list[MultiVideoSliceRange]] = {}
+    for range_info in ranges:
+        for packet_index in packet_plans[range_info].packet_indices:
+            packet_targets.setdefault(packet_index, []).append(range_info)
+    last_packet_end = max(packet_plans[range_info].packet_end for range_info in ranges)
+    packets_written = {range_info: 0 for range_info in ranges}
+    output_files: dict[MultiVideoSliceRange, Path] = {}
+    output_containers: dict[MultiVideoSliceRange, Any] = {}
+    output_streams: dict[MultiVideoSliceRange, Any] = {}
+    input_container = av.open(str(video_path))
     try:
         if not input_container.streams.video:
             raise MultiVideoSliceError(f"Stream {stream_id} has no video stream: {video_path}")
         input_stream = input_container.streams.video[0]
-        output_stream = None
-        packet_index = 0
+        def open_output(range_info: MultiVideoSliceRange) -> tuple[Any, Any]:
+            if range_info not in output_containers:
+                range_dir = output_path / range_info.output_name / "rgb"
+                range_dir.mkdir(parents=True, exist_ok=True)
+                output_file = range_dir / _format_stream_filename(stream_id)
+                output_container = av.open(str(output_file), mode="w", format="mp4")
+                output_containers[range_info] = output_container
+                output_streams[range_info] = _stream_from_template(output_container, input_stream)
+                output_files[range_info] = output_file
+            return output_containers[range_info], output_streams[range_info]
+        def close_finished_outputs(packet_index: int) -> None:
+            for range_info in list(output_containers):
+                if packet_index > packet_plans[range_info].packet_end:
+                    output_containers.pop(range_info).close()
+                    output_streams.pop(range_info, None)
+        def mux_packet(range_info: MultiVideoSliceRange, packet: Any) -> None:
+            output_container, output_stream = open_output(range_info)
+            packet_plan = packet_plans[range_info]
+            original_pts = packet.pts
+            original_dts = packet.dts
+            original_stream = packet.stream
+            try:
+                packet.pts = packet.pts - packet_plan.timestamp_offset
+                packet.dts = packet.dts - packet_plan.timestamp_offset
+                packet.stream = output_stream
+                output_container.mux(packet)
+            finally:
+                packet.pts = original_pts
+                packet.dts = original_dts
+                if original_stream is not None:
+                    packet.stream = original_stream
+        packet_index = 0
         for packet in input_container.demux(input_stream):
-            if getattr(packet, "size", 0) == 0:
+            if not _is_content_packet(packet):
                 continue
-            while packet_index > current_range.source_end_frame:
-                close_current_output()
-                current_range_index += 1
-                if current_range_index >= len(ranges):
-                    return
-                current_range = ranges[current_range_index]
-                output_stream = None
+            if packet_index > last_packet_end:
+                break
+            close_finished_outputs(packet_index)
-            if packet_index < current_range.source_start_frame:
+            targets = packet_targets.get(packet_index)
+            if not targets:
                 packet_index += 1
                 continue
-            if output_container is None:
-                range_dir = output_path / current_range.output_name / "rgb"
-                range_dir.mkdir(parents=True, exist_ok=True)
-                output_file = range_dir / _format_stream_filename(stream_id)
-                output_container = av.open(str(output_file), mode="w", format="mp4")
-                output_stream = _stream_from_template(output_container, input_stream)
-            packet.stream = output_stream
-            output_container.mux(packet)
-            packets_written += 1
+            target_packets = (
+                [_copy_packet_for_mux(packet) for _ in targets]
+                if len(targets) > 1
+                else [packet]
+            )
+            for range_info, target_packet in zip(targets, target_packets):
+                mux_packet(range_info, target_packet)
+                packets_written[range_info] += 1
             packet_index += 1
     finally:
-        try:
-            close_current_output()
-        finally:
-            input_container.close()
+        for output_container in output_containers.values():
+            output_container.close()
+        input_container.close()
-    if current_range_index < len(ranges) - 1:
-        missing = ranges[current_range_index + 1 :]
-        raise MultiVideoSliceError(
-            f"Stream {stream_id} ended before writing all ranges; "
-            f"first missing range is {missing[0].output_name}"
-        )
+    for range_info in ranges:
+        expected = range_info.frame_count
+        if packets_written[range_info] < expected:
+            raise MultiVideoSliceError(
+                f"Expected at least {expected} packets for stream {stream_id} range "
+                f"{range_info.output_name}, wrote {packets_written[range_info]}"
+            )
+        output_file = output_files[range_info]
+        visible_packets = _count_visible_video_packets(output_file)
+        if visible_packets < expected:
+            raise MultiVideoSliceError(
+                f"Expected at least {expected} visible packets for stream {stream_id} range "
+                f"{range_info.output_name}, found {visible_packets}."
+            )
 class MultiVideoToRangedSlicer(DatasetWriter):
@@ -750,12 +967,12 @@ class MultiVideoToRangedSlicer(DatasetWriter):
                 if video.fps != reference_fps:
                     _record_stream_error(video.stream_id, message)
-        min_frames = min(video.packet_count for video in videos)
-        max_frames = max(video.packet_count for video in videos)
+        min_frames = min(video.visible_frame_count for video in videos)
+        max_frames = max(video.visible_frame_count for video in videos)
         tail_spread = max_frames - min_frames
         if tail_spread > self.tail_tolerance_frames:
             counts = ", ".join(
-                f"{video.stream_id}={video.packet_count}"
+                f"{video.stream_id}={video.visible_frame_count}"
                 for video in sorted(videos, key=lambda v: v.stream_id)
             )
             message = (
@@ -764,7 +981,7 @@ class MultiVideoToRangedSlicer(DatasetWriter):
             )
             errors.append(message)
             for video in videos:
-                if video.packet_count != min_frames:
+                if video.visible_frame_count != min_frames:
                     _record_stream_error(video.stream_id, message)
         elif tail_spread:
             warnings.append(
@@ -853,9 +1070,9 @@ class MultiVideoToRangedSlicer(DatasetWriter):
             )
         discarded_tail_frames = {
-            video.stream_id: video.packet_count - min_frames
+            video.stream_id: video.visible_frame_count - min_frames
             for video in videos
-            if video.packet_count > min_frames
+            if video.visible_frame_count > min_frames
         }
         plan = MultiVideoSlicePlan(
             version=1,
@@ -909,6 +1126,8 @@ class MultiVideoToRangedSlicer(DatasetWriter):
         if self.copy_meta:
             self._copy_meta_folder()
+        ranged_dataset = RangedDataset(self.output_path, max_workers=self.max_workers)
         if self.stash_policy == "copy":
             self._copy_stash_folder(
                 stream_ids=list(plan.stream_ids),
@@ -917,7 +1136,8 @@ class MultiVideoToRangedSlicer(DatasetWriter):
                 maintain_frame_numbers=True,
             )
         elif self.stash_policy == "generate":
-            self._generate_stash_from_reader(
+            self._generate_stash_from_dataset(
+                source=ranged_dataset,
                 stream_ids=list(plan.stream_ids),
                 start_frame=start_frame,
                 end_frame=end_frame,
@@ -927,4 +1147,4 @@ class MultiVideoToRangedSlicer(DatasetWriter):
         if self.verbose:
             print(f"Successfully created sliced Ranged dataset at {self.output_path}")
-        return RangedDataset(self.output_path, max_workers=self.max_workers)
+        return ranged_dataset

mvdata 0.9.2__tar.gz → 0.9.3__tar.gz

mvdata 0.9.2tar.gz → 0.9.3tar.gz