mvdata 0.9.2__tar.gz → 0.9.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {mvdata-0.9.2 → mvdata-0.9.4}/PKG-INFO +19 -1
  2. mvdata-0.9.4/README.md +24 -0
  3. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/__init__.py +1 -1
  4. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/decode.py +117 -47
  5. mvdata-0.9.4/mvdata/codec/nvdec.py +256 -0
  6. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/downloader.py +5 -2
  7. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo.py +63 -2
  8. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo_slicer.py +165 -448
  9. mvdata-0.9.4/mvdata/multivideo_slicer_models.py +211 -0
  10. mvdata-0.9.4/mvdata/multivideo_slicer_packets.py +456 -0
  11. mvdata-0.9.4/mvdata/multivideo_slicer_plan.py +100 -0
  12. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/ranged.py +73 -4
  13. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/stash_utils.py +126 -25
  14. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/video_stream_reader.py +73 -53
  15. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/writer_base.py +17 -2
  16. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/PKG-INFO +19 -1
  17. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/SOURCES.txt +5 -0
  18. {mvdata-0.9.2 → mvdata-0.9.4}/pyproject.toml +1 -1
  19. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_dataset_base_defaults.py +3 -0
  20. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_multivideo_bit_depth.py +48 -0
  21. mvdata-0.9.4/tests/test_multivideo_slicer.py +671 -0
  22. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_mixed_streams.py +49 -29
  23. mvdata-0.9.4/tests/test_release_script.py +220 -0
  24. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_s3_downloader.py +40 -0
  25. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_video_stream_reader.py +362 -106
  26. mvdata-0.9.2/README.md +0 -6
  27. mvdata-0.9.2/tests/test_multivideo_slicer.py +0 -223
  28. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/__init__.py +0 -0
  29. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/cloud_storage.py +0 -0
  30. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/_imports.py +0 -0
  31. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/encode.py +0 -0
  32. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/frames.py +0 -0
  33. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/native_yuv.py +0 -0
  34. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/probe.py +0 -0
  35. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/select.py +0 -0
  36. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/dataset_base.py +0 -0
  37. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/gpu_policy.py +0 -0
  38. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/gpu_support.py +0 -0
  39. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/image_metrics.py +0 -0
  40. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/legacy_writer.py +0 -0
  41. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo_writer.py +0 -0
  42. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/nvdec_parallel.py +0 -0
  43. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/nvenc_codec.py +0 -0
  44. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/per_frame.py +0 -0
  45. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/ranged_writer.py +0 -0
  46. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/utils.py +0 -0
  47. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/write_progress.py +0 -0
  48. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/dependency_links.txt +0 -0
  49. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/requires.txt +0 -0
  50. {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/top_level.txt +0 -0
  51. {mvdata-0.9.2 → mvdata-0.9.4}/setup.cfg +0 -0
  52. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_gpu_policy.py +0 -0
  53. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_gpu_support.py +0 -0
  54. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_image_metrics.py +0 -0
  55. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_nvdec_parallel.py +0 -0
  56. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_per_camera.py +0 -0
  57. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_nvenc_roundtrip.py +0 -0
  58. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_resume.py +0 -0
  59. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_stream_discovery.py +0 -0
  60. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_roundtrip.py +0 -0
  61. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_bit_depth.py +0 -0
  62. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_comprehensive.py +0 -0
  63. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_policy.py +0 -0
  64. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_regenerate.py +0 -0
  65. {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_write_progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mvdata
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: Gracia Dataset Convention - Python library for working with multi-view video datasets
5
5
  Author: Gracia Team
6
6
  License: MIT
@@ -50,3 +50,21 @@ Python library for working with Gracia multi-view video datasets.
50
50
 
51
51
  The package provides readers, writers, conversion tools, and GPU-aware video
52
52
  decode helpers for the dataset layouts documented in the `docs` directory.
53
+
54
+ ## Release
55
+
56
+ Prepare the next release with the local helper script:
57
+
58
+ ```bash
59
+ UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
60
+ UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
61
+ ```
62
+
63
+ Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
64
+ explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
65
+ `uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
66
+ commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
67
+ `--push` is set.
68
+
69
+ Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
70
+ Release workflow.
mvdata-0.9.4/README.md ADDED
@@ -0,0 +1,24 @@
1
+ # mvdata
2
+
3
+ Python library for working with Gracia multi-view video datasets.
4
+
5
+ The package provides readers, writers, conversion tools, and GPU-aware video
6
+ decode helpers for the dataset layouts documented in the `docs` directory.
7
+
8
+ ## Release
9
+
10
+ Prepare the next release with the local helper script:
11
+
12
+ ```bash
13
+ UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
14
+ UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
15
+ ```
16
+
17
+ Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
18
+ explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
19
+ `uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
20
+ commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
21
+ `--push` is set.
22
+
23
+ Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
24
+ Release workflow.
@@ -29,7 +29,6 @@ from ._imports import (
29
29
  try_import_torch,
30
30
  )
31
31
  from .decode import (
32
- _try_open_nvdec,
33
32
  decode_mp4_to_rgb,
34
33
  decode_mp4_to_rgb_nvdec,
35
34
  decode_mp4_to_rgb_pyav,
@@ -59,6 +58,7 @@ from .frames import (
59
58
  numpy_to_cupy_rgb,
60
59
  )
61
60
  from .native_yuv import native_nvdec_to_rgb_cupy, native_nvdec_to_rgb_numpy
61
+ from .nvdec import _try_open_nvdec
62
62
  from .probe import (
63
63
  infer_video_bit_depth_from_frame,
64
64
  infer_video_bit_depth_from_pixel_format_name,
@@ -11,9 +11,15 @@ from ..gpu_policy import nvdec_decode_allowed
11
11
  from ._imports import try_import_av, try_import_cupy, try_import_pynvvideocodec, try_import_torch
12
12
  from .frames import _decoded_frame_to_rgb_numpy, _pyav_frame_to_rgb
13
13
  from .native_yuv import native_nvdec_to_rgb_numpy
14
+ from .nvdec import (
15
+ _close_nvdec_decoder,
16
+ _NvdecOrdinalRawFrameSource,
17
+ _NvdecPtsRawFrameSource,
18
+ _nvdec_scanned_presentation_pts,
19
+ _try_open_nvdec,
20
+ )
14
21
  from .probe import (
15
22
  infer_video_bit_depth_from_stream,
16
- nvdec_decode_compatibility_issue,
17
23
  nvdec_decode_compatibility_issue_for_path,
18
24
  probe_video_color_metadata_pyav,
19
25
  probe_video_bit_depth,
@@ -21,32 +27,88 @@ from .probe import (
21
27
  )
22
28
 
23
29
 
24
- def _try_open_nvdec(
25
- nvc,
26
- path_str: str,
30
+ def _nvdec_raw_to_rgb_numpy(
31
+ raw,
32
+ *,
33
+ source_bit_depth: int,
34
+ color_metadata: dict[str, Any],
35
+ torch_mod: Any,
36
+ ) -> np.ndarray:
37
+ if source_bit_depth > 8:
38
+ return native_nvdec_to_rgb_numpy(
39
+ raw,
40
+ bit_depth=source_bit_depth,
41
+ **color_metadata,
42
+ )
43
+ return _decoded_frame_to_rgb_numpy(raw, torch_mod, bit_depth=source_bit_depth)
44
+
45
+
46
+ def _decode_simple_nvdec_by_ordinal(
47
+ decoder: Any,
48
+ path: Path,
49
+ expect_count: int,
50
+ *,
51
+ source_bit_depth: int,
52
+ color_metadata: dict[str, Any],
53
+ torch_mod: Any,
54
+ ) -> list[np.ndarray]:
55
+ raw_source = _NvdecOrdinalRawFrameSource(decoder, path)
56
+
57
+ try:
58
+ try:
59
+ frame_count = min(len(decoder), expect_count)
60
+ except Exception:
61
+ frame_count = expect_count
62
+
63
+ frames: list[np.ndarray] = []
64
+ for index in range(frame_count):
65
+ raw, _ = raw_source.frame_by_index(index)
66
+ frames.append(
67
+ _nvdec_raw_to_rgb_numpy(
68
+ raw,
69
+ source_bit_depth=source_bit_depth,
70
+ color_metadata=color_metadata,
71
+ torch_mod=torch_mod,
72
+ )
73
+ )
74
+ return frames
75
+ finally:
76
+ raw_source.close()
77
+
78
+
79
+ def _decode_nvdec_by_presentation_pts(
80
+ nvc: Any,
81
+ mp4_path: Path,
82
+ *,
27
83
  gpu_id: int,
28
84
  use_device_memory: bool,
29
- *,
30
- output_color_type=None,
31
- ):
32
- """Open a SimpleDecoder, tolerating API-version differences in the kwarg surface."""
33
- base = dict(
85
+ output_color_type: Any,
86
+ presentation_pts: list[int],
87
+ expect_count: int,
88
+ source_bit_depth: int,
89
+ color_metadata: dict[str, Any],
90
+ torch_mod: Any,
91
+ ) -> list[np.ndarray]:
92
+ raw_source = _NvdecPtsRawFrameSource(
93
+ nvc,
94
+ mp4_path,
34
95
  gpu_id=gpu_id,
35
96
  use_device_memory=use_device_memory,
36
- output_color_type=output_color_type or nvc.OutputColorType.RGB,
97
+ output_color_type=output_color_type,
98
+ presentation_pts=presentation_pts,
37
99
  )
38
- for extra in ({}, {"need_scanned_stream_metadata": True}):
39
- try:
40
- return nvc.SimpleDecoder(path_str, **base, **extra)
41
- except TypeError:
42
- if extra:
43
- try:
44
- return nvc.SimpleDecoder(path_str, **base)
45
- except Exception:
46
- pass
47
- except Exception:
48
- pass
49
- return None
100
+ try:
101
+ return [
102
+ _nvdec_raw_to_rgb_numpy(
103
+ raw_source.frame(index),
104
+ source_bit_depth=source_bit_depth,
105
+ color_metadata=color_metadata,
106
+ torch_mod=torch_mod,
107
+ )
108
+ for index in range(expect_count)
109
+ ]
110
+ finally:
111
+ raw_source.close()
50
112
 
51
113
 
52
114
  def decode_mp4_to_rgb_pyav(mp4_path: Path, expect_count: int) -> List[np.ndarray]:
@@ -72,11 +134,7 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
72
134
  )
73
135
  meta = probe_video_stream_metadata(nvc, mp4_path)
74
136
  bit_depth = int(meta.get("bitdepth", 8))
75
- issue = nvdec_decode_compatibility_issue(
76
- nvc, meta["width"], meta["height"],
77
- gpu_id=gpu_id, codec=meta["codec"],
78
- chroma_subsampling=meta["chroma_subsampling"], bitdepth=bit_depth,
79
- )
137
+ issue = nvdec_decode_compatibility_issue_for_path(nvc, mp4_path, gpu_id)
80
138
  if issue is not None:
81
139
  raise RuntimeError(f"NVDEC decode unsupported for {mp4_path}: {issue}")
82
140
  source_bit_depth = probe_video_bit_depth(mp4_path, nvc=nvc)
@@ -91,7 +149,7 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
91
149
  last_err: Exception | None = None
92
150
  for use_dev in use_dev_options:
93
151
  if source_bit_depth > 8:
94
- dec = _try_open_nvdec(
152
+ metadata_decoder = _try_open_nvdec(
95
153
  nvc,
96
154
  path_str,
97
155
  gpu_id,
@@ -99,27 +157,39 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
99
157
  output_color_type=output_color_type,
100
158
  )
101
159
  else:
102
- dec = _try_open_nvdec(nvc, path_str, gpu_id, use_dev)
103
- if dec is None:
160
+ metadata_decoder = _try_open_nvdec(nvc, path_str, gpu_id, use_dev)
161
+ if metadata_decoder is None:
104
162
  continue
105
163
  try:
106
- n = min(len(dec), expect_count)
107
- frames: List[np.ndarray] = []
108
- for i in range(n):
109
- raw = dec.get_batch_frames_by_index([i])[0]
110
- if source_bit_depth > 8:
111
- frames.append(
112
- native_nvdec_to_rgb_numpy(
113
- raw,
114
- bit_depth=source_bit_depth,
115
- **color_metadata,
116
- )
117
- )
118
- else:
119
- frames.append(
120
- _decoded_frame_to_rgb_numpy(raw, torch_mod, bit_depth=source_bit_depth)
121
- )
122
- return frames
164
+ presentation_pts = _nvdec_scanned_presentation_pts(metadata_decoder, expect_count)
165
+ if presentation_pts is None:
166
+ return _decode_simple_nvdec_by_ordinal(
167
+ metadata_decoder,
168
+ mp4_path,
169
+ expect_count,
170
+ source_bit_depth=source_bit_depth,
171
+ color_metadata=color_metadata,
172
+ torch_mod=torch_mod,
173
+ )
174
+
175
+ _close_nvdec_decoder(metadata_decoder)
176
+ if len(presentation_pts) < expect_count:
177
+ raise RuntimeError(
178
+ f"NVDEC scanned metadata has {len(presentation_pts)} timestamps, "
179
+ f"expected at least {expect_count}"
180
+ )
181
+ return _decode_nvdec_by_presentation_pts(
182
+ nvc,
183
+ mp4_path,
184
+ gpu_id=gpu_id,
185
+ use_device_memory=use_dev,
186
+ output_color_type=output_color_type,
187
+ presentation_pts=presentation_pts,
188
+ expect_count=expect_count,
189
+ source_bit_depth=source_bit_depth,
190
+ color_metadata=color_metadata,
191
+ torch_mod=torch_mod,
192
+ )
123
193
  except Exception as e:
124
194
  last_err = e
125
195
 
@@ -0,0 +1,256 @@
1
+ """Shared NVDEC / PyNvVideoCodec helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ def _try_open_nvdec(
10
+ nvc,
11
+ path_str: str,
12
+ gpu_id: int,
13
+ use_device_memory: bool,
14
+ *,
15
+ output_color_type=None,
16
+ ):
17
+ """Open a SimpleDecoder, tolerating API-version differences in the kwarg surface."""
18
+ base = dict(
19
+ gpu_id=gpu_id,
20
+ use_device_memory=use_device_memory,
21
+ output_color_type=output_color_type or nvc.OutputColorType.RGB,
22
+ )
23
+ for extra in ({"need_scanned_stream_metadata": True}, {}):
24
+ try:
25
+ return nvc.SimpleDecoder(path_str, **base, **extra)
26
+ except TypeError:
27
+ if extra:
28
+ try:
29
+ return nvc.SimpleDecoder(path_str, **base)
30
+ except Exception:
31
+ pass
32
+ except Exception:
33
+ pass
34
+ return None
35
+
36
+
37
+ def _nvdec_frame_pts(frame: Any) -> int | None:
38
+ get_pts = getattr(frame, "getPTS", None)
39
+ if callable(get_pts):
40
+ pts = get_pts()
41
+ if pts is not None:
42
+ return int(pts)
43
+ pts = getattr(frame, "timestamp", None)
44
+ if pts is not None:
45
+ return int(pts)
46
+ return None
47
+
48
+
49
+ def _nvdec_scanned_presentation_pts(decoder: Any, expect_count: int) -> list[int] | None:
50
+ del expect_count
51
+ get_scanned = getattr(decoder, "get_scanned_stream_metadata", None)
52
+ if not callable(get_scanned):
53
+ return None
54
+
55
+ metadata = get_scanned()
56
+ pts_values = getattr(metadata, "pts", None)
57
+ if pts_values is None:
58
+ return None
59
+
60
+ pts = [int(value) for value in pts_values]
61
+ if not pts:
62
+ return None
63
+ return pts
64
+
65
+
66
+ def _create_low_level_nvdec(
67
+ nvc: Any,
68
+ path: Path | str,
69
+ *,
70
+ gpu_id: int,
71
+ use_device_memory: bool,
72
+ output_color_type: Any,
73
+ ) -> tuple[Any, Any]:
74
+ demuxer = nvc.CreateDemuxer(str(path))
75
+ decoder = nvc.CreateDecoder(
76
+ gpuid=gpu_id,
77
+ codec=demuxer.GetNvCodecId(),
78
+ usedevicememory=use_device_memory,
79
+ outputColorType=output_color_type,
80
+ latency=nvc.DisplayDecodeLatencyType.NATIVE,
81
+ )
82
+ return demuxer, decoder
83
+
84
+
85
+ def _close_nvdec_decoder(decoder: Any) -> None:
86
+ if decoder is None:
87
+ return
88
+ for close_name in ("close", "stop"):
89
+ close_fn = getattr(decoder, close_name, None)
90
+ if callable(close_fn):
91
+ try:
92
+ close_fn()
93
+ except Exception:
94
+ pass
95
+ return
96
+
97
+
98
+ class _NvdecOrdinalRawFrameSource:
99
+ def __init__(self, decoder: Any, path: Path | str):
100
+ self._decoder = decoder
101
+ self._path = path
102
+ self._get_batch_frames = getattr(decoder, "get_batch_frames", None)
103
+ self._get_batch_frames_by_index = getattr(decoder, "get_batch_frames_by_index", None)
104
+ self._seek_to_index = getattr(decoder, "seek_to_index", None)
105
+ if not callable(self._get_batch_frames_by_index) and not callable(self._get_batch_frames):
106
+ self.close()
107
+ raise RuntimeError(
108
+ f"{path}: NVDEC SimpleDecoder has no scanned metadata or ordinal frame API"
109
+ )
110
+ self._next_sequential_index: int | None = (
111
+ 0 if callable(self._get_batch_frames) else None
112
+ )
113
+
114
+ def _decode_sequential(self, index: int):
115
+ if not callable(self._get_batch_frames):
116
+ raise RuntimeError("Sequential NVDEC batch API is unavailable")
117
+ batch = self._get_batch_frames(1)
118
+ if not batch:
119
+ raise RuntimeError(f"NVDEC returned no frame for {self._path} at index {index}")
120
+ self._next_sequential_index = index + 1
121
+ return batch[0]
122
+
123
+ def _decode_indexed(self, index: int):
124
+ if callable(self._get_batch_frames_by_index):
125
+ self._next_sequential_index = None
126
+ batch = self._get_batch_frames_by_index([index])
127
+ if not batch:
128
+ raise RuntimeError(f"NVDEC returned no frame for {self._path} at index {index}")
129
+ return batch[0]
130
+
131
+ if not callable(self._seek_to_index) or not callable(self._get_batch_frames):
132
+ raise RuntimeError("Indexed NVDEC access API is unavailable")
133
+ self._seek_to_index(index)
134
+ self._next_sequential_index = index
135
+ return self._decode_sequential(index)
136
+
137
+ def frame(self, index: int) -> tuple[Any, str]:
138
+ if self._next_sequential_index == index:
139
+ return self._decode_sequential(index), "sequential"
140
+ return self._decode_indexed(index), "indexed"
141
+
142
+ def frame_by_index(self, index: int) -> tuple[Any, str]:
143
+ if callable(self._get_batch_frames_by_index) or callable(self._seek_to_index):
144
+ return self._decode_indexed(index), "indexed"
145
+ return self.frame(index)
146
+
147
+ def reset(self) -> None:
148
+ self._next_sequential_index = 0 if callable(self._get_batch_frames) else None
149
+
150
+ def close(self) -> None:
151
+ _close_nvdec_decoder(self._decoder)
152
+ self._decoder = None
153
+
154
+
155
+ class _NvdecPtsRawFrameSource:
156
+ def __init__(
157
+ self,
158
+ nvc: Any,
159
+ path: Path | str,
160
+ *,
161
+ gpu_id: int,
162
+ use_device_memory: bool,
163
+ output_color_type: Any,
164
+ presentation_pts: list[int],
165
+ ):
166
+ self._nvc = nvc
167
+ self._path = path
168
+ self._gpu_id = gpu_id
169
+ self._use_device_memory = use_device_memory
170
+ self._output_color_type = output_color_type
171
+ self._presentation_pts = presentation_pts
172
+ self._pts_to_index = {
173
+ pts: index
174
+ for index, pts in enumerate(self._presentation_pts)
175
+ }
176
+ if len(self._pts_to_index) != len(self._presentation_pts):
177
+ raise RuntimeError(f"{path}: duplicate presentation timestamps in NVDEC metadata")
178
+ self._demuxer = None
179
+ self._decoder = None
180
+ self._eos = False
181
+ self._last_decoded_index: int | None = None
182
+ self._pending_raw_by_index: dict[int, Any] = {}
183
+
184
+ def reset(self) -> None:
185
+ _close_nvdec_decoder(self._decoder)
186
+ self._demuxer, self._decoder = _create_low_level_nvdec(
187
+ self._nvc,
188
+ self._path,
189
+ gpu_id=self._gpu_id,
190
+ use_device_memory=self._use_device_memory,
191
+ output_color_type=self._output_color_type,
192
+ )
193
+ self._eos = False
194
+ self._last_decoded_index = None
195
+ self._pending_raw_by_index.clear()
196
+
197
+ def _ensure_decoder(self) -> None:
198
+ if self._decoder is None or self._demuxer is None:
199
+ self.reset()
200
+
201
+ def frame(self, index: int):
202
+ pending = self._pending_raw_by_index.pop(index, None)
203
+ if pending is not None:
204
+ return pending
205
+
206
+ if self._last_decoded_index is not None and index <= self._last_decoded_index:
207
+ self.reset()
208
+ else:
209
+ self._ensure_decoder()
210
+
211
+ assert self._decoder is not None
212
+ assert self._demuxer is not None
213
+
214
+ while not self._eos:
215
+ packet = self._demuxer.Demux()
216
+ outputs = self._decoder.Decode(packet)
217
+ if getattr(packet, "bsl", 0) == 0:
218
+ self._eos = True
219
+ target_raw = None
220
+ for raw in outputs:
221
+ pts = _nvdec_frame_pts(raw)
222
+ if pts is None:
223
+ raise RuntimeError(
224
+ f"{self._path}: NVDEC returned a frame without presentation timestamp"
225
+ )
226
+ decoded_index = self._pts_to_index.get(pts)
227
+ if decoded_index is None:
228
+ raise RuntimeError(
229
+ f"{self._path}: NVDEC returned unknown presentation timestamp {pts}"
230
+ )
231
+ self._last_decoded_index = decoded_index
232
+ if decoded_index > index:
233
+ if target_raw is None:
234
+ expected_pts = self._presentation_pts[index]
235
+ raise RuntimeError(
236
+ f"{self._path}: NVDEC skipped requested presentation frame "
237
+ f"{index} (pts={expected_pts}); first later frame was "
238
+ f"{decoded_index} (pts={pts}). The MP4 slice is missing "
239
+ "decode dependencies before the requested frame."
240
+ )
241
+ self._pending_raw_by_index[decoded_index] = raw
242
+ if decoded_index == index:
243
+ target_raw = raw
244
+ if target_raw is not None:
245
+ return target_raw
246
+
247
+ expected_pts = self._presentation_pts[index]
248
+ raise RuntimeError(
249
+ f"{self._path}: NVDEC reached end of stream before presentation frame "
250
+ f"{index} (pts={expected_pts})"
251
+ )
252
+
253
+ def close(self) -> None:
254
+ _close_nvdec_decoder(self._decoder)
255
+ self._decoder = None
256
+ self._demuxer = None
@@ -116,8 +116,11 @@ class DatasetDownloader(ABC):
116
116
  return None
117
117
 
118
118
 
119
+ RANGED_STREAM_EXTENSIONS = (".avif", ".mp4")
120
+
121
+
119
122
  class RangedDatasetDownloader(DatasetDownloader):
120
- """Downloader for ranged AVIF dataset format."""
123
+ """Downloader for ranged dataset format."""
121
124
 
122
125
  def _parse_range_folder(self, folder_name: str) -> Optional[Tuple[int, int]]:
123
126
  """Parse range folder name to extract start and end frame numbers."""
@@ -260,7 +263,7 @@ class RangedDatasetDownloader(DatasetDownloader):
260
263
  if is_system_file(rel_path_wrapped):
261
264
  continue
262
265
 
263
- if rel_path_wrapped.suffix.lower() != ".avif":
266
+ if rel_path_wrapped.suffix.lower() not in RANGED_STREAM_EXTENSIONS:
264
267
  files_to_download.append((obj_key, local_file))
265
268
  continue
266
269
 
@@ -20,6 +20,59 @@ SUPPORTED_VIDEO_EXTENSIONS = ['.mov', '.mp4', '.avi']
20
20
  SUPPORTED_CODECS = ['h264', 'hevc', 'av1']
21
21
 
22
22
 
23
+ def _is_content_packet(packet: Any) -> bool:
24
+ return getattr(packet, "size", 0) > 0
25
+
26
+
27
+ def _count_visible_video_packets_if_discard_preroll_detected(
28
+ container: Any,
29
+ video_stream: Any,
30
+ *,
31
+ probe_packets: int = 64,
32
+ ) -> Optional[int]:
33
+ if not _may_include_discard_preroll(video_stream):
34
+ return None
35
+
36
+ demux = getattr(container, "demux", None)
37
+ if not callable(demux):
38
+ return None
39
+
40
+ visible_packets = 0
41
+ content_packets = 0
42
+ found_discard = False
43
+ packets = demux(video_stream)
44
+
45
+ for packet in packets:
46
+ if not _is_content_packet(packet):
47
+ continue
48
+ content_packets += 1
49
+ if packet.is_discard:
50
+ found_discard = True
51
+ break
52
+ visible_packets += 1
53
+ if content_packets >= probe_packets:
54
+ return None
55
+
56
+ if not found_discard:
57
+ return visible_packets
58
+
59
+ for packet in packets:
60
+ if _is_content_packet(packet) and not packet.is_discard:
61
+ visible_packets += 1
62
+
63
+ return visible_packets
64
+
65
+
66
+ def _may_include_discard_preroll(video_stream: Any) -> bool:
67
+ if getattr(video_stream, "frames", 0) <= 0:
68
+ return False
69
+ codec_context = getattr(video_stream, "codec_context", None)
70
+ if bool(getattr(codec_context, "has_b_frames", False)):
71
+ return True
72
+ start_time = getattr(video_stream, "start_time", None)
73
+ return start_time not in (None, 0)
74
+
75
+
23
76
  def _check_av_available():
24
77
  if not HAS_AV:
25
78
  raise ImportError(
@@ -120,9 +173,17 @@ class MultiVideoDataset(Dataset):
120
173
  video_stream = container.streams.video[0]
121
174
 
122
175
  frame_count = video_stream.frames
123
- if frame_count == 0:
176
+ visible_packet_count = _count_visible_video_packets_if_discard_preroll_detected(
177
+ container,
178
+ video_stream,
179
+ )
180
+ if visible_packet_count is not None and visible_packet_count > 0:
181
+ frame_count = visible_packet_count
182
+ elif frame_count == 0:
183
+ seek = getattr(container, "seek", None)
184
+ if callable(seek):
185
+ seek(0)
124
186
  frame_count = sum(1 for _ in container.decode(video=0))
125
- container.seek(0)
126
187
 
127
188
  fps = float(video_stream.average_rate) if video_stream.average_rate else 30.0
128
189
  duration = float(video_stream.duration * video_stream.time_base) if video_stream.duration else 0.0