mvdata 0.9.2__tar.gz → 0.9.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mvdata-0.9.2 → mvdata-0.9.4}/PKG-INFO +19 -1
- mvdata-0.9.4/README.md +24 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/__init__.py +1 -1
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/decode.py +117 -47
- mvdata-0.9.4/mvdata/codec/nvdec.py +256 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/downloader.py +5 -2
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo.py +63 -2
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo_slicer.py +165 -448
- mvdata-0.9.4/mvdata/multivideo_slicer_models.py +211 -0
- mvdata-0.9.4/mvdata/multivideo_slicer_packets.py +456 -0
- mvdata-0.9.4/mvdata/multivideo_slicer_plan.py +100 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/ranged.py +73 -4
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/stash_utils.py +126 -25
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/video_stream_reader.py +73 -53
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/writer_base.py +17 -2
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/PKG-INFO +19 -1
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/SOURCES.txt +5 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/pyproject.toml +1 -1
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_dataset_base_defaults.py +3 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_multivideo_bit_depth.py +48 -0
- mvdata-0.9.4/tests/test_multivideo_slicer.py +671 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_mixed_streams.py +49 -29
- mvdata-0.9.4/tests/test_release_script.py +220 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_s3_downloader.py +40 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_video_stream_reader.py +362 -106
- mvdata-0.9.2/README.md +0 -6
- mvdata-0.9.2/tests/test_multivideo_slicer.py +0 -223
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/__init__.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/cloud_storage.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/_imports.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/encode.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/frames.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/native_yuv.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/probe.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/codec/select.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/dataset_base.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/gpu_policy.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/gpu_support.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/image_metrics.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/legacy_writer.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/multivideo_writer.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/nvdec_parallel.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/nvenc_codec.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/per_frame.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/ranged_writer.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/utils.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata/write_progress.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/dependency_links.txt +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/requires.txt +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/mvdata.egg-info/top_level.txt +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/setup.cfg +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_gpu_policy.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_gpu_support.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_image_metrics.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_nvdec_parallel.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_per_camera.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_nvenc_roundtrip.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_resume.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_ranged_stream_discovery.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_roundtrip.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_bit_depth.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_comprehensive.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_policy.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_stash_regenerate.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.4}/tests/test_write_progress.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mvdata
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.4
|
|
4
4
|
Summary: Gracia Dataset Convention - Python library for working with multi-view video datasets
|
|
5
5
|
Author: Gracia Team
|
|
6
6
|
License: MIT
|
|
@@ -50,3 +50,21 @@ Python library for working with Gracia multi-view video datasets.
|
|
|
50
50
|
|
|
51
51
|
The package provides readers, writers, conversion tools, and GPU-aware video
|
|
52
52
|
decode helpers for the dataset layouts documented in the `docs` directory.
|
|
53
|
+
|
|
54
|
+
## Release
|
|
55
|
+
|
|
56
|
+
Prepare the next release with the local helper script:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
|
|
60
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
|
|
64
|
+
explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
|
|
65
|
+
`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
|
|
66
|
+
commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
|
|
67
|
+
`--push` is set.
|
|
68
|
+
|
|
69
|
+
Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
|
|
70
|
+
Release workflow.
|
mvdata-0.9.4/README.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# mvdata
|
|
2
|
+
|
|
3
|
+
Python library for working with Gracia multi-view video datasets.
|
|
4
|
+
|
|
5
|
+
The package provides readers, writers, conversion tools, and GPU-aware video
|
|
6
|
+
decode helpers for the dataset layouts documented in the `docs` directory.
|
|
7
|
+
|
|
8
|
+
## Release
|
|
9
|
+
|
|
10
|
+
Prepare the next release with the local helper script:
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
|
|
14
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
|
|
18
|
+
explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
|
|
19
|
+
`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
|
|
20
|
+
commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
|
|
21
|
+
`--push` is set.
|
|
22
|
+
|
|
23
|
+
Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
|
|
24
|
+
Release workflow.
|
|
@@ -29,7 +29,6 @@ from ._imports import (
|
|
|
29
29
|
try_import_torch,
|
|
30
30
|
)
|
|
31
31
|
from .decode import (
|
|
32
|
-
_try_open_nvdec,
|
|
33
32
|
decode_mp4_to_rgb,
|
|
34
33
|
decode_mp4_to_rgb_nvdec,
|
|
35
34
|
decode_mp4_to_rgb_pyav,
|
|
@@ -59,6 +58,7 @@ from .frames import (
|
|
|
59
58
|
numpy_to_cupy_rgb,
|
|
60
59
|
)
|
|
61
60
|
from .native_yuv import native_nvdec_to_rgb_cupy, native_nvdec_to_rgb_numpy
|
|
61
|
+
from .nvdec import _try_open_nvdec
|
|
62
62
|
from .probe import (
|
|
63
63
|
infer_video_bit_depth_from_frame,
|
|
64
64
|
infer_video_bit_depth_from_pixel_format_name,
|
|
@@ -11,9 +11,15 @@ from ..gpu_policy import nvdec_decode_allowed
|
|
|
11
11
|
from ._imports import try_import_av, try_import_cupy, try_import_pynvvideocodec, try_import_torch
|
|
12
12
|
from .frames import _decoded_frame_to_rgb_numpy, _pyav_frame_to_rgb
|
|
13
13
|
from .native_yuv import native_nvdec_to_rgb_numpy
|
|
14
|
+
from .nvdec import (
|
|
15
|
+
_close_nvdec_decoder,
|
|
16
|
+
_NvdecOrdinalRawFrameSource,
|
|
17
|
+
_NvdecPtsRawFrameSource,
|
|
18
|
+
_nvdec_scanned_presentation_pts,
|
|
19
|
+
_try_open_nvdec,
|
|
20
|
+
)
|
|
14
21
|
from .probe import (
|
|
15
22
|
infer_video_bit_depth_from_stream,
|
|
16
|
-
nvdec_decode_compatibility_issue,
|
|
17
23
|
nvdec_decode_compatibility_issue_for_path,
|
|
18
24
|
probe_video_color_metadata_pyav,
|
|
19
25
|
probe_video_bit_depth,
|
|
@@ -21,32 +27,88 @@ from .probe import (
|
|
|
21
27
|
)
|
|
22
28
|
|
|
23
29
|
|
|
24
|
-
def
|
|
25
|
-
|
|
26
|
-
|
|
30
|
+
def _nvdec_raw_to_rgb_numpy(
|
|
31
|
+
raw,
|
|
32
|
+
*,
|
|
33
|
+
source_bit_depth: int,
|
|
34
|
+
color_metadata: dict[str, Any],
|
|
35
|
+
torch_mod: Any,
|
|
36
|
+
) -> np.ndarray:
|
|
37
|
+
if source_bit_depth > 8:
|
|
38
|
+
return native_nvdec_to_rgb_numpy(
|
|
39
|
+
raw,
|
|
40
|
+
bit_depth=source_bit_depth,
|
|
41
|
+
**color_metadata,
|
|
42
|
+
)
|
|
43
|
+
return _decoded_frame_to_rgb_numpy(raw, torch_mod, bit_depth=source_bit_depth)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _decode_simple_nvdec_by_ordinal(
|
|
47
|
+
decoder: Any,
|
|
48
|
+
path: Path,
|
|
49
|
+
expect_count: int,
|
|
50
|
+
*,
|
|
51
|
+
source_bit_depth: int,
|
|
52
|
+
color_metadata: dict[str, Any],
|
|
53
|
+
torch_mod: Any,
|
|
54
|
+
) -> list[np.ndarray]:
|
|
55
|
+
raw_source = _NvdecOrdinalRawFrameSource(decoder, path)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
try:
|
|
59
|
+
frame_count = min(len(decoder), expect_count)
|
|
60
|
+
except Exception:
|
|
61
|
+
frame_count = expect_count
|
|
62
|
+
|
|
63
|
+
frames: list[np.ndarray] = []
|
|
64
|
+
for index in range(frame_count):
|
|
65
|
+
raw, _ = raw_source.frame_by_index(index)
|
|
66
|
+
frames.append(
|
|
67
|
+
_nvdec_raw_to_rgb_numpy(
|
|
68
|
+
raw,
|
|
69
|
+
source_bit_depth=source_bit_depth,
|
|
70
|
+
color_metadata=color_metadata,
|
|
71
|
+
torch_mod=torch_mod,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
return frames
|
|
75
|
+
finally:
|
|
76
|
+
raw_source.close()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _decode_nvdec_by_presentation_pts(
|
|
80
|
+
nvc: Any,
|
|
81
|
+
mp4_path: Path,
|
|
82
|
+
*,
|
|
27
83
|
gpu_id: int,
|
|
28
84
|
use_device_memory: bool,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
85
|
+
output_color_type: Any,
|
|
86
|
+
presentation_pts: list[int],
|
|
87
|
+
expect_count: int,
|
|
88
|
+
source_bit_depth: int,
|
|
89
|
+
color_metadata: dict[str, Any],
|
|
90
|
+
torch_mod: Any,
|
|
91
|
+
) -> list[np.ndarray]:
|
|
92
|
+
raw_source = _NvdecPtsRawFrameSource(
|
|
93
|
+
nvc,
|
|
94
|
+
mp4_path,
|
|
34
95
|
gpu_id=gpu_id,
|
|
35
96
|
use_device_memory=use_device_memory,
|
|
36
|
-
output_color_type=output_color_type
|
|
97
|
+
output_color_type=output_color_type,
|
|
98
|
+
presentation_pts=presentation_pts,
|
|
37
99
|
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
100
|
+
try:
|
|
101
|
+
return [
|
|
102
|
+
_nvdec_raw_to_rgb_numpy(
|
|
103
|
+
raw_source.frame(index),
|
|
104
|
+
source_bit_depth=source_bit_depth,
|
|
105
|
+
color_metadata=color_metadata,
|
|
106
|
+
torch_mod=torch_mod,
|
|
107
|
+
)
|
|
108
|
+
for index in range(expect_count)
|
|
109
|
+
]
|
|
110
|
+
finally:
|
|
111
|
+
raw_source.close()
|
|
50
112
|
|
|
51
113
|
|
|
52
114
|
def decode_mp4_to_rgb_pyav(mp4_path: Path, expect_count: int) -> List[np.ndarray]:
|
|
@@ -72,11 +134,7 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
|
|
|
72
134
|
)
|
|
73
135
|
meta = probe_video_stream_metadata(nvc, mp4_path)
|
|
74
136
|
bit_depth = int(meta.get("bitdepth", 8))
|
|
75
|
-
issue =
|
|
76
|
-
nvc, meta["width"], meta["height"],
|
|
77
|
-
gpu_id=gpu_id, codec=meta["codec"],
|
|
78
|
-
chroma_subsampling=meta["chroma_subsampling"], bitdepth=bit_depth,
|
|
79
|
-
)
|
|
137
|
+
issue = nvdec_decode_compatibility_issue_for_path(nvc, mp4_path, gpu_id)
|
|
80
138
|
if issue is not None:
|
|
81
139
|
raise RuntimeError(f"NVDEC decode unsupported for {mp4_path}: {issue}")
|
|
82
140
|
source_bit_depth = probe_video_bit_depth(mp4_path, nvc=nvc)
|
|
@@ -91,7 +149,7 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
|
|
|
91
149
|
last_err: Exception | None = None
|
|
92
150
|
for use_dev in use_dev_options:
|
|
93
151
|
if source_bit_depth > 8:
|
|
94
|
-
|
|
152
|
+
metadata_decoder = _try_open_nvdec(
|
|
95
153
|
nvc,
|
|
96
154
|
path_str,
|
|
97
155
|
gpu_id,
|
|
@@ -99,27 +157,39 @@ def decode_mp4_to_rgb_nvdec(nvc, mp4_path: Path, gpu_id: int, expect_count: int)
|
|
|
99
157
|
output_color_type=output_color_type,
|
|
100
158
|
)
|
|
101
159
|
else:
|
|
102
|
-
|
|
103
|
-
if
|
|
160
|
+
metadata_decoder = _try_open_nvdec(nvc, path_str, gpu_id, use_dev)
|
|
161
|
+
if metadata_decoder is None:
|
|
104
162
|
continue
|
|
105
163
|
try:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
164
|
+
presentation_pts = _nvdec_scanned_presentation_pts(metadata_decoder, expect_count)
|
|
165
|
+
if presentation_pts is None:
|
|
166
|
+
return _decode_simple_nvdec_by_ordinal(
|
|
167
|
+
metadata_decoder,
|
|
168
|
+
mp4_path,
|
|
169
|
+
expect_count,
|
|
170
|
+
source_bit_depth=source_bit_depth,
|
|
171
|
+
color_metadata=color_metadata,
|
|
172
|
+
torch_mod=torch_mod,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
_close_nvdec_decoder(metadata_decoder)
|
|
176
|
+
if len(presentation_pts) < expect_count:
|
|
177
|
+
raise RuntimeError(
|
|
178
|
+
f"NVDEC scanned metadata has {len(presentation_pts)} timestamps, "
|
|
179
|
+
f"expected at least {expect_count}"
|
|
180
|
+
)
|
|
181
|
+
return _decode_nvdec_by_presentation_pts(
|
|
182
|
+
nvc,
|
|
183
|
+
mp4_path,
|
|
184
|
+
gpu_id=gpu_id,
|
|
185
|
+
use_device_memory=use_dev,
|
|
186
|
+
output_color_type=output_color_type,
|
|
187
|
+
presentation_pts=presentation_pts,
|
|
188
|
+
expect_count=expect_count,
|
|
189
|
+
source_bit_depth=source_bit_depth,
|
|
190
|
+
color_metadata=color_metadata,
|
|
191
|
+
torch_mod=torch_mod,
|
|
192
|
+
)
|
|
123
193
|
except Exception as e:
|
|
124
194
|
last_err = e
|
|
125
195
|
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""Shared NVDEC / PyNvVideoCodec helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _try_open_nvdec(
|
|
10
|
+
nvc,
|
|
11
|
+
path_str: str,
|
|
12
|
+
gpu_id: int,
|
|
13
|
+
use_device_memory: bool,
|
|
14
|
+
*,
|
|
15
|
+
output_color_type=None,
|
|
16
|
+
):
|
|
17
|
+
"""Open a SimpleDecoder, tolerating API-version differences in the kwarg surface."""
|
|
18
|
+
base = dict(
|
|
19
|
+
gpu_id=gpu_id,
|
|
20
|
+
use_device_memory=use_device_memory,
|
|
21
|
+
output_color_type=output_color_type or nvc.OutputColorType.RGB,
|
|
22
|
+
)
|
|
23
|
+
for extra in ({"need_scanned_stream_metadata": True}, {}):
|
|
24
|
+
try:
|
|
25
|
+
return nvc.SimpleDecoder(path_str, **base, **extra)
|
|
26
|
+
except TypeError:
|
|
27
|
+
if extra:
|
|
28
|
+
try:
|
|
29
|
+
return nvc.SimpleDecoder(path_str, **base)
|
|
30
|
+
except Exception:
|
|
31
|
+
pass
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _nvdec_frame_pts(frame: Any) -> int | None:
|
|
38
|
+
get_pts = getattr(frame, "getPTS", None)
|
|
39
|
+
if callable(get_pts):
|
|
40
|
+
pts = get_pts()
|
|
41
|
+
if pts is not None:
|
|
42
|
+
return int(pts)
|
|
43
|
+
pts = getattr(frame, "timestamp", None)
|
|
44
|
+
if pts is not None:
|
|
45
|
+
return int(pts)
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _nvdec_scanned_presentation_pts(decoder: Any, expect_count: int) -> list[int] | None:
|
|
50
|
+
del expect_count
|
|
51
|
+
get_scanned = getattr(decoder, "get_scanned_stream_metadata", None)
|
|
52
|
+
if not callable(get_scanned):
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
metadata = get_scanned()
|
|
56
|
+
pts_values = getattr(metadata, "pts", None)
|
|
57
|
+
if pts_values is None:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
pts = [int(value) for value in pts_values]
|
|
61
|
+
if not pts:
|
|
62
|
+
return None
|
|
63
|
+
return pts
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _create_low_level_nvdec(
|
|
67
|
+
nvc: Any,
|
|
68
|
+
path: Path | str,
|
|
69
|
+
*,
|
|
70
|
+
gpu_id: int,
|
|
71
|
+
use_device_memory: bool,
|
|
72
|
+
output_color_type: Any,
|
|
73
|
+
) -> tuple[Any, Any]:
|
|
74
|
+
demuxer = nvc.CreateDemuxer(str(path))
|
|
75
|
+
decoder = nvc.CreateDecoder(
|
|
76
|
+
gpuid=gpu_id,
|
|
77
|
+
codec=demuxer.GetNvCodecId(),
|
|
78
|
+
usedevicememory=use_device_memory,
|
|
79
|
+
outputColorType=output_color_type,
|
|
80
|
+
latency=nvc.DisplayDecodeLatencyType.NATIVE,
|
|
81
|
+
)
|
|
82
|
+
return demuxer, decoder
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _close_nvdec_decoder(decoder: Any) -> None:
|
|
86
|
+
if decoder is None:
|
|
87
|
+
return
|
|
88
|
+
for close_name in ("close", "stop"):
|
|
89
|
+
close_fn = getattr(decoder, close_name, None)
|
|
90
|
+
if callable(close_fn):
|
|
91
|
+
try:
|
|
92
|
+
close_fn()
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class _NvdecOrdinalRawFrameSource:
|
|
99
|
+
def __init__(self, decoder: Any, path: Path | str):
|
|
100
|
+
self._decoder = decoder
|
|
101
|
+
self._path = path
|
|
102
|
+
self._get_batch_frames = getattr(decoder, "get_batch_frames", None)
|
|
103
|
+
self._get_batch_frames_by_index = getattr(decoder, "get_batch_frames_by_index", None)
|
|
104
|
+
self._seek_to_index = getattr(decoder, "seek_to_index", None)
|
|
105
|
+
if not callable(self._get_batch_frames_by_index) and not callable(self._get_batch_frames):
|
|
106
|
+
self.close()
|
|
107
|
+
raise RuntimeError(
|
|
108
|
+
f"{path}: NVDEC SimpleDecoder has no scanned metadata or ordinal frame API"
|
|
109
|
+
)
|
|
110
|
+
self._next_sequential_index: int | None = (
|
|
111
|
+
0 if callable(self._get_batch_frames) else None
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _decode_sequential(self, index: int):
|
|
115
|
+
if not callable(self._get_batch_frames):
|
|
116
|
+
raise RuntimeError("Sequential NVDEC batch API is unavailable")
|
|
117
|
+
batch = self._get_batch_frames(1)
|
|
118
|
+
if not batch:
|
|
119
|
+
raise RuntimeError(f"NVDEC returned no frame for {self._path} at index {index}")
|
|
120
|
+
self._next_sequential_index = index + 1
|
|
121
|
+
return batch[0]
|
|
122
|
+
|
|
123
|
+
def _decode_indexed(self, index: int):
|
|
124
|
+
if callable(self._get_batch_frames_by_index):
|
|
125
|
+
self._next_sequential_index = None
|
|
126
|
+
batch = self._get_batch_frames_by_index([index])
|
|
127
|
+
if not batch:
|
|
128
|
+
raise RuntimeError(f"NVDEC returned no frame for {self._path} at index {index}")
|
|
129
|
+
return batch[0]
|
|
130
|
+
|
|
131
|
+
if not callable(self._seek_to_index) or not callable(self._get_batch_frames):
|
|
132
|
+
raise RuntimeError("Indexed NVDEC access API is unavailable")
|
|
133
|
+
self._seek_to_index(index)
|
|
134
|
+
self._next_sequential_index = index
|
|
135
|
+
return self._decode_sequential(index)
|
|
136
|
+
|
|
137
|
+
def frame(self, index: int) -> tuple[Any, str]:
|
|
138
|
+
if self._next_sequential_index == index:
|
|
139
|
+
return self._decode_sequential(index), "sequential"
|
|
140
|
+
return self._decode_indexed(index), "indexed"
|
|
141
|
+
|
|
142
|
+
def frame_by_index(self, index: int) -> tuple[Any, str]:
|
|
143
|
+
if callable(self._get_batch_frames_by_index) or callable(self._seek_to_index):
|
|
144
|
+
return self._decode_indexed(index), "indexed"
|
|
145
|
+
return self.frame(index)
|
|
146
|
+
|
|
147
|
+
def reset(self) -> None:
|
|
148
|
+
self._next_sequential_index = 0 if callable(self._get_batch_frames) else None
|
|
149
|
+
|
|
150
|
+
def close(self) -> None:
|
|
151
|
+
_close_nvdec_decoder(self._decoder)
|
|
152
|
+
self._decoder = None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class _NvdecPtsRawFrameSource:
|
|
156
|
+
def __init__(
|
|
157
|
+
self,
|
|
158
|
+
nvc: Any,
|
|
159
|
+
path: Path | str,
|
|
160
|
+
*,
|
|
161
|
+
gpu_id: int,
|
|
162
|
+
use_device_memory: bool,
|
|
163
|
+
output_color_type: Any,
|
|
164
|
+
presentation_pts: list[int],
|
|
165
|
+
):
|
|
166
|
+
self._nvc = nvc
|
|
167
|
+
self._path = path
|
|
168
|
+
self._gpu_id = gpu_id
|
|
169
|
+
self._use_device_memory = use_device_memory
|
|
170
|
+
self._output_color_type = output_color_type
|
|
171
|
+
self._presentation_pts = presentation_pts
|
|
172
|
+
self._pts_to_index = {
|
|
173
|
+
pts: index
|
|
174
|
+
for index, pts in enumerate(self._presentation_pts)
|
|
175
|
+
}
|
|
176
|
+
if len(self._pts_to_index) != len(self._presentation_pts):
|
|
177
|
+
raise RuntimeError(f"{path}: duplicate presentation timestamps in NVDEC metadata")
|
|
178
|
+
self._demuxer = None
|
|
179
|
+
self._decoder = None
|
|
180
|
+
self._eos = False
|
|
181
|
+
self._last_decoded_index: int | None = None
|
|
182
|
+
self._pending_raw_by_index: dict[int, Any] = {}
|
|
183
|
+
|
|
184
|
+
def reset(self) -> None:
|
|
185
|
+
_close_nvdec_decoder(self._decoder)
|
|
186
|
+
self._demuxer, self._decoder = _create_low_level_nvdec(
|
|
187
|
+
self._nvc,
|
|
188
|
+
self._path,
|
|
189
|
+
gpu_id=self._gpu_id,
|
|
190
|
+
use_device_memory=self._use_device_memory,
|
|
191
|
+
output_color_type=self._output_color_type,
|
|
192
|
+
)
|
|
193
|
+
self._eos = False
|
|
194
|
+
self._last_decoded_index = None
|
|
195
|
+
self._pending_raw_by_index.clear()
|
|
196
|
+
|
|
197
|
+
def _ensure_decoder(self) -> None:
|
|
198
|
+
if self._decoder is None or self._demuxer is None:
|
|
199
|
+
self.reset()
|
|
200
|
+
|
|
201
|
+
def frame(self, index: int):
|
|
202
|
+
pending = self._pending_raw_by_index.pop(index, None)
|
|
203
|
+
if pending is not None:
|
|
204
|
+
return pending
|
|
205
|
+
|
|
206
|
+
if self._last_decoded_index is not None and index <= self._last_decoded_index:
|
|
207
|
+
self.reset()
|
|
208
|
+
else:
|
|
209
|
+
self._ensure_decoder()
|
|
210
|
+
|
|
211
|
+
assert self._decoder is not None
|
|
212
|
+
assert self._demuxer is not None
|
|
213
|
+
|
|
214
|
+
while not self._eos:
|
|
215
|
+
packet = self._demuxer.Demux()
|
|
216
|
+
outputs = self._decoder.Decode(packet)
|
|
217
|
+
if getattr(packet, "bsl", 0) == 0:
|
|
218
|
+
self._eos = True
|
|
219
|
+
target_raw = None
|
|
220
|
+
for raw in outputs:
|
|
221
|
+
pts = _nvdec_frame_pts(raw)
|
|
222
|
+
if pts is None:
|
|
223
|
+
raise RuntimeError(
|
|
224
|
+
f"{self._path}: NVDEC returned a frame without presentation timestamp"
|
|
225
|
+
)
|
|
226
|
+
decoded_index = self._pts_to_index.get(pts)
|
|
227
|
+
if decoded_index is None:
|
|
228
|
+
raise RuntimeError(
|
|
229
|
+
f"{self._path}: NVDEC returned unknown presentation timestamp {pts}"
|
|
230
|
+
)
|
|
231
|
+
self._last_decoded_index = decoded_index
|
|
232
|
+
if decoded_index > index:
|
|
233
|
+
if target_raw is None:
|
|
234
|
+
expected_pts = self._presentation_pts[index]
|
|
235
|
+
raise RuntimeError(
|
|
236
|
+
f"{self._path}: NVDEC skipped requested presentation frame "
|
|
237
|
+
f"{index} (pts={expected_pts}); first later frame was "
|
|
238
|
+
f"{decoded_index} (pts={pts}). The MP4 slice is missing "
|
|
239
|
+
"decode dependencies before the requested frame."
|
|
240
|
+
)
|
|
241
|
+
self._pending_raw_by_index[decoded_index] = raw
|
|
242
|
+
if decoded_index == index:
|
|
243
|
+
target_raw = raw
|
|
244
|
+
if target_raw is not None:
|
|
245
|
+
return target_raw
|
|
246
|
+
|
|
247
|
+
expected_pts = self._presentation_pts[index]
|
|
248
|
+
raise RuntimeError(
|
|
249
|
+
f"{self._path}: NVDEC reached end of stream before presentation frame "
|
|
250
|
+
f"{index} (pts={expected_pts})"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def close(self) -> None:
|
|
254
|
+
_close_nvdec_decoder(self._decoder)
|
|
255
|
+
self._decoder = None
|
|
256
|
+
self._demuxer = None
|
|
@@ -116,8 +116,11 @@ class DatasetDownloader(ABC):
|
|
|
116
116
|
return None
|
|
117
117
|
|
|
118
118
|
|
|
119
|
+
RANGED_STREAM_EXTENSIONS = (".avif", ".mp4")
|
|
120
|
+
|
|
121
|
+
|
|
119
122
|
class RangedDatasetDownloader(DatasetDownloader):
|
|
120
|
-
"""Downloader for ranged
|
|
123
|
+
"""Downloader for ranged dataset format."""
|
|
121
124
|
|
|
122
125
|
def _parse_range_folder(self, folder_name: str) -> Optional[Tuple[int, int]]:
|
|
123
126
|
"""Parse range folder name to extract start and end frame numbers."""
|
|
@@ -260,7 +263,7 @@ class RangedDatasetDownloader(DatasetDownloader):
|
|
|
260
263
|
if is_system_file(rel_path_wrapped):
|
|
261
264
|
continue
|
|
262
265
|
|
|
263
|
-
if rel_path_wrapped.suffix.lower()
|
|
266
|
+
if rel_path_wrapped.suffix.lower() not in RANGED_STREAM_EXTENSIONS:
|
|
264
267
|
files_to_download.append((obj_key, local_file))
|
|
265
268
|
continue
|
|
266
269
|
|
|
@@ -20,6 +20,59 @@ SUPPORTED_VIDEO_EXTENSIONS = ['.mov', '.mp4', '.avi']
|
|
|
20
20
|
SUPPORTED_CODECS = ['h264', 'hevc', 'av1']
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def _is_content_packet(packet: Any) -> bool:
|
|
24
|
+
return getattr(packet, "size", 0) > 0
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _count_visible_video_packets_if_discard_preroll_detected(
|
|
28
|
+
container: Any,
|
|
29
|
+
video_stream: Any,
|
|
30
|
+
*,
|
|
31
|
+
probe_packets: int = 64,
|
|
32
|
+
) -> Optional[int]:
|
|
33
|
+
if not _may_include_discard_preroll(video_stream):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
demux = getattr(container, "demux", None)
|
|
37
|
+
if not callable(demux):
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
visible_packets = 0
|
|
41
|
+
content_packets = 0
|
|
42
|
+
found_discard = False
|
|
43
|
+
packets = demux(video_stream)
|
|
44
|
+
|
|
45
|
+
for packet in packets:
|
|
46
|
+
if not _is_content_packet(packet):
|
|
47
|
+
continue
|
|
48
|
+
content_packets += 1
|
|
49
|
+
if packet.is_discard:
|
|
50
|
+
found_discard = True
|
|
51
|
+
break
|
|
52
|
+
visible_packets += 1
|
|
53
|
+
if content_packets >= probe_packets:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
if not found_discard:
|
|
57
|
+
return visible_packets
|
|
58
|
+
|
|
59
|
+
for packet in packets:
|
|
60
|
+
if _is_content_packet(packet) and not packet.is_discard:
|
|
61
|
+
visible_packets += 1
|
|
62
|
+
|
|
63
|
+
return visible_packets
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _may_include_discard_preroll(video_stream: Any) -> bool:
|
|
67
|
+
if getattr(video_stream, "frames", 0) <= 0:
|
|
68
|
+
return False
|
|
69
|
+
codec_context = getattr(video_stream, "codec_context", None)
|
|
70
|
+
if bool(getattr(codec_context, "has_b_frames", False)):
|
|
71
|
+
return True
|
|
72
|
+
start_time = getattr(video_stream, "start_time", None)
|
|
73
|
+
return start_time not in (None, 0)
|
|
74
|
+
|
|
75
|
+
|
|
23
76
|
def _check_av_available():
|
|
24
77
|
if not HAS_AV:
|
|
25
78
|
raise ImportError(
|
|
@@ -120,9 +173,17 @@ class MultiVideoDataset(Dataset):
|
|
|
120
173
|
video_stream = container.streams.video[0]
|
|
121
174
|
|
|
122
175
|
frame_count = video_stream.frames
|
|
123
|
-
|
|
176
|
+
visible_packet_count = _count_visible_video_packets_if_discard_preroll_detected(
|
|
177
|
+
container,
|
|
178
|
+
video_stream,
|
|
179
|
+
)
|
|
180
|
+
if visible_packet_count is not None and visible_packet_count > 0:
|
|
181
|
+
frame_count = visible_packet_count
|
|
182
|
+
elif frame_count == 0:
|
|
183
|
+
seek = getattr(container, "seek", None)
|
|
184
|
+
if callable(seek):
|
|
185
|
+
seek(0)
|
|
124
186
|
frame_count = sum(1 for _ in container.decode(video=0))
|
|
125
|
-
container.seek(0)
|
|
126
187
|
|
|
127
188
|
fps = float(video_stream.average_rate) if video_stream.average_rate else 30.0
|
|
128
189
|
duration = float(video_stream.duration * video_stream.time_base) if video_stream.duration else 0.0
|