mvdata 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mvdata/__init__.py +169 -0
- mvdata/cloud_storage.py +204 -0
- mvdata/codec/__init__.py +126 -0
- mvdata/codec/_imports.py +42 -0
- mvdata/codec/decode.py +123 -0
- mvdata/codec/encode.py +313 -0
- mvdata/codec/frames.py +199 -0
- mvdata/codec/probe.py +239 -0
- mvdata/codec/select.py +153 -0
- mvdata/dataset_base.py +1049 -0
- mvdata/downloader.py +806 -0
- mvdata/gpu_policy.py +128 -0
- mvdata/gpu_support.py +384 -0
- mvdata/image_metrics.py +24 -0
- mvdata/legacy_writer.py +371 -0
- mvdata/multivideo.py +982 -0
- mvdata/multivideo_decode_benchmark.py +216 -0
- mvdata/multivideo_slicer.py +930 -0
- mvdata/multivideo_writer.py +375 -0
- mvdata/nvdec_parallel.py +145 -0
- mvdata/nvenc_codec.py +17 -0
- mvdata/per_frame.py +1799 -0
- mvdata/ranged.py +1349 -0
- mvdata/ranged_writer.py +964 -0
- mvdata/stash_utils.py +358 -0
- mvdata/utils.py +33 -0
- mvdata/video_stream_reader.py +490 -0
- mvdata/write_progress.py +213 -0
- mvdata/writer_base.py +161 -0
- mvdata-0.9.0.dist-info/METADATA +52 -0
- mvdata-0.9.0.dist-info/RECORD +33 -0
- mvdata-0.9.0.dist-info/WHEEL +5 -0
- mvdata-0.9.0.dist-info/top_level.txt +1 -0
mvdata/codec/encode.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""NVENC encoding pipeline: RGB → elementary stream → MP4."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import tempfile
|
|
9
|
+
from fractions import Fraction
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Iterable, List, Tuple
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from ._imports import normalize_codec_name, try_import_av
|
|
16
|
+
from .probe import (
|
|
17
|
+
nvenc_encode_compatibility_issue,
|
|
18
|
+
nvenc_encode_compatibility_issue_for_rgb,
|
|
19
|
+
safe_get_encoder_caps,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def rgb_hwc_to_nvenc_bgra_hwc4(rgb: np.ndarray) -> np.ndarray:
|
|
24
|
+
"""HWC uint8 RGB → contiguous H×W×4 BGRA for ``CreateEncoder(..., 'ARGB', True)``."""
|
|
25
|
+
if rgb.dtype != np.uint8 or rgb.ndim != 3 or rgb.shape[2] != 3:
|
|
26
|
+
raise ValueError("rgb must be uint8 HWC with 3 channels")
|
|
27
|
+
r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
|
|
28
|
+
a = np.full(rgb.shape[:2], 255, dtype=np.uint8)
|
|
29
|
+
return np.stack([b, g, r, a], axis=-1)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def rgb_hwc_to_yuv444_planar(rgb: np.ndarray, *, full_range: bool = True) -> np.ndarray:
|
|
33
|
+
"""HWC uint8 RGB → planar YUV444 (H*3, W) uint8 for ``CreateEncoder(..., 'YUV444', True)``."""
|
|
34
|
+
if rgb.dtype != np.uint8 or rgb.ndim != 3 or rgb.shape[2] != 3:
|
|
35
|
+
raise ValueError("rgb must be uint8 HWC with 3 channels")
|
|
36
|
+
r = rgb[:, :, 0].astype(np.float32)
|
|
37
|
+
g = rgb[:, :, 1].astype(np.float32)
|
|
38
|
+
b = rgb[:, :, 2].astype(np.float32)
|
|
39
|
+
if full_range:
|
|
40
|
+
y = 0.299 * r + 0.587 * g + 0.114 * b
|
|
41
|
+
cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128.0
|
|
42
|
+
cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0
|
|
43
|
+
else:
|
|
44
|
+
y = 16.0 + 65.481 * r / 255.0 + 128.553 * g / 255.0 + 24.966 * b / 255.0
|
|
45
|
+
cb = 128.0 - 37.797 * r / 255.0 - 74.203 * g / 255.0 + 112.0 * b / 255.0
|
|
46
|
+
cr = 128.0 + 112.0 * r / 255.0 - 93.786 * g / 255.0 - 18.214 * b / 255.0
|
|
47
|
+
clip = lambda plane: np.clip(plane + 0.5, 0, 255).astype(np.uint8)
|
|
48
|
+
return np.concatenate([clip(y), clip(cb), clip(cr)], axis=0)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def pad_rgb_for_nvenc(rgb: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
|
|
52
|
+
"""Return the original frame plus its full-frame crop (portrait is unsupported)."""
|
|
53
|
+
h, w, c = rgb.shape
|
|
54
|
+
if c != 3:
|
|
55
|
+
raise ValueError("expected RGB HWC")
|
|
56
|
+
if w < h:
|
|
57
|
+
raise ValueError(f"portrait frames ({w}x{h}) are not encoded via NVENC without padding")
|
|
58
|
+
return rgb, (0, h, 0, w)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def assert_yuv444_encode_supported(nvc, gpu_id: int, codec: str) -> None:
|
|
62
|
+
codec = normalize_codec_name(codec)
|
|
63
|
+
caps = safe_get_encoder_caps(nvc, gpu_id, codec)
|
|
64
|
+
if caps is None:
|
|
65
|
+
raise RuntimeError(f"GPU reports no encoder support for codec={codec!r}.")
|
|
66
|
+
if not caps.get("support_yuv444_encode"):
|
|
67
|
+
raise RuntimeError(
|
|
68
|
+
f"GPU reports no 4:4:4 encode for codec={codec!r} (support_yuv444_encode=0). "
|
|
69
|
+
"Use chroma 420 or a different codec/GPU."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def best_quality_extra_kwargs(nvc, gpu_id: int, codec: str) -> dict:
|
|
74
|
+
"""Only enable features advertised by the GPU so the dict is safe to splat into CreateEncoder."""
|
|
75
|
+
caps = nvc.GetEncoderCaps(gpu_id, codec)
|
|
76
|
+
kw: dict = {}
|
|
77
|
+
if caps.get("support_lookahead"):
|
|
78
|
+
kw["lookahead"] = 32
|
|
79
|
+
return kw
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def suggest_nvenc_vbr_bitrate_bps(
|
|
83
|
+
width: int, height: int, fps: int, *, chroma_subsampling: str = "444"
|
|
84
|
+
) -> int:
|
|
85
|
+
ref_px = 1920 * 1080
|
|
86
|
+
ref_fps = 30
|
|
87
|
+
ref_bps = 80_000_000
|
|
88
|
+
px = max(1, int(width) * int(height))
|
|
89
|
+
t = max(1, int(fps))
|
|
90
|
+
bps = int(ref_bps * ((px / ref_px) ** 0.88) * (t / ref_fps))
|
|
91
|
+
c = chroma_subsampling.strip()
|
|
92
|
+
if c == "444":
|
|
93
|
+
bps = int(bps * 1.5)
|
|
94
|
+
elif c != "420":
|
|
95
|
+
raise ValueError(f"chroma_subsampling must be 420 or 444, got {chroma_subsampling!r}")
|
|
96
|
+
return max(4_000_000, min(bps, 500_000_000))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def resolve_nvenc_bitrate_bps(
|
|
100
|
+
bitrate_arg: int, width: int, height: int, fps: int, *, chroma_subsampling: str
|
|
101
|
+
) -> int:
|
|
102
|
+
if bitrate_arg > 0:
|
|
103
|
+
return int(bitrate_arg)
|
|
104
|
+
return suggest_nvenc_vbr_bitrate_bps(width, height, fps, chroma_subsampling=chroma_subsampling)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def create_nvenc_encoder(
|
|
108
|
+
nvc,
|
|
109
|
+
width: int,
|
|
110
|
+
height: int,
|
|
111
|
+
gpu_id: int,
|
|
112
|
+
fps: int,
|
|
113
|
+
bitrate: int,
|
|
114
|
+
*,
|
|
115
|
+
codec: str = "h264",
|
|
116
|
+
full_range: bool = True,
|
|
117
|
+
chroma_subsampling: str = "444",
|
|
118
|
+
preset: str = "P7",
|
|
119
|
+
):
|
|
120
|
+
codec = normalize_codec_name(codec)
|
|
121
|
+
issue = nvenc_encode_compatibility_issue(
|
|
122
|
+
nvc, width, height, gpu_id=gpu_id, codec=codec, chroma_subsampling=chroma_subsampling
|
|
123
|
+
)
|
|
124
|
+
if issue is not None:
|
|
125
|
+
raise ValueError(issue)
|
|
126
|
+
|
|
127
|
+
surface_fmt = "YUV444" if chroma_subsampling == "444" else "ARGB"
|
|
128
|
+
kwargs = dict(
|
|
129
|
+
gpu_id=gpu_id,
|
|
130
|
+
codec=codec,
|
|
131
|
+
fps=fps,
|
|
132
|
+
preset=preset,
|
|
133
|
+
tuning_info="high_quality",
|
|
134
|
+
rc="vbr",
|
|
135
|
+
bitrate=bitrate,
|
|
136
|
+
maxbitrate=max(bitrate + 2_000_000, int(bitrate * 1.25)),
|
|
137
|
+
bf=0,
|
|
138
|
+
**best_quality_extra_kwargs(nvc, gpu_id, codec),
|
|
139
|
+
)
|
|
140
|
+
if full_range:
|
|
141
|
+
kwargs["colorrange"] = nvc.ColorRange.JPEG
|
|
142
|
+
kwargs["videoFullRangeFlag"] = 1
|
|
143
|
+
return nvc.CreateEncoder(width, height, surface_fmt, True, **kwargs)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def encode_rgb_iter_to_elementary_stream(
|
|
147
|
+
nvc,
|
|
148
|
+
frames: Iterable[np.ndarray],
|
|
149
|
+
gpu_id: int,
|
|
150
|
+
fps: int,
|
|
151
|
+
bitrate: int,
|
|
152
|
+
*,
|
|
153
|
+
codec: str = "h264",
|
|
154
|
+
full_range: bool = True,
|
|
155
|
+
chroma_subsampling: str = "444",
|
|
156
|
+
preset: str = "P7",
|
|
157
|
+
) -> bytes:
|
|
158
|
+
"""Encode an iterable of RGB uint8 HWC frames to an H.264/HEVC elementary stream."""
|
|
159
|
+
chunks: List[bytes] = []
|
|
160
|
+
enc = None
|
|
161
|
+
ref_h = ref_w = 0
|
|
162
|
+
use_yuv444 = chroma_subsampling == "444"
|
|
163
|
+
|
|
164
|
+
for rgb in frames:
|
|
165
|
+
issue = nvenc_encode_compatibility_issue_for_rgb(
|
|
166
|
+
nvc, rgb, gpu_id=gpu_id, codec=codec, chroma_subsampling=chroma_subsampling
|
|
167
|
+
)
|
|
168
|
+
if issue is not None:
|
|
169
|
+
raise ValueError(issue)
|
|
170
|
+
if enc is None:
|
|
171
|
+
ref_h, ref_w = rgb.shape[0], rgb.shape[1]
|
|
172
|
+
enc = create_nvenc_encoder(
|
|
173
|
+
nvc, ref_w, ref_h, gpu_id, fps, bitrate,
|
|
174
|
+
codec=codec, full_range=full_range,
|
|
175
|
+
chroma_subsampling=chroma_subsampling, preset=preset,
|
|
176
|
+
)
|
|
177
|
+
elif rgb.shape[0] != ref_h or rgb.shape[1] != ref_w:
|
|
178
|
+
raise ValueError("All frames must share the same resolution within a stream")
|
|
179
|
+
|
|
180
|
+
plane = np.ascontiguousarray(
|
|
181
|
+
rgb_hwc_to_yuv444_planar(rgb, full_range=full_range)
|
|
182
|
+
if use_yuv444
|
|
183
|
+
else rgb_hwc_to_nvenc_bgra_hwc4(rgb)
|
|
184
|
+
)
|
|
185
|
+
try:
|
|
186
|
+
out = enc.Encode(plane)
|
|
187
|
+
except TypeError:
|
|
188
|
+
out = enc.Encode(plane.reshape(-1))
|
|
189
|
+
if out:
|
|
190
|
+
chunks.append(bytes(out))
|
|
191
|
+
|
|
192
|
+
if enc is None:
|
|
193
|
+
return b""
|
|
194
|
+
tail = enc.EndEncode()
|
|
195
|
+
if tail:
|
|
196
|
+
chunks.append(bytes(tail))
|
|
197
|
+
return b"".join(chunks)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def encode_rgb_frames_to_elementary_stream(
|
|
201
|
+
nvc,
|
|
202
|
+
frames: List[np.ndarray],
|
|
203
|
+
gpu_id: int,
|
|
204
|
+
fps: int,
|
|
205
|
+
bitrate: int,
|
|
206
|
+
*,
|
|
207
|
+
codec: str = "h264",
|
|
208
|
+
full_range: bool = True,
|
|
209
|
+
chroma_subsampling: str = "444",
|
|
210
|
+
preset: str = "P7",
|
|
211
|
+
) -> bytes:
|
|
212
|
+
return encode_rgb_iter_to_elementary_stream(
|
|
213
|
+
nvc, iter(frames), gpu_id, fps, bitrate,
|
|
214
|
+
codec=codec, full_range=full_range,
|
|
215
|
+
chroma_subsampling=chroma_subsampling, preset=preset,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def ffmpeg_executable() -> str | None:
|
|
220
|
+
try:
|
|
221
|
+
import imageio_ffmpeg # type: ignore
|
|
222
|
+
|
|
223
|
+
return imageio_ffmpeg.get_ffmpeg_exe()
|
|
224
|
+
except Exception:
|
|
225
|
+
return shutil.which("ffmpeg")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def mux_elementary_to_mp4(
|
|
229
|
+
es_bytes: bytes, mp4_path: Path, fps: float, *, demux_format: str = "h264"
|
|
230
|
+
) -> None:
|
|
231
|
+
ffmpeg = ffmpeg_executable()
|
|
232
|
+
if ffmpeg:
|
|
233
|
+
suffix = ".hevc" if demux_format in ("hevc", "h265") else ".h264"
|
|
234
|
+
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
|
|
235
|
+
f.write(es_bytes)
|
|
236
|
+
tmp_es = Path(f.name)
|
|
237
|
+
try:
|
|
238
|
+
subprocess.run(
|
|
239
|
+
[ffmpeg, "-y", "-loglevel", "error", "-f", demux_format,
|
|
240
|
+
"-r", str(fps), "-i", str(tmp_es), "-c:v", "copy", str(mp4_path)],
|
|
241
|
+
check=True,
|
|
242
|
+
)
|
|
243
|
+
finally:
|
|
244
|
+
tmp_es.unlink(missing_ok=True)
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
av = try_import_av()
|
|
248
|
+
if av is None:
|
|
249
|
+
raise RuntimeError(
|
|
250
|
+
"Need ffmpeg in PATH, or install PyAV (`pip install av`) to mux elementary video to MP4."
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
bio = io.BytesIO(es_bytes)
|
|
254
|
+
with av.open(bio, format=demux_format) as inp:
|
|
255
|
+
if not inp.streams.video:
|
|
256
|
+
raise RuntimeError("No video stream in elementary bitstream")
|
|
257
|
+
vin = inp.streams.video[0]
|
|
258
|
+
with av.open(str(mp4_path), mode="w") as out:
|
|
259
|
+
vout = out.add_stream_from_template(vin, opaque=True)
|
|
260
|
+
tb_den = 90_000
|
|
261
|
+
vout.time_base = Fraction(1, tb_den)
|
|
262
|
+
inc = max(1, int(round(tb_den / float(fps))))
|
|
263
|
+
for i, packet in enumerate(inp.demux(vin)):
|
|
264
|
+
if getattr(packet, "size", 0) == 0:
|
|
265
|
+
continue
|
|
266
|
+
if packet.pts is None:
|
|
267
|
+
packet.pts = i * inc
|
|
268
|
+
packet.dts = packet.pts
|
|
269
|
+
packet.stream = vout
|
|
270
|
+
out.mux(packet)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def encode_rgb_iter_to_mp4(
|
|
274
|
+
nvc,
|
|
275
|
+
frames: Iterable[np.ndarray],
|
|
276
|
+
mp4_path: Path,
|
|
277
|
+
*,
|
|
278
|
+
gpu_id: int,
|
|
279
|
+
fps: int,
|
|
280
|
+
bitrate: int,
|
|
281
|
+
codec: str = "h264",
|
|
282
|
+
full_range: bool = True,
|
|
283
|
+
chroma_subsampling: str = "444",
|
|
284
|
+
preset: str = "P7",
|
|
285
|
+
) -> None:
|
|
286
|
+
codec = normalize_codec_name(codec)
|
|
287
|
+
es = encode_rgb_iter_to_elementary_stream(
|
|
288
|
+
nvc, frames, gpu_id, fps, bitrate,
|
|
289
|
+
codec=codec, full_range=full_range,
|
|
290
|
+
chroma_subsampling=chroma_subsampling, preset=preset,
|
|
291
|
+
)
|
|
292
|
+
demux = "hevc" if codec in ("hevc", "h265") else "h264"
|
|
293
|
+
mux_elementary_to_mp4(es, mp4_path, float(fps), demux_format=demux)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def encode_rgb_sequence_to_mp4(
|
|
297
|
+
nvc,
|
|
298
|
+
frames: List[np.ndarray],
|
|
299
|
+
mp4_path: Path,
|
|
300
|
+
*,
|
|
301
|
+
gpu_id: int,
|
|
302
|
+
fps: int,
|
|
303
|
+
bitrate: int,
|
|
304
|
+
codec: str = "h264",
|
|
305
|
+
full_range: bool = True,
|
|
306
|
+
chroma_subsampling: str = "444",
|
|
307
|
+
preset: str = "P7",
|
|
308
|
+
) -> None:
|
|
309
|
+
encode_rgb_iter_to_mp4(
|
|
310
|
+
nvc, iter(frames), mp4_path,
|
|
311
|
+
gpu_id=gpu_id, fps=fps, bitrate=bitrate, codec=codec,
|
|
312
|
+
full_range=full_range, chroma_subsampling=chroma_subsampling, preset=preset,
|
|
313
|
+
)
|
mvdata/codec/frames.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Conversion helpers that turn decoded frames into canonical HWC RGB arrays.
|
|
2
|
+
|
|
3
|
+
The public surface here is deliberately small:
|
|
4
|
+
|
|
5
|
+
``_decoded_frame_to_rgb_numpy``
|
|
6
|
+
Takes a decoder output (numpy, dlpack-capable, or torch/cupy tensor) and
|
|
7
|
+
returns a contiguous HWC numpy array owned by the caller.
|
|
8
|
+
``_decoded_frame_to_rgb_cupy``
|
|
9
|
+
Same, but returns a fresh ``cupy.ndarray`` that stays on the GPU. Uses
|
|
10
|
+
DLPack to view decoder device memory, then copies into a new buffer so the
|
|
11
|
+
decoder is free to reuse its pool slot. Accepts an optional raw CUDA stream
|
|
12
|
+
pointer to order the copy with user work.
|
|
13
|
+
``numpy_to_cupy_rgb``
|
|
14
|
+
H2D helper used for streams that can only be produced on the CPU (pyavif,
|
|
15
|
+
PyAV CPU fallback).
|
|
16
|
+
``_pyav_frame_to_rgb``
|
|
17
|
+
PyAV → HWC RGB with correct handling of full-range yuv444 and high bit
|
|
18
|
+
depths.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from contextlib import ExitStack
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
|
|
28
|
+
from ._imports import try_import_cupy, try_import_torch
|
|
29
|
+
from .probe import infer_video_bit_depth_from_frame
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _cupy_or_raise():
|
|
33
|
+
cp = try_import_cupy()
|
|
34
|
+
if cp is None:
|
|
35
|
+
raise RuntimeError(
|
|
36
|
+
"device='cuda' requires CuPy. Install it with `pip install mvdata[cuda]` "
|
|
37
|
+
"(or `pip install cupy-cuda12x`)."
|
|
38
|
+
)
|
|
39
|
+
return cp
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _device_and_stream_ctx(
|
|
43
|
+
cp, *, gpu_id: int | None = None, cuda_stream: int | None = None
|
|
44
|
+
):
|
|
45
|
+
stack = ExitStack()
|
|
46
|
+
if gpu_id is not None:
|
|
47
|
+
stack.enter_context(cp.cuda.Device(int(gpu_id)))
|
|
48
|
+
if cuda_stream is not None and int(cuda_stream) != 0:
|
|
49
|
+
stack.enter_context(cp.cuda.ExternalStream(int(cuda_stream)))
|
|
50
|
+
return stack
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _target_dtype_for(arr_dtype, *, bit_depth: int | None):
|
|
54
|
+
if arr_dtype == np.uint8:
|
|
55
|
+
return np.uint8
|
|
56
|
+
if arr_dtype == np.uint16:
|
|
57
|
+
return np.uint16
|
|
58
|
+
if np.issubdtype(arr_dtype, np.integer):
|
|
59
|
+
if (bit_depth or 8) > 8 or arr_dtype.itemsize > 1:
|
|
60
|
+
return np.uint16
|
|
61
|
+
return np.uint8
|
|
62
|
+
raise TypeError(f"Cannot convert decoded frame to HWC RGB: dtype={arr_dtype}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _ensure_hwc(arr):
|
|
66
|
+
"""Transpose 3×H×W to H×W×3 when needed. Returns (arr, needs_copy)."""
|
|
67
|
+
if arr.ndim == 3 and arr.shape[0] == 3 and arr.shape[-1] != 3:
|
|
68
|
+
return arr.transpose(1, 2, 0), True
|
|
69
|
+
if not (arr.ndim == 3 and arr.shape[-1] == 3):
|
|
70
|
+
raise TypeError(f"Cannot convert decoded frame to HWC RGB: shape={arr.shape}")
|
|
71
|
+
return arr, False
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _normalize_rgb_numpy_output(
|
|
75
|
+
arr: np.ndarray, *, bit_depth: int | None = None, copy_result: bool = False
|
|
76
|
+
) -> np.ndarray:
|
|
77
|
+
arr = np.asarray(arr)
|
|
78
|
+
arr, transposed = _ensure_hwc(arr)
|
|
79
|
+
copy_result = copy_result or transposed
|
|
80
|
+
|
|
81
|
+
target = _target_dtype_for(arr.dtype, bit_depth=bit_depth)
|
|
82
|
+
if arr.dtype != target:
|
|
83
|
+
arr = arr.astype(target, copy=False)
|
|
84
|
+
copy_result = True
|
|
85
|
+
|
|
86
|
+
if copy_result:
|
|
87
|
+
return np.array(arr, copy=True, order="C")
|
|
88
|
+
if not arr.flags.c_contiguous:
|
|
89
|
+
return np.ascontiguousarray(arr)
|
|
90
|
+
return arr
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _decoded_frame_to_rgb_numpy(frame, torch_mod, *, bit_depth: int | None = None):
|
|
94
|
+
"""Return a contiguous HWC RGB numpy array owned by the caller."""
|
|
95
|
+
if hasattr(frame, "__dlpack__"):
|
|
96
|
+
try:
|
|
97
|
+
arr = np.from_dlpack(frame)
|
|
98
|
+
return _normalize_rgb_numpy_output(arr, bit_depth=bit_depth, copy_result=True)
|
|
99
|
+
except Exception:
|
|
100
|
+
pass
|
|
101
|
+
if torch_mod is not None:
|
|
102
|
+
try:
|
|
103
|
+
arr = torch_mod.from_dlpack(frame).detach().cpu().numpy()
|
|
104
|
+
return _normalize_rgb_numpy_output(arr, bit_depth=bit_depth)
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
cp = try_import_cupy()
|
|
108
|
+
if cp is not None:
|
|
109
|
+
arr = cp.asnumpy(cp.from_dlpack(frame))
|
|
110
|
+
return _normalize_rgb_numpy_output(arr, bit_depth=bit_depth)
|
|
111
|
+
# Host-memory decode outputs may alias decoder-managed buffers that get
|
|
112
|
+
# reused on the next decode call, so take ownership here.
|
|
113
|
+
return _normalize_rgb_numpy_output(frame, bit_depth=bit_depth, copy_result=True)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _decoded_frame_to_rgb_cupy(
|
|
117
|
+
frame: Any,
|
|
118
|
+
*,
|
|
119
|
+
bit_depth: int | None = None,
|
|
120
|
+
gpu_id: int | None = None,
|
|
121
|
+
cuda_stream: int | None = None,
|
|
122
|
+
):
|
|
123
|
+
"""Produce a fresh contiguous HWC RGB ``cupy.ndarray`` from a decoder output.
|
|
124
|
+
|
|
125
|
+
Resulting array is always a new device allocation: the caller owns it and
|
|
126
|
+
the decoder is free to overwrite its pool slot immediately after return.
|
|
127
|
+
The GPU→GPU copy (or H2D copy, if ``frame`` is a host array) is issued on
|
|
128
|
+
``cuda_stream`` when provided, otherwise on CuPy's current stream.
|
|
129
|
+
"""
|
|
130
|
+
cp = _cupy_or_raise()
|
|
131
|
+
|
|
132
|
+
def _as_cupy(src):
|
|
133
|
+
if hasattr(src, "__dlpack__"):
|
|
134
|
+
return cp.from_dlpack(src)
|
|
135
|
+
return cp.asarray(np.ascontiguousarray(np.asarray(src)))
|
|
136
|
+
|
|
137
|
+
with _device_and_stream_ctx(cp, gpu_id=gpu_id, cuda_stream=cuda_stream):
|
|
138
|
+
src = _as_cupy(frame)
|
|
139
|
+
src, _ = _ensure_hwc(src)
|
|
140
|
+
target = _target_dtype_for(src.dtype, bit_depth=bit_depth)
|
|
141
|
+
out = cp.empty(src.shape, dtype=target, order="C")
|
|
142
|
+
if src.dtype == target:
|
|
143
|
+
cp.copyto(out, src)
|
|
144
|
+
else:
|
|
145
|
+
out[...] = src.astype(target, copy=False)
|
|
146
|
+
return out
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def numpy_to_cupy_rgb(
|
|
150
|
+
arr: np.ndarray,
|
|
151
|
+
*,
|
|
152
|
+
gpu_id: int | None = None,
|
|
153
|
+
cuda_stream: int | None = None,
|
|
154
|
+
):
|
|
155
|
+
"""Copy a host HWC RGB array to a fresh ``cupy.ndarray`` on ``cuda_stream``."""
|
|
156
|
+
cp = _cupy_or_raise()
|
|
157
|
+
arr = np.ascontiguousarray(arr)
|
|
158
|
+
with _device_and_stream_ctx(cp, gpu_id=gpu_id, cuda_stream=cuda_stream):
|
|
159
|
+
out = cp.empty(arr.shape, dtype=arr.dtype)
|
|
160
|
+
out.set(arr)
|
|
161
|
+
return out
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _pyav_frame_to_rgb(frame, *, bit_depth: int | None = None) -> np.ndarray:
|
|
165
|
+
"""Convert a PyAV VideoFrame to an HWC RGB numpy array.
|
|
166
|
+
|
|
167
|
+
Full-range yuv444p streams are converted manually to dodge swscale's
|
|
168
|
+
limited-range default when VUI is unspecified.
|
|
169
|
+
"""
|
|
170
|
+
pf = str(frame.format.name)
|
|
171
|
+
depth = int(bit_depth) if bit_depth is not None else infer_video_bit_depth_from_frame(frame)
|
|
172
|
+
cr = getattr(frame, "color_range", None)
|
|
173
|
+
cr_text = str(cr) if cr is not None else ""
|
|
174
|
+
is_full = cr is not None and cr_text in ("2", "JPEG", "jpeg", "pc")
|
|
175
|
+
|
|
176
|
+
if pf.startswith("yuv444p") and (
|
|
177
|
+
is_full or cr is None or cr_text in ("0", "UNSPECIFIED", "unspecified")
|
|
178
|
+
):
|
|
179
|
+
yuv = frame.to_ndarray(format=pf)
|
|
180
|
+
y = yuv[0].astype(np.float32)
|
|
181
|
+
max_value = float((1 << depth) - 1)
|
|
182
|
+
center = (max_value + 1.0) / 2.0
|
|
183
|
+
cb = yuv[1].astype(np.float32) - center
|
|
184
|
+
crv = yuv[2].astype(np.float32) - center
|
|
185
|
+
|
|
186
|
+
if depth > 8:
|
|
187
|
+
max_out = 65535.0
|
|
188
|
+
target_dtype = np.uint16
|
|
189
|
+
else:
|
|
190
|
+
max_out = 255.0
|
|
191
|
+
target_dtype = np.uint8
|
|
192
|
+
scale = max_out / max_value
|
|
193
|
+
|
|
194
|
+
r = np.clip((y + 1.402 * crv) * scale + 0.5, 0, max_out).astype(target_dtype)
|
|
195
|
+
g = np.clip((y - 0.344136 * cb - 0.714136 * crv) * scale + 0.5, 0, max_out).astype(target_dtype)
|
|
196
|
+
b = np.clip((y + 1.772 * cb) * scale + 0.5, 0, max_out).astype(target_dtype)
|
|
197
|
+
return np.stack([r, g, b], axis=-1)
|
|
198
|
+
|
|
199
|
+
return frame.to_ndarray(format="rgb48le" if depth > 8 else "rgb24")
|