mvdata 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mvdata/codec/encode.py ADDED
@@ -0,0 +1,313 @@
1
+ """NVENC encoding pipeline: RGB → elementary stream → MP4."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ from fractions import Fraction
10
+ from pathlib import Path
11
+ from typing import Iterable, List, Tuple
12
+
13
+ import numpy as np
14
+
15
+ from ._imports import normalize_codec_name, try_import_av
16
+ from .probe import (
17
+ nvenc_encode_compatibility_issue,
18
+ nvenc_encode_compatibility_issue_for_rgb,
19
+ safe_get_encoder_caps,
20
+ )
21
+
22
+
23
+ def rgb_hwc_to_nvenc_bgra_hwc4(rgb: np.ndarray) -> np.ndarray:
24
+ """HWC uint8 RGB → contiguous H×W×4 BGRA for ``CreateEncoder(..., 'ARGB', True)``."""
25
+ if rgb.dtype != np.uint8 or rgb.ndim != 3 or rgb.shape[2] != 3:
26
+ raise ValueError("rgb must be uint8 HWC with 3 channels")
27
+ r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
28
+ a = np.full(rgb.shape[:2], 255, dtype=np.uint8)
29
+ return np.stack([b, g, r, a], axis=-1)
30
+
31
+
32
+ def rgb_hwc_to_yuv444_planar(rgb: np.ndarray, *, full_range: bool = True) -> np.ndarray:
33
+ """HWC uint8 RGB → planar YUV444 (H*3, W) uint8 for ``CreateEncoder(..., 'YUV444', True)``."""
34
+ if rgb.dtype != np.uint8 or rgb.ndim != 3 or rgb.shape[2] != 3:
35
+ raise ValueError("rgb must be uint8 HWC with 3 channels")
36
+ r = rgb[:, :, 0].astype(np.float32)
37
+ g = rgb[:, :, 1].astype(np.float32)
38
+ b = rgb[:, :, 2].astype(np.float32)
39
+ if full_range:
40
+ y = 0.299 * r + 0.587 * g + 0.114 * b
41
+ cb = -0.168736 * r - 0.331264 * g + 0.5 * b + 128.0
42
+ cr = 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0
43
+ else:
44
+ y = 16.0 + 65.481 * r / 255.0 + 128.553 * g / 255.0 + 24.966 * b / 255.0
45
+ cb = 128.0 - 37.797 * r / 255.0 - 74.203 * g / 255.0 + 112.0 * b / 255.0
46
+ cr = 128.0 + 112.0 * r / 255.0 - 93.786 * g / 255.0 - 18.214 * b / 255.0
47
+ clip = lambda plane: np.clip(plane + 0.5, 0, 255).astype(np.uint8)
48
+ return np.concatenate([clip(y), clip(cb), clip(cr)], axis=0)
49
+
50
+
51
+ def pad_rgb_for_nvenc(rgb: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
52
+ """Return the original frame plus its full-frame crop (portrait is unsupported)."""
53
+ h, w, c = rgb.shape
54
+ if c != 3:
55
+ raise ValueError("expected RGB HWC")
56
+ if w < h:
57
+ raise ValueError(f"portrait frames ({w}x{h}) are not encoded via NVENC without padding")
58
+ return rgb, (0, h, 0, w)
59
+
60
+
61
+ def assert_yuv444_encode_supported(nvc, gpu_id: int, codec: str) -> None:
62
+ codec = normalize_codec_name(codec)
63
+ caps = safe_get_encoder_caps(nvc, gpu_id, codec)
64
+ if caps is None:
65
+ raise RuntimeError(f"GPU reports no encoder support for codec={codec!r}.")
66
+ if not caps.get("support_yuv444_encode"):
67
+ raise RuntimeError(
68
+ f"GPU reports no 4:4:4 encode for codec={codec!r} (support_yuv444_encode=0). "
69
+ "Use chroma 420 or a different codec/GPU."
70
+ )
71
+
72
+
73
+ def best_quality_extra_kwargs(nvc, gpu_id: int, codec: str) -> dict:
74
+ """Only enable features advertised by the GPU so the dict is safe to splat into CreateEncoder."""
75
+ caps = nvc.GetEncoderCaps(gpu_id, codec)
76
+ kw: dict = {}
77
+ if caps.get("support_lookahead"):
78
+ kw["lookahead"] = 32
79
+ return kw
80
+
81
+
82
+ def suggest_nvenc_vbr_bitrate_bps(
83
+ width: int, height: int, fps: int, *, chroma_subsampling: str = "444"
84
+ ) -> int:
85
+ ref_px = 1920 * 1080
86
+ ref_fps = 30
87
+ ref_bps = 80_000_000
88
+ px = max(1, int(width) * int(height))
89
+ t = max(1, int(fps))
90
+ bps = int(ref_bps * ((px / ref_px) ** 0.88) * (t / ref_fps))
91
+ c = chroma_subsampling.strip()
92
+ if c == "444":
93
+ bps = int(bps * 1.5)
94
+ elif c != "420":
95
+ raise ValueError(f"chroma_subsampling must be 420 or 444, got {chroma_subsampling!r}")
96
+ return max(4_000_000, min(bps, 500_000_000))
97
+
98
+
99
+ def resolve_nvenc_bitrate_bps(
100
+ bitrate_arg: int, width: int, height: int, fps: int, *, chroma_subsampling: str
101
+ ) -> int:
102
+ if bitrate_arg > 0:
103
+ return int(bitrate_arg)
104
+ return suggest_nvenc_vbr_bitrate_bps(width, height, fps, chroma_subsampling=chroma_subsampling)
105
+
106
+
107
+ def create_nvenc_encoder(
108
+ nvc,
109
+ width: int,
110
+ height: int,
111
+ gpu_id: int,
112
+ fps: int,
113
+ bitrate: int,
114
+ *,
115
+ codec: str = "h264",
116
+ full_range: bool = True,
117
+ chroma_subsampling: str = "444",
118
+ preset: str = "P7",
119
+ ):
120
+ codec = normalize_codec_name(codec)
121
+ issue = nvenc_encode_compatibility_issue(
122
+ nvc, width, height, gpu_id=gpu_id, codec=codec, chroma_subsampling=chroma_subsampling
123
+ )
124
+ if issue is not None:
125
+ raise ValueError(issue)
126
+
127
+ surface_fmt = "YUV444" if chroma_subsampling == "444" else "ARGB"
128
+ kwargs = dict(
129
+ gpu_id=gpu_id,
130
+ codec=codec,
131
+ fps=fps,
132
+ preset=preset,
133
+ tuning_info="high_quality",
134
+ rc="vbr",
135
+ bitrate=bitrate,
136
+ maxbitrate=max(bitrate + 2_000_000, int(bitrate * 1.25)),
137
+ bf=0,
138
+ **best_quality_extra_kwargs(nvc, gpu_id, codec),
139
+ )
140
+ if full_range:
141
+ kwargs["colorrange"] = nvc.ColorRange.JPEG
142
+ kwargs["videoFullRangeFlag"] = 1
143
+ return nvc.CreateEncoder(width, height, surface_fmt, True, **kwargs)
144
+
145
+
146
+ def encode_rgb_iter_to_elementary_stream(
147
+ nvc,
148
+ frames: Iterable[np.ndarray],
149
+ gpu_id: int,
150
+ fps: int,
151
+ bitrate: int,
152
+ *,
153
+ codec: str = "h264",
154
+ full_range: bool = True,
155
+ chroma_subsampling: str = "444",
156
+ preset: str = "P7",
157
+ ) -> bytes:
158
+ """Encode an iterable of RGB uint8 HWC frames to an H.264/HEVC elementary stream."""
159
+ chunks: List[bytes] = []
160
+ enc = None
161
+ ref_h = ref_w = 0
162
+ use_yuv444 = chroma_subsampling == "444"
163
+
164
+ for rgb in frames:
165
+ issue = nvenc_encode_compatibility_issue_for_rgb(
166
+ nvc, rgb, gpu_id=gpu_id, codec=codec, chroma_subsampling=chroma_subsampling
167
+ )
168
+ if issue is not None:
169
+ raise ValueError(issue)
170
+ if enc is None:
171
+ ref_h, ref_w = rgb.shape[0], rgb.shape[1]
172
+ enc = create_nvenc_encoder(
173
+ nvc, ref_w, ref_h, gpu_id, fps, bitrate,
174
+ codec=codec, full_range=full_range,
175
+ chroma_subsampling=chroma_subsampling, preset=preset,
176
+ )
177
+ elif rgb.shape[0] != ref_h or rgb.shape[1] != ref_w:
178
+ raise ValueError("All frames must share the same resolution within a stream")
179
+
180
+ plane = np.ascontiguousarray(
181
+ rgb_hwc_to_yuv444_planar(rgb, full_range=full_range)
182
+ if use_yuv444
183
+ else rgb_hwc_to_nvenc_bgra_hwc4(rgb)
184
+ )
185
+ try:
186
+ out = enc.Encode(plane)
187
+ except TypeError:
188
+ out = enc.Encode(plane.reshape(-1))
189
+ if out:
190
+ chunks.append(bytes(out))
191
+
192
+ if enc is None:
193
+ return b""
194
+ tail = enc.EndEncode()
195
+ if tail:
196
+ chunks.append(bytes(tail))
197
+ return b"".join(chunks)
198
+
199
+
200
+ def encode_rgb_frames_to_elementary_stream(
201
+ nvc,
202
+ frames: List[np.ndarray],
203
+ gpu_id: int,
204
+ fps: int,
205
+ bitrate: int,
206
+ *,
207
+ codec: str = "h264",
208
+ full_range: bool = True,
209
+ chroma_subsampling: str = "444",
210
+ preset: str = "P7",
211
+ ) -> bytes:
212
+ return encode_rgb_iter_to_elementary_stream(
213
+ nvc, iter(frames), gpu_id, fps, bitrate,
214
+ codec=codec, full_range=full_range,
215
+ chroma_subsampling=chroma_subsampling, preset=preset,
216
+ )
217
+
218
+
219
+ def ffmpeg_executable() -> str | None:
220
+ try:
221
+ import imageio_ffmpeg # type: ignore
222
+
223
+ return imageio_ffmpeg.get_ffmpeg_exe()
224
+ except Exception:
225
+ return shutil.which("ffmpeg")
226
+
227
+
228
+ def mux_elementary_to_mp4(
229
+ es_bytes: bytes, mp4_path: Path, fps: float, *, demux_format: str = "h264"
230
+ ) -> None:
231
+ ffmpeg = ffmpeg_executable()
232
+ if ffmpeg:
233
+ suffix = ".hevc" if demux_format in ("hevc", "h265") else ".h264"
234
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
235
+ f.write(es_bytes)
236
+ tmp_es = Path(f.name)
237
+ try:
238
+ subprocess.run(
239
+ [ffmpeg, "-y", "-loglevel", "error", "-f", demux_format,
240
+ "-r", str(fps), "-i", str(tmp_es), "-c:v", "copy", str(mp4_path)],
241
+ check=True,
242
+ )
243
+ finally:
244
+ tmp_es.unlink(missing_ok=True)
245
+ return
246
+
247
+ av = try_import_av()
248
+ if av is None:
249
+ raise RuntimeError(
250
+ "Need ffmpeg in PATH, or install PyAV (`pip install av`) to mux elementary video to MP4."
251
+ )
252
+
253
+ bio = io.BytesIO(es_bytes)
254
+ with av.open(bio, format=demux_format) as inp:
255
+ if not inp.streams.video:
256
+ raise RuntimeError("No video stream in elementary bitstream")
257
+ vin = inp.streams.video[0]
258
+ with av.open(str(mp4_path), mode="w") as out:
259
+ vout = out.add_stream_from_template(vin, opaque=True)
260
+ tb_den = 90_000
261
+ vout.time_base = Fraction(1, tb_den)
262
+ inc = max(1, int(round(tb_den / float(fps))))
263
+ for i, packet in enumerate(inp.demux(vin)):
264
+ if getattr(packet, "size", 0) == 0:
265
+ continue
266
+ if packet.pts is None:
267
+ packet.pts = i * inc
268
+ packet.dts = packet.pts
269
+ packet.stream = vout
270
+ out.mux(packet)
271
+
272
+
273
+ def encode_rgb_iter_to_mp4(
274
+ nvc,
275
+ frames: Iterable[np.ndarray],
276
+ mp4_path: Path,
277
+ *,
278
+ gpu_id: int,
279
+ fps: int,
280
+ bitrate: int,
281
+ codec: str = "h264",
282
+ full_range: bool = True,
283
+ chroma_subsampling: str = "444",
284
+ preset: str = "P7",
285
+ ) -> None:
286
+ codec = normalize_codec_name(codec)
287
+ es = encode_rgb_iter_to_elementary_stream(
288
+ nvc, frames, gpu_id, fps, bitrate,
289
+ codec=codec, full_range=full_range,
290
+ chroma_subsampling=chroma_subsampling, preset=preset,
291
+ )
292
+ demux = "hevc" if codec in ("hevc", "h265") else "h264"
293
+ mux_elementary_to_mp4(es, mp4_path, float(fps), demux_format=demux)
294
+
295
+
296
+ def encode_rgb_sequence_to_mp4(
297
+ nvc,
298
+ frames: List[np.ndarray],
299
+ mp4_path: Path,
300
+ *,
301
+ gpu_id: int,
302
+ fps: int,
303
+ bitrate: int,
304
+ codec: str = "h264",
305
+ full_range: bool = True,
306
+ chroma_subsampling: str = "444",
307
+ preset: str = "P7",
308
+ ) -> None:
309
+ encode_rgb_iter_to_mp4(
310
+ nvc, iter(frames), mp4_path,
311
+ gpu_id=gpu_id, fps=fps, bitrate=bitrate, codec=codec,
312
+ full_range=full_range, chroma_subsampling=chroma_subsampling, preset=preset,
313
+ )
mvdata/codec/frames.py ADDED
@@ -0,0 +1,199 @@
1
+ """Conversion helpers that turn decoded frames into canonical HWC RGB arrays.
2
+
3
+ The public surface here is deliberately small:
4
+
5
+ ``_decoded_frame_to_rgb_numpy``
6
+ Takes a decoder output (numpy, dlpack-capable, or torch/cupy tensor) and
7
+ returns a contiguous HWC numpy array owned by the caller.
8
+ ``_decoded_frame_to_rgb_cupy``
9
+ Same, but returns a fresh ``cupy.ndarray`` that stays on the GPU. Uses
10
+ DLPack to view decoder device memory, then copies into a new buffer so the
11
+ decoder is free to reuse its pool slot. Accepts an optional raw CUDA stream
12
+ pointer to order the copy with user work.
13
+ ``numpy_to_cupy_rgb``
14
+ H2D helper used for streams that can only be produced on the CPU (pyavif,
15
+ PyAV CPU fallback).
16
+ ``_pyav_frame_to_rgb``
17
+ PyAV → HWC RGB with correct handling of full-range yuv444 and high bit
18
+ depths.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from contextlib import ExitStack
24
+ from typing import Any
25
+
26
+ import numpy as np
27
+
28
+ from ._imports import try_import_cupy, try_import_torch
29
+ from .probe import infer_video_bit_depth_from_frame
30
+
31
+
32
+ def _cupy_or_raise():
33
+ cp = try_import_cupy()
34
+ if cp is None:
35
+ raise RuntimeError(
36
+ "device='cuda' requires CuPy. Install it with `pip install mvdata[cuda]` "
37
+ "(or `pip install cupy-cuda12x`)."
38
+ )
39
+ return cp
40
+
41
+
42
+ def _device_and_stream_ctx(
43
+ cp, *, gpu_id: int | None = None, cuda_stream: int | None = None
44
+ ):
45
+ stack = ExitStack()
46
+ if gpu_id is not None:
47
+ stack.enter_context(cp.cuda.Device(int(gpu_id)))
48
+ if cuda_stream is not None and int(cuda_stream) != 0:
49
+ stack.enter_context(cp.cuda.ExternalStream(int(cuda_stream)))
50
+ return stack
51
+
52
+
53
+ def _target_dtype_for(arr_dtype, *, bit_depth: int | None):
54
+ if arr_dtype == np.uint8:
55
+ return np.uint8
56
+ if arr_dtype == np.uint16:
57
+ return np.uint16
58
+ if np.issubdtype(arr_dtype, np.integer):
59
+ if (bit_depth or 8) > 8 or arr_dtype.itemsize > 1:
60
+ return np.uint16
61
+ return np.uint8
62
+ raise TypeError(f"Cannot convert decoded frame to HWC RGB: dtype={arr_dtype}")
63
+
64
+
65
+ def _ensure_hwc(arr):
66
+ """Transpose 3×H×W to H×W×3 when needed. Returns (arr, needs_copy)."""
67
+ if arr.ndim == 3 and arr.shape[0] == 3 and arr.shape[-1] != 3:
68
+ return arr.transpose(1, 2, 0), True
69
+ if not (arr.ndim == 3 and arr.shape[-1] == 3):
70
+ raise TypeError(f"Cannot convert decoded frame to HWC RGB: shape={arr.shape}")
71
+ return arr, False
72
+
73
+
74
+ def _normalize_rgb_numpy_output(
75
+ arr: np.ndarray, *, bit_depth: int | None = None, copy_result: bool = False
76
+ ) -> np.ndarray:
77
+ arr = np.asarray(arr)
78
+ arr, transposed = _ensure_hwc(arr)
79
+ copy_result = copy_result or transposed
80
+
81
+ target = _target_dtype_for(arr.dtype, bit_depth=bit_depth)
82
+ if arr.dtype != target:
83
+ arr = arr.astype(target, copy=False)
84
+ copy_result = True
85
+
86
+ if copy_result:
87
+ return np.array(arr, copy=True, order="C")
88
+ if not arr.flags.c_contiguous:
89
+ return np.ascontiguousarray(arr)
90
+ return arr
91
+
92
+
93
+ def _decoded_frame_to_rgb_numpy(frame, torch_mod, *, bit_depth: int | None = None):
94
+ """Return a contiguous HWC RGB numpy array owned by the caller."""
95
+ if hasattr(frame, "__dlpack__"):
96
+ try:
97
+ arr = np.from_dlpack(frame)
98
+ return _normalize_rgb_numpy_output(arr, bit_depth=bit_depth, copy_result=True)
99
+ except Exception:
100
+ pass
101
+ if torch_mod is not None:
102
+ try:
103
+ arr = torch_mod.from_dlpack(frame).detach().cpu().numpy()
104
+ return _normalize_rgb_numpy_output(arr, bit_depth=bit_depth)
105
+ except Exception:
106
+ pass
107
+ cp = try_import_cupy()
108
+ if cp is not None:
109
+ arr = cp.asnumpy(cp.from_dlpack(frame))
110
+ return _normalize_rgb_numpy_output(arr, bit_depth=bit_depth)
111
+ # Host-memory decode outputs may alias decoder-managed buffers that get
112
+ # reused on the next decode call, so take ownership here.
113
+ return _normalize_rgb_numpy_output(frame, bit_depth=bit_depth, copy_result=True)
114
+
115
+
116
+ def _decoded_frame_to_rgb_cupy(
117
+ frame: Any,
118
+ *,
119
+ bit_depth: int | None = None,
120
+ gpu_id: int | None = None,
121
+ cuda_stream: int | None = None,
122
+ ):
123
+ """Produce a fresh contiguous HWC RGB ``cupy.ndarray`` from a decoder output.
124
+
125
+ Resulting array is always a new device allocation: the caller owns it and
126
+ the decoder is free to overwrite its pool slot immediately after return.
127
+ The GPU→GPU copy (or H2D copy, if ``frame`` is a host array) is issued on
128
+ ``cuda_stream`` when provided, otherwise on CuPy's current stream.
129
+ """
130
+ cp = _cupy_or_raise()
131
+
132
+ def _as_cupy(src):
133
+ if hasattr(src, "__dlpack__"):
134
+ return cp.from_dlpack(src)
135
+ return cp.asarray(np.ascontiguousarray(np.asarray(src)))
136
+
137
+ with _device_and_stream_ctx(cp, gpu_id=gpu_id, cuda_stream=cuda_stream):
138
+ src = _as_cupy(frame)
139
+ src, _ = _ensure_hwc(src)
140
+ target = _target_dtype_for(src.dtype, bit_depth=bit_depth)
141
+ out = cp.empty(src.shape, dtype=target, order="C")
142
+ if src.dtype == target:
143
+ cp.copyto(out, src)
144
+ else:
145
+ out[...] = src.astype(target, copy=False)
146
+ return out
147
+
148
+
149
+ def numpy_to_cupy_rgb(
150
+ arr: np.ndarray,
151
+ *,
152
+ gpu_id: int | None = None,
153
+ cuda_stream: int | None = None,
154
+ ):
155
+ """Copy a host HWC RGB array to a fresh ``cupy.ndarray`` on ``cuda_stream``."""
156
+ cp = _cupy_or_raise()
157
+ arr = np.ascontiguousarray(arr)
158
+ with _device_and_stream_ctx(cp, gpu_id=gpu_id, cuda_stream=cuda_stream):
159
+ out = cp.empty(arr.shape, dtype=arr.dtype)
160
+ out.set(arr)
161
+ return out
162
+
163
+
164
+ def _pyav_frame_to_rgb(frame, *, bit_depth: int | None = None) -> np.ndarray:
165
+ """Convert a PyAV VideoFrame to an HWC RGB numpy array.
166
+
167
+ Full-range yuv444p streams are converted manually to dodge swscale's
168
+ limited-range default when VUI is unspecified.
169
+ """
170
+ pf = str(frame.format.name)
171
+ depth = int(bit_depth) if bit_depth is not None else infer_video_bit_depth_from_frame(frame)
172
+ cr = getattr(frame, "color_range", None)
173
+ cr_text = str(cr) if cr is not None else ""
174
+ is_full = cr is not None and cr_text in ("2", "JPEG", "jpeg", "pc")
175
+
176
+ if pf.startswith("yuv444p") and (
177
+ is_full or cr is None or cr_text in ("0", "UNSPECIFIED", "unspecified")
178
+ ):
179
+ yuv = frame.to_ndarray(format=pf)
180
+ y = yuv[0].astype(np.float32)
181
+ max_value = float((1 << depth) - 1)
182
+ center = (max_value + 1.0) / 2.0
183
+ cb = yuv[1].astype(np.float32) - center
184
+ crv = yuv[2].astype(np.float32) - center
185
+
186
+ if depth > 8:
187
+ max_out = 65535.0
188
+ target_dtype = np.uint16
189
+ else:
190
+ max_out = 255.0
191
+ target_dtype = np.uint8
192
+ scale = max_out / max_value
193
+
194
+ r = np.clip((y + 1.402 * crv) * scale + 0.5, 0, max_out).astype(target_dtype)
195
+ g = np.clip((y - 0.344136 * cb - 0.714136 * crv) * scale + 0.5, 0, max_out).astype(target_dtype)
196
+ b = np.clip((y + 1.772 * cb) * scale + 0.5, 0, max_out).astype(target_dtype)
197
+ return np.stack([r, g, b], axis=-1)
198
+
199
+ return frame.to_ndarray(format="rgb48le" if depth > 8 else "rgb24")