mcap-codec-support 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ """Lazy MP4 writer helpers for decoded image messages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fractions import Fraction
6
+ from typing import TYPE_CHECKING, Any, cast
7
+
8
+ from mcap_codec_support.video.common import (
9
+ EncoderBackend,
10
+ EncoderConfig,
11
+ EncoderMode,
12
+ VideoCodec,
13
+ VideoEncoderError,
14
+ get_encoder_options,
15
+ raw_image_to_array,
16
+ resolve_encoder_for_backend,
17
+ )
18
+ from mcap_codec_support.video.compression import decode_compressed_image_to_rgb_array
19
+ from mcap_codec_support.video.schemas import COMPRESSED_SCHEMAS, IMAGE_SCHEMAS, RAW_SCHEMAS
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+ from pathlib import Path
24
+
25
+ from mcap_codec_support._protocols import VideoFileStrategy
26
+
27
+
28
+ _TARGET_BITRATE_BY_QUALITY: tuple[tuple[int, int], ...] = (
29
+ (20, 10_000_000),
30
+ (25, 5_000_000),
31
+ (10**6, 2_000_000),
32
+ )
33
+
34
+
35
+ def _bitrate_for(quality: int) -> int:
36
+ for threshold, bitrate in _TARGET_BITRATE_BY_QUALITY:
37
+ if quality <= threshold:
38
+ return bitrate
39
+ return _TARGET_BITRATE_BY_QUALITY[-1][1]
40
+
41
+
42
+ _RAW_BYTES_PER_PIXEL: dict[str, int] = {
43
+ "rgb": 3,
44
+ "rgb8": 3,
45
+ "bgr": 3,
46
+ "bgr8": 3,
47
+ "mono": 1,
48
+ "mono8": 1,
49
+ "8uc1": 1,
50
+ }
51
+
52
+
53
+ def _pack_raw_image_bytes(decoded: Any, *, width: int, height: int) -> bytes:
54
+ """Pack raw ROS Image bytes to the exact frame size expected by ffmpeg."""
55
+ encoding = str(decoded.encoding).lower()
56
+ bytes_per_pixel = _RAW_BYTES_PER_PIXEL.get(encoding)
57
+ if bytes_per_pixel is None:
58
+ raise VideoEncoderError(f"Unsupported image encoding: {decoded.encoding}")
59
+
60
+ src_width = int(decoded.width)
61
+ src_height = int(decoded.height)
62
+ if width > src_width or height > src_height:
63
+ raise VideoEncoderError(
64
+ f"Cannot pack {src_width}x{src_height} frame as larger {width}x{height}"
65
+ )
66
+
67
+ src_row_bytes = src_width * bytes_per_pixel
68
+ dst_row_bytes = width * bytes_per_pixel
69
+ step = int(decoded.step)
70
+ if step < src_row_bytes:
71
+ raise VideoEncoderError(f"Image step {step} is smaller than row size {src_row_bytes}")
72
+
73
+ data = bytes(decoded.data)
74
+ required = step * src_height
75
+ if len(data) < required:
76
+ raise VideoEncoderError(f"Image data has {len(data)} bytes, expected at least {required}")
77
+
78
+ if width == src_width and height == src_height and step == dst_row_bytes:
79
+ return data
80
+
81
+ packed = bytearray(dst_row_bytes * height)
82
+ offset = 0
83
+ for row in range(height):
84
+ start = row * step
85
+ end = start + dst_row_bytes
86
+ packed[offset : offset + dst_row_bytes] = data[start:end]
87
+ offset += dst_row_bytes
88
+ return bytes(packed)
89
+
90
+
91
+ class _PyAVMp4Strategy:
92
+ """In-process PyAV MP4 writer."""
93
+
94
+ def __init__(
95
+ self,
96
+ path: Path,
97
+ *,
98
+ codec: VideoCodec,
99
+ encoder_backend: EncoderBackend,
100
+ quality: int,
101
+ width: int,
102
+ height: int,
103
+ ) -> None:
104
+ import av # noqa: PLC0415
105
+ import av.error # noqa: PLC0415
106
+
107
+ from mcap_codec_support.video.pyav import resolve_encoder_for_backend # noqa: PLC0415
108
+
109
+ self.path = path
110
+ self._codec = codec
111
+ self._quality = quality
112
+ self._encoder_name = resolve_encoder_for_backend(codec.value, encoder_backend.value)
113
+ self._first_timestamp_ns: int | None = None
114
+ self._last_pts = -1
115
+ self._frame_count = 0
116
+ self.config = EncoderConfig(width=width, height=height, codec_name=self._encoder_name)
117
+
118
+ container = av.open(str(path), "w", format=None, options={"movflags": "faststart"})
119
+ try:
120
+ stream = cast("Any", container.add_stream(codec_name=self._encoder_name))
121
+ except (av.error.FFmpegError, ValueError) as exc:
122
+ container.close()
123
+ raise VideoEncoderError(
124
+ f"Failed to create video stream with encoder '{self._encoder_name}': {exc}"
125
+ ) from exc
126
+
127
+ stream.width = width
128
+ stream.height = height
129
+ stream.pix_fmt = "yuv420p"
130
+ stream.time_base = Fraction(1, 1_000_000)
131
+ stream.codec_context.time_base = Fraction(1, 1_000_000)
132
+ stream.codec_context.framerate = Fraction(30, 1)
133
+ stream.codec_context.gop_size = 60
134
+ stream.codec_context.bit_rate = _bitrate_for(quality)
135
+
136
+ options = get_encoder_options(codec, self._encoder_name)
137
+ if any(s in self._encoder_name for s in ("libx264", "libx265", "videotoolbox")):
138
+ options["bf"] = "0"
139
+ stream.codec_context.options = options
140
+
141
+ self._container = container
142
+ self._stream = stream
143
+
144
+ def write_compressed(self, data: bytes, log_time_ns: int) -> None:
145
+ self.write_rgb(decode_compressed_image_to_rgb_array(data), log_time_ns)
146
+
147
+ def write_raw(self, data: bytes, log_time_ns: int) -> None:
148
+ del data, log_time_ns
149
+ raise VideoEncoderError("PyAV MP4 writer needs decoded RGB for raw frames")
150
+
151
+ def write_rgb(self, rgb: Any, log_time_ns: int) -> None:
152
+ import av # noqa: PLC0415
153
+ import av.error # noqa: PLC0415
154
+
155
+ if self._first_timestamp_ns is None:
156
+ self._first_timestamp_ns = log_time_ns
157
+
158
+ try:
159
+ frame = av.VideoFrame.from_ndarray(rgb, format="rgb24").reformat(format="yuv420p")
160
+ except (av.error.FFmpegError, ValueError) as exc:
161
+ raise VideoEncoderError(f"Frame conversion failed: {exc}") from exc
162
+
163
+ current_pts = (log_time_ns - self._first_timestamp_ns) // 1000
164
+ if current_pts <= self._last_pts:
165
+ current_pts = self._last_pts + 1
166
+ frame.pts = current_pts
167
+ self._last_pts = current_pts
168
+
169
+ try:
170
+ for packet in self._stream.encode(frame):
171
+ self._container.mux(packet)
172
+ except (av.error.FFmpegError, ValueError) as exc:
173
+ raise VideoEncoderError(
174
+ f"PyAV encoding failed at frame {self._frame_count}: {exc}"
175
+ ) from exc
176
+ self._frame_count += 1
177
+
178
+ def close(self) -> int:
179
+ for packet in self._stream.encode(None):
180
+ self._container.mux(packet)
181
+ self._container.close()
182
+ return self._frame_count
183
+
184
+
185
+ class _FfmpegMp4Strategy:
186
+ """ffmpeg-subprocess MP4 writer."""
187
+
188
+ def __init__(
189
+ self,
190
+ path: Path,
191
+ *,
192
+ codec: VideoCodec,
193
+ encoder_backend: EncoderBackend,
194
+ quality: int,
195
+ width: int,
196
+ height: int,
197
+ input_pix_fmt: str | None,
198
+ ) -> None:
199
+ from mcap_codec_support.video.ffmpeg import ( # noqa: PLC0415
200
+ FFmpegMp4Encoder,
201
+ check_encoder_cli,
202
+ )
203
+
204
+ encoder_name = resolve_encoder_for_backend(
205
+ codec.value, encoder_backend.value, test_fn=check_encoder_cli
206
+ )
207
+ self._encoder = FFmpegMp4Encoder(
208
+ path,
209
+ width=width,
210
+ height=height,
211
+ codec_name=encoder_name,
212
+ quality=quality,
213
+ input_pix_fmt=input_pix_fmt,
214
+ )
215
+ self.config = self._encoder.config
216
+
217
+ def write_compressed(self, data: bytes, log_time_ns: int) -> None:
218
+ del log_time_ns
219
+ self._encoder.write_frame(data)
220
+
221
+ def write_raw(self, data: bytes, log_time_ns: int) -> None:
222
+ del log_time_ns
223
+ self._encoder.write_frame(data)
224
+
225
+ def write_rgb(self, rgb: Any, log_time_ns: int) -> None:
226
+ del log_time_ns
227
+ self._encoder.write_frame(rgb.tobytes())
228
+
229
+ def close(self) -> int:
230
+ frames = self._encoder.frames_fed
231
+ self._encoder.close()
232
+ return frames
233
+
234
+
235
+ class VideoFileWriterSession:
236
+ """Lazy per-topic MP4 writer with unified backend selection."""
237
+
238
+ def __init__(
239
+ self,
240
+ path: Path,
241
+ *,
242
+ codec: VideoCodec,
243
+ encoder_backend: EncoderBackend,
244
+ quality: int,
245
+ mode: EncoderMode,
246
+ on_fallback: Callable[[str], None] | None = None,
247
+ ) -> None:
248
+ self.path = path
249
+ self._codec = codec
250
+ self._encoder_backend = encoder_backend
251
+ self._quality = quality
252
+ self._mode = mode
253
+ self._on_fallback = on_fallback
254
+ self._strategy: VideoFileStrategy | None = None
255
+ self._input_kind: str | None = None
256
+
257
+ def write_message(self, decoded: Any, schema_name: str, log_time_ns: int) -> None:
258
+ if schema_name not in IMAGE_SCHEMAS:
259
+ raise VideoEncoderError(f"Unexpected image schema {schema_name!r}")
260
+
261
+ if schema_name in COMPRESSED_SCHEMAS:
262
+ data = bytes(decoded.data)
263
+ first_rgb = self._ensure_open_for_compressed(data)
264
+ assert self._strategy is not None
265
+ if self._input_kind == "pyav":
266
+ rgb = (
267
+ first_rgb
268
+ if first_rgb is not None
269
+ else decode_compressed_image_to_rgb_array(data)
270
+ )
271
+ self._strategy.write_rgb(rgb, log_time_ns)
272
+ else:
273
+ self._strategy.write_compressed(data, log_time_ns)
274
+ return
275
+
276
+ if schema_name in RAW_SCHEMAS:
277
+ first_rgb = self._ensure_open_for_raw(decoded)
278
+ assert self._strategy is not None
279
+ if self._input_kind == "pyav":
280
+ rgb = first_rgb if first_rgb is not None else raw_image_to_array(decoded)
281
+ self._strategy.write_rgb(rgb, log_time_ns)
282
+ else:
283
+ data = _pack_raw_image_bytes(
284
+ decoded,
285
+ width=self._strategy.config.width,
286
+ height=self._strategy.config.height,
287
+ )
288
+ self._strategy.write_raw(data, log_time_ns)
289
+ return
290
+
291
+ raise VideoEncoderError(f"Unexpected image schema {schema_name!r}")
292
+
293
+ def _ensure_open_for_compressed(self, data: bytes) -> Any | None:
294
+ return self._open_with_fallback(
295
+ decode_pyav=lambda: decode_compressed_image_to_rgb_array(data),
296
+ open_ffmpeg=lambda: self._open_ffmpeg_compressed(data),
297
+ )
298
+
299
+ def _ensure_open_for_raw(self, decoded: Any) -> Any | None:
300
+ return self._open_with_fallback(
301
+ decode_pyav=lambda: raw_image_to_array(decoded),
302
+ open_ffmpeg=lambda: self._open_ffmpeg_raw(decoded),
303
+ )
304
+
305
+ def _open_with_fallback(
306
+ self,
307
+ *,
308
+ decode_pyav: Callable[[], Any],
309
+ open_ffmpeg: Callable[[], None],
310
+ ) -> Any | None:
311
+ if self._strategy is not None:
312
+ return None
313
+
314
+ if self._mode is EncoderMode.FFMPEG_CLI:
315
+ open_ffmpeg()
316
+ return None
317
+
318
+ try:
319
+ rgb = decode_pyav()
320
+ height, width = rgb.shape[:2]
321
+ self._open_pyav(width, height)
322
+ except (ImportError, VideoEncoderError) as exc:
323
+ if self._mode is EncoderMode.PYAV:
324
+ raise
325
+ if self._on_fallback is not None:
326
+ self._on_fallback(
327
+ f"PyAV failed to open encoder ({exc}); falling back to ffmpeg-cli."
328
+ )
329
+ open_ffmpeg()
330
+ return None
331
+ return rgb
332
+
333
+ def _open_pyav(self, width: int, height: int) -> None:
334
+ width, height = _even_dimensions(width, height)
335
+ self._strategy = _PyAVMp4Strategy(
336
+ self.path,
337
+ codec=self._codec,
338
+ encoder_backend=self._encoder_backend,
339
+ quality=self._quality,
340
+ width=width,
341
+ height=height,
342
+ )
343
+ self._input_kind = "pyav"
344
+
345
+ def _open_ffmpeg_compressed(self, data: bytes) -> None:
346
+ from mcap_codec_support.video.ffmpeg import probe_image_dimensions # noqa: PLC0415
347
+
348
+ width, height = probe_image_dimensions(data)
349
+ width, height = _even_dimensions(width, height)
350
+ self._strategy = _FfmpegMp4Strategy(
351
+ self.path,
352
+ codec=self._codec,
353
+ encoder_backend=self._encoder_backend,
354
+ quality=self._quality,
355
+ width=width,
356
+ height=height,
357
+ input_pix_fmt=None,
358
+ )
359
+ self._input_kind = "ffmpeg"
360
+
361
+ def _open_ffmpeg_raw(self, decoded: Any) -> None:
362
+ from mcap_codec_support.video.ffmpeg import ROS_ENCODING_TO_PIX_FMT # noqa: PLC0415
363
+
364
+ encoding = str(decoded.encoding).lower()
365
+ pix_fmt = ROS_ENCODING_TO_PIX_FMT.get(encoding)
366
+ if not pix_fmt:
367
+ raise VideoEncoderError(f"Unsupported image encoding: {decoded.encoding}")
368
+ width, height = _even_dimensions(decoded.width, decoded.height)
369
+ self._strategy = _FfmpegMp4Strategy(
370
+ self.path,
371
+ codec=self._codec,
372
+ encoder_backend=self._encoder_backend,
373
+ quality=self._quality,
374
+ width=width,
375
+ height=height,
376
+ input_pix_fmt=pix_fmt,
377
+ )
378
+ self._input_kind = "ffmpeg"
379
+
380
+ def close(self) -> int:
381
+ if self._strategy is None:
382
+ return 0
383
+ return self._strategy.close()
384
+
385
+
386
+ def _even_dimensions(width: int, height: int) -> tuple[int, int]:
387
+ width -= width % 2
388
+ height -= height % 2
389
+ if width < 2 or height < 2:
390
+ raise VideoEncoderError(f"Source frame too small ({width}x{height}) for video encoding")
391
+ return width, height
392
+
393
+
394
+ def create_video_file_writer(
395
+ path: Path,
396
+ *,
397
+ codec: VideoCodec,
398
+ encoder_backend: EncoderBackend,
399
+ quality: int,
400
+ mode: EncoderMode,
401
+ on_fallback: Callable[[str], None] | None = None,
402
+ ) -> VideoFileWriterSession:
403
+ """Create a lazy MP4 writer session."""
404
+ return VideoFileWriterSession(
405
+ path,
406
+ codec=codec,
407
+ encoder_backend=encoder_backend,
408
+ quality=quality,
409
+ mode=mode,
410
+ on_fallback=on_fallback,
411
+ )