jellycoder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
video_reducer/core.py ADDED
@@ -0,0 +1,1142 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import math
6
+ import os
7
+ import shutil
8
+ import subprocess
9
+ import sys
10
+ import time
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ from dataclasses import dataclass
13
+ from functools import partial
14
+ from pathlib import Path
15
+ from typing import Iterable, List, Optional
16
+
17
+ from array import array
18
+
19
+ VIDEO_EXTENSIONS = {".mkv", ".mp4", ".wmv", ".mwv"}
20
+ NVENC_PRESET = "p6"
21
+ NVENC_RC_MODE = "vbr_hq"
22
+ NVENC_CQ = 24
23
+ NVENC_LOOKAHEAD = "20"
24
+ NVENC_SPATIAL_AQ = "1"
25
+ NVENC_TEMPORAL_AQ = "1"
26
+ NVENC_TUNE = "hq"
27
+ AUDIO_BITRATE = "192k"
28
+ DEFAULT_MAX_WORKERS = 1
29
+ PROGRESS_BAR_WIDTH = 40
30
+ PROGRESS_UPDATE_INTERVAL = 0.3
31
+ PREFERRED_NVENC_ENCODERS = ["h264_nvenc", "hevc_nvenc"]
32
+ NVENC_PROFILE_MAP = {"hevc_nvenc": "main", "h264_nvenc": "high"}
33
+ HEVC_BITRATE_RATIO = 0.6
34
+ H264_BITRATE_RATIO = 0.75
35
+ MIN_TARGET_BITRATE_KBPS = 350
36
+ MP4_ALLOWED_AUDIO_CODECS = {"aac", "ac3", "eac3", "mp3", "alac"}
37
+ MP4_ALLOWED_SUBTITLE_CODECS = {"mov_text"}
38
+ MP4_CONVERTIBLE_SUBTITLE_CODECS = {"subrip", "srt", "ass", "ssa"}
39
+ MP4_ALLOWED_ATTACHED_PIC_CODECS = {"mjpeg", "png"}
40
+ QUALITY_PRESETS: tuple[str, ...] = ("auto", "1080p", "720p", "480p", "360p")
41
+ QUALITY_HEIGHT_MAP = {
42
+ "1080p": 1080,
43
+ "720p": 720,
44
+ "480p": 480,
45
+ "360p": 360,
46
+ }
47
+ SUPPORTED_ENCODER_BACKENDS = ("auto", "nvenc", "x264", "qsv", "amf")
48
+ X264_ENCODER_NAME = "libx264"
49
+ X264_DEFAULT_PRESET = "medium"
50
+ QSV_ENCODERS = {"h264": "h264_qsv", "hevc": "hevc_qsv"}
51
+ QSV_DEFAULT_PRESET = "medium"
52
+ AMF_ENCODERS = {"h264": "h264_amf", "hevc": "hevc_amf"}
53
+ AMF_DEFAULT_QUALITY = "quality"
54
+ DEFAULT_ENCODER_PRIORITY = ("nvenc", "qsv", "amf", "x264")
55
+ EFFECTIVE_MONO_SAMPLE_SECONDS = 90
56
+ EFFECTIVE_MONO_IMBALANCE_THRESHOLD_DB = 18.0
57
+
58
+
59
+ def _safe_float(value: object) -> Optional[float]:
60
+ if isinstance(value, (int, float)):
61
+ return float(value)
62
+ if isinstance(value, str):
63
+ try:
64
+ return float(value)
65
+ except ValueError:
66
+ return None
67
+ return None
68
+
69
+
70
+ def _safe_fraction(value: object) -> Optional[float]:
71
+ if not isinstance(value, str) or "/" not in value:
72
+ return None
73
+ num_str, den_str = value.split("/", 1)
74
+ num = _safe_float(num_str)
75
+ den = _safe_float(den_str)
76
+ if num is None or not den:
77
+ return None
78
+ fps = num / den
79
+ return fps if fps else None
80
+
81
+
82
+ def _detect_pseudo_mono_channel(
83
+ path: Path,
84
+ sample_seconds: int = EFFECTIVE_MONO_SAMPLE_SECONDS,
85
+ imbalance_threshold_db: float = EFFECTIVE_MONO_IMBALANCE_THRESHOLD_DB,
86
+ ) -> Optional[int]:
87
+ cmd = [
88
+ "ffmpeg",
89
+ "-hide_banner",
90
+ "-loglevel",
91
+ "error",
92
+ "-i",
93
+ str(path),
94
+ "-vn",
95
+ "-ac",
96
+ "2",
97
+ "-f",
98
+ "s16le",
99
+ "-t",
100
+ str(sample_seconds),
101
+ "pipe:1",
102
+ ]
103
+ try:
104
+ result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE)
105
+ except FileNotFoundError:
106
+ logging.debug("ffmpeg not found while sampling audio for %s.", path)
107
+ return None
108
+ except subprocess.CalledProcessError as exc:
109
+ logging.debug("ffmpeg failed while sampling audio for %s: %s", path, exc)
110
+ return None
111
+
112
+ data = result.stdout
113
+
114
+ usable = len(data) - (len(data) % 4)
115
+ if usable < 4:
116
+ return None
117
+
118
+ samples = array("h")
119
+ samples.frombytes(data[:usable])
120
+
121
+ left_sq = 0
122
+ right_sq = 0
123
+ limit = len(samples) - (len(samples) % 2)
124
+ for idx in range(0, limit, 2):
125
+ left = samples[idx]
126
+ right = samples[idx + 1]
127
+ left_sq += left * left
128
+ right_sq += right * right
129
+
130
+ if left_sq == 0 and right_sq == 0:
131
+ return None
132
+
133
+ dominant = 0 if left_sq >= right_sq else 1
134
+ quiet = right_sq if dominant == 0 else left_sq
135
+ loud = left_sq if dominant == 0 else right_sq
136
+
137
+ if quiet == 0:
138
+ return dominant
139
+
140
+ ratio = loud / quiet
141
+ if ratio <= 1.0:
142
+ return None
143
+
144
+ imbalance_db = 10.0 * math.log10(ratio)
145
+ if imbalance_db >= imbalance_threshold_db:
146
+ return dominant
147
+ return None
148
+
149
+
150
+ @dataclass(slots=True)
151
+ class MediaInfo:
152
+ frames: Optional[int]
153
+ duration: Optional[float]
154
+ bitrate_kbps: Optional[float]
155
+ width: Optional[int]
156
+ height: Optional[int]
157
+ audio_codec: Optional[str]
158
+ audio_bitrate_kbps: Optional[float]
159
+ audio_channels: Optional[int]
160
+ subtitle_codecs: List[str]
161
+ video_codecs: List[str]
162
+ attached_pic_codecs: List[str]
163
+ data_stream_codecs: List[str]
164
+
165
+
166
+ @dataclass(slots=True)
167
+ class EncoderSelection:
168
+ encoder: str
169
+ output_extension: str
170
+ backend: str
171
+ codec_family: str
172
+
173
+
174
+ @dataclass(slots=True)
175
+ class ReducerConfig:
176
+ input_path: Path
177
+ overwrite: bool = False
178
+ output_root: Optional[Path] = None
179
+ max_workers: int = DEFAULT_MAX_WORKERS
180
+ preferred_codec: Optional[str] = None
181
+ quality: str = "auto"
182
+ encoder_backend: str = "auto"
183
+
184
+
185
+ def ensure_ffmpeg_available() -> None:
186
+ if shutil.which("ffmpeg") is None:
187
+ raise RuntimeError("ffmpeg not found in PATH. Please install it before running this command.")
188
+
189
+
190
+ def _normalize_codec(preferred: Optional[str]) -> Optional[str]:
191
+ if preferred is None:
192
+ return None
193
+ mapping = {
194
+ "h264": "h264",
195
+ "avc": "h264",
196
+ "x264": "h264",
197
+ "hevc": "hevc",
198
+ "h265": "hevc",
199
+ }
200
+ normalized = preferred.lower()
201
+ if normalized not in mapping:
202
+ raise ValueError(f"Unsupported codec preference: {preferred}")
203
+ return mapping[normalized]
204
+
205
+
206
+ def _query_ffmpeg_video_encoders() -> set[str]:
207
+ try:
208
+ result = subprocess.run(
209
+ ["ffmpeg", "-hide_banner", "-encoders"],
210
+ check=True,
211
+ capture_output=True,
212
+ text=True,
213
+ )
214
+ except subprocess.CalledProcessError as exc:
215
+ raise RuntimeError("Unable to query ffmpeg encoders. Check your ffmpeg installation.") from exc
216
+
217
+ available_encoders: set[str] = set()
218
+ for line in result.stdout.splitlines():
219
+ stripped = line.strip()
220
+ if not stripped or stripped.startswith("------"):
221
+ continue
222
+ if not stripped.startswith("V"):
223
+ continue
224
+ parts = stripped.split()
225
+ if len(parts) >= 2:
226
+ available_encoders.add(parts[1])
227
+ return available_encoders
228
+
229
+
230
+ def _make_selection(encoder: str, backend: str) -> EncoderSelection:
231
+ codec_family = "hevc" if "hevc" in encoder else "h264"
232
+ extension = ".mkv" if codec_family == "hevc" else ".mp4"
233
+ return EncoderSelection(encoder=encoder, output_extension=extension, backend=backend, codec_family=codec_family)
234
+
235
+
236
+ def select_encoder(backend: str = "auto", preferred: Optional[str] = None) -> EncoderSelection:
237
+ backend_normalized = (backend or "auto").lower()
238
+ if backend_normalized not in SUPPORTED_ENCODER_BACKENDS:
239
+ raise ValueError(f"Unsupported encoder backend: {backend}")
240
+
241
+ preferred_family = _normalize_codec(preferred)
242
+ available_encoders = _query_ffmpeg_video_encoders()
243
+
244
+ def select_nvenc() -> Optional[EncoderSelection]:
245
+ candidate_order: List[str]
246
+ if preferred_family == "hevc":
247
+ candidate_order = ["hevc_nvenc", "h264_nvenc"]
248
+ elif preferred_family == "h264":
249
+ candidate_order = ["h264_nvenc", "hevc_nvenc"]
250
+ else:
251
+ candidate_order = list(PREFERRED_NVENC_ENCODERS)
252
+ for candidate in PREFERRED_NVENC_ENCODERS:
253
+ if candidate not in candidate_order:
254
+ candidate_order.append(candidate)
255
+
256
+ for candidate in candidate_order:
257
+ if candidate in available_encoders:
258
+ if preferred_family and not candidate.startswith(preferred_family):
259
+ logging.warning(
260
+ "Requested NVENC codec %s is not available. Falling back to %s.",
261
+ preferred_family,
262
+ candidate,
263
+ )
264
+ return _make_selection(candidate, "nvenc")
265
+
266
+ if preferred_family:
267
+ logging.warning(
268
+ "Requested NVENC codec %s is not available.",
269
+ preferred_family,
270
+ )
271
+ return None
272
+
273
+ def select_qsv() -> Optional[EncoderSelection]:
274
+ if preferred_family == "hevc":
275
+ order = [QSV_ENCODERS["hevc"], QSV_ENCODERS["h264"]]
276
+ else:
277
+ order = [QSV_ENCODERS["h264"], QSV_ENCODERS["hevc"]]
278
+ for candidate in order:
279
+ if candidate in available_encoders:
280
+ if preferred_family and preferred_family not in candidate:
281
+ logging.warning(
282
+ "Requested QSV codec %s is not available. Falling back to %s.",
283
+ preferred_family,
284
+ candidate,
285
+ )
286
+ return _make_selection(candidate, "qsv")
287
+ if preferred_family:
288
+ logging.warning("Requested QSV codec %s is not available.", preferred_family)
289
+ return None
290
+
291
+ def select_amf() -> Optional[EncoderSelection]:
292
+ if preferred_family == "hevc":
293
+ order = [AMF_ENCODERS["hevc"], AMF_ENCODERS["h264"]]
294
+ else:
295
+ order = [AMF_ENCODERS["h264"], AMF_ENCODERS["hevc"]]
296
+ for candidate in order:
297
+ if candidate in available_encoders:
298
+ if preferred_family and preferred_family not in candidate:
299
+ logging.warning(
300
+ "Requested AMF codec %s is not available. Falling back to %s.",
301
+ preferred_family,
302
+ candidate,
303
+ )
304
+ return _make_selection(candidate, "amf")
305
+ if preferred_family:
306
+ logging.warning("Requested AMF codec %s is not available.", preferred_family)
307
+ return None
308
+
309
+ def select_x264() -> Optional[EncoderSelection]:
310
+ if preferred_family == "hevc":
311
+ logging.warning("libx264 does not support HEVC; encoding will use H.264 instead.")
312
+ if X264_ENCODER_NAME in available_encoders:
313
+ return _make_selection(X264_ENCODER_NAME, "x264")
314
+ return None
315
+
316
+ backend_selectors = {
317
+ "nvenc": select_nvenc,
318
+ "x264": select_x264,
319
+ "qsv": select_qsv,
320
+ "amf": select_amf,
321
+ }
322
+
323
+ if backend_normalized == "auto":
324
+ for candidate_backend in DEFAULT_ENCODER_PRIORITY:
325
+ selector = backend_selectors[candidate_backend]
326
+ selection = selector()
327
+ if selection:
328
+ return selection
329
+ raise RuntimeError("ffmpeg does not expose a supported hardware encoder; libx264 is also unavailable.")
330
+
331
+ selection = backend_selectors[backend_normalized]()
332
+ if selection:
333
+ return selection
334
+
335
+ if backend_normalized == "nvenc":
336
+ raise RuntimeError(
337
+ "ffmpeg does not expose an NVENC encoder (h264_nvenc or hevc_nvenc). "
338
+ "Install an NVENC-enabled build and ensure NVIDIA drivers are up to date."
339
+ )
340
+
341
+ raise RuntimeError(f"ffmpeg does not expose the requested encoder backend: {backend_normalized}")
342
+
343
+
344
+ def discover_videos(base_dir: Path, ignore_dir: Optional[Path]) -> List[Path]:
345
+ videos: List[Path] = []
346
+ ignore_dir_resolved = ignore_dir.resolve() if ignore_dir else None
347
+ for root, dirs, files in os.walk(base_dir):
348
+ current = Path(root).resolve()
349
+ if ignore_dir_resolved:
350
+ dirs[:] = [d for d in dirs if (current / d).resolve(strict=False) != ignore_dir_resolved]
351
+ if current == ignore_dir_resolved or ignore_dir_resolved in current.parents:
352
+ continue
353
+ for name in files:
354
+ path = current / name
355
+ if path.suffix.lower() in VIDEO_EXTENSIONS:
356
+ videos.append(path)
357
+ return sorted(videos)
358
+
359
+
360
+ def format_size(num_bytes: int) -> str:
361
+ units = ["B", "KB", "MB", "GB", "TB", "PB"]
362
+ size = float(num_bytes)
363
+ for unit in units[:-1]:
364
+ if size < 1024:
365
+ return f"{size:.2f} {unit}"
366
+ size /= 1024
367
+ return f"{size:.2f} {units[-1]}"
368
+
369
+
370
+ def build_output_path(
371
+ src: Path,
372
+ base_input: Path,
373
+ overwrite: bool,
374
+ output_root: Optional[Path],
375
+ extension: str,
376
+ ) -> Path:
377
+ relative = src.relative_to(base_input)
378
+ target_name = relative.with_suffix(extension)
379
+ if overwrite:
380
+ return src.with_suffix(extension)
381
+ assert output_root is not None
382
+ return output_root / target_name
383
+
384
+
385
+ def probe_media_info(path: Path) -> MediaInfo:
386
+ cmd = [
387
+ "ffprobe",
388
+ "-v",
389
+ "error",
390
+ "-show_entries",
391
+ "format=bit_rate,duration:stream=index,codec_type,codec_name,bit_rate,nb_frames,avg_frame_rate,duration",
392
+ "-of",
393
+ "json",
394
+ str(path),
395
+ ]
396
+ try:
397
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True)
398
+ except FileNotFoundError:
399
+ logging.debug("ffprobe not found; media metrics unavailable.")
400
+ return MediaInfo(
401
+ frames=None,
402
+ duration=None,
403
+ bitrate_kbps=None,
404
+ width=None,
405
+ height=None,
406
+ audio_codec=None,
407
+ audio_bitrate_kbps=None,
408
+ audio_channels=None,
409
+ subtitle_codecs=[],
410
+ video_codecs=[],
411
+ attached_pic_codecs=[],
412
+ data_stream_codecs=[],
413
+ )
414
+ except subprocess.CalledProcessError as exc:
415
+ logging.debug("ffprobe failed for %s: %s", path, exc.stderr.strip())
416
+ return MediaInfo(
417
+ frames=None,
418
+ duration=None,
419
+ bitrate_kbps=None,
420
+ width=None,
421
+ height=None,
422
+ audio_codec=None,
423
+ audio_bitrate_kbps=None,
424
+ audio_channels=None,
425
+ subtitle_codecs=[],
426
+ video_codecs=[],
427
+ attached_pic_codecs=[],
428
+ data_stream_codecs=[],
429
+ )
430
+
431
+ try:
432
+ data = json.loads(result.stdout)
433
+ except json.JSONDecodeError as exc:
434
+ logging.debug("Unable to parse ffprobe output for %s: %s", path, exc)
435
+ return MediaInfo(
436
+ frames=None,
437
+ duration=None,
438
+ bitrate_kbps=None,
439
+ width=None,
440
+ height=None,
441
+ audio_codec=None,
442
+ audio_bitrate_kbps=None,
443
+ audio_channels=None,
444
+ subtitle_codecs=[],
445
+ video_codecs=[],
446
+ attached_pic_codecs=[],
447
+ data_stream_codecs=[],
448
+ )
449
+
450
+ streams = data.get("streams", [])
451
+ total_frames: Optional[int] = None
452
+ duration_sec: Optional[float] = None
453
+ audio_codec: Optional[str] = None
454
+ audio_bitrate_kbps: Optional[float] = None
455
+ audio_channels: Optional[int] = None
456
+ primary_width: Optional[int] = None
457
+ primary_height: Optional[int] = None
458
+ subtitle_codecs: List[str] = []
459
+ video_codecs: List[str] = []
460
+ attached_pic_codecs: List[str] = []
461
+ data_stream_codecs: List[str] = []
462
+
463
+ for stream in streams:
464
+ codec_type = stream.get("codec_type")
465
+ if codec_type == "video":
466
+ if isinstance(stream.get("codec_name"), str):
467
+ video_codecs.append(stream["codec_name"].lower())
468
+ disposition = stream.get("disposition") or {}
469
+ if disposition.get("attached_pic") == 1:
470
+ codec_name = stream.get("codec_name")
471
+ if isinstance(codec_name, str):
472
+ attached_pic_codecs.append(codec_name.lower())
473
+ if codec_type == "video" and total_frames is None:
474
+ nb_frames = stream.get("nb_frames")
475
+ if isinstance(nb_frames, str) and nb_frames.isdigit():
476
+ total_frames = int(nb_frames)
477
+
478
+ duration_candidate = _safe_float(stream.get("duration"))
479
+ if duration_candidate is not None:
480
+ duration_sec = duration_candidate
481
+
482
+ if total_frames is None:
483
+ fps = _safe_fraction(stream.get("avg_frame_rate"))
484
+ if fps and duration_sec is not None:
485
+ total_frames = int(duration_sec * fps)
486
+ if codec_type == "video" and (primary_width is None or primary_height is None):
487
+ width = stream.get("width")
488
+ if isinstance(width, int) and primary_width is None:
489
+ primary_width = width
490
+ height = stream.get("height")
491
+ if isinstance(height, int) and primary_height is None:
492
+ primary_height = height
493
+ elif codec_type == "audio" and audio_codec is None:
494
+ codec_name = stream.get("codec_name")
495
+ if isinstance(codec_name, str):
496
+ audio_codec = codec_name.lower()
497
+ stream_bitrate = _safe_float(stream.get("bit_rate"))
498
+ if stream_bitrate is not None:
499
+ audio_bitrate_kbps = stream_bitrate / 1000.0
500
+ channels_value = stream.get("channels")
501
+ if isinstance(channels_value, int):
502
+ audio_channels = channels_value
503
+ elif codec_type == "subtitle":
504
+ codec_name = stream.get("codec_name")
505
+ if isinstance(codec_name, str):
506
+ subtitle_codecs.append(codec_name.lower())
507
+ elif codec_type in {"data", "attachment"}:
508
+ codec_name = stream.get("codec_name")
509
+ if isinstance(codec_name, str):
510
+ data_stream_codecs.append(codec_name.lower())
511
+
512
+ bitrate_kbps: Optional[float] = None
513
+ format_section = data.get("format", {})
514
+ bit_rate_value = _safe_float(format_section.get("bit_rate"))
515
+ if bit_rate_value is not None:
516
+ bitrate_kbps = bit_rate_value / 1000.0
517
+
518
+ if duration_sec is None:
519
+ duration_candidate = _safe_float(format_section.get("duration"))
520
+ if duration_candidate is not None:
521
+ duration_sec = duration_candidate
522
+
523
+ return MediaInfo(
524
+ frames=total_frames,
525
+ duration=duration_sec,
526
+ bitrate_kbps=bitrate_kbps,
527
+ width=primary_width,
528
+ height=primary_height,
529
+ audio_codec=audio_codec,
530
+ audio_bitrate_kbps=audio_bitrate_kbps,
531
+ audio_channels=audio_channels,
532
+ subtitle_codecs=subtitle_codecs,
533
+ video_codecs=video_codecs,
534
+ attached_pic_codecs=attached_pic_codecs,
535
+ data_stream_codecs=data_stream_codecs,
536
+ )
537
+
538
+
539
+ def _format_progress_bar(ratio: Optional[float]) -> str:
540
+ if ratio is None:
541
+ ratio = 0.0
542
+ ratio = max(0.0, min(1.0, ratio))
543
+ filled = int(PROGRESS_BAR_WIDTH * ratio)
544
+ bar = "#" * filled + "-" * (PROGRESS_BAR_WIDTH - filled)
545
+ percent = f"{ratio * 100:5.1f}%"
546
+ return f"[{bar}] {percent}"
547
+
548
+
549
+ def _render_progress_line(
550
+ current_frame: Optional[int],
551
+ total_frames: Optional[int],
552
+ out_time_sec: Optional[float],
553
+ total_duration: Optional[float],
554
+ ) -> tuple[str, Optional[float]]:
555
+ ratio: Optional[float] = None
556
+ label: str
557
+
558
+ if total_frames and total_frames > 0 and current_frame is not None:
559
+ ratio = current_frame / total_frames
560
+ label = f"frame {current_frame}/{total_frames}"
561
+ elif total_duration and total_duration > 0 and out_time_sec is not None:
562
+ ratio = out_time_sec / total_duration
563
+ label = f"time {out_time_sec:0.1f}s/{total_duration:0.1f}s"
564
+ elif current_frame is not None:
565
+ label = f"frame {current_frame}"
566
+ elif out_time_sec is not None:
567
+ label = f"time {out_time_sec:0.1f}s"
568
+ else:
569
+ label = "encoding"
570
+
571
+ bar = _format_progress_bar(ratio)
572
+ return f"{bar} {label}", ratio
573
+
574
+
575
+ def _display_progress(line: str) -> str:
576
+ padded = line.ljust(PROGRESS_BAR_WIDTH + 30)
577
+ sys.stdout.write("\r" + padded)
578
+ sys.stdout.flush()
579
+ return padded
580
+
581
+
582
+ def _clear_progress(line: str) -> None:
583
+ if not line:
584
+ return
585
+ sys.stdout.write("\r" + " " * len(line) + "\r")
586
+ sys.stdout.flush()
587
+
588
+
589
+ def run_ffmpeg_with_progress(cmd: List[str], total_frames: Optional[int], total_duration: Optional[float]) -> None:
590
+ current_frame: Optional[int] = None
591
+ out_time_sec: Optional[float] = None
592
+ last_render_time = 0.0
593
+ last_line = ""
594
+ progress_lines: List[str] = []
595
+
596
+ process = subprocess.Popen(
597
+ cmd,
598
+ stdout=subprocess.PIPE,
599
+ stderr=subprocess.PIPE,
600
+ text=True,
601
+ encoding="utf-8",
602
+ errors="replace",
603
+ bufsize=1,
604
+ )
605
+
606
+ try:
607
+ assert process.stdout is not None
608
+ for raw_line in process.stdout:
609
+ line = raw_line.strip()
610
+ progress_lines.append(line)
611
+ if line.startswith("frame="):
612
+ try:
613
+ current_frame = int(line.split("=", 1)[1])
614
+ except ValueError:
615
+ pass
616
+ elif line.startswith("out_time_ms="):
617
+ try:
618
+ out_time_sec = int(line.split("=", 1)[1]) / 1_000_000
619
+ except ValueError:
620
+ pass
621
+ elif line == "progress=end":
622
+ current_frame = total_frames if total_frames else current_frame
623
+ out_time_sec = total_duration if total_duration else out_time_sec
624
+
625
+ now = time.time()
626
+ if now - last_render_time >= PROGRESS_UPDATE_INTERVAL or line == "progress=end":
627
+ display_line, _ = _render_progress_line(
628
+ current_frame,
629
+ total_frames,
630
+ out_time_sec,
631
+ total_duration,
632
+ )
633
+ last_line = _display_progress(display_line)
634
+ last_render_time = now
635
+
636
+ if last_line:
637
+ display_line, _ = _render_progress_line(
638
+ total_frames if total_frames else current_frame,
639
+ total_frames,
640
+ total_duration if total_duration else out_time_sec,
641
+ total_duration,
642
+ )
643
+ last_line = _display_progress(display_line)
644
+ sys.stdout.write("\n")
645
+ sys.stdout.flush()
646
+ finally:
647
+ if process.stdout:
648
+ process.stdout.close()
649
+
650
+ stderr_output = ""
651
+ if process.stderr:
652
+ stderr_output = process.stderr.read()
653
+ process.stderr.close()
654
+
655
+ return_code = process.wait()
656
+ if return_code != 0:
657
+ _clear_progress(last_line)
658
+ raise subprocess.CalledProcessError(return_code, cmd, output="\n".join(progress_lines), stderr=stderr_output)
659
+
660
+
661
+ def encode_video(
662
+ src: Path,
663
+ dst: Path,
664
+ overwrite: bool,
665
+ encoder: str,
666
+ output_extension: str,
667
+ quality: str,
668
+ *,
669
+ _allow_encoder_fallback: bool = True,
670
+ _media_info_override: Optional[MediaInfo] = None,
671
+ ) -> None:
672
+ media_info = _media_info_override or probe_media_info(src)
673
+
674
+ effective_extension = output_extension
675
+ is_mp4_output = effective_extension.lower() == ".mp4"
676
+ subtitle_args: List[str] = ["-c:s", "copy"]
677
+
678
+ encoder_lower = encoder.lower()
679
+ is_nvenc_encoder = encoder_lower.endswith("_nvenc")
680
+ is_qsv_encoder = encoder_lower.endswith("_qsv")
681
+ is_amf_encoder = encoder_lower.endswith("_amf")
682
+ is_x264_encoder = encoder_lower == X264_ENCODER_NAME
683
+
684
+ if is_mp4_output:
685
+ subtitle_codecs = [code.lower() for code in media_info.subtitle_codecs]
686
+ allowed_subtitle_codecs = MP4_ALLOWED_SUBTITLE_CODECS | MP4_CONVERTIBLE_SUBTITLE_CODECS
687
+ unsupported_subs = [code for code in subtitle_codecs if code not in allowed_subtitle_codecs]
688
+ if unsupported_subs:
689
+ logging.warning(
690
+ "Subtitle codec(s) %s are not compatible with MP4; switching to MKV container for this file.",
691
+ ", ".join(sorted(set(unsupported_subs))),
692
+ )
693
+ effective_extension = ".mkv"
694
+ is_mp4_output = False
695
+ subtitle_args = ["-c:s", "copy"]
696
+ elif subtitle_codecs and any(code in MP4_CONVERTIBLE_SUBTITLE_CODECS for code in subtitle_codecs):
697
+ subtitle_args = ["-c:s", "mov_text"]
698
+ logging.info("Subtitle streams converted to mov_text for MP4 compatibility.")
699
+ else:
700
+ subtitle_args = ["-c:s", "copy"]
701
+
702
+ mp4_incompatible_reasons: List[str] = []
703
+ if len(media_info.video_codecs) > 1:
704
+ mp4_incompatible_reasons.append("multiple video streams present")
705
+ unsupported_attached = [
706
+ codec for codec in media_info.attached_pic_codecs if codec not in MP4_ALLOWED_ATTACHED_PIC_CODECS
707
+ ]
708
+ if unsupported_attached:
709
+ mp4_incompatible_reasons.append(
710
+ "attached pictures with codecs " + ", ".join(sorted(set(unsupported_attached)))
711
+ )
712
+ if media_info.data_stream_codecs:
713
+ mp4_incompatible_reasons.append("data/attachment streams present")
714
+
715
+ if mp4_incompatible_reasons:
716
+ logging.warning(
717
+ "MP4 container not suitable (%s); switching to MKV for this file.",
718
+ "; ".join(mp4_incompatible_reasons),
719
+ )
720
+ effective_extension = ".mkv"
721
+ is_mp4_output = False
722
+ subtitle_args = ["-c:s", "copy"]
723
+
724
+ if effective_extension != output_extension:
725
+ dst = dst.with_suffix(effective_extension)
726
+
727
+ dst.parent.mkdir(parents=True, exist_ok=True)
728
+ if not overwrite and dst.exists():
729
+ logging.info("Skipping existing file: %s", dst)
730
+ return
731
+
732
+ original_size = src.stat().st_size
733
+ if overwrite:
734
+ temp_dst = dst.with_name(f"_{dst.name}")
735
+ else:
736
+ temp_dst = dst
737
+
738
+ if temp_dst.exists():
739
+ try:
740
+ temp_dst.unlink()
741
+ except OSError:
742
+ pass
743
+
744
+ total_frames = media_info.frames
745
+ total_duration = media_info.duration
746
+ logging.debug(
747
+ "Media info for %s - frames: %s, duration: %s, bitrate: %s kbps",
748
+ src,
749
+ total_frames,
750
+ total_duration,
751
+ media_info.bitrate_kbps,
752
+ )
753
+
754
+ target_bitrate_kbps: Optional[int] = None
755
+ if media_info.bitrate_kbps:
756
+ ratio = HEVC_BITRATE_RATIO if "hevc" in encoder_lower else H264_BITRATE_RATIO
757
+ proposed = int(media_info.bitrate_kbps * ratio)
758
+ target_bitrate_kbps = max(proposed, MIN_TARGET_BITRATE_KBPS)
759
+ ceiling = int(media_info.bitrate_kbps * 0.95)
760
+ if ceiling > 0:
761
+ target_bitrate_kbps = min(target_bitrate_kbps, ceiling)
762
+ if target_bitrate_kbps <= 0:
763
+ target_bitrate_kbps = None
764
+
765
+ audio_args: List[str]
766
+ audio_description_parts: List[str] = []
767
+ if media_info.audio_codec:
768
+ audio_description_parts.append(media_info.audio_codec)
769
+ if media_info.audio_bitrate_kbps:
770
+ audio_description_parts.append(f"~{media_info.audio_bitrate_kbps:.0f} kbps")
771
+ audio_description = " ".join(audio_description_parts) if audio_description_parts else "unknown"
772
+
773
+ audio_codec_lower = media_info.audio_codec.lower() if media_info.audio_codec else None
774
+ audio_channels = media_info.audio_channels
775
+ actual_mono_source = audio_channels is not None and audio_channels < 2
776
+
777
+ pseudo_mono_channel: Optional[int] = None
778
+ if (
779
+ not actual_mono_source
780
+ and audio_codec_lower
781
+ and (audio_channels is None or audio_channels >= 2)
782
+ ):
783
+ pseudo_mono_channel = _detect_pseudo_mono_channel(src)
784
+
785
+ force_stereo = actual_mono_source or pseudo_mono_channel is not None
786
+
787
+ audio_filter_arg: Optional[str] = None
788
+ if pseudo_mono_channel is not None:
789
+ channel_token = f"c{pseudo_mono_channel}"
790
+ audio_filter_arg = f"pan=stereo|c0={channel_token}|c1={channel_token}"
791
+
792
+ audio_can_copy = audio_codec_lower is not None and not force_stereo
793
+ if audio_can_copy and is_mp4_output:
794
+ audio_can_copy = audio_codec_lower in MP4_ALLOWED_AUDIO_CODECS
795
+
796
+ if audio_can_copy:
797
+ audio_args = ["-c:a", "copy"]
798
+ logging.info("Audio stream will be copied (%s).", audio_description)
799
+ else:
800
+ target_channels = 2
801
+ audio_args = ["-c:a", "aac", "-b:a", AUDIO_BITRATE, "-ac", str(target_channels)]
802
+ log_message = "Audio stream will be transcoded to AAC %s (original %s)." % (AUDIO_BITRATE, audio_description)
803
+ if pseudo_mono_channel is not None:
804
+ channel_label = "left" if pseudo_mono_channel == 0 else "right"
805
+ log_message += (
806
+ f" Detected {channel_label} channel dominance; "
807
+ f"duplicating {channel_label} channel to both outputs."
808
+ )
809
+ elif actual_mono_source:
810
+ log_message += " Mono source detected; forcing stereo output."
811
+ elif audio_channels and audio_channels > target_channels:
812
+ log_message += f" Downmixing from {audio_channels} channels to stereo."
813
+ logging.info(log_message)
814
+
815
+ normalized_quality = quality.lower()
816
+ scale_filter: Optional[str] = None
817
+ if normalized_quality != "auto":
818
+ target_height = QUALITY_HEIGHT_MAP.get(normalized_quality)
819
+ if target_height is None:
820
+ logging.warning("Unknown quality preset '%s'; defaulting to auto.", quality)
821
+ normalized_quality = "auto"
822
+ else:
823
+ source_height = media_info.height
824
+ if source_height is None or source_height > target_height:
825
+ scale_filter = f"scale=-2:{target_height}"
826
+ if source_height:
827
+ logging.info(
828
+ "Video will be scaled from %sp to %sp height (maintaining aspect ratio).",
829
+ source_height,
830
+ target_height,
831
+ )
832
+ else:
833
+ logging.info(
834
+ "Video will be scaled to a maximum height of %sp (source height unknown).",
835
+ target_height,
836
+ )
837
+ if target_bitrate_kbps and source_height:
838
+ scale_ratio = min(target_height / source_height, 1.0)
839
+ adjusted = max(int(target_bitrate_kbps * scale_ratio * scale_ratio), MIN_TARGET_BITRATE_KBPS)
840
+ if adjusted != target_bitrate_kbps:
841
+ logging.debug(
842
+ "Adjusting target bitrate from %s kbps to %s kbps based on scale ratio %.3f.",
843
+ target_bitrate_kbps,
844
+ adjusted,
845
+ scale_ratio,
846
+ )
847
+ target_bitrate_kbps = adjusted
848
+ else:
849
+ logging.info(
850
+ "Source height %sp is already <= target %sp; no scaling applied.",
851
+ source_height,
852
+ target_height,
853
+ )
854
+
855
+ def build_ffmpeg_cmd(use_hw_decode: bool, force_hw_format: bool) -> List[str]:
856
+ cmd: List[str] = [
857
+ "ffmpeg",
858
+ "-hide_banner",
859
+ "-loglevel",
860
+ "error",
861
+ "-y",
862
+ ]
863
+
864
+ if use_hw_decode:
865
+ cmd.extend(["-hwaccel", "cuda"])
866
+ if force_hw_format and scale_filter is None:
867
+ cmd.extend(["-hwaccel_output_format", "cuda"])
868
+
869
+ cmd.extend(["-progress", "pipe:1", "-nostats"])
870
+
871
+ cmd.extend([
872
+ "-i",
873
+ str(src),
874
+ "-map",
875
+ "0",
876
+ "-c:v",
877
+ encoder,
878
+ ])
879
+
880
+ if is_nvenc_encoder:
881
+ nvenc_profile = NVENC_PROFILE_MAP.get(encoder)
882
+ if nvenc_profile:
883
+ cmd.extend(["-profile:v", nvenc_profile])
884
+ if NVENC_TUNE:
885
+ cmd.extend(["-tune", NVENC_TUNE])
886
+ cmd.extend([
887
+ "-preset",
888
+ NVENC_PRESET,
889
+ "-rc",
890
+ NVENC_RC_MODE,
891
+ "-cq",
892
+ str(NVENC_CQ),
893
+ ])
894
+ elif is_x264_encoder:
895
+ cmd.extend(["-preset", X264_DEFAULT_PRESET])
896
+ elif is_qsv_encoder:
897
+ cmd.extend(["-preset", QSV_DEFAULT_PRESET])
898
+ elif is_amf_encoder:
899
+ cmd.extend(["-quality", AMF_DEFAULT_QUALITY])
900
+
901
+ if target_bitrate_kbps:
902
+ video_bitrate = f"{target_bitrate_kbps}k"
903
+ maxrate = f"{int(target_bitrate_kbps * 1.15)}k"
904
+ bufsize = f"{int(target_bitrate_kbps * 2)}k"
905
+ cmd.extend([
906
+ "-b:v",
907
+ video_bitrate,
908
+ "-maxrate",
909
+ maxrate,
910
+ "-bufsize",
911
+ bufsize,
912
+ ])
913
+ else:
914
+ cmd.extend(["-b:v", "0"])
915
+
916
+ if is_nvenc_encoder:
917
+ if NVENC_LOOKAHEAD:
918
+ cmd.extend(["-rc-lookahead", NVENC_LOOKAHEAD])
919
+ if NVENC_SPATIAL_AQ:
920
+ cmd.extend(["-spatial_aq", NVENC_SPATIAL_AQ])
921
+ if NVENC_TEMPORAL_AQ:
922
+ cmd.extend(["-temporal_aq", NVENC_TEMPORAL_AQ])
923
+
924
+ if scale_filter:
925
+ cmd.extend(["-vf", scale_filter])
926
+
927
+ cmd.extend([
928
+ "-pix_fmt",
929
+ "yuv420p",
930
+ ])
931
+
932
+ if audio_filter_arg:
933
+ cmd.extend(["-af", audio_filter_arg])
934
+
935
+ cmd.extend(audio_args)
936
+
937
+ cmd.extend(subtitle_args or [])
938
+
939
+ cmd.extend([
940
+ "-map_metadata",
941
+ "0",
942
+ ])
943
+
944
+ if is_mp4_output:
945
+ cmd.extend(["-movflags", "+faststart"])
946
+
947
+ cmd.append(str(temp_dst))
948
+
949
+ return cmd
950
+
951
+ logging.info("Encoding %s -> %s", src, dst)
952
+ if total_frames:
953
+ logging.info("Estimated frames to process: %s", total_frames)
954
+ if not total_frames and total_duration:
955
+ logging.info("Estimated duration to process: %.1f seconds", total_duration)
956
+ if media_info.bitrate_kbps:
957
+ logging.info("Source video bitrate ≈ %.0f kbps", media_info.bitrate_kbps)
958
+ if target_bitrate_kbps:
959
+ logging.info("Target video bitrate set to %s kbps", target_bitrate_kbps)
960
+
961
+ if is_nvenc_encoder:
962
+ attempts = (
963
+ (True, True, "Hardware decode (GPU frames)"),
964
+ (True, False, "Hardware decode (system frames)"),
965
+ (False, False, "CPU decode"),
966
+ )
967
+ elif is_qsv_encoder:
968
+ attempts = ((False, False, "Intel QSV"),)
969
+ elif is_amf_encoder:
970
+ attempts = ((False, False, "AMD AMF"),)
971
+ else:
972
+ attempts = ((False, False, "Software decode"),)
973
+ success = False
974
+ last_error: Optional[subprocess.CalledProcessError] = None
975
+ for index, (use_hw_decode, force_hw_format, label) in enumerate(attempts):
976
+ cmd = build_ffmpeg_cmd(use_hw_decode, force_hw_format)
977
+ logging.debug("Attempting encode via %s", label)
978
+ try:
979
+ run_ffmpeg_with_progress(cmd, total_frames, total_duration)
980
+ success = True
981
+ break
982
+ except subprocess.CalledProcessError as exc:
983
+ last_error = exc
984
+ if temp_dst.exists():
985
+ try:
986
+ temp_dst.unlink()
987
+ except OSError:
988
+ pass
989
+ if index < len(attempts) - 1:
990
+ logging.warning("%s path failed; attempting fallback.", label)
991
+
992
+ fallback_encoder: Optional[str] = None
993
+ fallback_label: Optional[str] = None
994
+ if is_nvenc_encoder or is_qsv_encoder or is_amf_encoder:
995
+ fallback_encoder = X264_ENCODER_NAME
996
+ if is_nvenc_encoder:
997
+ fallback_label = "NVENC"
998
+ elif is_qsv_encoder:
999
+ fallback_label = "QSV"
1000
+ else:
1001
+ fallback_label = "AMF"
1002
+
1003
+ if not success and last_error is not None:
1004
+ stdout_output = getattr(last_error, "stdout", None)
1005
+ if stdout_output is None:
1006
+ stdout_output = getattr(last_error, "output", "")
1007
+ if (
1008
+ fallback_encoder
1009
+ and _allow_encoder_fallback
1010
+ and encoder_lower != fallback_encoder
1011
+ ):
1012
+ logging.warning(
1013
+ "%s encoding failed after all retries; falling back to %s.",
1014
+ fallback_label,
1015
+ fallback_encoder,
1016
+ )
1017
+ encode_video(
1018
+ src=src,
1019
+ dst=dst,
1020
+ overwrite=overwrite,
1021
+ encoder=fallback_encoder,
1022
+ output_extension=output_extension,
1023
+ quality=quality,
1024
+ _allow_encoder_fallback=False,
1025
+ _media_info_override=media_info,
1026
+ )
1027
+ return
1028
+ raise RuntimeError(
1029
+ f"ffmpeg failed for {src}:\nSTDOUT:\n{stdout_output}\nSTDERR:\n{last_error.stderr}"
1030
+ ) from last_error
1031
+
1032
+ try:
1033
+ temp_dst.stat()
1034
+ except FileNotFoundError:
1035
+ raise RuntimeError(f"Expected output file missing for {src}") from None
1036
+
1037
+ if overwrite:
1038
+ src.unlink()
1039
+ temp_dst.rename(dst)
1040
+ final_path = dst
1041
+ else:
1042
+ final_path = temp_dst
1043
+
1044
+ final_size = final_path.stat().st_size
1045
+
1046
+ if final_size >= original_size:
1047
+ logging.warning(
1048
+ "Output is larger than input (%s vs %s) for %s.",
1049
+ format_size(final_size),
1050
+ format_size(original_size),
1051
+ final_path,
1052
+ )
1053
+ else:
1054
+ savings = original_size - final_size
1055
+ logging.info(
1056
+ "Reduced %s by %s (from %s to %s).",
1057
+ final_path.name,
1058
+ format_size(savings),
1059
+ format_size(original_size),
1060
+ format_size(final_size),
1061
+ )
1062
+
1063
+
1064
+ def process_videos(
1065
+ videos: Iterable[Path],
1066
+ base_input: Path,
1067
+ overwrite: bool,
1068
+ output_root: Optional[Path],
1069
+ encoder: str,
1070
+ output_extension: str,
1071
+ max_workers: int,
1072
+ quality: str,
1073
+ ) -> None:
1074
+ tasks = {}
1075
+ encode = partial(
1076
+ encode_video,
1077
+ overwrite=overwrite,
1078
+ encoder=encoder,
1079
+ output_extension=output_extension,
1080
+ quality=quality,
1081
+ )
1082
+
1083
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
1084
+ for src in videos:
1085
+ dst = build_output_path(src, base_input, overwrite, output_root, output_extension)
1086
+ future = pool.submit(encode, src=src, dst=dst)
1087
+ tasks[future] = src
1088
+
1089
+ for future in as_completed(tasks):
1090
+ src = tasks[future]
1091
+ try:
1092
+ future.result()
1093
+ logging.info("Finished: %s", src)
1094
+ except Exception as exc: # noqa: BLE001
1095
+ logging.error("Failed: %s\nReason: %s", src, exc)
1096
+
1097
+
1098
+ def reduce_videos(config: ReducerConfig) -> None:
1099
+ base_input = config.input_path.expanduser().resolve()
1100
+ if not base_input.is_dir():
1101
+ raise ValueError(f"Input path is not a directory: {base_input}")
1102
+
1103
+ ensure_ffmpeg_available()
1104
+ selection = select_encoder(config.encoder_backend, config.preferred_codec)
1105
+ quality_choice = (config.quality or "auto").lower()
1106
+ logging.info(
1107
+ "Using %s via %s backend (output extension %s).",
1108
+ selection.encoder,
1109
+ selection.backend.upper(),
1110
+ selection.output_extension,
1111
+ )
1112
+ logging.info("Quality preset set to '%s'.", quality_choice)
1113
+
1114
+ if config.overwrite:
1115
+ output_root = None
1116
+ ignore_dir = None
1117
+ logging.info("Overwrite enabled; source files will be replaced in place.")
1118
+ else:
1119
+ root = config.output_root or (Path.cwd() / "output" / base_input.name)
1120
+ output_root = root.resolve()
1121
+ output_root.mkdir(parents=True, exist_ok=True)
1122
+ ignore_dir = output_root
1123
+ logging.info("Overwrite disabled; writing converted files under %s.", output_root)
1124
+
1125
+ logging.info("Beginning scan of %s.", base_input)
1126
+ videos = discover_videos(base_input, ignore_dir)
1127
+ if not videos:
1128
+ known_extensions = ", ".join(sorted(ext.upper().lstrip(".") for ext in VIDEO_EXTENSIONS))
1129
+ logging.info("No %s files found.", known_extensions)
1130
+ return
1131
+
1132
+ logging.info("Found %d video(s) to process.", len(videos))
1133
+ process_videos(
1134
+ videos=videos,
1135
+ base_input=base_input,
1136
+ overwrite=config.overwrite,
1137
+ output_root=output_root,
1138
+ encoder=selection.encoder,
1139
+ output_extension=selection.output_extension,
1140
+ max_workers=config.max_workers,
1141
+ quality=quality_choice,
1142
+ )