mkv2cast 1.2.7.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mkv2cast/converter.py ADDED
@@ -0,0 +1,1454 @@
1
+ """
2
+ Core conversion logic for mkv2cast.
3
+
4
+ Contains:
5
+ - Codec detection and decision logic
6
+ - Backend selection (VAAPI, QSV, CPU)
7
+ - FFmpeg command building
8
+ - File conversion functions
9
+ - Progress callback support for library usage
10
+ - Batch processing with multi-threading
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import re
16
+ import shlex
17
+ import shutil
18
+ import subprocess
19
+ import threading
20
+ import time
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
+ from dataclasses import dataclass
23
+ from pathlib import Path
24
+ from typing import Any, Callable, Dict, List, Optional, Tuple
25
+
26
+ from mkv2cast.config import CFG, Config
27
+
28
+ # -------------------- UTILITY FUNCTIONS --------------------
29
+
30
+
31
+ def run_quiet(cmd: List[str], timeout: float = 10.0) -> bool:
32
+ """Run a command quietly, return True if successful."""
33
+ try:
34
+ p = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout)
35
+ return p.returncode == 0
36
+ except Exception:
37
+ return False
38
+
39
+
40
+ def ffprobe_json(path: Path) -> Dict[str, Any]:
41
+ """Run ffprobe and return JSON output."""
42
+ cmd = ["ffprobe", "-v", "error", "-print_format", "json", "-show_streams", "-show_format", str(path)]
43
+ out = subprocess.check_output(cmd)
44
+ result: Dict[str, Any] = json.loads(out)
45
+ return result
46
+
47
+
48
+ def probe_duration_ms(path: Path, debug: bool = False) -> int:
49
+ """Get video duration in milliseconds."""
50
+ try:
51
+ cmd = [
52
+ "ffprobe",
53
+ "-v",
54
+ "error",
55
+ "-of",
56
+ "json",
57
+ "-show_entries",
58
+ "format=duration:stream=codec_type,duration",
59
+ str(path),
60
+ ]
61
+ j = json.loads(subprocess.check_output(cmd))
62
+ dur = None
63
+ if "format" in j and j["format"].get("duration"):
64
+ dur = float(j["format"]["duration"])
65
+ if (dur is None or dur <= 0) and "streams" in j:
66
+ for s in j["streams"]:
67
+ if s.get("codec_type") == "video" and s.get("duration"):
68
+ d2 = float(s["duration"])
69
+ if d2 > 0:
70
+ dur = d2
71
+ break
72
+ if dur is None or dur <= 0:
73
+ return 0
74
+ return int(dur * 1000)
75
+ except Exception:
76
+ return 0
77
+
78
+
79
+ def file_size(path: Path) -> int:
80
+ """Get file size in bytes."""
81
+ try:
82
+ return path.stat().st_size
83
+ except Exception:
84
+ return 0
85
+
86
+
87
+ def _mb_to_bytes(mb: int) -> int:
88
+ """Convert MB to bytes (0 for invalid values)."""
89
+ try:
90
+ return max(0, int(mb)) * 1024 * 1024
91
+ except Exception:
92
+ return 0
93
+
94
+
95
+ def check_disk_space(
96
+ output_dir: Path,
97
+ tmp_dir: Optional[Path],
98
+ estimated_bytes: int,
99
+ cfg: Config,
100
+ ) -> Optional[str]:
101
+ """Return error message if disk guard would be violated, else None."""
102
+ try:
103
+ output_dir.mkdir(parents=True, exist_ok=True)
104
+ except Exception:
105
+ pass
106
+
107
+ min_free_out = _mb_to_bytes(cfg.disk_min_free_mb)
108
+ if min_free_out > 0:
109
+ try:
110
+ usage = shutil.disk_usage(str(output_dir))
111
+ if usage.free - estimated_bytes < min_free_out:
112
+ return f"Insufficient free space in {output_dir} (min {cfg.disk_min_free_mb} MB)"
113
+ except Exception:
114
+ pass
115
+
116
+ if tmp_dir is not None and cfg.disk_min_free_tmp_mb > 0:
117
+ try:
118
+ if output_dir.exists() and tmp_dir.exists():
119
+ if output_dir.stat().st_dev != tmp_dir.stat().st_dev:
120
+ usage = shutil.disk_usage(str(tmp_dir))
121
+ min_free_tmp = _mb_to_bytes(cfg.disk_min_free_tmp_mb)
122
+ if usage.free - estimated_bytes < min_free_tmp:
123
+ return f"Insufficient temp space in {tmp_dir} (min {cfg.disk_min_free_tmp_mb} MB)"
124
+ except Exception:
125
+ pass
126
+
127
+ return None
128
+
129
+
130
+ def enforce_output_quota(output_path: Path, input_size: int, cfg: Config) -> Optional[str]:
131
+ """Return error message if output exceeds quota, else None."""
132
+ try:
133
+ out_size = output_path.stat().st_size
134
+ except Exception:
135
+ return None
136
+
137
+ if cfg.max_output_mb > 0:
138
+ max_bytes = _mb_to_bytes(cfg.max_output_mb)
139
+ if max_bytes > 0 and out_size > max_bytes:
140
+ return f"Output exceeds max size ({cfg.max_output_mb} MB)"
141
+
142
+ if cfg.max_output_ratio > 0 and input_size > 0:
143
+ if out_size > int(input_size * cfg.max_output_ratio):
144
+ return f"Output exceeds max ratio ({cfg.max_output_ratio:.2f}x)"
145
+
146
+ return None
147
+
148
+
149
+ # -------------------- BACKEND SELECTION --------------------
150
+
151
+
152
+ def have_encoder(name: str) -> bool:
153
+ """Check if ffmpeg has the specified encoder."""
154
+ try:
155
+ result = subprocess.run(["ffmpeg", "-hide_banner", "-encoders"], capture_output=True, text=True, timeout=4.0)
156
+ # Search for the encoder name in the output
157
+ for line in result.stdout.split("\n"):
158
+ # Format is like: " V....D libx264 description..."
159
+ parts = line.split()
160
+ if len(parts) >= 2 and parts[1] == name:
161
+ return True
162
+ return False
163
+ except Exception:
164
+ return False
165
+
166
+
167
+ def test_qsv(vaapi_device: str = "/dev/dri/renderD128") -> bool:
168
+ """Test if QSV encoding works."""
169
+ if not Path(vaapi_device).exists():
170
+ return False
171
+ cmd = [
172
+ "ffmpeg",
173
+ "-hide_banner",
174
+ "-loglevel",
175
+ "error",
176
+ "-init_hw_device",
177
+ f"qsv=hw:{vaapi_device}",
178
+ "-filter_hw_device",
179
+ "hw",
180
+ "-f",
181
+ "lavfi",
182
+ "-i",
183
+ "testsrc2=size=128x128:rate=30",
184
+ "-t",
185
+ "0.2",
186
+ "-vf",
187
+ "format=nv12",
188
+ "-c:v",
189
+ "h264_qsv",
190
+ "-global_quality",
191
+ "35",
192
+ "-an",
193
+ "-f",
194
+ "null",
195
+ "-",
196
+ ]
197
+ return run_quiet(cmd, timeout=6.0)
198
+
199
+
200
+ def test_vaapi(vaapi_device: str = "/dev/dri/renderD128") -> bool:
201
+ """Test if VAAPI encoding works."""
202
+ if not Path(vaapi_device).exists():
203
+ return False
204
+ cmd = [
205
+ "ffmpeg",
206
+ "-hide_banner",
207
+ "-loglevel",
208
+ "error",
209
+ "-vaapi_device",
210
+ vaapi_device,
211
+ "-f",
212
+ "lavfi",
213
+ "-i",
214
+ "testsrc2=size=128x128:rate=30",
215
+ "-t",
216
+ "0.2",
217
+ "-vf",
218
+ "format=nv12,hwupload",
219
+ "-c:v",
220
+ "h264_vaapi",
221
+ "-qp",
222
+ "35",
223
+ "-an",
224
+ "-f",
225
+ "null",
226
+ "-",
227
+ ]
228
+ return run_quiet(cmd, timeout=6.0)
229
+
230
+
231
+ def test_nvenc() -> bool:
232
+ """Test if NVIDIA NVENC encoding works."""
233
+ # Check if nvidia-smi is available (indicates NVIDIA driver)
234
+ try:
235
+ subprocess.run(["nvidia-smi"], capture_output=True, timeout=5.0, check=True)
236
+ except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
237
+ return False
238
+
239
+ # Check if h264_nvenc encoder is available
240
+ if not have_encoder("h264_nvenc"):
241
+ return False
242
+
243
+ # Test actual encoding
244
+ cmd = [
245
+ "ffmpeg",
246
+ "-hide_banner",
247
+ "-loglevel",
248
+ "error",
249
+ "-f",
250
+ "lavfi",
251
+ "-i",
252
+ "testsrc2=size=128x128:rate=30",
253
+ "-t",
254
+ "0.2",
255
+ "-c:v",
256
+ "h264_nvenc",
257
+ "-preset",
258
+ "p4",
259
+ "-cq",
260
+ "23",
261
+ "-an",
262
+ "-f",
263
+ "null",
264
+ "-",
265
+ ]
266
+ return run_quiet(cmd, timeout=6.0)
267
+
268
+
269
+ def test_amf() -> bool:
270
+ """Test if AMD AMF encoding works."""
271
+ # Check if h264_amf encoder is available
272
+ if not have_encoder("h264_amf"):
273
+ return False
274
+
275
+ # Test actual encoding
276
+ cmd = [
277
+ "ffmpeg",
278
+ "-hide_banner",
279
+ "-loglevel",
280
+ "error",
281
+ "-f",
282
+ "lavfi",
283
+ "-i",
284
+ "testsrc2=size=128x128:rate=30",
285
+ "-t",
286
+ "0.2",
287
+ "-c:v",
288
+ "h264_amf",
289
+ "-quality",
290
+ "balanced",
291
+ "-rc",
292
+ "cqp",
293
+ "-qp_i",
294
+ "23",
295
+ "-qp_p",
296
+ "23",
297
+ "-qp_b",
298
+ "23",
299
+ "-an",
300
+ "-f",
301
+ "null",
302
+ "-",
303
+ ]
304
+ return run_quiet(cmd, timeout=6.0)
305
+
306
+
307
+ def pick_backend(cfg: Optional[Config] = None) -> str:
308
+ """
309
+ Select the best available encoding backend.
310
+
311
+ Args:
312
+ cfg: Config instance (uses global CFG if not provided).
313
+
314
+ Returns:
315
+ Backend name: "nvenc", "qsv", "vaapi", or "cpu".
316
+ """
317
+ if cfg is None:
318
+ cfg = CFG
319
+
320
+ if cfg.hw != "auto":
321
+ return cfg.hw
322
+ # Priority: NVENC > AMF > QSV > VAAPI > CPU
323
+ if have_encoder("h264_nvenc") and test_nvenc():
324
+ return "nvenc"
325
+ if have_encoder("h264_amf") and test_amf():
326
+ return "amf"
327
+ if have_encoder("h264_qsv") and test_qsv(cfg.vaapi_device):
328
+ return "qsv"
329
+ if have_encoder("h264_vaapi") and test_vaapi(cfg.vaapi_device):
330
+ return "vaapi"
331
+ return "cpu"
332
+
333
+
334
+ def video_args_for(backend: str, cfg: Optional[Config] = None) -> List[str]:
335
+ """Get ffmpeg video encoding arguments for the specified backend."""
336
+ if cfg is None:
337
+ cfg = CFG
338
+
339
+ if backend == "nvenc":
340
+ # NVIDIA NVENC encoding
341
+ # Presets: p1 (fastest) to p7 (slowest/best quality)
342
+ # Map CPU presets to NVENC presets
343
+ nvenc_preset_map = {
344
+ "ultrafast": "p1",
345
+ "superfast": "p2",
346
+ "veryfast": "p3",
347
+ "faster": "p4",
348
+ "fast": "p4",
349
+ "medium": "p5",
350
+ "slow": "p6",
351
+ "slower": "p7",
352
+ "veryslow": "p7",
353
+ }
354
+ nvenc_preset = nvenc_preset_map.get(cfg.preset, "p4")
355
+ return [
356
+ "-c:v",
357
+ "h264_nvenc",
358
+ "-preset",
359
+ nvenc_preset,
360
+ "-cq",
361
+ str(cfg.nvenc_cq),
362
+ "-profile:v",
363
+ "high",
364
+ "-level",
365
+ "4.1",
366
+ "-rc",
367
+ "vbr",
368
+ "-b:v",
369
+ "0",
370
+ ]
371
+ if backend == "amf":
372
+ # AMD AMF encoding
373
+ # Quality modes: speed, balanced, quality
374
+ # Map CPU presets to AMF quality modes
375
+ amf_quality_map = {
376
+ "ultrafast": "speed",
377
+ "superfast": "speed",
378
+ "veryfast": "speed",
379
+ "faster": "balanced",
380
+ "fast": "balanced",
381
+ "medium": "balanced",
382
+ "slow": "quality",
383
+ "slower": "quality",
384
+ "veryslow": "quality",
385
+ }
386
+ amf_quality_mode = amf_quality_map.get(cfg.preset, "balanced")
387
+ return [
388
+ "-c:v",
389
+ "h264_amf",
390
+ "-quality",
391
+ amf_quality_mode,
392
+ "-rc",
393
+ "cqp", # Constant Quantization Parameter
394
+ "-qp_i",
395
+ str(cfg.amf_quality),
396
+ "-qp_p",
397
+ str(cfg.amf_quality),
398
+ "-qp_b",
399
+ str(cfg.amf_quality),
400
+ "-profile:v",
401
+ "high",
402
+ "-level",
403
+ "4.1",
404
+ ]
405
+ if backend == "qsv":
406
+ return [
407
+ "-vf",
408
+ "format=nv12",
409
+ "-c:v",
410
+ "h264_qsv",
411
+ "-global_quality",
412
+ str(cfg.qsv_quality),
413
+ "-profile:v",
414
+ "high",
415
+ "-level",
416
+ "4.1",
417
+ ]
418
+ if backend == "vaapi":
419
+ return [
420
+ "-vaapi_device",
421
+ cfg.vaapi_device,
422
+ "-vf",
423
+ "format=nv12,hwupload",
424
+ "-c:v",
425
+ "h264_vaapi",
426
+ "-qp",
427
+ str(cfg.vaapi_qp),
428
+ "-profile:v",
429
+ "high",
430
+ "-level",
431
+ "4.1",
432
+ ]
433
+ if backend == "cpu":
434
+ return [
435
+ "-c:v",
436
+ "libx264",
437
+ "-preset",
438
+ cfg.preset,
439
+ "-crf",
440
+ str(cfg.crf),
441
+ "-pix_fmt",
442
+ "yuv420p",
443
+ "-profile:v",
444
+ "high",
445
+ "-level",
446
+ "4.1",
447
+ ]
448
+ raise RuntimeError(f"Unknown backend: {backend}")
449
+
450
+
451
+ # -------------------- DECISION LOGIC --------------------
452
+
453
+
454
+ @dataclass
455
+ class Decision:
456
+ """Decision about what transcoding is needed for a file."""
457
+
458
+ need_v: bool # Need to transcode video
459
+ need_a: bool # Need to transcode audio
460
+ aidx: int # Audio stream index to use (-1 if none)
461
+ add_silence: bool # Add silent audio track
462
+ reason_v: str # Reason for video decision
463
+ vcodec: str # Source video codec
464
+ vpix: str # Source pixel format
465
+ vbit: int # Source bit depth
466
+ vhdr: bool # Is HDR content
467
+ vprof: str # Video profile
468
+ vlevel: int # Video level
469
+ acodec: str # Source audio codec
470
+ ach: int # Audio channels
471
+ alang: str # Audio language
472
+ format_name: str # Container format name
473
+ # Subtitle info
474
+ sidx: int = -1 # Subtitle stream index to use (-1 if none)
475
+ slang: str = "" # Subtitle language
476
+ sforced: bool = False # Is forced subtitle
477
+
478
+
479
+ def parse_bitdepth_from_pix(pix: str) -> int:
480
+ """Parse bit depth from pixel format string."""
481
+ pix = (pix or "").lower()
482
+ m = re.search(r"(10|12)le", pix)
483
+ if m:
484
+ return int(m.group(1))
485
+ if "p010" in pix:
486
+ return 10
487
+ return 8
488
+
489
+
490
+ def is_audio_description(title: str) -> bool:
491
+ """Check if audio track is an audio description track."""
492
+ t = (title or "").lower()
493
+ return (
494
+ "audio description" in t
495
+ or "audio-description" in t
496
+ or "audiodescription" in t
497
+ or "visual impaired" in t
498
+ or " v.i" in t
499
+ or " ad" in t
500
+ )
501
+
502
+
503
+ def select_audio_track(streams: List[dict], cfg: Optional["Config"] = None) -> Tuple[Optional[dict], str]:
504
+ """
505
+ Select the best audio track based on user preferences.
506
+
507
+ Priority:
508
+ 1. Explicit track index (--audio-track)
509
+ 2. Language priority list (--audio-lang)
510
+ 3. Default French preference (fre, fra, fr)
511
+ 4. First audio track
512
+
513
+ Args:
514
+ streams: List of stream dictionaries from ffprobe.
515
+ cfg: Config instance.
516
+
517
+ Returns:
518
+ Tuple of (selected_stream, selected_language).
519
+ """
520
+ if cfg is None:
521
+ from mkv2cast.config import CFG
522
+
523
+ cfg = CFG
524
+
525
+ audio_streams = [s for s in streams if s.get("codec_type") == "audio"]
526
+ if not audio_streams:
527
+ return None, ""
528
+
529
+ def get_lang(s: dict) -> str:
530
+ return (s.get("tags") or {}).get("language", "").lower()
531
+
532
+ def get_title(s: dict) -> str:
533
+ return (s.get("tags") or {}).get("title", "")
534
+
535
+ # 1. Explicit track index
536
+ if cfg.audio_track is not None:
537
+ if 0 <= cfg.audio_track < len(audio_streams):
538
+ selected = audio_streams[cfg.audio_track]
539
+ return selected, get_lang(selected)
540
+
541
+ # 2. Language priority list from config
542
+ if cfg.audio_lang:
543
+ langs = [lang.strip().lower() for lang in cfg.audio_lang.split(",")]
544
+ for lang in langs:
545
+ # First pass: match language, exclude audio descriptions
546
+ for stream in audio_streams:
547
+ stream_lang = get_lang(stream)
548
+ if (stream_lang == lang or stream_lang.startswith(lang)) and not is_audio_description(
549
+ get_title(stream)
550
+ ):
551
+ return stream, stream_lang
552
+ # Second pass: match language, include audio descriptions
553
+ for stream in audio_streams:
554
+ stream_lang = get_lang(stream)
555
+ if stream_lang == lang or stream_lang.startswith(lang):
556
+ return stream, stream_lang
557
+
558
+ # 3. Default: prefer French (fre, fra, fr)
559
+ fr_langs = {"fre", "fra", "fr"}
560
+ # First pass: French without audio description
561
+ for stream in audio_streams:
562
+ stream_lang = get_lang(stream)
563
+ if stream_lang in fr_langs and not is_audio_description(get_title(stream)):
564
+ return stream, stream_lang
565
+ # Second pass: French with audio description
566
+ for stream in audio_streams:
567
+ stream_lang = get_lang(stream)
568
+ if stream_lang in fr_langs:
569
+ return stream, stream_lang
570
+
571
+ # 4. Fallback: first audio track
572
+ return audio_streams[0], get_lang(audio_streams[0])
573
+
574
+
575
+ def select_subtitle_track(
576
+ streams: List[dict], audio_lang: str, cfg: Optional["Config"] = None
577
+ ) -> Optional[Tuple[dict, bool]]:
578
+ """
579
+ Select the best subtitle track based on user preferences.
580
+
581
+ Priority:
582
+ 1. Explicit track index (--subtitle-track)
583
+ 2. Forced subtitles in audio language (if --prefer-forced-subs)
584
+ 3. Language priority list (--subtitle-lang)
585
+ 4. No subtitles selected
586
+
587
+ Args:
588
+ streams: List of stream dictionaries from ffprobe.
589
+ audio_lang: The language of the selected audio track.
590
+ cfg: Config instance.
591
+
592
+ Returns:
593
+ Tuple of (selected_stream, is_forced) or None if no subtitle selected.
594
+ """
595
+ if cfg is None:
596
+ from mkv2cast.config import CFG
597
+
598
+ cfg = CFG
599
+
600
+ # Disabled subtitles
601
+ if cfg.no_subtitles:
602
+ return None
603
+
604
+ subtitle_streams = [s for s in streams if s.get("codec_type") == "subtitle"]
605
+ if not subtitle_streams:
606
+ return None
607
+
608
+ def get_lang(s: dict) -> str:
609
+ return (s.get("tags") or {}).get("language", "").lower()
610
+
611
+ def is_forced(s: dict) -> bool:
612
+ disposition = s.get("disposition") or {}
613
+ return disposition.get("forced", 0) == 1
614
+
615
+ def is_sdh(s: dict) -> bool:
616
+ """Check if subtitle is SDH (for hearing impaired)."""
617
+ disposition = s.get("disposition") or {}
618
+ title = (s.get("tags") or {}).get("title", "").lower()
619
+ return disposition.get("hearing_impaired", 0) == 1 or "sdh" in title
620
+
621
+ # 1. Explicit track index
622
+ if cfg.subtitle_track is not None:
623
+ if 0 <= cfg.subtitle_track < len(subtitle_streams):
624
+ selected = subtitle_streams[cfg.subtitle_track]
625
+ return selected, is_forced(selected)
626
+
627
+ # 2. Prefer forced subtitles in audio language
628
+ if cfg.prefer_forced_subs and audio_lang:
629
+ # Normalize audio language for comparison
630
+ audio_lang_norm = audio_lang[:2] if len(audio_lang) >= 2 else audio_lang
631
+ for stream in subtitle_streams:
632
+ stream_lang = get_lang(stream)
633
+ stream_lang_norm = stream_lang[:2] if len(stream_lang) >= 2 else stream_lang
634
+ if is_forced(stream) and (stream_lang == audio_lang or stream_lang_norm == audio_lang_norm):
635
+ return stream, True
636
+
637
+ # 3. Language priority list
638
+ if cfg.subtitle_lang:
639
+ langs = [lang.strip().lower() for lang in cfg.subtitle_lang.split(",")]
640
+ for lang in langs:
641
+ # First pass: forced subtitles in requested language
642
+ for stream in subtitle_streams:
643
+ stream_lang = get_lang(stream)
644
+ if (stream_lang == lang or stream_lang.startswith(lang)) and is_forced(stream):
645
+ return stream, True
646
+ # Second pass: non-SDH subtitles in requested language
647
+ for stream in subtitle_streams:
648
+ stream_lang = get_lang(stream)
649
+ if (stream_lang == lang or stream_lang.startswith(lang)) and not is_sdh(stream):
650
+ return stream, is_forced(stream)
651
+ # Third pass: any subtitle in requested language
652
+ for stream in subtitle_streams:
653
+ stream_lang = get_lang(stream)
654
+ if stream_lang == lang or stream_lang.startswith(lang):
655
+ return stream, is_forced(stream)
656
+
657
+ # 4. No subtitle selected by default (user must specify --subtitle-lang)
658
+ return None
659
+
660
+
661
+ def decide_for(path: Path, cfg: Optional[Config] = None) -> Decision:
662
+ """
663
+ Analyze a file and decide what transcoding is needed.
664
+
665
+ Args:
666
+ path: Path to the MKV file.
667
+ cfg: Config instance (uses global CFG if not provided).
668
+
669
+ Returns:
670
+ Decision dataclass with transcoding requirements.
671
+ """
672
+ if cfg is None:
673
+ cfg = CFG
674
+
675
+ j = ffprobe_json(path)
676
+ fmt = j.get("format", {}) or {}
677
+ format_name = fmt.get("format_name", "") or ""
678
+
679
+ streams = j.get("streams", []) or []
680
+ v = next((s for s in streams if s.get("codec_type") == "video"), None)
681
+
682
+ def low(x):
683
+ return (x or "").lower()
684
+
685
+ vcodec = low((v or {}).get("codec_name", ""))
686
+ vpix = low((v or {}).get("pix_fmt", ""))
687
+ vprof = low((v or {}).get("profile", ""))
688
+ vlevel = int((v or {}).get("level") or 0)
689
+ vbit = parse_bitdepth_from_pix(vpix)
690
+
691
+ cprim = low((v or {}).get("color_primaries", ""))
692
+ ctrans = low((v or {}).get("color_transfer", ""))
693
+ vhdr = (cprim in {"bt2020", "bt2020nc", "bt2020c"}) or (ctrans in {"smpte2084", "arib-std-b67"})
694
+
695
+ # Audio track selection using new function
696
+ audio_stream, alang = select_audio_track(streams, cfg)
697
+
698
+ aidx = int(audio_stream.get("index") or -1) if audio_stream else -1
699
+ acodec = low((audio_stream or {}).get("codec_name", ""))
700
+ ach = int((audio_stream or {}).get("channels") or 0)
701
+
702
+ # Subtitle track selection using new function
703
+ subtitle_result = select_subtitle_track(streams, alang, cfg)
704
+ sidx = -1
705
+ slang = ""
706
+ sforced = False
707
+ if subtitle_result:
708
+ sub_stream, sforced = subtitle_result
709
+ sidx = int(sub_stream.get("index") or -1)
710
+ slang = (sub_stream.get("tags") or {}).get("language", "")
711
+
712
+ pname = path.name.upper()
713
+ reason_v = ""
714
+ video_ok = False
715
+
716
+ if vcodec == "av1" or "AV1" in pname:
717
+ video_ok = False
718
+ reason_v = "AV1 (or filename AV1) => forced transcode"
719
+ elif cfg.force_h264:
720
+ video_ok = False
721
+ reason_v = "--force-h264"
722
+ elif vcodec == "h264":
723
+ if (
724
+ vbit <= 8
725
+ and vpix in {"yuv420p", "yuvj420p"}
726
+ and (not vhdr)
727
+ and vprof not in {"high 10", "high10", "high 4:2:2", "high 4:4:4"}
728
+ and (vlevel == 0 or vlevel <= 41)
729
+ ):
730
+ video_ok = True
731
+ reason_v = "H264 8-bit SDR"
732
+ else:
733
+ video_ok = False
734
+ reason_v = f"H264 constraints not OK (bit={vbit},pix={vpix},hdr={vhdr},prof={vprof},level={vlevel})"
735
+ elif vcodec in {"hevc", "h265"}:
736
+ if cfg.allow_hevc and (vbit <= 8) and (not vhdr):
737
+ video_ok = True
738
+ reason_v = "HEVC SDR 8-bit (--allow-hevc)"
739
+ else:
740
+ video_ok = False
741
+ reason_v = "HEVC => transcode (default)"
742
+ else:
743
+ video_ok = False
744
+ reason_v = f"video codec {vcodec} => transcode"
745
+
746
+ need_v = not video_ok
747
+
748
+ audio_ok = acodec in {"aac", "mp3"}
749
+ need_a = False
750
+ if aidx < 0:
751
+ need_a = False
752
+ elif cfg.force_aac:
753
+ need_a = True
754
+ elif not audio_ok:
755
+ need_a = True
756
+
757
+ add_silence = False
758
+ if aidx < 0 and cfg.add_silence_if_no_audio:
759
+ add_silence = True
760
+ need_a = True
761
+
762
+ return Decision(
763
+ need_v=need_v,
764
+ need_a=need_a,
765
+ aidx=aidx,
766
+ add_silence=add_silence,
767
+ reason_v=reason_v,
768
+ vcodec=vcodec,
769
+ vpix=vpix,
770
+ vbit=vbit,
771
+ vhdr=vhdr,
772
+ vprof=vprof,
773
+ vlevel=vlevel,
774
+ acodec=acodec,
775
+ ach=ach,
776
+ alang=alang,
777
+ format_name=format_name,
778
+ sidx=sidx,
779
+ slang=slang,
780
+ sforced=sforced,
781
+ )
782
+
783
+
784
+ # -------------------- FFMPEG COMMAND BUILDING --------------------
785
+
786
+
787
+ def build_transcode_cmd(
788
+ inp: Path,
789
+ decision: Decision,
790
+ backend: str,
791
+ tmp_out: Path,
792
+ log_path: Optional[Path] = None,
793
+ cfg: Optional[Config] = None,
794
+ ) -> Tuple[List[str], str]:
795
+ """
796
+ Build ffmpeg transcoding command.
797
+
798
+ Args:
799
+ inp: Input file path.
800
+ decision: Decision dataclass with transcoding requirements.
801
+ backend: Encoding backend to use.
802
+ tmp_out: Temporary output path.
803
+ log_path: Optional path to write command log.
804
+ cfg: Config instance.
805
+
806
+ Returns:
807
+ Tuple of (command_args, stage_name).
808
+ """
809
+ if cfg is None:
810
+ cfg = CFG
811
+
812
+ ext = cfg.container
813
+ if ext not in ("mkv", "mp4"):
814
+ raise RuntimeError("container must be mkv or mp4")
815
+
816
+ args = ["ffmpeg", "-hide_banner", "-y"]
817
+
818
+ if ext == "mkv":
819
+ args += ["-f", "matroska"]
820
+ else:
821
+ args += ["-f", "mp4", "-movflags", "+faststart"]
822
+
823
+ if decision.add_silence:
824
+ args += ["-i", str(inp), "-f", "lavfi", "-i", "anullsrc=channel_layout=stereo:sample_rate=48000"]
825
+ args += ["-map", "0:v:0", "-map", "1:a:0"]
826
+ # Map selected subtitle or all subtitles
827
+ if decision.sidx >= 0:
828
+ args += ["-map", f"0:{decision.sidx}"]
829
+ else:
830
+ args += ["-map", "0:s?"]
831
+ args += ["-shortest"]
832
+ else:
833
+ args += ["-i", str(inp), "-map", "0:v:0"]
834
+ if decision.aidx >= 0:
835
+ args += ["-map", f"0:{decision.aidx}"]
836
+ # Map selected subtitle or all subtitles
837
+ if decision.sidx >= 0:
838
+ args += ["-map", f"0:{decision.sidx}"]
839
+ elif not cfg.no_subtitles:
840
+ args += ["-map", "0:s?"]
841
+
842
+ if not decision.need_v:
843
+ args += ["-c:v", "copy"]
844
+ else:
845
+ args += video_args_for(backend, cfg)
846
+
847
+ if decision.add_silence:
848
+ args += ["-c:a", "aac", "-b:a", cfg.abr, "-ac", "2"]
849
+ else:
850
+ if decision.aidx >= 0:
851
+ if not decision.need_a:
852
+ args += ["-c:a", "copy"]
853
+ else:
854
+ args += ["-c:a", "aac", "-b:a", cfg.abr]
855
+ if not cfg.keep_surround:
856
+ args += ["-ac", "2"]
857
+
858
+ if ext == "mkv":
859
+ args += ["-c:s", "copy"]
860
+ else:
861
+ args += ["-c:s", "mov_text"]
862
+
863
+ if cfg.preserve_metadata:
864
+ args += ["-map_metadata", "0"]
865
+ else:
866
+ args += ["-map_metadata", "-1"]
867
+
868
+ if cfg.preserve_chapters:
869
+ args += ["-map_chapters", "0"]
870
+ else:
871
+ args += ["-map_chapters", "-1"]
872
+
873
+ if cfg.preserve_attachments and ext == "mkv":
874
+ args += ["-map", "0:t?", "-c:t", "copy"]
875
+
876
+ args += ["-max_muxing_queue_size", "2048"]
877
+ args += [str(tmp_out)]
878
+
879
+ stage = "TRANSCODE"
880
+ if (not decision.need_v) and decision.need_a:
881
+ stage = "AUDIO"
882
+ elif (not decision.need_v) and (not decision.need_a):
883
+ stage = "REMUX"
884
+
885
+ if log_path:
886
+ with log_path.open("a", encoding="utf-8", errors="replace") as lf:
887
+ lf.write("CMD: " + shlex.join(args) + "\n")
888
+
889
+ return args, stage
890
+
891
+
892
+ # -------------------- PROGRESS PARSING --------------------
893
+
894
+
895
+ def parse_ffmpeg_progress(line: str, dur_ms: int) -> Dict[str, Any]:
896
+ """
897
+ Parse FFmpeg progress line and return progress metrics.
898
+
899
+ Args:
900
+ line: A line from FFmpeg stderr output.
901
+ dur_ms: Total duration in milliseconds.
902
+
903
+ Returns:
904
+ Dict with progress metrics:
905
+ - progress_percent: float (0-100)
906
+ - fps: float
907
+ - speed: str (e.g., "2.5x")
908
+ - bitrate: str (e.g., "2500kbits/s")
909
+ - current_time_ms: int
910
+ - frame: int
911
+ - size_bytes: int
912
+ """
913
+ result: Dict[str, Any] = {
914
+ "progress_percent": 0.0,
915
+ "fps": 0.0,
916
+ "speed": "",
917
+ "bitrate": "",
918
+ "current_time_ms": 0,
919
+ "frame": 0,
920
+ "size_bytes": 0,
921
+ }
922
+
923
+ # Parse time: time=00:01:23.45
924
+ m = re.search(r"time=\s*(\d+):(\d+):(\d+)\.(\d+)", line)
925
+ if m:
926
+ h, mi, s, cs = int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4))
927
+ current_ms = (h * 3600 + mi * 60 + s) * 1000 + cs * 10
928
+ result["current_time_ms"] = current_ms
929
+ if dur_ms > 0:
930
+ result["progress_percent"] = min(100.0, (current_ms / dur_ms) * 100)
931
+
932
+ # Parse fps: fps=123.45
933
+ m = re.search(r"fps=\s*([0-9.]+)", line)
934
+ if m:
935
+ try:
936
+ result["fps"] = float(m.group(1))
937
+ except ValueError:
938
+ pass
939
+
940
+ # Parse speed: speed=2.5x
941
+ m = re.search(r"speed=\s*([0-9.]+)x", line)
942
+ if m:
943
+ result["speed"] = f"{float(m.group(1)):.1f}x"
944
+
945
+ # Parse bitrate: bitrate=2500kbits/s
946
+ m = re.search(r"bitrate=\s*([^\s]+)", line)
947
+ if m:
948
+ result["bitrate"] = m.group(1)
949
+
950
+ # Parse frame: frame=12345
951
+ m = re.search(r"frame=\s*(\d+)", line)
952
+ if m:
953
+ result["frame"] = int(m.group(1))
954
+
955
+ # Parse size: size=12345kB
956
+ m = re.search(r"size=\s*(\d+)kB", line)
957
+ if m:
958
+ result["size_bytes"] = int(m.group(1)) * 1024
959
+
960
+ return result
961
+
962
+
963
+ def calculate_eta(current_time_ms: int, dur_ms: int, speed_str: str, start_time: float) -> float:
964
+ """
965
+ Calculate ETA in seconds based on progress.
966
+
967
+ Args:
968
+ current_time_ms: Current position in milliseconds.
969
+ dur_ms: Total duration in milliseconds.
970
+ speed_str: Speed string like "2.5x".
971
+ start_time: Start time (time.time()).
972
+
973
+ Returns:
974
+ Estimated time remaining in seconds.
975
+ """
976
+ if current_time_ms <= 0 or dur_ms <= 0:
977
+ return 0.0
978
+
979
+ remaining_ms = dur_ms - current_time_ms
980
+ if remaining_ms <= 0:
981
+ return 0.0
982
+
983
+ # Try speed-based ETA first
984
+ if speed_str:
985
+ m = re.match(r"([0-9.]+)x", speed_str)
986
+ if m:
987
+ try:
988
+ speed_x = float(m.group(1))
989
+ if speed_x > 0:
990
+ return (remaining_ms / 1000.0) / speed_x
991
+ except ValueError:
992
+ pass
993
+
994
+ # Fallback to elapsed-time based ETA
995
+ elapsed = time.time() - start_time
996
+ if elapsed > 0 and current_time_ms > 0:
997
+ rate = current_time_ms / elapsed
998
+ if rate > 0:
999
+ return remaining_ms / rate / 1000.0
1000
+
1001
+ return 0.0
1002
+
1003
+
1004
+ # -------------------- CALLBACK TYPES --------------------
1005
+
1006
+
1007
+ # Type alias for progress callback
1008
+ ProgressCallback = Callable[[Path, Dict[str, Any]], None]
1009
+
1010
+
1011
+ def _make_progress_dict(
1012
+ stage: str,
1013
+ progress_percent: float = 0.0,
1014
+ fps: float = 0.0,
1015
+ eta_seconds: float = 0.0,
1016
+ bitrate: str = "",
1017
+ speed: str = "",
1018
+ current_time_ms: int = 0,
1019
+ duration_ms: int = 0,
1020
+ error: Optional[str] = None,
1021
+ ) -> Dict[str, Any]:
1022
+ """Create a standardized progress dictionary for callbacks."""
1023
+ return {
1024
+ "stage": stage,
1025
+ "progress_percent": progress_percent,
1026
+ "fps": fps,
1027
+ "eta_seconds": eta_seconds,
1028
+ "bitrate": bitrate,
1029
+ "speed": speed,
1030
+ "current_time_ms": current_time_ms,
1031
+ "duration_ms": duration_ms,
1032
+ "error": error,
1033
+ }
1034
+
1035
+
1036
+ # -------------------- HIGH-LEVEL CONVERSION --------------------
1037
+
1038
+
1039
+ def get_output_tag(decision: Decision) -> str:
1040
+ """Get the output filename tag based on decision."""
1041
+ tag = ""
1042
+ if decision.need_v:
1043
+ tag += ".h264"
1044
+ if decision.need_a:
1045
+ tag += ".aac"
1046
+ if not tag:
1047
+ tag = ".remux"
1048
+ return tag
1049
+
1050
+
1051
+ def convert_file(
1052
+ input_path: Path,
1053
+ cfg: Optional[Config] = None,
1054
+ backend: Optional[str] = None,
1055
+ output_dir: Optional[Path] = None,
1056
+ log_path: Optional[Path] = None,
1057
+ progress_callback: Optional[ProgressCallback] = None,
1058
+ ) -> Tuple[bool, Optional[Path], str]:
1059
+ """
1060
+ Convert a single MKV file.
1061
+
1062
+ Args:
1063
+ input_path: Path to input MKV file.
1064
+ cfg: Config instance (uses global CFG if not provided).
1065
+ backend: Backend to use (auto-detected if not provided).
1066
+ output_dir: Output directory (same as input if not provided).
1067
+ log_path: Path for conversion log.
1068
+ progress_callback: Optional callback function called with progress updates.
1069
+ The callback receives (filepath, progress_dict) where progress_dict contains:
1070
+ - stage: "checking" | "encoding" | "done" | "skipped" | "failed"
1071
+ - progress_percent: float (0-100)
1072
+ - fps: float
1073
+ - eta_seconds: float
1074
+ - bitrate: str
1075
+ - speed: str
1076
+ - current_time_ms: int
1077
+ - duration_ms: int
1078
+ - error: Optional[str]
1079
+
1080
+ Returns:
1081
+ Tuple of (success, output_path, message).
1082
+
1083
+ Example:
1084
+ >>> def on_progress(filepath, progress):
1085
+ ... print(f"{filepath.name}: {progress['stage']} - {progress['progress_percent']:.1f}%")
1086
+ >>> success, output, msg = convert_file(Path("movie.mkv"), progress_callback=on_progress)
1087
+ """
1088
+ if cfg is None:
1089
+ cfg = CFG
1090
+
1091
+ if backend is None:
1092
+ backend = pick_backend(cfg)
1093
+
1094
+ if output_dir is None:
1095
+ output_dir = input_path.parent
1096
+
1097
+ def _call_callback(stage: str, **kwargs: Any) -> None:
1098
+ """Helper to safely call the progress callback."""
1099
+ if progress_callback is not None:
1100
+ try:
1101
+ progress_dict = _make_progress_dict(stage, **kwargs)
1102
+ progress_callback(input_path, progress_dict)
1103
+ except Exception:
1104
+ pass # Don't let callback errors affect conversion
1105
+
1106
+ # Signal checking stage
1107
+ _call_callback("checking", progress_percent=0.0)
1108
+
1109
+ # Analyze file
1110
+ try:
1111
+ decision = decide_for(input_path, cfg)
1112
+ except Exception as e:
1113
+ _call_callback("failed", error=f"Analysis failed: {e}")
1114
+ return False, None, f"Analysis failed: {e}"
1115
+
1116
+ # Check if already compatible
1117
+ if (not decision.need_v) and (not decision.need_a) and cfg.skip_when_ok:
1118
+ _call_callback("skipped", progress_percent=100.0)
1119
+ return True, None, "Already compatible"
1120
+
1121
+ # Build output path
1122
+ tag = get_output_tag(decision)
1123
+ output_path = output_dir / f"{input_path.stem}{tag}{cfg.suffix}.{cfg.container}"
1124
+
1125
+ if output_path.exists():
1126
+ _call_callback("skipped", progress_percent=100.0)
1127
+ return True, output_path, "Output already exists"
1128
+
1129
+ input_size = file_size(input_path)
1130
+ space_error = check_disk_space(output_dir, output_dir, input_size, cfg)
1131
+ if space_error:
1132
+ _call_callback("failed", error=space_error)
1133
+ return False, None, space_error
1134
+
1135
+ # Create temp path
1136
+ tmp_path = output_dir / f"{input_path.stem}{tag}{cfg.suffix}.tmp.{os.getpid()}.{cfg.container}"
1137
+
1138
+ if cfg.dryrun:
1139
+ cmd, _stage = build_transcode_cmd(input_path, decision, backend, tmp_path, log_path, cfg)
1140
+ _call_callback("skipped", progress_percent=100.0)
1141
+ return True, None, f"DRYRUN: {shlex.join(cmd)}"
1142
+
1143
+ # Get duration for progress calculation
1144
+ dur_ms = probe_duration_ms(input_path)
1145
+
1146
+ # Signal encoding start
1147
+ _call_callback("encoding", progress_percent=0.0, duration_ms=dur_ms)
1148
+
1149
+ last_error = ""
1150
+ attempts = max(0, cfg.retry_attempts)
1151
+ total_attempts = 1 + attempts
1152
+ attempt_backend = backend
1153
+
1154
+ for attempt in range(total_attempts):
1155
+ if attempt > 0:
1156
+ _call_callback("retry", error=last_error)
1157
+ if cfg.retry_delay_sec > 0:
1158
+ time.sleep(cfg.retry_delay_sec)
1159
+
1160
+ cmd, stage = build_transcode_cmd(input_path, decision, attempt_backend, tmp_path, log_path, cfg)
1161
+
1162
+ # Run ffmpeg with progress parsing if callback is provided
1163
+ if progress_callback is not None:
1164
+ success, out_path, message = _run_ffmpeg_with_callback(
1165
+ cmd, tmp_path, output_path, stage, dur_ms, input_path, progress_callback
1166
+ )
1167
+ else:
1168
+ # Original behavior without callback
1169
+ try:
1170
+ result = subprocess.run(cmd, capture_output=True, timeout=86400) # 24h timeout
1171
+
1172
+ if result.returncode == 0:
1173
+ # Move temp to final
1174
+ shutil.move(str(tmp_path), str(output_path))
1175
+ success = True
1176
+ out_path = output_path
1177
+ message = f"{stage} complete"
1178
+ else:
1179
+ # Clean up temp file
1180
+ if tmp_path.exists():
1181
+ tmp_path.unlink()
1182
+ success = False
1183
+ out_path = None
1184
+ message = f"ffmpeg error (rc={result.returncode})"
1185
+
1186
+ except subprocess.TimeoutExpired:
1187
+ if tmp_path.exists():
1188
+ tmp_path.unlink()
1189
+ success = False
1190
+ out_path = None
1191
+ message = "Timeout exceeded"
1192
+
1193
+ except Exception as e:
1194
+ if tmp_path.exists():
1195
+ tmp_path.unlink()
1196
+ success = False
1197
+ out_path = None
1198
+ message = f"Error: {e}"
1199
+
1200
+ if success and out_path:
1201
+ quota_error = enforce_output_quota(out_path, input_size, cfg)
1202
+ if quota_error:
1203
+ try:
1204
+ out_path.unlink()
1205
+ except Exception:
1206
+ pass
1207
+ _call_callback("failed", error=quota_error)
1208
+ return False, None, quota_error
1209
+ return True, out_path, message
1210
+
1211
+ last_error = message
1212
+
1213
+ if attempt < total_attempts - 1:
1214
+ if cfg.retry_fallback_cpu and attempt_backend != "cpu" and attempt == total_attempts - 2:
1215
+ attempt_backend = "cpu"
1216
+ continue
1217
+
1218
+ return False, None, last_error
1219
+
1220
+
1221
+ def _run_ffmpeg_with_callback(
1222
+ cmd: List[str],
1223
+ tmp_path: Path,
1224
+ output_path: Path,
1225
+ stage: str,
1226
+ dur_ms: int,
1227
+ input_path: Path,
1228
+ progress_callback: ProgressCallback,
1229
+ ) -> Tuple[bool, Optional[Path], str]:
1230
+ """
1231
+ Run FFmpeg command while parsing progress and calling callback.
1232
+
1233
+ Args:
1234
+ cmd: FFmpeg command to run.
1235
+ tmp_path: Temporary output path.
1236
+ output_path: Final output path.
1237
+ stage: Stage name (e.g., "TRANSCODE").
1238
+ dur_ms: Duration in milliseconds.
1239
+ input_path: Input file path.
1240
+ progress_callback: Callback function for progress updates.
1241
+
1242
+ Returns:
1243
+ Tuple of (success, output_path, message).
1244
+ """
1245
+ start_time = time.time()
1246
+
1247
+ try:
1248
+ # Start process with stderr pipe for progress
1249
+ process = subprocess.Popen(
1250
+ cmd,
1251
+ stdout=subprocess.PIPE,
1252
+ stderr=subprocess.PIPE,
1253
+ text=False,
1254
+ )
1255
+
1256
+ # Read stderr for progress updates
1257
+ last_progress = 0.0
1258
+
1259
+ while True:
1260
+ if process.stderr is None:
1261
+ break
1262
+ line = process.stderr.readline()
1263
+ if not line:
1264
+ break
1265
+
1266
+ line_str = line.decode("utf-8", errors="replace")
1267
+
1268
+ # Parse progress from FFmpeg output
1269
+ progress_data = parse_ffmpeg_progress(line_str, dur_ms)
1270
+
1271
+ # Only call callback if progress changed significantly
1272
+ if progress_data["progress_percent"] > last_progress + 0.5 or progress_data["fps"] > 0:
1273
+ last_progress = progress_data["progress_percent"]
1274
+
1275
+ # Calculate ETA
1276
+ eta = calculate_eta(progress_data["current_time_ms"], dur_ms, progress_data["speed"], start_time)
1277
+
1278
+ try:
1279
+ progress_dict = _make_progress_dict(
1280
+ stage="encoding",
1281
+ progress_percent=progress_data["progress_percent"],
1282
+ fps=progress_data["fps"],
1283
+ eta_seconds=eta,
1284
+ bitrate=progress_data["bitrate"],
1285
+ speed=progress_data["speed"],
1286
+ current_time_ms=progress_data["current_time_ms"],
1287
+ duration_ms=dur_ms,
1288
+ )
1289
+ progress_callback(input_path, progress_dict)
1290
+ except Exception:
1291
+ pass
1292
+
1293
+ # Wait for process to complete
1294
+ process.wait()
1295
+
1296
+ if process.returncode == 0:
1297
+ # Move temp to final
1298
+ shutil.move(str(tmp_path), str(output_path))
1299
+
1300
+ # Signal done
1301
+ try:
1302
+ progress_dict = _make_progress_dict(
1303
+ stage="done",
1304
+ progress_percent=100.0,
1305
+ duration_ms=dur_ms,
1306
+ )
1307
+ progress_callback(input_path, progress_dict)
1308
+ except Exception:
1309
+ pass
1310
+
1311
+ return True, output_path, f"{stage} complete"
1312
+ else:
1313
+ # Clean up temp file
1314
+ if tmp_path.exists():
1315
+ tmp_path.unlink()
1316
+
1317
+ error_msg = f"ffmpeg error (rc={process.returncode})"
1318
+ try:
1319
+ progress_dict = _make_progress_dict(
1320
+ stage="failed",
1321
+ error=error_msg,
1322
+ )
1323
+ progress_callback(input_path, progress_dict)
1324
+ except Exception:
1325
+ pass
1326
+
1327
+ return False, None, error_msg
1328
+
1329
+ except subprocess.TimeoutExpired:
1330
+ if tmp_path.exists():
1331
+ tmp_path.unlink()
1332
+ error_msg = "Timeout exceeded"
1333
+ try:
1334
+ progress_dict = _make_progress_dict(stage="failed", error=error_msg)
1335
+ progress_callback(input_path, progress_dict)
1336
+ except Exception:
1337
+ pass
1338
+ return False, None, error_msg
1339
+
1340
+ except Exception as e:
1341
+ if tmp_path.exists():
1342
+ tmp_path.unlink()
1343
+ error_msg = f"Error: {e}"
1344
+ try:
1345
+ progress_dict = _make_progress_dict(stage="failed", error=error_msg)
1346
+ progress_callback(input_path, progress_dict)
1347
+ except Exception:
1348
+ pass
1349
+ return False, None, error_msg
1350
+
1351
+
1352
+ def convert_batch(
1353
+ input_paths: List[Path],
1354
+ cfg: Optional[Config] = None,
1355
+ progress_callback: Optional[ProgressCallback] = None,
1356
+ output_dir: Optional[Path] = None,
1357
+ backend: Optional[str] = None,
1358
+ ) -> Dict[Path, Tuple[bool, Optional[Path], str]]:
1359
+ """
1360
+ Convert multiple files in parallel using multi-threading.
1361
+
1362
+ This function processes multiple files concurrently, respecting the
1363
+ configured number of workers. Each file's progress is reported via
1364
+ the optional callback.
1365
+
1366
+ Args:
1367
+ input_paths: List of input file paths to convert.
1368
+ cfg: Config instance (uses global CFG if not provided).
1369
+ The number of parallel workers is determined by cfg.encode_workers.
1370
+ progress_callback: Optional callback function called with progress updates.
1371
+ The callback receives (filepath, progress_dict) for each file.
1372
+ The callback should be thread-safe if processing multiple files.
1373
+ output_dir: Output directory for all files (same as input if not provided).
1374
+ backend: Backend to use (auto-detected if not provided).
1375
+
1376
+ Returns:
1377
+ Dict mapping input_path -> (success, output_path, message).
1378
+
1379
+ Example:
1380
+ >>> from mkv2cast import convert_batch, Config
1381
+ >>> from pathlib import Path
1382
+ >>>
1383
+ >>> config = Config.for_library(hw="vaapi", encode_workers=2)
1384
+ >>>
1385
+ >>> def on_progress(filepath, progress):
1386
+ ... print(f"{filepath.name}: {progress['progress_percent']:.1f}%")
1387
+ >>>
1388
+ >>> files = [Path("movie1.mkv"), Path("movie2.mkv")]
1389
+ >>> results = convert_batch(files, cfg=config, progress_callback=on_progress)
1390
+ >>>
1391
+ >>> for filepath, (success, output, msg) in results.items():
1392
+ ... print(f"{filepath.name}: {'OK' if success else 'FAIL'} - {msg}")
1393
+ """
1394
+ if cfg is None:
1395
+ cfg = CFG
1396
+
1397
+ if backend is None:
1398
+ backend = pick_backend(cfg)
1399
+
1400
+ # Determine number of workers
1401
+ max_workers = cfg.encode_workers if cfg.encode_workers > 0 else 1
1402
+
1403
+ # Thread-safe results dict
1404
+ results: Dict[Path, Tuple[bool, Optional[Path], str]] = {}
1405
+ results_lock = threading.Lock()
1406
+
1407
+ # Thread-safe callback wrapper
1408
+ callback_lock = threading.Lock()
1409
+
1410
+ def thread_safe_callback(filepath: Path, progress: Dict[str, Any]) -> None:
1411
+ """Thread-safe wrapper for the progress callback."""
1412
+ if progress_callback is not None:
1413
+ with callback_lock:
1414
+ try:
1415
+ progress_callback(filepath, progress)
1416
+ except Exception:
1417
+ pass
1418
+
1419
+ def process_file(input_path: Path) -> Tuple[Path, Tuple[bool, Optional[Path], str]]:
1420
+ """Process a single file and return the result."""
1421
+ out_dir = output_dir if output_dir is not None else input_path.parent
1422
+
1423
+ result = convert_file(
1424
+ input_path,
1425
+ cfg=cfg,
1426
+ backend=backend,
1427
+ output_dir=out_dir,
1428
+ progress_callback=thread_safe_callback if progress_callback else None,
1429
+ )
1430
+
1431
+ return input_path, result
1432
+
1433
+ # Process files in parallel
1434
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
1435
+ # Submit all tasks
1436
+ futures = {executor.submit(process_file, path): path for path in input_paths}
1437
+
1438
+ # Collect results as they complete
1439
+ for future in as_completed(futures):
1440
+ input_path = futures[future]
1441
+ try:
1442
+ path, result = future.result()
1443
+ with results_lock:
1444
+ results[path] = result
1445
+ except Exception as e:
1446
+ # Handle unexpected errors
1447
+ with results_lock:
1448
+ results[input_path] = (False, None, f"Error: {e}")
1449
+
1450
+ # Signal failure via callback
1451
+ if progress_callback:
1452
+ thread_safe_callback(input_path, _make_progress_dict(stage="failed", error=str(e)))
1453
+
1454
+ return results