mkv2cast 1.2.7.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mkv2cast/pipeline.py ADDED
@@ -0,0 +1,641 @@
1
+ """
2
+ Pipeline orchestrator for parallel processing in mkv2cast.
3
+
4
+ Manages multiple integrity check and encode workers processing files in parallel.
5
+ """
6
+
7
+ import os
8
+ import re
9
+ import shutil
10
+ import signal
11
+ import subprocess
12
+ import threading
13
+ import time
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+ from queue import Empty, Queue
17
+ from typing import Callable, List, Optional, Tuple
18
+
19
+ from mkv2cast.config import Config
20
+ from mkv2cast.converter import (
21
+ Decision,
22
+ build_transcode_cmd,
23
+ check_disk_space,
24
+ decide_for,
25
+ enforce_output_quota,
26
+ probe_duration_ms,
27
+ )
28
+ from mkv2cast.history import HistoryRecorder
29
+ from mkv2cast.i18n import _
30
+ from mkv2cast.integrity import check_ffprobe_valid, file_size
31
+ from mkv2cast.ui.rich_ui import RichProgressUI
32
+
33
+
34
+ @dataclass
35
+ class EncodeJob:
36
+ """Job data passed from integrity worker to encode worker."""
37
+
38
+ inp: Path
39
+ decision: Decision
40
+ log_path: Path
41
+ final: Path
42
+ tmp: Path
43
+ dur_ms: int
44
+ stage: str
45
+ integrity_time: float
46
+
47
+
48
+ # Track active ffmpeg processes for cleanup on interrupt
49
+ _active_processes: List[subprocess.Popen] = []
50
+ _processes_lock = threading.Lock()
51
+
52
+
53
+ def register_process(proc: subprocess.Popen) -> None:
54
+ """Register a process for tracking."""
55
+ with _processes_lock:
56
+ _active_processes.append(proc)
57
+
58
+
59
+ def unregister_process(proc: subprocess.Popen) -> None:
60
+ """Unregister a process from tracking."""
61
+ with _processes_lock:
62
+ if proc in _active_processes:
63
+ _active_processes.remove(proc)
64
+
65
+
66
+ def terminate_all_processes() -> None:
67
+ """Terminate all active processes."""
68
+ with _processes_lock:
69
+ procs = list(_active_processes)
70
+
71
+ for proc in procs:
72
+ try:
73
+ proc.terminate()
74
+ except Exception:
75
+ pass
76
+
77
+ # Wait a bit then kill any remaining
78
+ time.sleep(0.5)
79
+ for proc in procs:
80
+ try:
81
+ if proc.poll() is None:
82
+ proc.kill()
83
+ except Exception:
84
+ pass
85
+
86
+
87
+ def integrity_check_with_progress(
88
+ path: Path,
89
+ ui: RichProgressUI,
90
+ worker_id: int,
91
+ filename: str,
92
+ log_path: Optional[Path] = None,
93
+ stop_event: Optional[threading.Event] = None,
94
+ cfg: Optional[Config] = None,
95
+ ) -> Tuple[bool, float]:
96
+ """
97
+ Perform integrity check with Rich UI progress updates.
98
+
99
+ Returns (success, elapsed_seconds).
100
+ """
101
+ if cfg is None:
102
+ from mkv2cast.config import CFG
103
+
104
+ cfg = CFG
105
+
106
+ start_time = time.time()
107
+
108
+ if not cfg.integrity_check:
109
+ return True, 0
110
+
111
+ ui.start_integrity(worker_id, filename, path)
112
+
113
+ # Stage 1: File size check
114
+ ui.update_integrity(worker_id, "SIZE", 10, filename, inp=path)
115
+ size = file_size(path)
116
+ if size < 1024 * 1024: # 1MB minimum
117
+ ui.stop_integrity(worker_id, path)
118
+ return False, time.time() - start_time
119
+
120
+ # Stage 2: Stability check
121
+ if cfg.stable_wait > 0:
122
+ for i in range(cfg.stable_wait):
123
+ if stop_event and stop_event.is_set():
124
+ ui.stop_integrity(worker_id, path)
125
+ return False, time.time() - start_time
126
+
127
+ pct = 10 + int((i + 1) * 40 / cfg.stable_wait)
128
+ ui.update_integrity(worker_id, "STABLE", pct, filename, inp=path)
129
+ time.sleep(1)
130
+
131
+ new_size = file_size(path)
132
+ if size != new_size:
133
+ ui.stop_integrity(worker_id, path)
134
+ return False, time.time() - start_time
135
+
136
+ # Stage 3: ffprobe check
137
+ ui.update_integrity(worker_id, "FFPROBE", 60, filename, inp=path)
138
+ if not check_ffprobe_valid(path):
139
+ ui.stop_integrity(worker_id, path)
140
+ return False, time.time() - start_time
141
+
142
+ # Stage 4: Deep check (optional)
143
+ if cfg.deep_check:
144
+ ui.update_integrity(worker_id, "DECODE", 70, filename, inp=path)
145
+ # Deep decode check - this takes a while
146
+ cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-i", str(path), "-map", "0:v:0", "-f", "null", "-"]
147
+ try:
148
+ result = subprocess.run(cmd, capture_output=True, timeout=3600)
149
+ if result.returncode != 0:
150
+ ui.stop_integrity(worker_id, path)
151
+ return False, time.time() - start_time
152
+ except Exception:
153
+ ui.stop_integrity(worker_id, path)
154
+ return False, time.time() - start_time
155
+
156
+ ui.update_integrity(worker_id, "DONE", 100, filename, inp=path)
157
+ ui.stop_integrity(worker_id, path)
158
+
159
+ elapsed = time.time() - start_time
160
+ return True, elapsed
161
+
162
+
163
+ def run_ffmpeg_with_progress(
164
+ cmd: List[str],
165
+ ui: RichProgressUI,
166
+ worker_id: int,
167
+ stage: str,
168
+ filename: str,
169
+ dur_ms: int,
170
+ log_path: Optional[Path],
171
+ inp: Path,
172
+ stop_event: Optional[threading.Event] = None,
173
+ ) -> int:
174
+ """
175
+ Run ffmpeg command while updating Rich UI progress.
176
+
177
+ Returns the process return code.
178
+ """
179
+ # Add progress output to command
180
+ progress_cmd = list(cmd)
181
+ # Insert progress stats option after ffmpeg
182
+ if progress_cmd[0] == "ffmpeg" and "-progress" not in progress_cmd:
183
+ progress_cmd.insert(1, "-stats")
184
+
185
+ # Start process
186
+ process = subprocess.Popen(
187
+ progress_cmd,
188
+ stdout=subprocess.PIPE,
189
+ stderr=subprocess.PIPE,
190
+ text=False,
191
+ )
192
+ register_process(process)
193
+
194
+ try:
195
+ # Parse stderr for progress
196
+ last_pct = 0
197
+ last_speed = ""
198
+
199
+ while True:
200
+ if stop_event and stop_event.is_set():
201
+ process.terminate()
202
+ break
203
+
204
+ if process.stderr is None:
205
+ break
206
+ line = process.stderr.readline()
207
+ if not line:
208
+ break
209
+
210
+ line_str = line.decode("utf-8", errors="replace")
211
+
212
+ # Log to file
213
+ if log_path:
214
+ try:
215
+ with log_path.open("a", encoding="utf-8", errors="replace") as lf:
216
+ lf.write(line_str)
217
+ except Exception:
218
+ pass
219
+
220
+ # Parse progress
221
+ pct, speed, out_ms = _parse_ffmpeg_progress(line_str, dur_ms)
222
+
223
+ if pct > last_pct or speed != last_speed:
224
+ last_pct = pct
225
+ last_speed = speed
226
+ ui.update_encode(worker_id, stage, pct, filename, speed=speed, inp=inp, out_ms=out_ms, dur_ms=dur_ms)
227
+
228
+ process.wait()
229
+ return process.returncode
230
+
231
+ finally:
232
+ unregister_process(process)
233
+
234
+
235
+ def _parse_ffmpeg_progress(line: str, dur_ms: int) -> Tuple[int, str, int]:
236
+ """Parse ffmpeg progress line. Returns (percentage, speed, current_ms)."""
237
+ pct = 0
238
+ speed = ""
239
+ out_ms = 0
240
+
241
+ # Parse time
242
+ m = re.search(r"time=\s*(\d+):(\d+):(\d+)\.(\d+)", line)
243
+ if m:
244
+ h, mi, s, cs = int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4))
245
+ out_ms = (h * 3600 + mi * 60 + s) * 1000 + cs * 10
246
+ if dur_ms > 0:
247
+ pct = min(100, int(out_ms * 100 / dur_ms))
248
+
249
+ # Parse speed
250
+ m = re.search(r"speed=\s*([0-9.]+)x", line)
251
+ if m:
252
+ speed = f"{float(m.group(1)):.1f}x"
253
+
254
+ return pct, speed, out_ms
255
+
256
+
257
+ class PipelineOrchestrator:
258
+ """Orchestrates parallel integrity check and encoding with multiple workers."""
259
+
260
+ def __init__(
261
+ self,
262
+ targets: List[Path],
263
+ backend: str,
264
+ ui: RichProgressUI,
265
+ cfg: Config,
266
+ encode_workers: int,
267
+ integrity_workers: int,
268
+ get_log_path: Callable[[Path], Path],
269
+ get_tmp_path: Callable[[Path, int, str], Path],
270
+ output_exists_fn: Callable[[Path, Config], bool],
271
+ history: Optional[HistoryRecorder] = None,
272
+ ):
273
+ self.targets = targets
274
+ self.backend = backend
275
+ self.ui = ui
276
+ self.cfg = cfg
277
+ self.history = history
278
+ self.encode_workers_count = encode_workers
279
+ self.integrity_workers_count = integrity_workers
280
+ self.get_log_path = get_log_path
281
+ self.get_tmp_path = get_tmp_path
282
+ self.output_exists_fn = output_exists_fn
283
+
284
+ # Queues
285
+ self.integrity_queue: Queue[Optional[Path]] = Queue()
286
+ self.encode_queue: Queue[Optional[EncodeJob]] = Queue()
287
+
288
+ # Control
289
+ self.stop_event = threading.Event()
290
+ self.interrupted = False
291
+
292
+ # Track sentinels
293
+ self.integrity_sentinels_remaining = integrity_workers
294
+ self.integrity_sentinels_lock = threading.Lock()
295
+
296
+ # Register all jobs and fill integrity queue
297
+ for t in targets:
298
+ self.ui.register_job(t, backend=self.backend)
299
+ self.integrity_queue.put(t)
300
+
301
+ # Add sentinels for integrity workers
302
+ for _i in range(integrity_workers):
303
+ self.integrity_queue.put(None)
304
+
305
+ def integrity_worker(self, worker_id: int):
306
+ """Worker that performs integrity checks and prepares encode jobs."""
307
+ while not self.stop_event.is_set():
308
+ try:
309
+ inp = self.integrity_queue.get(timeout=0.5)
310
+ except Empty:
311
+ continue
312
+
313
+ if inp is None:
314
+ # Sentinel - check if we're the last one
315
+ with self.integrity_sentinels_lock:
316
+ self.integrity_sentinels_remaining -= 1
317
+ if self.integrity_sentinels_remaining == 0:
318
+ # Last integrity worker - send sentinels to encode workers
319
+ for _j in range(self.encode_workers_count):
320
+ self.encode_queue.put(None)
321
+ break
322
+
323
+ filename = inp.name
324
+ input_size = file_size(inp)
325
+ integrity_time = 0.0
326
+
327
+ if self.history:
328
+ self.history.start(inp, input_size)
329
+
330
+ # Check if output already exists
331
+ if self.output_exists_fn(inp, self.cfg):
332
+ reason = _("output exists")
333
+ self.ui.mark_skipped(inp, reason)
334
+ if self.history:
335
+ self.history.finish(inp, "skipped", error_msg=reason, integrity_time=integrity_time)
336
+ continue
337
+
338
+ log_path = self.get_log_path(inp)
339
+
340
+ # Run integrity check
341
+ try:
342
+ success, integrity_time = integrity_check_with_progress(
343
+ inp, self.ui, worker_id, filename, log_path, self.stop_event, self.cfg
344
+ )
345
+ if not success:
346
+ reason = _("integrity failed")
347
+ self.ui.mark_skipped(inp, reason)
348
+ if self.history:
349
+ self.history.finish(inp, "skipped", error_msg=reason, integrity_time=integrity_time)
350
+ continue
351
+ except Exception as e:
352
+ reason = _("integrity error") + f": {e}"
353
+ self.ui.mark_failed(inp, reason)
354
+ if self.history:
355
+ self.history.finish(inp, "failed", error_msg=reason, integrity_time=integrity_time)
356
+ continue
357
+
358
+ # Analyze file
359
+ try:
360
+ d = decide_for(inp, self.cfg)
361
+ except Exception as e:
362
+ reason = _("analysis error") + f": {e}"
363
+ self.ui.mark_failed(inp, reason)
364
+ if self.history:
365
+ self.history.finish(inp, "failed", error_msg=reason, integrity_time=integrity_time)
366
+ continue
367
+
368
+ # Check if already compatible
369
+ if (not d.need_v) and (not d.need_a) and self.cfg.skip_when_ok:
370
+ reason = _("compatible")
371
+ self.ui.mark_skipped(inp, reason)
372
+ if self.history:
373
+ self.history.finish(inp, "skipped", error_msg=reason, integrity_time=integrity_time)
374
+ continue
375
+
376
+ # Build output paths
377
+ tag = ""
378
+ if d.need_v:
379
+ tag += ".h264"
380
+ if d.need_a:
381
+ tag += ".aac"
382
+ if not tag:
383
+ tag = ".remux"
384
+
385
+ final = inp.parent / f"{inp.stem}{tag}{self.cfg.suffix}.{self.cfg.container}"
386
+ if final.exists():
387
+ reason = _("output exists")
388
+ self.ui.mark_skipped(inp, reason)
389
+ if self.history:
390
+ self.history.finish(inp, "skipped", error_msg=reason, integrity_time=integrity_time)
391
+ continue
392
+
393
+ tmp = self.get_tmp_path(inp, worker_id, tag)
394
+ if tmp.exists():
395
+ reason = _("tmp exists")
396
+ self.ui.mark_skipped(inp, reason)
397
+ if self.history:
398
+ self.history.finish(inp, "skipped", error_msg=reason, integrity_time=integrity_time)
399
+ continue
400
+
401
+ space_error = check_disk_space(final.parent, tmp.parent, input_size, self.cfg)
402
+ if space_error:
403
+ self.ui.mark_failed(inp, space_error)
404
+ if self.history:
405
+ self.history.finish(inp, "failed", error_msg=space_error, integrity_time=integrity_time)
406
+ continue
407
+
408
+ # Build ffmpeg command
409
+ cmd, stage = build_transcode_cmd(inp, d, self.backend, tmp, log_path, self.cfg)
410
+ dur_ms = probe_duration_ms(inp)
411
+
412
+ if self.cfg.dryrun:
413
+ self.ui.log(f"DRYRUN: {' '.join(cmd)}")
414
+ reason = _("dryrun")
415
+ self.ui.mark_skipped(inp, reason)
416
+ if self.history:
417
+ self.history.finish(inp, "skipped", error_msg=reason, integrity_time=integrity_time)
418
+ continue
419
+
420
+ # Create encode job
421
+ job = EncodeJob(
422
+ inp=inp,
423
+ decision=d,
424
+ log_path=log_path,
425
+ final=final,
426
+ tmp=tmp,
427
+ dur_ms=dur_ms,
428
+ stage=stage,
429
+ integrity_time=integrity_time,
430
+ )
431
+ self.encode_queue.put(job)
432
+
433
+ def encode_worker(self, worker_id: int):
434
+ """Worker that performs encoding."""
435
+ while not self.stop_event.is_set():
436
+ try:
437
+ job = self.encode_queue.get(timeout=0.5)
438
+ except Empty:
439
+ continue
440
+
441
+ if job is None:
442
+ break
443
+
444
+ filename = job.inp.name
445
+ self.ui.start_encode(worker_id, filename, job.inp, job.final.name)
446
+
447
+ attempts = max(0, self.cfg.retry_attempts)
448
+ total_attempts = 1 + attempts
449
+ attempt_backend = self.backend
450
+ last_error = ""
451
+ encode_time_total = 0.0
452
+
453
+ for attempt in range(total_attempts):
454
+ if attempt > 0:
455
+ self.ui.log(f"{job.inp.name}: retry {attempt}/{attempts}")
456
+ if self.cfg.retry_delay_sec > 0:
457
+ time.sleep(self.cfg.retry_delay_sec)
458
+
459
+ # Rebuild command (backend may change)
460
+ cmd, _stage = build_transcode_cmd(
461
+ job.inp, job.decision, attempt_backend, job.tmp, job.log_path, self.cfg
462
+ )
463
+
464
+ attempt_start = time.time()
465
+
466
+ try:
467
+ rc = run_ffmpeg_with_progress(
468
+ cmd,
469
+ self.ui,
470
+ worker_id,
471
+ job.stage,
472
+ filename,
473
+ job.dur_ms,
474
+ job.log_path,
475
+ job.inp,
476
+ self.stop_event,
477
+ )
478
+ last_error = f"ffmpeg rc={rc}"
479
+ except Exception as e:
480
+ rc = -1
481
+ last_error = _("encode error") + f": {e}"
482
+
483
+ encode_time_total += time.time() - attempt_start
484
+
485
+ if rc == 0:
486
+ try:
487
+ shutil.move(str(job.tmp), str(job.final))
488
+ quota_error = enforce_output_quota(job.final, file_size(job.inp), self.cfg)
489
+ if quota_error:
490
+ job.final.unlink(missing_ok=True)
491
+ self.ui.mark_failed(job.inp, quota_error)
492
+ if self.history:
493
+ self.history.finish(
494
+ job.inp,
495
+ "failed",
496
+ error_msg=quota_error,
497
+ encode_time=encode_time_total,
498
+ integrity_time=job.integrity_time,
499
+ )
500
+ else:
501
+ output_size = job.final.stat().st_size if job.final.exists() else 0
502
+ self.ui.mark_done(job.inp, final_path=job.final, output_size=output_size)
503
+ if self.history:
504
+ self.history.finish(
505
+ job.inp,
506
+ "done",
507
+ output_path=job.final,
508
+ output_size=output_size,
509
+ encode_time=encode_time_total,
510
+ integrity_time=job.integrity_time,
511
+ )
512
+ except Exception as e:
513
+ try:
514
+ job.tmp.unlink(missing_ok=True)
515
+ except Exception:
516
+ pass
517
+ reason = _("move error") + f": {e}"
518
+ self.ui.mark_failed(job.inp, reason)
519
+ if self.history:
520
+ self.history.finish(
521
+ job.inp,
522
+ "failed",
523
+ error_msg=reason,
524
+ encode_time=encode_time_total,
525
+ integrity_time=job.integrity_time,
526
+ )
527
+ break
528
+
529
+ # Cleanup temp file before retry
530
+ try:
531
+ job.tmp.unlink(missing_ok=True)
532
+ except Exception:
533
+ pass
534
+
535
+ if self.stop_event.is_set():
536
+ reason = _("interrupted")
537
+ self.ui.mark_failed(job.inp, reason)
538
+ if self.history:
539
+ self.history.finish(
540
+ job.inp,
541
+ "interrupted",
542
+ error_msg=reason,
543
+ encode_time=encode_time_total,
544
+ integrity_time=job.integrity_time,
545
+ )
546
+ break
547
+
548
+ if attempt < total_attempts - 1:
549
+ if self.cfg.retry_fallback_cpu and attempt_backend != "cpu" and attempt == total_attempts - 2:
550
+ attempt_backend = "cpu"
551
+ continue
552
+
553
+ self.ui.mark_failed(job.inp, last_error)
554
+ if self.history:
555
+ self.history.finish(
556
+ job.inp,
557
+ "failed",
558
+ error_msg=last_error,
559
+ encode_time=encode_time_total,
560
+ integrity_time=job.integrity_time,
561
+ )
562
+ break
563
+
564
+ def run(self) -> Tuple[int, int, int, bool]:
565
+ """Run the pipeline. Returns (ok, skipped, failed, interrupted)."""
566
+ # Create worker threads
567
+ integrity_threads = []
568
+ for i in range(self.integrity_workers_count):
569
+ t = threading.Thread(target=self.integrity_worker, args=(i,), name=f"integrity_worker_{i}", daemon=True)
570
+ integrity_threads.append(t)
571
+
572
+ encode_threads = []
573
+ for i in range(self.encode_workers_count):
574
+ t = threading.Thread(target=self.encode_worker, args=(i,), name=f"encode_worker_{i}", daemon=True)
575
+ encode_threads.append(t)
576
+
577
+ # Signal handler
578
+ def on_sigint(_sig, _frm):
579
+ self.interrupted = True
580
+ self.stop_event.set()
581
+ terminate_all_processes()
582
+
583
+ old_handler = signal.signal(signal.SIGINT, on_sigint)
584
+
585
+ try:
586
+ # Start UI
587
+ self.ui.start()
588
+
589
+ # Start all threads
590
+ for t in integrity_threads:
591
+ t.start()
592
+ for t in encode_threads:
593
+ t.start()
594
+
595
+ # Wait for all threads
596
+ for t in integrity_threads:
597
+ t.join()
598
+ for t in encode_threads:
599
+ t.join()
600
+
601
+ finally:
602
+ signal.signal(signal.SIGINT, old_handler)
603
+ self.ui.stop()
604
+ terminate_all_processes()
605
+
606
+ if self.interrupted and self.history:
607
+ self.history.interrupt_all()
608
+
609
+ # Get final stats
610
+ ok, skipped, failed, _processed = self.ui.get_stats()
611
+ return (ok, skipped, failed, self.interrupted)
612
+
613
+
614
+ def auto_detect_workers() -> Tuple[int, int]:
615
+ """Auto-detect optimal number of workers based on system resources."""
616
+ try:
617
+ cpu_count = os.cpu_count() or 4
618
+ except Exception:
619
+ cpu_count = 4
620
+
621
+ # Try to read RAM
622
+ try:
623
+ with open("/proc/meminfo") as f:
624
+ for line in f:
625
+ if line.startswith("MemTotal:"):
626
+ parts = line.split()
627
+ if len(parts) >= 2:
628
+ ram_gb = int(parts[1]) // (1024 * 1024)
629
+ break
630
+ else:
631
+ ram_gb = 8
632
+ except Exception:
633
+ ram_gb = 8
634
+
635
+ # Encode workers: limited by RAM (each encode can use 2-4GB)
636
+ encode_workers = max(1, min(cpu_count // 2, ram_gb // 4))
637
+
638
+ # Integrity workers: limited by I/O, typically 2-4 is good
639
+ integrity_workers = min(4, cpu_count // 2, encode_workers * 2)
640
+
641
+ return encode_workers, integrity_workers
@@ -0,0 +1,26 @@
1
+ """
2
+ User interface components for mkv2cast.
3
+
4
+ Provides both Rich-based and legacy text-based progress displays.
5
+ """
6
+
7
+ import importlib.util
8
+
9
+ from mkv2cast.ui.legacy_ui import LegacyProgressUI, UIState
10
+
11
+ # Check if Rich is available using importlib
12
+ RICH_AVAILABLE = importlib.util.find_spec("rich") is not None
13
+
14
+ __all__ = [
15
+ "RICH_AVAILABLE",
16
+ "LegacyProgressUI",
17
+ "UIState",
18
+ ]
19
+
20
+ # Conditionally export Rich UI classes
21
+ if RICH_AVAILABLE:
22
+ from mkv2cast.ui.rich_ui import RichProgressUI # noqa: F401
23
+ from mkv2cast.ui.simple_rich import SimpleRichUI # noqa: F401
24
+
25
+ __all__.append("RichProgressUI")
26
+ __all__.append("SimpleRichUI")