mkv2cast 1.2.7.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mkv2cast/integrity.py ADDED
@@ -0,0 +1,176 @@
1
+ """
2
+ File integrity checking for mkv2cast.
3
+
4
+ Verifies source files before processing to avoid corrupted outputs.
5
+ Includes:
6
+ - File size stability check (for files being downloaded/copied)
7
+ - Basic ffprobe validation
8
+ - Optional deep decode verification
9
+ """
10
+
11
+ import subprocess
12
+ import time
13
+ from pathlib import Path
14
+ from typing import Callable, Optional, Tuple
15
+
16
+
17
+ def file_size(path: Path) -> int:
18
+ """Get file size in bytes, returns 0 on error."""
19
+ try:
20
+ return path.stat().st_size
21
+ except Exception:
22
+ return 0
23
+
24
+
25
+ def run_quiet(cmd: list, timeout: float = 10.0) -> bool:
26
+ """Run a command quietly, return True if successful."""
27
+ try:
28
+ p = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout)
29
+ return p.returncode == 0
30
+ except Exception:
31
+ return False
32
+
33
+
34
+ def check_file_stable(path: Path, wait_seconds: int = 3) -> bool:
35
+ """
36
+ Check if file size is stable (not being written to).
37
+
38
+ Args:
39
+ path: Path to the file.
40
+ wait_seconds: Number of seconds to wait between checks.
41
+
42
+ Returns:
43
+ True if file size is stable, False otherwise.
44
+ """
45
+ if wait_seconds <= 0:
46
+ return True
47
+
48
+ s1 = file_size(path)
49
+ if s1 < 1024 * 1024: # Less than 1MB is suspicious
50
+ return False
51
+
52
+ time.sleep(wait_seconds)
53
+
54
+ s2 = file_size(path)
55
+ return s1 == s2
56
+
57
+
58
+ def check_ffprobe_valid(path: Path, timeout: float = 8.0) -> bool:
59
+ """
60
+ Check if file is valid using ffprobe.
61
+
62
+ Args:
63
+ path: Path to the file.
64
+ timeout: Timeout in seconds.
65
+
66
+ Returns:
67
+ True if ffprobe reports valid duration.
68
+ """
69
+ return run_quiet(
70
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nk=1", str(path)],
71
+ timeout=timeout,
72
+ )
73
+
74
+
75
+ def check_deep_decode(
76
+ path: Path,
77
+ log_path: Optional[Path] = None,
78
+ progress_callback: Optional[Callable[[int, str], None]] = None,
79
+ dur_ms: int = 0,
80
+ ) -> bool:
81
+ """
82
+ Perform deep decode verification by decoding the entire video stream.
83
+
84
+ This is slow but catches files that are truncated or have decode errors.
85
+
86
+ Args:
87
+ path: Path to the file.
88
+ log_path: Optional path to write logs.
89
+ progress_callback: Optional callback(pct, speed) for progress updates.
90
+ dur_ms: Duration in milliseconds for progress calculation.
91
+
92
+ Returns:
93
+ True if decode succeeds, False otherwise.
94
+ """
95
+ if log_path:
96
+ with log_path.open("a", encoding="utf-8", errors="replace") as lf:
97
+ lf.write("DEEP_CHECK: decode video stream\n")
98
+
99
+ cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-i", str(path), "-map", "0:v:0", "-f", "null", "-"]
100
+
101
+ try:
102
+ result = subprocess.run(cmd, capture_output=True, timeout=3600) # 1 hour timeout
103
+ return result.returncode == 0
104
+ except subprocess.TimeoutExpired:
105
+ return False
106
+ except Exception:
107
+ return False
108
+
109
+
110
+ def integrity_check(
111
+ path: Path,
112
+ enabled: bool = True,
113
+ stable_wait: int = 3,
114
+ deep_check: bool = False,
115
+ log_path: Optional[Path] = None,
116
+ progress_callback: Optional[Callable[[str, int, str], None]] = None,
117
+ ) -> Tuple[bool, float]:
118
+ """
119
+ Perform complete integrity check on a file.
120
+
121
+ Args:
122
+ path: Path to the file to check.
123
+ enabled: If False, skip all checks and return True.
124
+ stable_wait: Seconds to wait for file size stability.
125
+ deep_check: If True, perform full decode verification.
126
+ log_path: Optional path to write logs.
127
+ progress_callback: Optional callback(stage, pct, message) for UI updates.
128
+
129
+ Returns:
130
+ Tuple of (success, elapsed_seconds).
131
+ """
132
+ start_time = time.time()
133
+
134
+ if not enabled:
135
+ return True, 0
136
+
137
+ # Stage 1: Check file exists and has reasonable size
138
+ if progress_callback:
139
+ progress_callback("CHECK", 0, "Checking file...")
140
+
141
+ size = file_size(path)
142
+ if size < 1024 * 1024: # Less than 1MB
143
+ return False, time.time() - start_time
144
+
145
+ # Stage 2: File stability check
146
+ if stable_wait > 0:
147
+ for i in range(stable_wait):
148
+ if progress_callback:
149
+ pct = int(((i + 1) * 50) / stable_wait)
150
+ progress_callback("STABLE", pct, f"Waiting {stable_wait - i - 1}s...")
151
+ time.sleep(1)
152
+
153
+ new_size = file_size(path)
154
+ if size != new_size:
155
+ return False, time.time() - start_time
156
+
157
+ # Stage 3: ffprobe validation
158
+ if progress_callback:
159
+ progress_callback("FFPROBE", 60, "Validating with ffprobe...")
160
+
161
+ if not check_ffprobe_valid(path):
162
+ return False, time.time() - start_time
163
+
164
+ # Stage 4: Deep check (optional)
165
+ if deep_check:
166
+ if progress_callback:
167
+ progress_callback("DECODE", 70, "Deep verification...")
168
+
169
+ if not check_deep_decode(path, log_path):
170
+ return False, time.time() - start_time
171
+
172
+ if progress_callback:
173
+ progress_callback("DONE", 100, "OK")
174
+
175
+ elapsed = time.time() - start_time
176
+ return True, elapsed
@@ -0,0 +1,311 @@
1
+ """
2
+ JSON progress output for mkv2cast.
3
+
4
+ This module provides structured JSON output for integration
5
+ with other applications (web UIs, monitoring tools, etc.).
6
+ """
7
+
8
+ import json
9
+ import sys
10
+ import time
11
+ from dataclasses import asdict, dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Optional
14
+
15
+
16
+ @dataclass
17
+ class FileProgress:
18
+ """Progress information for a single file."""
19
+
20
+ filename: str
21
+ filepath: str
22
+ status: str # "queued", "checking", "encoding", "done", "skipped", "failed"
23
+ progress_percent: float = 0.0
24
+ current_frame: int = 0
25
+ total_frames: int = 0
26
+ current_time_ms: int = 0
27
+ duration_ms: int = 0
28
+ fps: float = 0.0
29
+ speed: str = ""
30
+ bitrate: str = ""
31
+ size_bytes: int = 0
32
+ eta_seconds: float = 0.0
33
+ error: Optional[str] = None
34
+ started_at: Optional[float] = None
35
+ finished_at: Optional[float] = None
36
+ output_path: Optional[str] = None
37
+
38
+
39
+ @dataclass
40
+ class OverallProgress:
41
+ """Overall progress for the entire batch."""
42
+
43
+ total_files: int = 0
44
+ processed_files: int = 0
45
+ converted_files: int = 0
46
+ skipped_files: int = 0
47
+ failed_files: int = 0
48
+ current_file: Optional[str] = None
49
+ overall_percent: float = 0.0
50
+ eta_seconds: float = 0.0
51
+ started_at: Optional[float] = None
52
+ backend: str = ""
53
+ encode_workers: int = 1
54
+ integrity_workers: int = 1
55
+
56
+
57
+ @dataclass
58
+ class JSONProgressState:
59
+ """Complete state for JSON progress output."""
60
+
61
+ version: str = "1.0"
62
+ timestamp: float = field(default_factory=time.time)
63
+ event: str = "progress" # "start", "progress", "file_start", "file_done", "complete"
64
+ overall: OverallProgress = field(default_factory=OverallProgress)
65
+ files: Dict[str, FileProgress] = field(default_factory=dict)
66
+ current_encoding: List[str] = field(default_factory=list)
67
+ current_checking: List[str] = field(default_factory=list)
68
+
69
+
70
+ class JSONProgressOutput:
71
+ """Manages JSON progress output to stdout."""
72
+
73
+ def __init__(self, stream=sys.stdout):
74
+ self.stream = stream
75
+ self.state = JSONProgressState()
76
+ self._file_durations: Dict[str, int] = {}
77
+
78
+ def _emit(self, event: str, extra: Optional[Dict[str, Any]] = None) -> None:
79
+ """Emit a JSON progress event."""
80
+ self.state.timestamp = time.time()
81
+ self.state.event = event
82
+ output = asdict(self.state)
83
+ if extra:
84
+ output.update(extra)
85
+ print(json.dumps(output), file=self.stream, flush=True)
86
+
87
+ def start(
88
+ self,
89
+ total_files: int,
90
+ backend: str,
91
+ encode_workers: int,
92
+ integrity_workers: int,
93
+ ) -> None:
94
+ """Signal start of processing."""
95
+ self.state.overall = OverallProgress(
96
+ total_files=total_files,
97
+ backend=backend,
98
+ encode_workers=encode_workers,
99
+ integrity_workers=integrity_workers,
100
+ started_at=time.time(),
101
+ )
102
+ self._emit("start")
103
+
104
+ def set_file_duration(self, filepath: str, duration_ms: int) -> None:
105
+ """Set the duration for a file (from probe)."""
106
+ self._file_durations[filepath] = duration_ms
107
+
108
+ def file_queued(self, filepath: Path, duration_ms: int = 0) -> None:
109
+ """Signal a file has been queued."""
110
+ key = str(filepath)
111
+ self.state.files[key] = FileProgress(
112
+ filename=filepath.name,
113
+ filepath=key,
114
+ status="queued",
115
+ duration_ms=duration_ms or self._file_durations.get(key, 0),
116
+ )
117
+ self._file_durations[key] = duration_ms
118
+
119
+ def file_checking(self, filepath: Path) -> None:
120
+ """Signal a file integrity check has started."""
121
+ key = str(filepath)
122
+ if key in self.state.files:
123
+ self.state.files[key].status = "checking"
124
+ self.state.files[key].started_at = time.time()
125
+ self.state.current_checking.append(filepath.name)
126
+ self._emit("file_checking", {"file": filepath.name})
127
+
128
+ def file_check_done(self, filepath: Path) -> None:
129
+ """Signal a file integrity check has finished."""
130
+ if filepath.name in self.state.current_checking:
131
+ self.state.current_checking.remove(filepath.name)
132
+
133
+ def file_encoding_start(self, filepath: Path, duration_ms: int = 0) -> None:
134
+ """Signal encoding has started for a file."""
135
+ key = str(filepath)
136
+ if key in self.state.files:
137
+ self.state.files[key].status = "encoding"
138
+ self.state.files[key].started_at = time.time()
139
+ if duration_ms:
140
+ self.state.files[key].duration_ms = duration_ms
141
+ else:
142
+ self.state.files[key] = FileProgress(
143
+ filename=filepath.name,
144
+ filepath=key,
145
+ status="encoding",
146
+ duration_ms=duration_ms or self._file_durations.get(key, 0),
147
+ started_at=time.time(),
148
+ )
149
+ self.state.current_encoding.append(filepath.name)
150
+ self.state.overall.current_file = filepath.name
151
+ self._emit("file_start", {"file": filepath.name})
152
+
153
+ def file_progress(
154
+ self,
155
+ filepath: Path,
156
+ frame: int = 0,
157
+ fps: float = 0.0,
158
+ time_ms: int = 0,
159
+ bitrate: str = "",
160
+ speed: str = "",
161
+ size_bytes: int = 0,
162
+ ) -> None:
163
+ """Update encoding progress for a file."""
164
+ key = str(filepath)
165
+ if key not in self.state.files:
166
+ return
167
+
168
+ fp = self.state.files[key]
169
+ fp.current_frame = frame
170
+ fp.current_time_ms = time_ms
171
+ fp.fps = fps
172
+ fp.bitrate = bitrate
173
+ fp.speed = speed
174
+ fp.size_bytes = size_bytes
175
+
176
+ # Calculate progress percentage
177
+ if fp.duration_ms > 0:
178
+ fp.progress_percent = min(100.0, (time_ms / fp.duration_ms) * 100)
179
+
180
+ # Calculate ETA
181
+ if time_ms > 0 and fp.started_at:
182
+ elapsed = time.time() - fp.started_at
183
+ remaining_ms = fp.duration_ms - time_ms
184
+ if time_ms > 0:
185
+ rate = elapsed / time_ms
186
+ fp.eta_seconds = (remaining_ms * rate) / 1000
187
+
188
+ self._update_overall()
189
+ self._emit("progress")
190
+
191
+ def file_done(
192
+ self,
193
+ filepath: Path,
194
+ output_path: Optional[Path] = None,
195
+ skipped: bool = False,
196
+ error: Optional[str] = None,
197
+ ) -> None:
198
+ """Signal a file has finished processing."""
199
+ key = str(filepath)
200
+ if key in self.state.files:
201
+ fp = self.state.files[key]
202
+ fp.finished_at = time.time()
203
+ if error:
204
+ fp.status = "failed"
205
+ fp.error = error
206
+ self.state.overall.failed_files += 1
207
+ elif skipped:
208
+ fp.status = "skipped"
209
+ self.state.overall.skipped_files += 1
210
+ else:
211
+ fp.status = "done"
212
+ fp.progress_percent = 100.0
213
+ self.state.overall.converted_files += 1
214
+ if output_path:
215
+ fp.output_path = str(output_path)
216
+
217
+ if filepath.name in self.state.current_encoding:
218
+ self.state.current_encoding.remove(filepath.name)
219
+
220
+ self.state.overall.processed_files += 1
221
+ self._update_overall()
222
+ self._emit("file_done", {"file": filepath.name, "status": fp.status if key in self.state.files else "unknown"})
223
+
224
+ def complete(self) -> None:
225
+ """Signal all processing is complete."""
226
+ self.state.overall.overall_percent = 100.0
227
+ self._emit("complete")
228
+
229
+ def _update_overall(self) -> None:
230
+ """Update overall progress statistics."""
231
+ total = self.state.overall.total_files
232
+ if total > 0:
233
+ # Weight: completed files + partial progress of encoding files
234
+ completed_weight = self.state.overall.processed_files
235
+ encoding_weight = sum(
236
+ fp.progress_percent / 100.0 for fp in self.state.files.values() if fp.status == "encoding"
237
+ )
238
+ self.state.overall.overall_percent = ((completed_weight + encoding_weight) / total) * 100
239
+
240
+ # Estimate overall ETA based on average processing time
241
+ if self.state.overall.started_at and self.state.overall.processed_files > 0:
242
+ elapsed = time.time() - self.state.overall.started_at
243
+ avg_time = elapsed / (self.state.overall.processed_files + encoding_weight)
244
+ remaining = total - self.state.overall.processed_files - encoding_weight
245
+ self.state.overall.eta_seconds = avg_time * remaining
246
+
247
+
248
+ def parse_ffmpeg_progress_for_json(line: str) -> Dict[str, Any]:
249
+ """Parse FFmpeg progress line for JSON output.
250
+
251
+ Returns a dict with parsed values.
252
+ """
253
+ result: Dict[str, Any] = {}
254
+
255
+ # frame=12345
256
+ if "frame=" in line:
257
+ try:
258
+ frame_part = line.split("frame=")[1].split()[0]
259
+ result["frame"] = int(frame_part)
260
+ except (IndexError, ValueError):
261
+ pass
262
+
263
+ # fps=123.45
264
+ if "fps=" in line:
265
+ try:
266
+ fps_part = line.split("fps=")[1].split()[0]
267
+ result["fps"] = float(fps_part)
268
+ except (IndexError, ValueError):
269
+ pass
270
+
271
+ # time=00:01:23.45
272
+ if "time=" in line:
273
+ try:
274
+ time_part = line.split("time=")[1].split()[0]
275
+ if time_part and time_part != "N/A":
276
+ parts = time_part.split(":")
277
+ if len(parts) == 3:
278
+ h, m, s = parts
279
+ time_ms = int((int(h) * 3600 + int(m) * 60 + float(s)) * 1000)
280
+ result["time_ms"] = time_ms
281
+ except (IndexError, ValueError):
282
+ pass
283
+
284
+ # bitrate=1234.5kbits/s
285
+ if "bitrate=" in line:
286
+ try:
287
+ bitrate_part = line.split("bitrate=")[1].split()[0]
288
+ result["bitrate"] = bitrate_part
289
+ except (IndexError, ValueError):
290
+ pass
291
+
292
+ # speed=1.23x
293
+ if "speed=" in line:
294
+ try:
295
+ speed_part = line.split("speed=")[1].split()[0]
296
+ result["speed"] = speed_part
297
+ except (IndexError, ValueError):
298
+ pass
299
+
300
+ # size=12345kB
301
+ if "size=" in line:
302
+ try:
303
+ size_part = line.split("size=")[1].split()[0]
304
+ if size_part.endswith("kB"):
305
+ result["size_bytes"] = int(float(size_part[:-2]) * 1024)
306
+ elif size_part.endswith("mB"):
307
+ result["size_bytes"] = int(float(size_part[:-2]) * 1024 * 1024)
308
+ except (IndexError, ValueError):
309
+ pass
310
+
311
+ return result