mvw-cli 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/PKG-INFO +19 -1
  2. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/README.md +18 -0
  3. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/pyproject.toml +1 -1
  4. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/cli.py +46 -10
  5. mvw_cli-0.2.0/src/mvw/download.py +220 -0
  6. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_cli.py +81 -0
  7. mvw_cli-0.2.0/tests/test_download.py +236 -0
  8. mvw_cli-0.1.2/src/mvw/download.py +0 -123
  9. mvw_cli-0.1.2/tests/test_download.py +0 -96
  10. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/.gitignore +0 -0
  11. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/LICENSE +0 -0
  12. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/__init__.py +0 -0
  13. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/api.py +0 -0
  14. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/config.py +0 -0
  15. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/display.py +0 -0
  16. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/episodes.py +0 -0
  17. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/filters.py +0 -0
  18. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/models.py +0 -0
  19. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/naming.py +0 -0
  20. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/src/mvw/query.py +0 -0
  21. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/__init__.py +0 -0
  22. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_api.py +0 -0
  23. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_config.py +0 -0
  24. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_display.py +0 -0
  25. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_episodes.py +0 -0
  26. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_filters.py +0 -0
  27. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_live.py +0 -0
  28. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_models.py +0 -0
  29. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_naming.py +0 -0
  30. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_query.py +0 -0
  31. {mvw_cli-0.1.2 → mvw_cli-0.2.0}/tests/test_smoke.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mvw-cli
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Search and download German public-broadcasting media from MediathekViewWeb, with Plex-friendly season downloads.
5
5
  Project-URL: Homepage, https://github.com/maxboettinger/mvw-cli
6
6
  Project-URL: Repository, https://github.com/maxboettinger/mvw-cli
@@ -162,6 +162,24 @@ mvw download QUERY
162
162
  | `--season-number` | — | Override detected season number |
163
163
  | `--subtitles` | off | Also fetch subtitle files alongside each video |
164
164
  | `--limit` | 200 | Maximum number of entries to resolve |
165
+ | `--force` | off | Redownload every matched entry even if a valid file already exists |
166
+
167
+ #### Download behavior and verification
168
+
169
+ By default, `download` checks each target path before fetching:
170
+
171
+ - an existing file is checked by probing the actual size of the version that
172
+ would be downloaded (an HTTP HEAD, falling back to a ranged request) and
173
+ comparing it to the file on disk; a match → `✓ … already exists, skipped`,
174
+ a mismatch → `⚠ … corrupted (size mismatch), redownloading`;
175
+ - if the size cannot be determined (or for HLS `.m3u8`, which has no single
176
+ size), any non-empty existing file is treated as complete.
177
+
178
+ Each fresh download is verified against the server's `Content-Length` before
179
+ the temporary `.part` file is promoted to its final name (this now applies to
180
+ HLS too, which is muxed to a `.part` and promoted only after ffmpeg succeeds),
181
+ so an interrupted transfer never leaves a corrupt final file. A run ends with a
182
+ summary line, e.g. `Done: 8 downloaded, 5 skipped, 2 redownloaded, 1 failed`.
165
183
 
166
184
  #### Filename template
167
185
 
@@ -135,6 +135,24 @@ mvw download QUERY
135
135
  | `--season-number` | — | Override detected season number |
136
136
  | `--subtitles` | off | Also fetch subtitle files alongside each video |
137
137
  | `--limit` | 200 | Maximum number of entries to resolve |
138
+ | `--force` | off | Redownload every matched entry even if a valid file already exists |
139
+
140
+ #### Download behavior and verification
141
+
142
+ By default, `download` checks each target path before fetching:
143
+
144
+ - an existing file is checked by probing the actual size of the version that
145
+ would be downloaded (an HTTP HEAD, falling back to a ranged request) and
146
+ comparing it to the file on disk; a match → `✓ … already exists, skipped`,
147
+ a mismatch → `⚠ … corrupted (size mismatch), redownloading`;
148
+ - if the size cannot be determined (or for HLS `.m3u8`, which has no single
149
+ size), any non-empty existing file is treated as complete.
150
+
151
+ Each fresh download is verified against the server's `Content-Length` before
152
+ the temporary `.part` file is promoted to its final name (this now applies to
153
+ HLS too, which is muxed to a `.part` and promoted only after ffmpeg succeeds),
154
+ so an interrupted transfer never leaves a corrupt final file. A run ends with a
155
+ summary line, e.g. `Done: 8 downloaded, 5 skipped, 2 redownloaded, 1 failed`.
138
156
 
139
157
  #### Filename template
140
158
 
@@ -3,7 +3,7 @@ name = "mvw-cli"
3
3
  description = "Search and download German public-broadcasting media from MediathekViewWeb, with Plex-friendly season downloads."
4
4
  readme = "README.md"
5
5
  requires-python = ">=3.13"
6
- version = "0.1.2"
6
+ version = "0.2.0"
7
7
  license = "MIT"
8
8
  license-files = ["LICENSE"]
9
9
  authors = [{ name = "Max Boettinger", email = "perplexity@bttngr.de" }]
@@ -16,7 +16,8 @@ from mvw import config as configmod
16
16
  from mvw import display, episodes, filters, naming, query
17
17
  from mvw.api import MediathekClient, MediathekError
18
18
  from mvw.download import (
19
- DownloadError, FFmpegMissingError, download as download_file, download_hls, is_hls, pick_resolution,
19
+ DownloadError, FFmpegMissingError, download as download_file, download_hls,
20
+ is_hls, pick_resolution, remote_size, verify_size,
20
21
  )
21
22
 
22
23
  app = typer.Typer(help="Search and download from MediathekViewWeb.", no_args_is_help=True)
@@ -95,6 +96,7 @@ def download(
95
96
  latest_season: bool = typer.Option(False, "--latest-season"),
96
97
  season_number: Optional[int] = typer.Option(None, "--season-number"),
97
98
  subtitles: bool = typer.Option(False, "--subtitles"),
99
+ force: bool = typer.Option(False, "--force", help="redownload even if a valid file exists"),
98
100
  limit: int = typer.Option(200, "--limit", help="max entries to resolve"),
99
101
  ) -> None:
100
102
  cfg = configmod.load()
@@ -149,7 +151,7 @@ def download(
149
151
  console.print(f"[dim]{len(plan)} file(s) planned[/]")
150
152
  return
151
153
 
152
- _run_downloads(plan, subtitles=subtitles)
154
+ _run_downloads(plan, subtitles=subtitles, force=force)
153
155
 
154
156
 
155
157
  def _make_progress() -> tuple[Progress, Progress, Group]:
@@ -178,40 +180,74 @@ def _make_progress() -> tuple[Progress, Progress, Group]:
178
180
  return overall, per_file, Group(overall, per_file)
179
181
 
180
182
 
181
- def _run_downloads(plan, *, subtitles: bool) -> None:
183
+ def _run_downloads(plan, *, subtitles: bool, force: bool = False) -> None:
182
184
  overall_progress, file_progress, group = _make_progress()
183
- failures = 0
185
+ counts = {"downloaded": 0, "skipped": 0, "redownloaded": 0, "failed": 0}
184
186
  ffmpeg_missing = False
187
+ pkw = dict(no_wrap=True, overflow="ignore", crop=False)
185
188
  with Live(group, console=console):
186
189
  overall = overall_progress.add_task("Overall", total=len(plan))
187
190
  for dest, url, _tier, sub_url in plan:
191
+ hls = is_hls(url)
192
+ probe: int | None = None
193
+ is_redownload = False
194
+ if not force and dest.exists():
195
+ # Probe the real size of the tier we'd fetch; HLS has no single
196
+ # size, so fall back to "exists and non-empty".
197
+ probe = None if hls else remote_size(url)
198
+ expected = 0 if (hls or probe is None) else probe
199
+ if verify_size(dest, expected):
200
+ console.print(f"[green]✓[/] {dest.name} — already exists, skipped", **pkw)
201
+ counts["skipped"] += 1
202
+ overall_progress.advance(overall)
203
+ continue
204
+ console.print(
205
+ f"[yellow]⚠[/] {dest.name} — corrupted (size mismatch), redownloading",
206
+ **pkw,
207
+ )
208
+ dest.unlink(missing_ok=True)
209
+ is_redownload = True
210
+
188
211
  task = file_progress.add_task(dest.name, total=None)
189
212
 
190
213
  def cb(done: int, total, _t=task):
191
214
  file_progress.update(_t, completed=done, total=total)
192
215
 
193
216
  try:
194
- if is_hls(url):
217
+ if hls:
195
218
  file_progress.update(task, description=f"{dest.name} (ffmpeg)")
196
219
  download_hls(url, dest, on_progress=cb)
197
220
  else:
198
- download_file(url, dest, on_progress=cb)
221
+ download_file(url, dest, on_progress=cb, expected_size=probe)
222
+ counts["redownloaded" if is_redownload else "downloaded"] += 1
223
+ console.print(f"[cyan]↓[/] {dest.name} — downloaded", **pkw)
199
224
  if subtitles and sub_url:
200
- download_file(sub_url, dest.with_suffix(".xml"))
225
+ try:
226
+ download_file(sub_url, dest.with_suffix(".xml"))
227
+ except DownloadError as exc:
228
+ err_console.print(
229
+ f"[yellow]⚠[/] {dest.name} (subtitle) — failed: {exc}", **pkw
230
+ )
201
231
  except FFmpegMissingError as exc:
202
232
  ffmpeg_missing = True
203
233
  err_console.print(display.error_panel(
204
234
  f"{dest.name}: {exc}\nInstall ffmpeg: https://ffmpeg.org/download.html"
205
235
  ))
206
236
  except DownloadError as exc:
207
- failures += 1
208
- err_console.print(display.error_panel(f"{dest.name}: {exc}"))
237
+ counts["failed"] += 1
238
+ console.print(f"[red]✗[/] {dest.name} — failed: {exc}", **pkw)
209
239
  finally:
210
240
  file_progress.update(task, visible=False)
211
241
  overall_progress.advance(overall)
242
+ console.print(
243
+ f"[bold]Done:[/] {counts['downloaded']} downloaded, "
244
+ f"{counts['skipped']} skipped, {counts['redownloaded']} redownloaded, "
245
+ f"{counts['failed']} failed",
246
+ **pkw,
247
+ )
212
248
  if ffmpeg_missing:
213
249
  raise typer.Exit(4)
214
- elif failures:
250
+ elif counts["failed"]:
215
251
  raise typer.Exit(5)
216
252
 
217
253
 
@@ -0,0 +1,220 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ import tempfile
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Callable
9
+
10
+ import httpx
11
+
12
+ from mvw.models import MediathekResult
13
+
14
+ ProgressCb = Callable[[int, "int | None"], None]
15
+
16
+
17
+ class DownloadError(Exception):
18
+ pass
19
+
20
+
21
+ class FFmpegMissingError(DownloadError):
22
+ pass
23
+
24
+
25
+ def is_hls(url: str) -> bool:
26
+ return ".m3u8" in url.split("?", 1)[0].lower()
27
+
28
+
29
+ def verify_size(path: Path, expected: int, *, tolerance: int = 0) -> bool:
30
+ """Return True if ``path`` is a plausibly complete download.
31
+
32
+ When ``expected`` is positive it is the byte oracle (the API ``size``):
33
+ the file must exist and its size must be within ``tolerance`` of it. When
34
+ ``expected`` is non-positive (e.g. HLS, where no reliable size exists),
35
+ validity degrades to "exists and is non-empty".
36
+ """
37
+ try:
38
+ actual = path.stat().st_size
39
+ except OSError:
40
+ return False
41
+ if expected > 0:
42
+ return abs(actual - expected) <= tolerance
43
+ return actual > 0
44
+
45
+
46
+ def remote_size(url: str, *, client: httpx.Client | None = None) -> int | None:
47
+ """Best-effort probe of a URL's byte size without downloading the body.
48
+
49
+ Tries an HTTP HEAD first; if that gives no usable size (many CDNs reject
50
+ HEAD), falls back to a ranged GET and reads the total from Content-Range
51
+ WITHOUT reading the body. Returns the size in bytes, or None when it cannot
52
+ be determined or any network error occurs. Never raises — callers use it
53
+ only as a hint.
54
+ """
55
+ owns_client = client is None
56
+ client = client or httpx.Client(timeout=30.0, follow_redirects=True)
57
+ try:
58
+ try:
59
+ resp = client.head(url)
60
+ if resp.status_code == 200:
61
+ cl = resp.headers.get("content-length")
62
+ if cl is not None and cl.isdigit():
63
+ return int(cl)
64
+ except httpx.HTTPError:
65
+ pass
66
+ # Fallback: a 1-byte ranged GET exposes the full size via Content-Range.
67
+ # Use stream() and never read the body, so a server that ignores Range
68
+ # (returns 200) does not trigger a full download.
69
+ try:
70
+ with client.stream("GET", url, headers={"Range": "bytes=0-0"}) as resp:
71
+ if resp.status_code == 206:
72
+ cr = resp.headers.get("content-range") # "bytes 0-0/12345"
73
+ if cr and "/" in cr:
74
+ total = cr.rsplit("/", 1)[1]
75
+ if total.isdigit():
76
+ return int(total)
77
+ except httpx.HTTPError:
78
+ pass
79
+ return None
80
+ finally:
81
+ if owns_client:
82
+ client.close()
83
+
84
+
85
+ def pick_resolution(result: MediathekResult, preference: str) -> tuple[str, str]:
86
+ url, tier = result.resolve_video(preference)
87
+ if not url:
88
+ raise DownloadError("no video URL available for this entry")
89
+ return url, tier
90
+
91
+
92
+ def download(
93
+ url: str,
94
+ dest: Path,
95
+ *,
96
+ on_progress: ProgressCb | None = None,
97
+ resume: bool = True,
98
+ expected_size: int | None = None,
99
+ client: httpx.Client | None = None,
100
+ ) -> Path:
101
+ dest.parent.mkdir(parents=True, exist_ok=True)
102
+ part = dest.with_name(dest.name + ".part")
103
+
104
+ owns_client = client is None
105
+ client = client or httpx.Client(timeout=60.0, follow_redirects=True)
106
+ downloaded = 0
107
+ total: int | None = None
108
+ try:
109
+ # At most two passes: one resume attempt, then a clean restart if the
110
+ # server's 206 total disagrees with the known expected size.
111
+ for _attempt in range(2):
112
+ existing = part.stat().st_size if (resume and part.exists()) else 0
113
+ headers = {}
114
+ mode = "wb"
115
+ if existing:
116
+ headers["Range"] = f"bytes={existing}-"
117
+ mode = "ab"
118
+ with client.stream("GET", url, headers=headers) as resp:
119
+ if resp.status_code not in (200, 206):
120
+ raise DownloadError(f"HTTP {resp.status_code} downloading {url}")
121
+ if resp.status_code == 200:
122
+ # Server is sending the whole file; ignore any partial .part.
123
+ existing = 0
124
+ mode = "wb"
125
+ total = None
126
+ cl = resp.headers.get("content-length")
127
+ if cl is not None:
128
+ total = int(cl) + existing
129
+ # Resume guard: a 206 whose total disagrees with the known
130
+ # expected size means the remote changed under us. Drop the
131
+ # stale partial and retry once from byte 0.
132
+ if (
133
+ resp.status_code == 206
134
+ and existing
135
+ and expected_size
136
+ and expected_size > 0
137
+ and total is not None
138
+ and total != expected_size
139
+ ):
140
+ resp.close()
141
+ part.unlink(missing_ok=True)
142
+ resume = False
143
+ continue
144
+ downloaded = existing
145
+ with open(part, mode) as fh:
146
+ for chunk in resp.iter_bytes():
147
+ fh.write(chunk)
148
+ downloaded += len(chunk)
149
+ if on_progress:
150
+ on_progress(downloaded, total)
151
+ break
152
+ except httpx.TransportError as exc:
153
+ raise DownloadError(f"network error: {exc}") from exc
154
+ finally:
155
+ if owns_client:
156
+ client.close()
157
+
158
+ # Completeness check: prefer the server's Content-Length total for this
159
+ # transfer; fall back to the API-provided expected_size. Promote .part to
160
+ # the final name only after this passes.
161
+ effective = total if total is not None else (
162
+ expected_size if expected_size and expected_size > 0 else None
163
+ )
164
+ if effective is not None and downloaded != effective:
165
+ part.unlink(missing_ok=True)
166
+ raise DownloadError(
167
+ f"size mismatch downloading {url}: got {downloaded}, expected {effective}"
168
+ )
169
+
170
+ part.replace(dest)
171
+ return dest
172
+
173
+
174
+ def download_hls(
175
+ url: str,
176
+ dest: Path,
177
+ *,
178
+ ffmpeg: str = "ffmpeg",
179
+ on_progress: ProgressCb | None = None,
180
+ poll_interval: float = 0.5,
181
+ ) -> Path:
182
+ if shutil.which(ffmpeg) is None:
183
+ raise FFmpegMissingError(
184
+ "ffmpeg not found on PATH; required for HLS (.m3u8) downloads"
185
+ )
186
+ dest.parent.mkdir(parents=True, exist_ok=True)
187
+ part = dest.with_name(dest.name + ".part")
188
+ part.unlink(missing_ok=True)
189
+ cmd = [ffmpeg, "-y", "-i", url, "-c", "copy", str(part)]
190
+
191
+ def report() -> None:
192
+ # HLS has no known total, so report bytes written so far against an
193
+ # indeterminate total (None) to drive a live size/speed display.
194
+ if on_progress is not None:
195
+ try:
196
+ on_progress(part.stat().st_size, None)
197
+ except OSError:
198
+ pass
199
+
200
+ # Stream ffmpeg's chatty stderr to a temp file rather than a PIPE we never
201
+ # read; an unread PIPE can fill its buffer and deadlock the child.
202
+ with tempfile.TemporaryFile(mode="w+", encoding="utf-8", errors="replace") as errf:
203
+ proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=errf)
204
+ try:
205
+ while proc.poll() is None:
206
+ report()
207
+ time.sleep(poll_interval)
208
+ finally:
209
+ proc.wait()
210
+ if proc.returncode != 0:
211
+ errf.seek(0)
212
+ tail = errf.read()[-300:]
213
+ part.unlink(missing_ok=True)
214
+ raise DownloadError(f"ffmpeg failed ({proc.returncode}): {tail}")
215
+ report()
216
+ if part.stat().st_size == 0:
217
+ part.unlink(missing_ok=True)
218
+ raise DownloadError("ffmpeg produced an empty output file")
219
+ part.replace(dest)
220
+ return dest
@@ -129,6 +129,87 @@ def test_season_flag_gates_foldering(monkeypatch, tmp_path):
129
129
  assert "s07e05" in result_flat.stdout.lower()
130
130
 
131
131
 
132
+ def _fake_writer(calls, *, nbytes):
133
+ def fake_dl(url, dest, **kwargs):
134
+ calls.append(url)
135
+ dest.parent.mkdir(parents=True, exist_ok=True)
136
+ dest.write_bytes(b"x" * nbytes)
137
+ return dest
138
+ return fake_dl
139
+
140
+
141
+ def test_download_skips_existing_valid_file(monkeypatch, tmp_path):
142
+ rows = [_r("Feuer und Flamme (S07/E05)", size=11,
143
+ url_video="https://v.mp4", url_video_hd="")]
144
+ monkeypatch.setattr(cli, "_make_client", lambda cfg: FakeClient(rows))
145
+ calls = []
146
+ monkeypatch.setattr(cli, "download_file", _fake_writer(calls, nbytes=11))
147
+ monkeypatch.setattr(cli, "remote_size", lambda url, **kw: 11)
148
+
149
+ args = ["download", "#Feuer und Flamme", "--resolution", "medium",
150
+ "--output", str(tmp_path)]
151
+ first = runner.invoke(cli.app, args)
152
+ assert first.exit_code == 0
153
+ assert len(calls) == 1
154
+
155
+ second = runner.invoke(cli.app, args)
156
+ assert second.exit_code == 0
157
+ assert len(calls) == 1 # not downloaded again
158
+ assert "already exists, skipped" in second.stdout
159
+
160
+
161
+ def test_download_redownloads_corrupt_file(monkeypatch, tmp_path):
162
+ rows = [_r("Feuer und Flamme (S07/E05)", size=999,
163
+ url_video="https://v.mp4", url_video_hd="")]
164
+ monkeypatch.setattr(cli, "_make_client", lambda cfg: FakeClient(rows))
165
+ calls = []
166
+ # Writes only 5 bytes although the server "really" has 11 -> always "corrupt".
167
+ monkeypatch.setattr(cli, "download_file", _fake_writer(calls, nbytes=5))
168
+ monkeypatch.setattr(cli, "remote_size", lambda url, **kw: 11)
169
+
170
+ args = ["download", "#Feuer und Flamme", "--resolution", "medium",
171
+ "--output", str(tmp_path)]
172
+ runner.invoke(cli.app, args) # first download leaves a 5-byte file
173
+ second = runner.invoke(cli.app, args) # sees size mismatch -> redownload
174
+ assert second.exit_code == 0
175
+ assert len(calls) == 2
176
+ assert "corrupted" in second.stdout
177
+ assert "redownloading" in second.stdout
178
+
179
+
180
+ def test_download_force_redownloads_valid_file(monkeypatch, tmp_path):
181
+ rows = [_r("Feuer und Flamme (S07/E05)", size=11,
182
+ url_video="https://v.mp4", url_video_hd="")]
183
+ monkeypatch.setattr(cli, "_make_client", lambda cfg: FakeClient(rows))
184
+ calls = []
185
+ monkeypatch.setattr(cli, "download_file", _fake_writer(calls, nbytes=11))
186
+
187
+ base = ["download", "#Feuer und Flamme", "--resolution", "medium",
188
+ "--output", str(tmp_path)]
189
+ runner.invoke(cli.app, base) # 1 download
190
+ forced = runner.invoke(cli.app, base + ["--force"])
191
+ assert forced.exit_code == 0
192
+ assert len(calls) == 2 # downloaded again
193
+ assert "already exists" not in forced.stdout
194
+
195
+
196
+ def test_download_prints_summary(monkeypatch, tmp_path):
197
+ rows = [_r("Feuer und Flamme (S07/E05)", size=11,
198
+ url_video="https://v.mp4", url_video_hd="")]
199
+ monkeypatch.setattr(cli, "_make_client", lambda cfg: FakeClient(rows))
200
+ monkeypatch.setattr(cli, "download_file", _fake_writer([], nbytes=11))
201
+ result = runner.invoke(
202
+ cli.app,
203
+ ["download", "#Feuer und Flamme", "--resolution", "medium",
204
+ "--output", str(tmp_path)],
205
+ )
206
+ assert result.exit_code == 0
207
+ assert "Done:" in result.stdout
208
+ assert "1 downloaded" in result.stdout
209
+ assert "0 skipped" in result.stdout
210
+ assert "0 redownloaded, 0 failed" in result.stdout
211
+
212
+
132
213
  def test_overall_progress_counts_files_not_bytes():
133
214
  """Regression: the overall bar tracks a file count, so it must use a
134
215
  count column (MofN) and never byte/speed columns, which produced the
@@ -0,0 +1,236 @@
1
+ from pathlib import Path
2
+
3
+ import httpx
4
+ import pytest
5
+ import respx
6
+
7
+ from mvw.download import DownloadError, download, download_hls, is_hls, pick_resolution, remote_size, verify_size
8
+ from mvw.models import MediathekResult
9
+
10
+
11
+ def _r(**over):
12
+ base = dict(channel="WDR", topic="T", title="x", timestamp=1, duration=1,
13
+ url_video="https://v/m.mp4", url_video_low="", url_video_hd="https://v/hd.mp4",
14
+ id="1")
15
+ base.update(over)
16
+ return MediathekResult.from_api(base)
17
+
18
+
19
+ def test_is_hls():
20
+ assert is_hls("https://x/playlist.m3u8")
21
+ assert is_hls("https://x/playlist.m3u8?token=1")
22
+ assert not is_hls("https://x/v.mp4")
23
+
24
+
25
+ def test_pick_resolution_best_and_missing():
26
+ assert pick_resolution(_r(), "best") == ("https://v/hd.mp4", "high")
27
+ with pytest.raises(DownloadError):
28
+ pick_resolution(_r(url_video="", url_video_hd="", url_video_low=""), "best")
29
+
30
+
31
+ @respx.mock
32
+ def test_download_streams_and_renames(tmp_path: Path):
33
+ respx.get("https://v/m.mp4").mock(
34
+ return_value=httpx.Response(200, content=b"hello world", headers={"content-length": "11"})
35
+ )
36
+ dest = tmp_path / "out.mp4"
37
+ seen = []
38
+ result = download("https://v/m.mp4", dest, on_progress=lambda d, t: seen.append((d, t)))
39
+ assert result == dest
40
+ assert dest.read_bytes() == b"hello world"
41
+ assert not dest.with_suffix(".mp4.part").exists()
42
+ assert seen[-1][0] == 11
43
+
44
+
45
+ @respx.mock
46
+ def test_download_resumes_with_range(tmp_path: Path):
47
+ dest = tmp_path / "out.mp4"
48
+ part = dest.with_suffix(".mp4.part")
49
+ part.write_bytes(b"hello ") # 6 bytes already present
50
+
51
+ def handler(request):
52
+ assert request.headers["range"] == "bytes=6-"
53
+ return httpx.Response(206, content=b"world", headers={"content-range": "bytes 6-10/11"})
54
+
55
+ respx.get("https://v/m.mp4").mock(side_effect=handler)
56
+ download("https://v/m.mp4", dest)
57
+ assert dest.read_bytes() == b"hello world"
58
+
59
+
60
+ def _fake_ffmpeg(tmp_path: Path, *, sleep: float = 0.0, rc: int = 0) -> Path:
61
+ """A stand-in 'ffmpeg' that writes growing bytes to its output (last arg)."""
62
+ script = tmp_path / "fake_ffmpeg"
63
+ script.write_text(
64
+ "#!/bin/sh\n"
65
+ 'eval out="\\${$#}"\n'
66
+ f"sleep {sleep}\n"
67
+ 'printf "chunk-a" > "$out"\n'
68
+ 'printf "chunk-b" >> "$out"\n'
69
+ f"exit {rc}\n"
70
+ )
71
+ script.chmod(0o755)
72
+ return script
73
+
74
+
75
+ def test_download_hls_reports_progress_and_writes_output(tmp_path: Path):
76
+ ffmpeg = _fake_ffmpeg(tmp_path, sleep=0.6) # outlive one poll tick
77
+ dest = tmp_path / "out.mkv"
78
+ seen: list[tuple[int, object]] = []
79
+
80
+ result = download_hls(
81
+ "https://x/playlist.m3u8", dest, ffmpeg=str(ffmpeg),
82
+ on_progress=lambda d, t: seen.append((d, t)), poll_interval=0.1,
83
+ )
84
+
85
+ assert result == dest
86
+ assert dest.read_bytes() == b"chunk-achunk-b"
87
+ # HLS has no known total, so it is reported as indeterminate.
88
+ assert seen and all(t is None for _, t in seen)
89
+ # Final report reflects the fully-written file size.
90
+ assert seen[-1][0] == len(b"chunk-achunk-b")
91
+
92
+
93
+ def test_download_hls_raises_on_ffmpeg_failure(tmp_path: Path):
94
+ ffmpeg = _fake_ffmpeg(tmp_path, rc=1)
95
+ dest = tmp_path / "out.mkv"
96
+ with pytest.raises(DownloadError, match="ffmpeg failed"):
97
+ download_hls("https://x/playlist.m3u8", dest, ffmpeg=str(ffmpeg))
98
+ assert not dest.exists()
99
+ assert not dest.with_name("out.mkv.part").exists()
100
+
101
+
102
+ def test_verify_size_missing_file(tmp_path):
103
+ assert verify_size(tmp_path / "nope.mp4", 10) is False
104
+
105
+
106
+ def test_verify_size_zero_byte_file(tmp_path):
107
+ f = tmp_path / "empty.mp4"
108
+ f.write_bytes(b"")
109
+ assert verify_size(f, 10) is False
110
+ # expected<=0 path also rejects an empty file
111
+ assert verify_size(f, 0) is False
112
+
113
+
114
+ def test_verify_size_exact_match(tmp_path):
115
+ f = tmp_path / "ok.mp4"
116
+ f.write_bytes(b"0123456789") # 10 bytes
117
+ assert verify_size(f, 10) is True
118
+
119
+
120
+ def test_verify_size_off_by_one_mismatch(tmp_path):
121
+ f = tmp_path / "bad.mp4"
122
+ f.write_bytes(b"012345678") # 9 bytes
123
+ assert verify_size(f, 10) is False
124
+ assert verify_size(f, 10, tolerance=1) is True
125
+
126
+
127
+ def test_verify_size_unknown_expected_accepts_nonempty(tmp_path):
128
+ f = tmp_path / "hls.mkv"
129
+ f.write_bytes(b"x")
130
+ assert verify_size(f, 0) is True
131
+ assert verify_size(f, -1) is True
132
+
133
+
134
+ class _ShortStream(httpx.SyncByteStream):
135
+ """Yields fewer bytes than the caller's expected_size, with no length header."""
136
+
137
+ def __iter__(self):
138
+ yield b"hello" # 5 bytes
139
+
140
+ def close(self):
141
+ pass
142
+
143
+
144
+ @respx.mock
145
+ def test_download_raises_when_short_of_expected_size(tmp_path: Path):
146
+ respx.get("https://v/m.mp4").mock(
147
+ return_value=httpx.Response(200, stream=_ShortStream())
148
+ )
149
+ dest = tmp_path / "out.mp4"
150
+ with pytest.raises(DownloadError, match="size mismatch"):
151
+ download("https://v/m.mp4", dest, expected_size=11)
152
+ assert not dest.exists()
153
+ assert not dest.with_name("out.mp4.part").exists()
154
+
155
+
156
+ @respx.mock
157
+ def test_download_restarts_when_remote_total_differs(tmp_path: Path):
158
+ dest = tmp_path / "out.mp4"
159
+ part = dest.with_name("out.mp4.part")
160
+ part.write_bytes(b"OLDOLD") # 6 stale bytes from a previous run
161
+
162
+ seen_ranges: list[str | None] = []
163
+
164
+ def handler(request):
165
+ rng = request.headers.get("range")
166
+ seen_ranges.append(rng)
167
+ if rng is not None:
168
+ # Resume attempt: server reports a different total (11) than the
169
+ # caller's expected_size (20) -> remote changed, must restart.
170
+ return httpx.Response(
171
+ 206, content=b"world",
172
+ headers={"content-range": "bytes 6-10/11", "content-length": "5"},
173
+ )
174
+ # Restart attempt: full file from byte 0.
175
+ return httpx.Response(
176
+ 200, content=b"hello world", headers={"content-length": "11"}
177
+ )
178
+
179
+ respx.get("https://v/m.mp4").mock(side_effect=handler)
180
+ download("https://v/m.mp4", dest, expected_size=20)
181
+
182
+ assert dest.read_bytes() == b"hello world"
183
+ assert seen_ranges == ["bytes=6-", None] # tried resume, then restarted clean
184
+ assert not part.exists()
185
+
186
+
187
+ @respx.mock
188
+ def test_download_resume_continues_on_valid_206(tmp_path: Path):
189
+ dest = tmp_path / "out.mp4"
190
+ part = dest.with_name("out.mp4.part")
191
+ part.write_bytes(b"hello ") # 6 bytes already present
192
+
193
+ def handler(request):
194
+ assert request.headers["range"] == "bytes=6-"
195
+ return httpx.Response(
196
+ 206, content=b"world",
197
+ headers={"content-range": "bytes 6-10/11", "content-length": "5"},
198
+ )
199
+
200
+ respx.get("https://v/m.mp4").mock(side_effect=handler)
201
+ # expected_size matches the server's total (11), so the resume guard stays
202
+ # inert and the partial is continued, not restarted.
203
+ download("https://v/m.mp4", dest, expected_size=11)
204
+ assert dest.read_bytes() == b"hello world"
205
+ assert not part.exists()
206
+
207
+
208
+ @respx.mock
209
+ def test_remote_size_from_head(tmp_path: Path):
210
+ respx.head("https://v/m.mp4").mock(
211
+ return_value=httpx.Response(200, headers={"content-length": "4242"})
212
+ )
213
+ assert remote_size("https://v/m.mp4") == 4242
214
+
215
+
216
+ @respx.mock
217
+ def test_remote_size_falls_back_to_range_when_head_unsupported():
218
+ respx.head("https://v/m.mp4").mock(return_value=httpx.Response(405))
219
+ respx.get("https://v/m.mp4").mock(
220
+ return_value=httpx.Response(206, headers={"content-range": "bytes 0-0/9001"})
221
+ )
222
+ assert remote_size("https://v/m.mp4") == 9001
223
+
224
+
225
+ @respx.mock
226
+ def test_remote_size_none_when_unknown():
227
+ respx.head("https://v/m.mp4").mock(return_value=httpx.Response(200)) # no length
228
+ respx.get("https://v/m.mp4").mock(return_value=httpx.Response(200)) # ignores range
229
+ assert remote_size("https://v/m.mp4") is None
230
+
231
+
232
+ @respx.mock
233
+ def test_remote_size_none_on_network_error():
234
+ respx.head("https://v/m.mp4").mock(side_effect=httpx.ConnectError("boom"))
235
+ respx.get("https://v/m.mp4").mock(side_effect=httpx.ConnectError("boom"))
236
+ assert remote_size("https://v/m.mp4") is None
@@ -1,123 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import shutil
4
- import subprocess
5
- import tempfile
6
- import time
7
- from pathlib import Path
8
- from typing import Callable
9
-
10
- import httpx
11
-
12
- from mvw.models import MediathekResult
13
-
14
- ProgressCb = Callable[[int, "int | None"], None]
15
-
16
-
17
- class DownloadError(Exception):
18
- pass
19
-
20
-
21
- class FFmpegMissingError(DownloadError):
22
- pass
23
-
24
-
25
- def is_hls(url: str) -> bool:
26
- return ".m3u8" in url.split("?", 1)[0].lower()
27
-
28
-
29
- def pick_resolution(result: MediathekResult, preference: str) -> tuple[str, str]:
30
- url, tier = result.resolve_video(preference)
31
- if not url:
32
- raise DownloadError("no video URL available for this entry")
33
- return url, tier
34
-
35
-
36
- def download(
37
- url: str,
38
- dest: Path,
39
- *,
40
- on_progress: ProgressCb | None = None,
41
- resume: bool = True,
42
- client: httpx.Client | None = None,
43
- ) -> Path:
44
- dest.parent.mkdir(parents=True, exist_ok=True)
45
- part = dest.with_name(dest.name + ".part")
46
- existing = part.stat().st_size if (resume and part.exists()) else 0
47
-
48
- headers = {}
49
- mode = "wb"
50
- if existing:
51
- headers["Range"] = f"bytes={existing}-"
52
- mode = "ab"
53
-
54
- owns_client = client is None
55
- client = client or httpx.Client(timeout=60.0, follow_redirects=True)
56
- try:
57
- with client.stream("GET", url, headers=headers) as resp:
58
- if resp.status_code not in (200, 206):
59
- raise DownloadError(f"HTTP {resp.status_code} downloading {url}")
60
- if resp.status_code == 200:
61
- existing = 0
62
- mode = "wb"
63
- total: int | None = None
64
- cl = resp.headers.get("content-length")
65
- if cl is not None:
66
- total = int(cl) + existing
67
- downloaded = existing
68
- with open(part, mode) as fh:
69
- for chunk in resp.iter_bytes():
70
- fh.write(chunk)
71
- downloaded += len(chunk)
72
- if on_progress:
73
- on_progress(downloaded, total)
74
- except httpx.TransportError as exc:
75
- raise DownloadError(f"network error: {exc}") from exc
76
- finally:
77
- if owns_client:
78
- client.close()
79
-
80
- part.replace(dest)
81
- return dest
82
-
83
-
84
- def download_hls(
85
- url: str,
86
- dest: Path,
87
- *,
88
- ffmpeg: str = "ffmpeg",
89
- on_progress: ProgressCb | None = None,
90
- poll_interval: float = 0.5,
91
- ) -> Path:
92
- if shutil.which(ffmpeg) is None:
93
- raise FFmpegMissingError(
94
- "ffmpeg not found on PATH; required for HLS (.m3u8) downloads"
95
- )
96
- dest.parent.mkdir(parents=True, exist_ok=True)
97
- cmd = [ffmpeg, "-y", "-i", url, "-c", "copy", str(dest)]
98
-
99
- def report() -> None:
100
- # HLS has no known total, so report bytes written so far against an
101
- # indeterminate total (None) to drive a live size/speed display.
102
- if on_progress is not None:
103
- try:
104
- on_progress(dest.stat().st_size, None)
105
- except OSError:
106
- pass
107
-
108
- # Stream ffmpeg's chatty stderr to a temp file rather than a PIPE we never
109
- # read; an unread PIPE can fill its buffer and deadlock the child.
110
- with tempfile.TemporaryFile(mode="w+", encoding="utf-8", errors="replace") as errf:
111
- proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=errf)
112
- try:
113
- while proc.poll() is None:
114
- report()
115
- time.sleep(poll_interval)
116
- finally:
117
- proc.wait()
118
- if proc.returncode != 0:
119
- errf.seek(0)
120
- tail = errf.read()[-300:]
121
- raise DownloadError(f"ffmpeg failed ({proc.returncode}): {tail}")
122
- report()
123
- return dest
@@ -1,96 +0,0 @@
1
- from pathlib import Path
2
-
3
- import httpx
4
- import pytest
5
- import respx
6
-
7
- from mvw.download import DownloadError, download, download_hls, is_hls, pick_resolution
8
- from mvw.models import MediathekResult
9
-
10
-
11
- def _r(**over):
12
- base = dict(channel="WDR", topic="T", title="x", timestamp=1, duration=1,
13
- url_video="https://v/m.mp4", url_video_low="", url_video_hd="https://v/hd.mp4",
14
- id="1")
15
- base.update(over)
16
- return MediathekResult.from_api(base)
17
-
18
-
19
- def test_is_hls():
20
- assert is_hls("https://x/playlist.m3u8")
21
- assert is_hls("https://x/playlist.m3u8?token=1")
22
- assert not is_hls("https://x/v.mp4")
23
-
24
-
25
- def test_pick_resolution_best_and_missing():
26
- assert pick_resolution(_r(), "best") == ("https://v/hd.mp4", "high")
27
- with pytest.raises(DownloadError):
28
- pick_resolution(_r(url_video="", url_video_hd="", url_video_low=""), "best")
29
-
30
-
31
- @respx.mock
32
- def test_download_streams_and_renames(tmp_path: Path):
33
- respx.get("https://v/m.mp4").mock(
34
- return_value=httpx.Response(200, content=b"hello world", headers={"content-length": "11"})
35
- )
36
- dest = tmp_path / "out.mp4"
37
- seen = []
38
- result = download("https://v/m.mp4", dest, on_progress=lambda d, t: seen.append((d, t)))
39
- assert result == dest
40
- assert dest.read_bytes() == b"hello world"
41
- assert not dest.with_suffix(".mp4.part").exists()
42
- assert seen[-1][0] == 11
43
-
44
-
45
- @respx.mock
46
- def test_download_resumes_with_range(tmp_path: Path):
47
- dest = tmp_path / "out.mp4"
48
- part = dest.with_suffix(".mp4.part")
49
- part.write_bytes(b"hello ") # 6 bytes already present
50
-
51
- def handler(request):
52
- assert request.headers["range"] == "bytes=6-"
53
- return httpx.Response(206, content=b"world", headers={"content-range": "bytes 6-10/11"})
54
-
55
- respx.get("https://v/m.mp4").mock(side_effect=handler)
56
- download("https://v/m.mp4", dest)
57
- assert dest.read_bytes() == b"hello world"
58
-
59
-
60
- def _fake_ffmpeg(tmp_path: Path, *, sleep: float = 0.0, rc: int = 0) -> Path:
61
- """A stand-in 'ffmpeg' that writes growing bytes to its output (last arg)."""
62
- script = tmp_path / "fake_ffmpeg"
63
- script.write_text(
64
- "#!/bin/sh\n"
65
- 'eval out="\\${$#}"\n'
66
- f"sleep {sleep}\n"
67
- 'printf "chunk-a" > "$out"\n'
68
- 'printf "chunk-b" >> "$out"\n'
69
- f"exit {rc}\n"
70
- )
71
- script.chmod(0o755)
72
- return script
73
-
74
-
75
- def test_download_hls_reports_progress_and_writes_output(tmp_path: Path):
76
- ffmpeg = _fake_ffmpeg(tmp_path, sleep=0.6) # outlive one poll tick
77
- dest = tmp_path / "out.mkv"
78
- seen: list[tuple[int, object]] = []
79
-
80
- result = download_hls(
81
- "https://x/playlist.m3u8", dest, ffmpeg=str(ffmpeg),
82
- on_progress=lambda d, t: seen.append((d, t)), poll_interval=0.1,
83
- )
84
-
85
- assert result == dest
86
- assert dest.read_bytes() == b"chunk-achunk-b"
87
- # HLS has no known total, so it is reported as indeterminate.
88
- assert seen and all(t is None for _, t in seen)
89
- # Final report reflects the fully-written file size.
90
- assert seen[-1][0] == len(b"chunk-achunk-b")
91
-
92
-
93
- def test_download_hls_raises_on_ffmpeg_failure(tmp_path: Path):
94
- ffmpeg = _fake_ffmpeg(tmp_path, rc=1)
95
- with pytest.raises(DownloadError, match="ffmpeg failed"):
96
- download_hls("https://x/playlist.m3u8", tmp_path / "out.mkv", ffmpeg=str(ffmpeg))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes