PyPI - mvw-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mvw-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

mvw/__init__.py +1 -0
mvw/api.py +69 -0
mvw/cli.py +236 -0
mvw/config.py +67 -0
mvw/display.py +68 -0
mvw/download.py +92 -0
mvw/episodes.py +94 -0
mvw/filters.py +66 -0
mvw/models.py +109 -0
mvw/naming.py +47 -0
mvw/query.py +105 -0
mvw_cli-0.1.0.dist-info/METADATA +256 -0
mvw_cli-0.1.0.dist-info/RECORD +16 -0
mvw_cli-0.1.0.dist-info/WHEEL +4 -0
mvw_cli-0.1.0.dist-info/entry_points.txt +2 -0
mvw_cli-0.1.0.dist-info/licenses/LICENSE +21 -0

mvw/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

mvw/api.py ADDED Viewed

@@ -0,0 +1,69 @@
+from __future__ import annotations
+import json
+from collections.abc import Iterator
+import httpx
+from mvw.models import MediathekResult, QueryInfo, QueryResult
+class MediathekError(Exception):
+    """Raised on API errors, HTTP failures, or transport errors."""
+class MediathekClient:
+    def __init__(
+        self,
+        user_agent: str = "mvw/0.1.0",
+        timeout: float = 30.0,
+        retries: int = 2,
+        base_url: str = "https://mediathekviewweb.de/api/query",
+    ) -> None:
+        self.user_agent = user_agent
+        self.timeout = timeout
+        self.retries = retries
+        self.base_url = base_url
+    def query(self, payload: dict) -> QueryResult:
+        headers = {"Content-Type": "text/plain", "User-Agent": self.user_agent}
+        body = json.dumps(payload)
+        transport_err: Exception | None = None
+        for _ in range(self.retries + 1):
+            try:
+                resp = httpx.post(
+                    self.base_url, content=body, headers=headers, timeout=self.timeout
+                )
+            except httpx.TransportError as exc:
+                transport_err = exc
+                continue
+            if resp.status_code != 200:
+                raise MediathekError(f"HTTP {resp.status_code}: {resp.text[:200]}")
+            data = resp.json()
+            err = data.get("err")
+            if err:
+                raise MediathekError("; ".join(str(e) for e in err))
+            result = data.get("result") or {}
+            results = [MediathekResult.from_api(r) for r in result.get("results", [])]
+            info = QueryInfo.from_api(result.get("queryInfo", {}))
+            return QueryResult(results=results, query_info=info)
+        raise MediathekError(f"network error: {transport_err}")
+    def iter_all(
+        self, payload: dict, page_size: int = 50, cap: int | None = None
+    ) -> Iterator[MediathekResult]:
+        offset = int(payload.get("offset", 0))
+        yielded = 0
+        while True:
+            page = dict(payload, offset=offset, size=page_size)
+            result = self.query(page)
+            if not result.results:
+                return
+            for row in result.results:
+                yield row
+                yielded += 1
+                if cap is not None and yielded >= cap:
+                    return
+            offset += len(result.results)
+            if offset >= result.query_info.total_results:
+                return

mvw/cli.py ADDED Viewed

@@ -0,0 +1,236 @@
+from __future__ import annotations
+import json as jsonlib
+from pathlib import Path
+from typing import Optional
+import typer
+from rich.console import Console
+from rich.progress import (
+    BarColumn, DownloadColumn, Progress, SpinnerColumn,
+    TextColumn, TransferSpeedColumn,
+)
+from mvw import config as configmod
+from mvw import display, episodes, filters, naming, query
+from mvw.api import MediathekClient, MediathekError
+from mvw.download import (
+    DownloadError, FFmpegMissingError, download as download_file, download_hls, is_hls, pick_resolution,
+)
+app = typer.Typer(help="Search and download from MediathekViewWeb.", no_args_is_help=True)
+console = Console()
+err_console = Console(stderr=True)
+def _make_client(cfg: dict) -> MediathekClient:
+    return MediathekClient(
+        user_agent=cfg["user_agent"], timeout=cfg["request_timeout"]
+    )
+@app.command()
+def search(
+    query_str: str = typer.Argument("", help="MVW query string, e.g. '!ARD #Tatort >80'"),
+    channel: Optional[str] = typer.Option(None, "--channel"),
+    topic: Optional[str] = typer.Option(None, "--topic"),
+    title: Optional[str] = typer.Option(None, "--title"),
+    description: Optional[str] = typer.Option(None, "--description"),
+    min_duration: Optional[int] = typer.Option(None, "--min-duration", help="minutes"),
+    max_duration: Optional[int] = typer.Option(None, "--max-duration", help="minutes"),
+    sort: str = typer.Option("timestamp", "--sort"),
+    order: str = typer.Option("desc", "--order"),
+    future: bool = typer.Option(False, "--future"),
+    limit: int = typer.Option(15, "--limit"),
+    offset: int = typer.Option(0, "--offset"),
+    json_out: bool = typer.Option(False, "--json"),
+) -> None:
+    cfg = configmod.load()
+    payload = query.build_payload(
+        query_str or None, channel=channel, topic=topic, title=title,
+        description=description, min_duration=min_duration, max_duration=max_duration,
+        sort_by=sort, sort_order=order, future=future, offset=offset, size=limit,
+    )
+    client = _make_client(cfg)
+    try:
+        result = client.query(payload)
+    except MediathekError as exc:
+        err_console.print(display.error_panel(str(exc)))
+        raise typer.Exit(2)
+    if json_out:
+        console.print_json(jsonlib.dumps([r.__dict__ for r in result.results]))
+        return
+    if not result.results:
+        console.print("No results.")
+        return
+    console.print(display.results_table(result.results, start_index=offset + 1))
+    total = result.query_info.total_results
+    a = offset + 1
+    b = offset + len(result.results)
+    console.print(
+        f"[dim]showing {a}–{b} of {total} · "
+        f"{result.query_info.search_engine_time:.1f} ms[/]"
+    )
+@app.command()
+def download(
+    query_str: str = typer.Argument("", help="MVW query string"),
+    channel: Optional[str] = typer.Option(None, "--channel"),
+    topic: Optional[str] = typer.Option(None, "--topic"),
+    title: Optional[str] = typer.Option(None, "--title"),
+    min_duration: Optional[int] = typer.Option(None, "--min-duration", help="minutes"),
+    max_duration: Optional[int] = typer.Option(None, "--max-duration", help="minutes"),
+    season: bool = typer.Option(False, "--season", help="group into Plex season folders"),
+    dry_run: bool = typer.Option(False, "--dry-run"),
+    resolution: Optional[str] = typer.Option(None, "--resolution"),
+    output: Optional[Path] = typer.Option(None, "--output", "-o"),
+    template: Optional[str] = typer.Option(None, "--template"),
+    exclude: list[str] = typer.Option([], "--exclude", help="regex (repeatable)"),
+    dedup: bool = typer.Option(False, "--dedup"),
+    latest_season: bool = typer.Option(False, "--latest-season"),
+    season_number: Optional[int] = typer.Option(None, "--season-number"),
+    subtitles: bool = typer.Option(False, "--subtitles"),
+    limit: int = typer.Option(200, "--limit", help="max entries to resolve"),
+) -> None:
+    cfg = configmod.load()
+    pref = resolution or cfg["resolution"]
+    tmpl = template or cfg["template"]
+    out_dir = output or Path(cfg["download_dir"])
+    payload = query.build_payload(
+        query_str or None, channel=channel, topic=topic, title=title,
+        min_duration=min_duration, max_duration=max_duration,
+    )
+    client = _make_client(cfg)
+    try:
+        with console.status("Searching…", spinner="dots"):
+            rows = list(client.iter_all(payload, page_size=cfg["page_size"], cap=limit))
+    except MediathekError as exc:
+        err_console.print(display.error_panel(str(exc)))
+        raise typer.Exit(2)
+    rows = filters.exclude(rows, exclude)
+    if dedup:
+        rows = filters.dedup(rows)
+    if latest_season:
+        rows = filters.latest_season(rows)
+    if not rows:
+        console.print("No matching entries to download.")
+        return
+    eps = episodes.assign(rows, season_override=season_number)
+    if season:
+        ordered = [e for _s, lst in episodes.group_by_season(eps).items() for e in lst]
+    else:
+        ordered = eps
+    # Build plan: (dest_path, url, tier, subtitle_url)
+    plan: list[tuple[Path, str, str, str]] = []
+    for ep in ordered:
+        try:
+            url, tier = pick_resolution(ep.result, pref)
+        except DownloadError as exc:
+            err_console.print(display.error_panel(f"{ep.result.title}: {exc}"))
+            continue
+        ext = "mp4"
+        rendered = naming.render(ep, template=tmpl, tier=tier, ext=ext)
+        dest = out_dir / rendered if season else out_dir / rendered.name
+        plan.append((dest, url, tier, ep.result.url_subtitle))
+    if dry_run:
+        console.print(display.dry_run_tree([(d, u, t) for d, u, t, _ in plan]))
+        console.print(f"[dim]{len(plan)} file(s) planned[/]")
+        return
+    _run_downloads(plan, subtitles=subtitles)
+def _run_downloads(plan, *, subtitles: bool) -> None:
+    progress = Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        DownloadColumn(),
+        TransferSpeedColumn(),
+        console=console,
+    )
+    failures = 0
+    ffmpeg_missing = False
+    with progress:
+        overall = progress.add_task("Overall", total=len(plan))
+        for dest, url, _tier, sub_url in plan:
+            task = progress.add_task(dest.name, total=None)
+            def cb(done: int, total, _t=task):
+                progress.update(_t, completed=done, total=total)
+            try:
+                if is_hls(url):
+                    progress.update(task, description=f"{dest.name} (ffmpeg)")
+                    download_hls(url, dest)
+                else:
+                    download_file(url, dest, on_progress=cb)
+                if subtitles and sub_url:
+                    download_file(sub_url, dest.with_suffix(".xml"))
+            except FFmpegMissingError as exc:
+                ffmpeg_missing = True
+                err_console.print(display.error_panel(
+                    f"{dest.name}: {exc}\nInstall ffmpeg: https://ffmpeg.org/download.html"
+                ))
+            except DownloadError as exc:
+                failures += 1
+                err_console.print(display.error_panel(f"{dest.name}: {exc}"))
+            finally:
+                progress.update(task, visible=False)
+                progress.advance(overall)
+    if ffmpeg_missing:
+        raise typer.Exit(4)
+    elif failures:
+        raise typer.Exit(5)
+@app.command()
+def info(target: str = typer.Argument(..., help="query string; shows the first match")) -> None:
+    cfg = configmod.load()
+    payload = query.build_payload(target, size=1)
+    client = _make_client(cfg)
+    try:
+        result = client.query(payload)
+    except MediathekError as exc:
+        err_console.print(display.error_panel(str(exc)))
+        raise typer.Exit(2)
+    if not result.results:
+        console.print("No results.")
+        raise typer.Exit(0)
+    console.print(display.detail_panel(result.results[0]))
+config_app = typer.Typer(help="Manage configuration.")
+app.add_typer(config_app, name="config")
+@config_app.command("show")
+def config_show() -> None:
+    for k, v in configmod.load().items():
+        console.print(f"[cyan]{k}[/] = {v}")
+@config_app.command("set")
+def config_set(key: str, value: str) -> None:
+    configmod.set_value(key, value)
+    console.print(f"Set [cyan]{key}[/] = {value}")
+@config_app.command("path")
+def config_path_cmd() -> None:
+    console.print(str(configmod.config_path()))
+if __name__ == "__main__":
+    app()

mvw/config.py ADDED Viewed

@@ -0,0 +1,67 @@
+from __future__ import annotations
+import tomllib
+from pathlib import Path
+import platformdirs
+from mvw.naming import DEFAULT_TEMPLATE
+DEFAULTS: dict = {
+    "download_dir": ".",
+    "template": DEFAULT_TEMPLATE,
+    "resolution": "best",
+    "user_agent": "mvw/0.1.0",
+    "page_size": 50,
+    "request_timeout": 30.0,
+}
+_INT_KEYS = {"page_size"}
+_FLOAT_KEYS = {"request_timeout"}
+def config_path() -> Path:
+    return Path(platformdirs.user_config_dir("mvw")) / "config.toml"
+def load(path: Path | None = None) -> dict:
+    path = path or config_path()
+    cfg = dict(DEFAULTS)
+    if path.exists():
+        with open(path, "rb") as fh:
+            cfg.update(tomllib.load(fh))
+    return cfg
+def _coerce(key: str, value: str):
+    if key in _INT_KEYS:
+        return int(value)
+    if key in _FLOAT_KEYS:
+        return float(value)
+    if value.lower() in ("true", "false"):
+        return value.lower() == "true"
+    return value
+def _dump_toml(data: dict) -> str:
+    lines = []
+    for k, v in data.items():
+        if isinstance(v, bool):
+            lines.append(f"{k} = {str(v).lower()}")
+        elif isinstance(v, (int, float)):
+            lines.append(f"{k} = {v}")
+        else:
+            escaped = str(v).replace("\\", "\\\\").replace('"', '\\"')
+            lines.append(f'{k} = "{escaped}"')
+    return "\n".join(lines) + "\n"
+def set_value(key: str, value: str, path: Path | None = None) -> None:
+    path = path or config_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    current: dict = {}
+    if path.exists():
+        with open(path, "rb") as fh:
+            current = tomllib.load(fh)
+    current[key] = _coerce(key, value)
+    path.write_text(_dump_toml(current))

mvw/display.py ADDED Viewed

@@ -0,0 +1,68 @@
+from __future__ import annotations
+from pathlib import Path
+from rich.panel import Panel
+from rich.table import Table
+from rich.tree import Tree
+from mvw.models import MediathekResult
+_TIER_BADGE = {"low": "LOW", "medium": "SD", "high": "HD"}
+def res_badges(result: MediathekResult) -> str:
+    return " ".join(_TIER_BADGE[t] for t in result.resolutions)
+def results_table(results: list[MediathekResult], *, start_index: int = 1) -> Table:
+    table = Table(show_lines=False, expand=True)
+    table.add_column("#", justify="right", style="dim", no_wrap=True)
+    table.add_column("Channel", style="cyan", no_wrap=True)
+    table.add_column("Topic", style="magenta")
+    table.add_column("Title")
+    table.add_column("Date", no_wrap=True)
+    table.add_column("Dur", justify="right", no_wrap=True)
+    table.add_column("Res", no_wrap=True)
+    for i, r in enumerate(results, start=start_index):
+        table.add_row(
+            str(i), r.channel, r.topic, r.title,
+            r.aired.strftime("%Y-%m-%d"), r.duration_human, res_badges(r),
+        )
+    return table
+def detail_panel(result: MediathekResult) -> Panel:
+    lines = [
+        f"[bold]Channel:[/] {result.channel}",
+        f"[bold]Topic:[/] {result.topic}",
+        f"[bold]Title:[/] {result.title}",
+        f"[bold]Aired:[/] {result.aired.strftime('%Y-%m-%d %H:%M')}",
+        f"[bold]Duration:[/] {result.duration_human}",
+        f"[bold]Size:[/] {result.size_human}",
+        f"[bold]Resolutions:[/] {res_badges(result) or '—'}",
+        "",
+        result.description or "(no description)",
+        "",
+        f"[dim]Video:[/] {result.url_video or '—'}",
+        f"[dim]HD:[/] {result.url_video_hd or '—'}",
+        f"[dim]Low:[/] {result.url_video_low or '—'}",
+        f"[dim]Subtitle:[/] {result.url_subtitle or '—'}",
+        f"[dim]Website:[/] {result.url_website or '—'}",
+    ]
+    return Panel("\n".join(lines), title=result.title, border_style="cyan")
+def dry_run_tree(plans: list[tuple[Path, str, str]]) -> Tree:
+    root = Tree("[bold]Planned downloads[/]")
+    folders: dict[str, Tree] = {}
+    for dest, _url, tier in plans:
+        parent_key = str(dest.parent)
+        if parent_key not in folders:
+            folders[parent_key] = root.add(f"[blue]{parent_key}[/]")
+        folders[parent_key].add(f"{dest.name}  [dim]({tier})[/]")
+    return root
+def error_panel(message: str) -> Panel:
+    return Panel(message, title="Error", border_style="red")

mvw/download.py ADDED Viewed

@@ -0,0 +1,92 @@
+from __future__ import annotations
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Callable
+import httpx
+from mvw.models import MediathekResult
+ProgressCb = Callable[[int, "int | None"], None]
+class DownloadError(Exception):
+    pass
+class FFmpegMissingError(DownloadError):
+    pass
+def is_hls(url: str) -> bool:
+    return ".m3u8" in url.split("?", 1)[0].lower()
+def pick_resolution(result: MediathekResult, preference: str) -> tuple[str, str]:
+    url, tier = result.resolve_video(preference)
+    if not url:
+        raise DownloadError("no video URL available for this entry")
+    return url, tier
+def download(
+    url: str,
+    dest: Path,
+    *,
+    on_progress: ProgressCb | None = None,
+    resume: bool = True,
+    client: httpx.Client | None = None,
+) -> Path:
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    part = dest.with_name(dest.name + ".part")
+    existing = part.stat().st_size if (resume and part.exists()) else 0
+    headers = {}
+    mode = "wb"
+    if existing:
+        headers["Range"] = f"bytes={existing}-"
+        mode = "ab"
+    owns_client = client is None
+    client = client or httpx.Client(timeout=60.0, follow_redirects=True)
+    try:
+        with client.stream("GET", url, headers=headers) as resp:
+            if resp.status_code not in (200, 206):
+                raise DownloadError(f"HTTP {resp.status_code} downloading {url}")
+            if resp.status_code == 200:
+                existing = 0
+                mode = "wb"
+            total: int | None = None
+            cl = resp.headers.get("content-length")
+            if cl is not None:
+                total = int(cl) + existing
+            downloaded = existing
+            with open(part, mode) as fh:
+                for chunk in resp.iter_bytes():
+                    fh.write(chunk)
+                    downloaded += len(chunk)
+                    if on_progress:
+                        on_progress(downloaded, total)
+    except httpx.TransportError as exc:
+        raise DownloadError(f"network error: {exc}") from exc
+    finally:
+        if owns_client:
+            client.close()
+    part.replace(dest)
+    return dest
+def download_hls(url: str, dest: Path, *, ffmpeg: str = "ffmpeg") -> Path:
+    if shutil.which(ffmpeg) is None:
+        raise FFmpegMissingError(
+            "ffmpeg not found on PATH; required for HLS (.m3u8) downloads"
+        )
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    cmd = [ffmpeg, "-y", "-i", url, "-c", "copy", str(dest)]
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        raise DownloadError(f"ffmpeg failed ({proc.returncode}): {proc.stderr[-300:]}")
+    return dest

mvw/episodes.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from mvw.models import MediathekResult
+_SE = re.compile(r"S\s*(\d{1,2})\s*[ _/.\-]?\s*E\s*(\d{1,3})", re.IGNORECASE)
+_STAFFEL_FOLGE = re.compile(r"Staffel\s*(\d{1,2}).*?Folge\s*(\d{1,3})", re.IGNORECASE)
+_FOLGE = re.compile(r"\bFolge\s*(\d{1,3})\b", re.IGNORECASE)
+_FOLGE_PRE = re.compile(r"\b(\d{1,3})\.\s*Folge\b", re.IGNORECASE)
+_TRAILING_NUM = re.compile(r"\((\d{1,3})\)\s*$")
+def parse_se(title: str) -> tuple[int | None, int | None]:
+    m = _SE.search(title)
+    if m:
+        return int(m.group(1)), int(m.group(2))
+    m = _STAFFEL_FOLGE.search(title)
+    if m:
+        return int(m.group(1)), int(m.group(2))
+    m = _FOLGE.search(title) or _FOLGE_PRE.search(title)
+    if m:
+        return None, int(m.group(1))
+    m = _TRAILING_NUM.search(title)
+    if m:
+        return None, int(m.group(1))
+    return None, None
+def clean_episode_title(title: str) -> str:
+    t = _SE.sub("", title)
+    t = _STAFFEL_FOLGE.sub("", t)
+    t = _FOLGE.sub("", t)
+    t = _TRAILING_NUM.sub("", t)
+    t = re.sub(r"[\s\-–—:|()]+$", "", t)
+    t = re.sub(r"^[\s\-–—:|()]+", "", t)
+    return t.strip() or title.strip()
+@dataclass
+class Episode:
+    result: MediathekResult
+    season: int
+    episode: int
+def assign(
+    results: list[MediathekResult], *, season_override: int | None = None
+) -> list[Episode]:
+    parsed = [(r, *parse_se(r.title)) for r in results]
+    # Resolve each row's season first.
+    def season_of(r: MediathekResult, s: int | None) -> int:
+        if season_override is not None:
+            return season_override
+        if s is not None:
+            return s
+        return r.aired.year
+    enriched = [(r, season_of(r, s), e) for (r, s, e) in parsed]
+    # Per-season episode numbering by timestamp order; explicit numbers win.
+    episodes: list[Episode] = []
+    by_season: dict[int, list[tuple[MediathekResult, int | None]]] = {}
+    for r, season, e in enriched:
+        by_season.setdefault(season, []).append((r, e))
+    for season, rows in by_season.items():
+        rows.sort(key=lambda re_: re_[0].timestamp)
+        used = {e for _, e in rows if e is not None}
+        counter = 0
+        for r, e in rows:
+            if e is None:
+                counter += 1
+                while counter in used:
+                    counter += 1
+                used.add(counter)
+                num = counter
+            else:
+                num = e
+                counter = max(counter, e)
+            episodes.append(Episode(result=r, season=season, episode=num))
+    return episodes
+def group_by_season(episodes: list[Episode]) -> dict[int, list[Episode]]:
+    grouped: dict[int, list[Episode]] = {}
+    for ep in episodes:
+        grouped.setdefault(ep.season, []).append(ep)
+    return {
+        season: sorted(grouped[season], key=lambda e: e.episode)
+        for season in sorted(grouped)
+    }

mvw/filters.py ADDED Viewed

@@ -0,0 +1,66 @@
+from __future__ import annotations
+import re
+from mvw.models import MediathekResult
+_VARIANT_MARKERS = [
+    r"\(?audiodeskription\)?",
+    r"\(?mit geb(ä|ae)rdensprache\)?",
+    r"\(?geb(ä|ae)rdensprache\)?",
+    r"\(?h(ö|oe)rfassung\)?",
+]
+_PUNCT = re.compile(r"[^\w\s]", re.UNICODE)
+_WS = re.compile(r"\s+")
+def normalize_title(title: str) -> str:
+    t = title.lower()
+    for marker in _VARIANT_MARKERS:
+        t = re.sub(marker, " ", t)
+    t = _PUNCT.sub(" ", t)
+    return _WS.sub(" ", t).strip()
+def exclude(results: list[MediathekResult], patterns: list[str]) -> list[MediathekResult]:
+    if not patterns:
+        return list(results)
+    compiled = [re.compile(p, re.IGNORECASE) for p in patterns]
+    kept = []
+    for r in results:
+        haystack = f"{r.title}\n{r.topic}\n{r.description}"
+        if any(c.search(haystack) for c in compiled):
+            continue
+        kept.append(r)
+    return kept
+def _score(r: MediathekResult) -> tuple[int, int]:
+    return (len(r.resolutions), r.duration)
+def dedup(results: list[MediathekResult]) -> list[MediathekResult]:
+    best: dict[str, MediathekResult] = {}
+    order: list[str] = []
+    for r in results:
+        key = f"{r.topic.lower().strip()}|{normalize_title(r.title)}"
+        if key not in best:
+            best[key] = r
+            order.append(key)
+        elif _score(r) > _score(best[key]):
+            best[key] = r
+    return [best[k] for k in order]
+def latest_season(results: list[MediathekResult]) -> list[MediathekResult]:
+    from mvw.episodes import parse_se
+    seasons = []
+    for r in results:
+        season, _ = parse_se(r.title)
+        seasons.append(season)
+    detected = [s for s in seasons if s is not None]
+    if not detected:
+        return list(results)
+    top = max(detected)
+    return [r for r, s in zip(results, seasons) if s == top]

mvw/models.py ADDED Viewed

@@ -0,0 +1,109 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from datetime import datetime, timezone
+_TIER_TO_FIELD = {"high": "url_video_hd", "medium": "url_video", "low": "url_video_low"}
+_FALLBACK_ORDER = {
+    "best": ["high", "medium", "low"],
+    "high": ["high", "medium", "low"],
+    "medium": ["medium", "low", "high"],
+    "low": ["low", "medium", "high"],
+}
+@dataclass
+class MediathekResult:
+    channel: str
+    topic: str
+    title: str
+    description: str
+    timestamp: int
+    duration: int
+    size: int
+    url_website: str
+    url_subtitle: str
+    url_video: str
+    url_video_low: str
+    url_video_hd: str
+    filmliste_timestamp: str
+    id: str
+    @classmethod
+    def from_api(cls, d: dict) -> "MediathekResult":
+        g = d.get
+        return cls(
+            channel=g("channel", "") or "",
+            topic=g("topic", "") or "",
+            title=g("title", "") or "",
+            description=g("description", "") or "",
+            timestamp=int(g("timestamp", 0) or 0),
+            duration=int(g("duration", 0) or 0),
+            size=int(g("size", 0) or 0),
+            url_website=g("url_website", "") or "",
+            url_subtitle=g("url_subtitle", "") or "",
+            url_video=g("url_video", "") or "",
+            url_video_low=g("url_video_low", "") or "",
+            url_video_hd=g("url_video_hd", "") or "",
+            filmliste_timestamp=str(g("filmlisteTimestamp", "") or ""),
+            id=str(g("id", "") or ""),
+        )
+    @property
+    def aired(self) -> datetime:
+        return datetime.fromtimestamp(self.timestamp, tz=timezone.utc)
+    @property
+    def duration_human(self) -> str:
+        s = self.duration
+        h, rem = divmod(s, 3600)
+        m, sec = divmod(rem, 60)
+        if h:
+            return f"{h}:{m:02d}:{sec:02d}"
+        return f"{m}:{sec:02d}"
+    @property
+    def size_human(self) -> str:
+        n = float(self.size)
+        for unit in ("B", "KB", "MB", "GB", "TB"):
+            if n < 1024 or unit == "TB":
+                return f"{n:.0f} {unit}" if unit == "B" else f"{n:.1f} {unit}"
+            n /= 1024
+        return f"{n:.1f} TB"
+    def resolution_present(self, tier: str) -> bool:
+        return bool(getattr(self, _TIER_TO_FIELD[tier], ""))
+    @property
+    def resolutions(self) -> list[str]:
+        return [t for t in ("low", "medium", "high") if self.resolution_present(t)]
+    def resolve_video(self, preference: str) -> tuple[str | None, str | None]:
+        for tier in _FALLBACK_ORDER.get(preference, _FALLBACK_ORDER["best"]):
+            url = getattr(self, _TIER_TO_FIELD[tier], "")
+            if url:
+                return url, tier
+        return None, None
+@dataclass
+class QueryInfo:
+    total_results: int
+    result_count: int
+    search_engine_time: float
+    filmliste_timestamp: str
+    @classmethod
+    def from_api(cls, d: dict) -> "QueryInfo":
+        return cls(
+            total_results=int(d.get("totalResults", 0) or 0),
+            result_count=int(d.get("resultCount", 0) or 0),
+            search_engine_time=float(d.get("searchEngineTime", 0) or 0),
+            filmliste_timestamp=str(d.get("filmlisteTimestamp", "") or ""),
+        )
+@dataclass
+class QueryResult:
+    results: list[MediathekResult]
+    query_info: QueryInfo

mvw/naming.py ADDED Viewed

@@ -0,0 +1,47 @@
+from __future__ import annotations
+import re
+from pathlib import Path
+from mvw.episodes import Episode, clean_episode_title
+DEFAULT_TEMPLATE = (
+    "{series} ({year})/Season {s:02d}/"
+    "{series} ({year}) - s{s:02d}e{e:02d} - {ep_title} [{res}].{ext}"
+)
+RES_LABELS = {"high": "1080p", "medium": "720p", "low": "480p"}
+_ILLEGAL = re.compile(r'[/\\:*?"<>|]')
+_WS = re.compile(r"\s+")
+def sanitize(component: str) -> str:
+    s = _ILLEGAL.sub(" ", component)
+    s = _WS.sub(" ", s).strip()
+    s = s.rstrip(". ")
+    return s[:150]
+def render(
+    episode: Episode, *, template: str = DEFAULT_TEMPLATE, tier: str, ext: str
+) -> Path:
+    r = episode.result
+    values = {
+        "series": r.topic,
+        "year": r.aired.year,
+        "s": episode.season,
+        "e": episode.episode,
+        "ep_title": clean_episode_title(r.title),
+        "res": RES_LABELS.get(tier, tier),
+        "channel": r.channel,
+        "date": r.aired.strftime("%Y-%m-%d"),
+        "ext": ext,
+    }
+    # Scrub path separators from string values to prevent path injection.
+    # Integers (year, s, e) are left unchanged to preserve format specs like :02d.
+    for key in ["series", "ep_title", "res", "channel", "date", "ext"]:
+        if isinstance(values[key], str):
+            values[key] = values[key].replace("/", " ").replace("\\", " ")
+    rendered = template.format(**values)
+    parts = [sanitize(p) for p in rendered.split("/") if p]
+    return Path(*parts)

mvw/query.py ADDED Viewed

@@ -0,0 +1,105 @@
+from __future__ import annotations
+_PREFIX_FIELD = {"!": "channel", "#": "topic", "+": "title", "*": "description"}
+def parse_raw(raw: str) -> tuple[list[dict], int | None, int | None]:
+    """Translate the MVW query-string grammar into API query dicts.
+    Same selector repeated -> OR (values joined by space, MVW's OR semantics).
+    Bare words -> default topic+title field. `>N`/`<N` are duration in minutes.
+    Topic prefixes collect immediately following bare words; other prefixes do not.
+    """
+    by_field: dict[str, list[str]] = {}
+    bare: list[str] = []
+    dur_min: int | None = None
+    dur_max: int | None = None
+    tokens = raw.split()
+    i = 0
+    while i < len(tokens):
+        token = tokens[i]
+        head = token[0] if token else ""
+        if head == ">" and len(token) > 1 and token[1:].isdigit():
+            dur_min = int(token[1:]) * 60
+            i += 1
+        elif head == "<" and len(token) > 1 and token[1:].isdigit():
+            dur_max = int(token[1:]) * 60
+            i += 1
+        elif head in _PREFIX_FIELD and len(token) > 1:
+            field = _PREFIX_FIELD[head]
+            values = [token[1:]]
+            i += 1
+            # Topic prefix collects immediately following bare tokens
+            if field == "topic":
+                while i < len(tokens):
+                    next_token = tokens[i]
+                    next_head = next_token[0] if next_token else ""
+                    # Stop if next token is a prefix or duration
+                    if next_head in _PREFIX_FIELD and len(next_token) > 1:
+                        break
+                    if next_head in ('>', '<') and len(next_token) > 1 and next_token[1:].isdigit():
+                        break
+                    # This bare token follows topic prefix, collect it
+                    values.append(next_token)
+                    i += 1
+            by_field.setdefault(field, []).extend(values)
+        else:
+            bare.append(token)
+            i += 1
+    queries: list[dict] = []
+    for field, values in by_field.items():
+        queries.append({"fields": [field], "query": " ".join(values)})
+    if bare:
+        queries.append({"fields": ["topic", "title"], "query": " ".join(bare)})
+    return queries, dur_min, dur_max
+def build_payload(
+    raw: str | None = None,
+    *,
+    channel: str | None = None,
+    topic: str | None = None,
+    title: str | None = None,
+    description: str | None = None,
+    min_duration: int | None = None,
+    max_duration: int | None = None,
+    sort_by: str = "timestamp",
+    sort_order: str = "desc",
+    future: bool = False,
+    offset: int = 0,
+    size: int = 15,
+) -> dict:
+    queries: list[dict] = []
+    dur_min = dur_max = None
+    if raw:
+        queries, dur_min, dur_max = parse_raw(raw)
+    for field, value in (
+        ("channel", channel), ("topic", topic),
+        ("title", title), ("description", description),
+    ):
+        if value:
+            queries.append({"fields": [field], "query": value})
+    if min_duration is not None:
+        dur_min = min_duration * 60
+    if max_duration is not None:
+        dur_max = max_duration * 60
+    payload: dict = {
+        "queries": queries,
+        "sortBy": sort_by,
+        "sortOrder": sort_order,
+        "future": future,
+        "offset": offset,
+        "size": size,
+    }
+    if dur_min is not None:
+        payload["duration_min"] = dur_min
+    if dur_max is not None:
+        payload["duration_max"] = dur_max
+    return payload

mvw_cli-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,256 @@
+Metadata-Version: 2.4
+Name: mvw-cli
+Version: 0.1.0
+Summary: Search and download German public-broadcasting media from MediathekViewWeb, with Plex-friendly season downloads.
+Author-email: Max Boettinger <perplexity@bttngr.de>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: ard,cli,download,german,mediathek,mediathekviewweb,plex,television,zdf
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: Natural Language :: German
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Utilities
+Requires-Python: >=3.13
+Requires-Dist: httpx>=0.27
+Requires-Dist: platformdirs>=4.2
+Requires-Dist: rich>=13.7
+Requires-Dist: typer>=0.12
+Description-Content-Type: text/markdown
+# mvw
+A command-line tool for searching and downloading content from
+[MediathekViewWeb](https://mediathekviewweb.de/) (MVW), the index of German
+public-broadcasting media libraries (ARD, ZDF, WDR, and more). Built for
+automation: the headline feature is reliable, Plex-friendly **season**
+downloads.
+## Install
+Requires Python ≥ 3.13. The distribution is published as **`mvw-cli`** (the
+PyPI name `mvw` was already taken); the installed command is `mvw`.
+Install as a standalone tool with [uv](https://github.com/astral-sh/uv):
+```bash
+uv tool install mvw-cli      # adds the `mvw` command to your PATH
+```
+Or run it once without installing:
+```bash
+uvx --from mvw-cli mvw search "#Tatort"
+```
+With pip:
+```bash
+pip install mvw-cli
+```
+> **Note:** HLS (`.m3u8`) downloads require [ffmpeg](https://ffmpeg.org/download.html)
+> on your `PATH`. It is an external (non-Python) dependency and is not installed
+> automatically.
+### From source
+```bash
+uv sync                      # create the dev environment
+uv run mvw --help            # run from the working tree
+```
+## Query grammar
+The query string follows the MediathekViewWeb syntax:
+| Prefix | Field searched | Example |
+|--------|---------------|---------|
+| `!` | channel | `!ARD` |
+| `#` | topic | `#Tatort` |
+| `+` | title | `+Schokolade` |
+| `*` | description | `*Berlin` |
+| (none) | topic and title | `feuer flamme` |
+| `>N` | duration > N minutes | `>80` |
+| `<N` | duration < N minutes | `<10` |
+Combination rules:
+- **Space between different selectors** → AND: `!WDR #Tatort` means channel=WDR
+  AND topic=Tatort.
+- **Same selector repeated** → OR: `!ARD !ZDF` means ARD or ZDF.
+- **Comma within a selector's value** → AND of words: `#Olympia,Tokio` matches
+  topic containing both "Olympia" and "Tokio".
+- **No negation operator.** Exclusion is done client-side with `--exclude`.
+> Note: the API is case-insensitive and flexible with umlauts
+> (`ö` ≈ `oe` ≈ `OE`).
+## Commands
+### `mvw search`
+Search MVW and display a Rich results table.
+```
+mvw search QUERY
+           [--channel C] [--topic T] [--title T] [--description D]
+           [--min-duration MIN] [--max-duration MAX]
+           [--sort timestamp|duration|channel] [--order asc|desc]
+           [--future] [--limit N] [--offset N] [--json]
+```
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--channel` | — | Filter by channel (structured flag, not query syntax) |
+| `--topic` | — | Filter by topic |
+| `--title` | — | Filter by title |
+| `--description` | — | Filter by description |
+| `--min-duration` | — | Minimum duration in minutes |
+| `--max-duration` | — | Maximum duration in minutes |
+| `--sort` | `timestamp` | Sort field |
+| `--order` | `desc` | Sort order (`asc` or `desc`) |
+| `--future` | off | Include not-yet-aired entries |
+| `--limit` | 15 | Number of results to fetch |
+| `--offset` | 0 | Pagination offset |
+| `--json` | off | Emit raw JSON to stdout (scripting-friendly) |
+Example:
+```bash
+mvw search "#Tatort !ARD >80"
+```
+### `mvw download`
+Search and download matching entries. Run `--dry-run` first to preview the
+exact file tree before downloading anything.
+```
+mvw download QUERY
+             [--channel C] [--topic T] [--title T]
+             [--min-duration MIN] [--max-duration MAX]
+             [--season] [--dry-run]
+             [--resolution low|medium|high|best]
+             [--output DIR] [-o DIR] [--template STR]
+             [--exclude TERM ...] [--dedup] [--latest-season]
+             [--season-number N] [--subtitles] [--limit N]
+```
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--channel` | — | Filter by channel |
+| `--topic` | — | Filter by topic |
+| `--title` | — | Filter by title |
+| `--min-duration` | — | Minimum duration in minutes |
+| `--max-duration` | — | Maximum duration in minutes |
+| `--season` | off | Group into Plex season folders using `S##E##` numbering |
+| `--dry-run` | off | Preview the file tree and source URLs; download nothing |
+| `--resolution` | `best` | Resolution preference: `low`, `medium`, `high`, or `best` |
+| `--output`, `-o` | config default | Output directory |
+| `--template` | Plex default | Custom filename template (see below) |
+| `--exclude` | — | Regex to exclude entries from title/topic/description (repeatable) |
+| `--dedup` | off | Remove near-duplicate entries, keeping the highest-quality copy |
+| `--latest-season` | off | Keep only entries from the highest detected season |
+| `--season-number` | — | Override detected season number |
+| `--subtitles` | off | Also fetch subtitle files alongside each video |
+| `--limit` | 200 | Maximum number of entries to resolve |
+#### Filename template
+The default template produces Plex/Jellyfin-compatible paths:
+```
+{series} ({year})/Season {s:02d}/{series} ({year}) - s{s:02d}e{e:02d} - {ep_title} [{res}].{ext}
+```
+Override with `--template`. Available tokens:
+| Token | Value |
+|-------|-------|
+| `{series}` | Topic (show name) |
+| `{year}` | Broadcast year |
+| `{s}` | Season number (supports `:02d` formatting) |
+| `{e}` | Episode number (supports `:02d` formatting) |
+| `{ep_title}` | Cleaned episode title |
+| `{res}` | Resolution label (see note below) |
+| `{channel}` | Broadcaster |
+| `{date}` | Broadcast date (`YYYY-MM-DD`) |
+| `{ext}` | File extension |
+**`{res}` label note:** MVW exposes only three tiers (`low` / `medium` / `high`),
+not measured pixel heights. The `{res}` token maps these to conventional labels —
+`high → "1080p"`, `medium → "720p"`, `low → "480p"` — because Plex parses these
+and they reflect typical public-broadcast encodes. These are labels, not
+guarantees of exact resolution.
+#### ffmpeg requirement for HLS
+Some entries serve `.m3u8` HLS playlists instead of direct `.mp4` files. Those
+are downloaded via `ffmpeg -i <url> -c copy <dest>`. If ffmpeg is not on your
+PATH and an HLS entry is encountered, `mvw` exits with code 4 and prints an
+install hint. Install from <https://ffmpeg.org/download.html>.
+#### Flagship example: Feuer und Flamme
+```bash
+# Preview the newest season, no audio description, deduped
+mvw download "#Feuer und Flamme" --season --latest-season --dedup \
+    --exclude Audiodeskription --exclude "Gebärdensprache" \
+    --output ~/Media/TV --dry-run
+# Then download for real in best resolution
+mvw download "#Feuer und Flamme" --season --latest-season --dedup \
+    --exclude Audiodeskription --output ~/Media/TV
+```
+### `mvw info`
+Show a Rich detail panel for the first match of a query.
+```
+mvw info QUERY
+```
+Displays: topic, title, description, channel, aired datetime, duration, size,
+available resolutions with URLs, subtitle URL, website URL, and detected
+season/episode.
+### `mvw config`
+Manage persistent configuration stored in `config.toml`
+(location: `platformdirs.user_config_dir("mvw")`).
+```
+mvw config show              # Print the effective config (key = value)
+mvw config set KEY VALUE     # Write a key to config.toml
+mvw config path              # Print the path to config.toml
+```
+Available keys: `download_dir`, `template`, `resolution`, `user_agent`,
+`page_size`, `request_timeout`.
+CLI flags always override config file values, which override built-in defaults.
+## Exit codes
+| Code | Condition |
+|------|-----------|
+| 0 | Success or no results |
+| 2 | API error (non-null `err`), HTTP error, or network/timeout after retries |
+| 4 | HLS entry encountered but ffmpeg is not installed |
+| 5 | Partial/interrupted download failure |
+## Running tests
+```bash
+# Unit and mocked tests (default)
+uv run pytest -q
+# Include the live API test (requires network access)
+uv run pytest -m live tests/test_live.py -v
+```

mvw_cli-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+mvw/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
+mvw/api.py,sha256=umJQipBJf28uOxaAfaFObg46t_f6k1wfx2ihUffFiBo,2434
+mvw/cli.py,sha256=jfAFGccyHEqrr_ufOE2_PaYYkXjAQfzCT8C5Ovy-3mc,8519
+mvw/config.py,sha256=sHQjkHt22Z0-2sRO6Wdlo8l-F283Y3Nzs7KtGqzyjdM,1718
+mvw/display.py,sha256=96Plt3Nzcb549r7Df4CT6YNVpFqrxpjqfwsgkXatgew,2466
+mvw/download.py,sha256=MCbXATjGv4ZDH4mbaAmM4IuybW_r4IEqR8AG4Qm8EE8,2719
+mvw/episodes.py,sha256=QjrRf3L2ihgzAJNWDqzaC2g1N5rOY5x2LDsAGEbhEv0,2932
+mvw/filters.py,sha256=mySPklWFk6l94YGC9duoe1qw-YXwTqC8xvJ2_iquk3w,1861
+mvw/models.py,sha256=GfgtqdOgWunAkTk5e2HocedkagTRBIqz4_6TcAYKkVo,3287
+mvw/naming.py,sha256=sP41a395jwookebM360G4j6zYx2scosw51m1yv3fQ5Y,1462
+mvw/query.py,sha256=l__5Ae0xtchr1iRU2ng-TmEfJZafZyU-BYK_s7zmc5A,3495
+mvw_cli-0.1.0.dist-info/METADATA,sha256=3appoB6h8ZHdqSEa7dkyU7deYNK8hasjcbK5bYjSGXc,8635
+mvw_cli-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+mvw_cli-0.1.0.dist-info/entry_points.txt,sha256=YkBabN7J7BLpn37C8tmFf1E7kWUHBAo11wxjhRspSi8,36
+mvw_cli-0.1.0.dist-info/licenses/LICENSE,sha256=tT1uuoa2LujVevO7EDuNARVNvTUzbEhhOuAnNGsx2cQ,1071
+mvw_cli-0.1.0.dist-info/RECORD,,

mvw_cli-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

mvw_cli-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ mvw = mvw.cli:app

mvw_cli-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Max Boettinger
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.