PyPI - slidecast - Versions diffs - 0.1.0__py3-none-any.whl - Mend

slidecast 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

slidecast/__init__.py +67 -0
slidecast/cli.py +112 -0
slidecast/ffmpeg.py +42 -0
slidecast/models.py +34 -0
slidecast/reel.py +123 -0
slidecast/render.py +131 -0
slidecast/tts.py +144 -0
slidecast/video.py +110 -0
slidecast-0.1.0.dist-info/METADATA +123 -0
slidecast-0.1.0.dist-info/RECORD +13 -0
slidecast-0.1.0.dist-info/WHEEL +4 -0
slidecast-0.1.0.dist-info/entry_points.txt +2 -0
slidecast-0.1.0.dist-info/licenses/LICENSE +21 -0

slidecast/__init__.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""slidecast — turn a list of HTML slides + narration into a narrated MP4.
+You bring the slide design (any HTML you like) and the words; slidecast
+screenshots each slide with a headless browser, narrates it with a pluggable
+text-to-speech provider, and stitches the frames into one MP4 with ffmpeg.
+Quick start
+-----------
+    from slidecast import Reel, KokoroTTS
+    reel = Reel(width=1280, height=720, tts=KokoroTTS(voice="af_heart"))
+    reel.add("<!doctype html><h1>Hello</h1>", "Hello, and welcome.")
+    reel.add("<!doctype html><h1>Bye</h1>", "Thanks for watching.", tail_pad=0.8)
+    reel.render("out.mp4", make_poster=True)
+Pieces (all swappable)
+----------------------
+Model:
+    Slide(html, narration="", tail_pad=0.0, min_duration=0.0)
+    Reel(width, height, fps, tts=..., renderer=...).add(...).render(out)
+Text-to-speech (``synthesize(text, path) -> seconds | None``):
+    KokoroTTS  — any OpenAI-compatible /v1/audio/speech endpoint
+    GTTSTTS    — Google Translate TTS (mp3)
+    SilentTTS  — silent track, no deps (default)
+Renderers (HTML -> PNG, used as a context manager):
+    PlaywrightRenderer    — headless Chromium (default)
+    ChromeBinaryRenderer  — drive an existing Chrome binary by path
+ffmpeg steps (injectable runner, for direct use/testing):
+    build_segment(...) / concat(...) / poster(...)
+    find_ffmpeg() -> path   (PATH, $SLIDECAST_FFMPEG, or imageio-ffmpeg)
+"""
+from .ffmpeg import FFmpegNotFound, find_ffmpeg
+from .models import Slide
+from .reel import Reel
+from .render import ChromeBinaryRenderer, PlaywrightRenderer, Renderer
+from .tts import (
+    GTTSTTS,
+    KokoroTTS,
+    SilentTTS,
+    TTSProvider,
+    apply_phonetic,
+    wav_duration,
+)
+from .video import build_segment, concat, poster
+__version__ = "0.1.0"
+__all__ = [
+    "Slide",
+    "Reel",
+    "Renderer",
+    "PlaywrightRenderer",
+    "ChromeBinaryRenderer",
+    "TTSProvider",
+    "KokoroTTS",
+    "GTTSTTS",
+    "SilentTTS",
+    "apply_phonetic",
+    "wav_duration",
+    "build_segment",
+    "concat",
+    "poster",
+    "find_ffmpeg",
+    "FFmpegNotFound",
+    "__version__",
+]

slidecast/cli.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Command line: render a reel from a spec file.
+    slidecast render reel.yaml -o out.mp4 --poster
+A spec is YAML or JSON::
+    width: 1280
+    height: 720
+    fps: 25
+    tts:
+      provider: kokoro        # kokoro | gtts | silent
+      url: http://127.0.0.1:8021/v1/audio/speech
+      voice: af_heart
+      response_format: wav
+    slides:
+      - html_file: intro.html       # path (relative to the spec) ...
+        narration: "Welcome."
+        tail_pad: 0.8
+      - html: "<!doctype html>..."  # ... or inline HTML
+        narration: ""               # empty => silent slide
+        min_duration: 3
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Any, Dict
+from .reel import Reel
+from .render import ChromeBinaryRenderer
+from .tts import GTTSTTS, KokoroTTS, SilentTTS
+def _load_spec(path: Path) -> Dict[str, Any]:
+    text = path.read_text()
+    if path.suffix in (".yaml", ".yml"):
+        try:
+            import yaml
+        except ImportError:  # noqa: BLE001
+            raise SystemExit("YAML spec needs PyYAML — `pip install slidecast[yaml]`")
+        return yaml.safe_load(text)
+    return json.loads(text)
+def _build_tts(cfg: Dict[str, Any]):
+    cfg = dict(cfg or {})
+    provider = (cfg.pop("provider", "silent") or "silent").lower()
+    if provider == "kokoro":
+        return KokoroTTS(**cfg)
+    if provider == "gtts":
+        return GTTSTTS(**cfg)
+    if provider == "silent":
+        return SilentTTS(**cfg) if cfg else SilentTTS()
+    raise SystemExit(f"Unknown tts provider: {provider!r}")
+def _build_reel(spec: Dict[str, Any], base: Path) -> Reel:
+    reel = Reel(
+        width=int(spec.get("width", 1920)),
+        height=int(spec.get("height", 1080)),
+        fps=int(spec.get("fps", 25)),
+        tts=_build_tts(spec.get("tts", {})),
+        silent_slide_seconds=float(spec.get("silent_slide_seconds", 3.0)),
+    )
+    chrome = spec.get("chrome")
+    if chrome:
+        reel.renderer = ChromeBinaryRenderer(chrome=chrome)
+    for s in spec.get("slides", []):
+        html = s.get("html")
+        if not html and s.get("html_file"):
+            html = (base / s["html_file"]).read_text()
+        if not html:
+            raise SystemExit("each slide needs 'html' or 'html_file'")
+        reel.add(html, s.get("narration", ""),
+                 tail_pad=float(s.get("tail_pad", 0.0)),
+                 min_duration=float(s.get("min_duration", 0.0)))
+    return reel
+def main(argv=None) -> int:
+    parser = argparse.ArgumentParser(prog="slidecast", description=__doc__,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    sub = parser.add_subparsers(dest="cmd", required=True)
+    r = sub.add_parser("render", help="render a reel spec to an MP4")
+    r.add_argument("spec", type=Path, help="YAML or JSON reel spec")
+    r.add_argument("-o", "--out", type=Path, required=True, help="output .mp4 path")
+    r.add_argument("--poster", action="store_true", help="also write <stem>_poster.jpg")
+    r.add_argument("--keep-work", type=Path, default=None,
+                   help="keep intermediate frames/audio in this directory")
+    args = parser.parse_args(argv)
+    if args.cmd == "render":
+        spec = _load_spec(args.spec)
+        reel = _build_reel(spec, args.spec.resolve().parent)
+        n = len(reel.slides)
+        def progress(i, total, slide):
+            head = slide.narration[:48].replace("\n", " ") or "(silent)"
+            print(f"  [{i}/{total}] {head}", file=sys.stderr)
+        out = reel.render(args.out, make_poster=args.poster,
+                          workdir=args.keep_work, on_progress=progress)
+        size_mb = out.stat().st_size / 1e6
+        print(f"✓ wrote {out} ({size_mb:.1f} MB) from {n} slides")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

slidecast/ffmpeg.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Locate an ffmpeg binary without forcing a system install.
+Resolution order:
+1. ``$SLIDECAST_FFMPEG`` — an explicit path, wins over everything.
+2. ``ffmpeg`` on ``$PATH`` — the normal case on a dev box or CI image.
+3. The binary bundled with ``imageio-ffmpeg`` (the ``[ffmpeg]`` extra), so a pure
+   ``pip install`` with no system package still works.
+"""
+from __future__ import annotations
+import os
+import shutil
+class FFmpegNotFound(RuntimeError):
+    """Raised when no ffmpeg binary can be located by any strategy."""
+def find_ffmpeg() -> str:
+    """Return a path to an ffmpeg executable, or raise :class:`FFmpegNotFound`."""
+    explicit = os.environ.get("SLIDECAST_FFMPEG")
+    if explicit:
+        if shutil.which(explicit) or os.path.isfile(explicit):
+            return explicit
+        raise FFmpegNotFound(f"SLIDECAST_FFMPEG={explicit!r} is not an executable")
+    on_path = shutil.which("ffmpeg")
+    if on_path:
+        return on_path
+    try:
+        import imageio_ffmpeg  # type: ignore
+        return imageio_ffmpeg.get_ffmpeg_exe()
+    except Exception:  # noqa: BLE001 — any failure here means "not available"
+        pass
+    raise FFmpegNotFound(
+        "ffmpeg not found. Install it on PATH, set $SLIDECAST_FFMPEG, or "
+        "`pip install slidecast[ffmpeg]` to use the bundled binary."
+    )

slidecast/models.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Core data model: a slide is one screen of HTML plus what the voice says over it."""
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass
+class Slide:
+    """One frame of a reel.
+    Attributes:
+        html: A complete, self-contained HTML document. slidecast does not style
+            your slides — it screenshots exactly what you hand it, so the design,
+            fonts, and layout are entirely yours.
+        narration: What the voice reads over this slide. Empty string => a silent
+            slide that holds for ``min_duration`` seconds.
+        tail_pad: Seconds of silence appended after the narration so the last word
+            is never clipped. Only applied when the narration duration is known.
+        min_duration: A floor on the segment length in seconds. For silent slides
+            this *is* the duration; for narrated slides the segment is at least
+            this long even if the narration is shorter.
+    """
+    html: str
+    narration: str = ""
+    tail_pad: float = 0.0
+    min_duration: float = 0.0
+    def __post_init__(self) -> None:
+        if not isinstance(self.html, str) or not self.html.strip():
+            raise ValueError("Slide.html must be a non-empty HTML string")
+        if self.tail_pad < 0 or self.min_duration < 0:
+            raise ValueError("tail_pad and min_duration must be non-negative")

slidecast/reel.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""The orchestrator: a list of slides in, one narrated MP4 out.
+A :class:`Reel` ties the three pluggable pieces together — a renderer (HTML ->
+PNG), a TTS provider (text -> audio + duration), and the ffmpeg steps (segment +
+concat). Per slide it screenshots the HTML, narrates the text, and builds a
+segment whose length fits the speech; then it concatenates every segment.
+"""
+from __future__ import annotations
+import shutil
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, List, Optional
+from . import video as _video
+from .ffmpeg import find_ffmpeg
+from .models import Slide
+from .render import PlaywrightRenderer, Renderer
+from .tts import SilentTTS, TTSProvider
+# Called as on_progress(index, total, slide) before each slide is built.
+ProgressHook = Callable[[int, int, Slide], None]
+@dataclass
+class Reel:
+    """A narrated slide reel.
+    Args:
+        width / height: Output resolution in pixels.
+        fps: Output frame rate.
+        tts: A text-to-speech provider. Defaults to silent (:class:`SilentTTS`),
+            so a reel renders end to end with no audio backend configured.
+        renderer: An HTML screenshotter. Defaults to :class:`PlaywrightRenderer`.
+        silent_slide_seconds: Hold time for a slide with empty narration when it
+            sets no ``min_duration`` of its own.
+    """
+    width: int = 1920
+    height: int = 1080
+    fps: int = 25
+    tts: TTSProvider = field(default_factory=SilentTTS)
+    renderer: Optional[Renderer] = None
+    silent_slide_seconds: float = 3.0
+    slides: List[Slide] = field(default_factory=list)
+    def add(self, html: str, narration: str = "", *,
+            tail_pad: float = 0.0, min_duration: float = 0.0) -> Slide:
+        """Append a slide and return it (chainable-ish convenience over the list)."""
+        slide = Slide(html=html, narration=narration,
+                      tail_pad=tail_pad, min_duration=min_duration)
+        self.slides.append(slide)
+        return slide
+    def _segment_duration(self, slide: Slide, audio: Path) -> Optional[float]:
+        """Narrate ``slide`` into ``audio`` and return the segment's target length.
+        Returns a concrete duration when it's known (so the segment is padded to
+        fit), or None to let the audio drive the length (ffmpeg ``-shortest``).
+        """
+        if not slide.narration.strip():
+            seconds = slide.min_duration or self.silent_slide_seconds
+            SilentTTS(seconds=seconds).synthesize("", audio)
+            return seconds
+        narrated = self.tts.synthesize(slide.narration, audio)
+        if narrated is None:
+            # Provider couldn't measure (e.g. MP3) — audio drives the length.
+            return None
+        return max(narrated + slide.tail_pad, slide.min_duration)
+    def render(
+        self,
+        out_path,
+        *,
+        make_poster: bool = False,
+        workdir: Optional[Path] = None,
+        ffmpeg: Optional[str] = None,
+        on_progress: Optional[ProgressHook] = None,
+    ) -> Path:
+        """Render the whole reel to ``out_path`` (an .mp4). Returns the path.
+        If ``make_poster`` is set, also writes ``<stem>_poster.jpg`` next to it.
+        """
+        if not self.slides:
+            raise ValueError("Reel has no slides")
+        out_path = Path(out_path)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        ffmpeg = ffmpeg or find_ffmpeg()
+        keep_work = workdir is not None
+        work = Path(workdir) if workdir else Path(tempfile.mkdtemp(prefix="slidecast_"))
+        work.mkdir(parents=True, exist_ok=True)
+        renderer = self.renderer or PlaywrightRenderer()
+        segments: List[Path] = []
+        total = len(self.slides)
+        try:
+            with renderer as r:
+                for i, slide in enumerate(self.slides, start=1):
+                    if on_progress:
+                        on_progress(i, total, slide)
+                    png = work / f"s{i:03d}.png"
+                    audio = work / f"s{i:03d}.wav"
+                    seg = work / f"s{i:03d}.mp4"
+                    r.screenshot(slide.html, png, width=self.width, height=self.height)
+                    duration = self._segment_duration(slide, audio)
+                    _video.build_segment(
+                        png, audio, seg,
+                        width=self.width, height=self.height, fps=self.fps,
+                        duration=duration, ffmpeg=ffmpeg,
+                    )
+                    segments.append(seg)
+            _video.concat(segments, out_path, ffmpeg=ffmpeg)
+            if make_poster:
+                _video.poster(out_path, out_path.with_name(out_path.stem + "_poster.jpg"),
+                              ffmpeg=ffmpeg)
+        finally:
+            if not keep_work:
+                shutil.rmtree(work, ignore_errors=True)
+        return out_path

slidecast/render.py ADDED Viewed

@@ -0,0 +1,131 @@
+"""Renderers turn an HTML string into a PNG screenshot at a fixed size.
+A renderer is a context manager (so an implementation can launch a browser once
+and reuse it across every slide) exposing::
+    screenshot(html: str, out_path: Path, *, width: int, height: int) -> None
+Two ship in the box:
+* :class:`PlaywrightRenderer` — the default. Needs the ``[playwright]`` extra and
+  a one-time ``playwright install chromium``. Launches Chromium once per reel.
+* :class:`ChromeBinaryRenderer` — drives an existing Chrome/Chromium binary by
+  path via ``--headless --screenshot``. No Python browser dep; handy when a
+  Chrome is already on the box.
+Bring your own by matching the same context-manager + ``screenshot`` shape.
+"""
+from __future__ import annotations
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Optional, Protocol, Sequence, runtime_checkable
+@runtime_checkable
+class Renderer(Protocol):
+    def __enter__(self) -> "Renderer": ...
+    def __exit__(self, *exc) -> None: ...
+    def screenshot(self, html: str, out_path: Path, *, width: int, height: int) -> None: ...
+class PlaywrightRenderer:
+    """Screenshot HTML with headless Chromium via Playwright.
+    The browser is launched on ``__enter__`` and reused for every ``screenshot``
+    call, so rendering a 20-slide reel pays the startup cost once.
+    """
+    def __init__(
+        self,
+        device_scale_factor: int = 1,
+        wait_ms: int = 250,
+        wait_until: str = "networkidle",
+        launch_args: Sequence[str] = ("--force-color-profile=srgb",),
+    ):
+        self.device_scale_factor = device_scale_factor
+        self.wait_ms = wait_ms
+        self.wait_until = wait_until
+        self.launch_args = list(launch_args)
+        self._pw = None
+        self._browser = None
+    def __enter__(self) -> "PlaywrightRenderer":
+        from playwright.sync_api import sync_playwright
+        self._pw = sync_playwright().start()
+        self._browser = self._pw.chromium.launch(args=self.launch_args)
+        return self
+    def __exit__(self, *exc) -> None:
+        if self._browser is not None:
+            self._browser.close()
+            self._browser = None
+        if self._pw is not None:
+            self._pw.stop()
+            self._pw = None
+    def screenshot(self, html: str, out_path: Path, *, width: int, height: int) -> None:
+        if self._browser is None:
+            raise RuntimeError("PlaywrightRenderer must be used as a context manager")
+        page = self._browser.new_context(
+            viewport={"width": width, "height": height},
+            device_scale_factor=self.device_scale_factor,
+        ).new_page()
+        try:
+            page.set_content(html, wait_until=self.wait_until)
+            if self.wait_ms:
+                page.wait_for_timeout(self.wait_ms)
+            page.screenshot(path=str(out_path))
+        finally:
+            page.context.close()
+class ChromeBinaryRenderer:
+    """Screenshot HTML by shelling out to a Chrome/Chromium binary.
+    Stateless: writes the HTML to a temp file and runs the binary headless with
+    ``--screenshot``. Use when you already have a Chrome on the box and would
+    rather not add the Playwright dependency.
+    """
+    def __init__(
+        self,
+        chrome: str,
+        device_scale_factor: int = 1,
+        virtual_time_budget_ms: int = 2000,
+        extra_args: Sequence[str] = (),
+        runner=subprocess.run,
+    ):
+        self.chrome = chrome
+        self.device_scale_factor = device_scale_factor
+        self.virtual_time_budget_ms = virtual_time_budget_ms
+        self.extra_args = list(extra_args)
+        self.runner = runner
+    def __enter__(self) -> "ChromeBinaryRenderer":
+        return self
+    def __exit__(self, *exc) -> None:
+        return None
+    def screenshot(self, html: str, out_path: Path, *, width: int, height: int) -> None:
+        with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
+            f.write(html)
+            html_path = Path(f.name)
+        try:
+            cmd = [
+                self.chrome, "--headless=new", "--no-sandbox", "--disable-gpu",
+                "--hide-scrollbars",
+                f"--force-device-scale-factor={self.device_scale_factor}",
+                f"--window-size={width},{height}",
+                "--default-background-color=00000000",
+                f"--virtual-time-budget={self.virtual_time_budget_ms}",
+                *self.extra_args,
+                f"--screenshot={out_path}", html_path.as_uri(),
+            ]
+            self.runner(cmd, check=True, capture_output=True)
+        finally:
+            html_path.unlink(missing_ok=True)

slidecast/tts.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""Text-to-speech providers.
+A provider is anything with::
+    synthesize(text: str, out_path: Path) -> float | None
+It writes an audio file to ``out_path`` and returns the clip duration in seconds,
+or ``None`` if it can't measure it (e.g. it emitted MP3 and you don't want a
+probe dependency). When the duration is unknown, the video builder lets the audio
+drive the segment length (ffmpeg ``-shortest``) instead of padding to a target.
+Three providers ship in the box:
+* :class:`KokoroTTS` — any OpenAI-compatible ``/v1/audio/speech`` endpoint
+  (Kokoro, OpenAI, LocalAI, …). Defaults to WAV so duration is measurable.
+* :class:`GTTSTTS` — Google Translate TTS via the ``gtts`` package (MP3, no
+  measurable duration → ``-shortest``).
+* :class:`SilentTTS` — a silent track of a fixed length. No dependencies; used
+  for silent slides, muted reels, and deterministic tests.
+Bring your own by implementing the same one-method shape.
+"""
+from __future__ import annotations
+import re
+import struct
+import wave
+from pathlib import Path
+from typing import Dict, Optional, Protocol, runtime_checkable
+@runtime_checkable
+class TTSProvider(Protocol):
+    def synthesize(self, text: str, out_path: Path) -> Optional[float]:
+        """Write audio for ``text`` to ``out_path``; return its duration or None."""
+        ...
+def wav_duration(path: Path) -> Optional[float]:
+    """Duration of a WAV file in seconds, or None if it isn't a readable WAV."""
+    try:
+        with wave.open(str(path)) as w:
+            rate = w.getframerate()
+            return w.getnframes() / float(rate) if rate else None
+    except Exception:  # noqa: BLE001
+        return None
+def apply_phonetic(text: str, rules: Optional[Dict[str, str]]) -> str:
+    """Rewrite spoken text by regex so TTS pronounces tricky tokens correctly.
+    ``rules`` maps a regex pattern to its spoken replacement, e.g.
+    ``{r"\\bSOC\\b": "sock"}`` so "SOC" is said as a word, not spelled out. The
+    on-screen slide text is untouched — this only changes the audio.
+    """
+    if not rules:
+        return text
+    for pattern, repl in rules.items():
+        text = re.sub(pattern, repl, text)
+    return text
+class SilentTTS:
+    """Emit a silent WAV of a fixed length. Pure stdlib, fully deterministic."""
+    def __init__(self, seconds: float = 3.0, sample_rate: int = 24000):
+        if seconds <= 0:
+            raise ValueError("seconds must be > 0")
+        self.seconds = float(seconds)
+        self.sample_rate = int(sample_rate)
+    def synthesize(self, text: str, out_path: Path) -> float:
+        n_frames = int(self.seconds * self.sample_rate)
+        with wave.open(str(out_path), "w") as w:
+            w.setnchannels(1)
+            w.setsampwidth(2)  # 16-bit
+            w.setframerate(self.sample_rate)
+            w.writeframes(struct.pack("<h", 0) * n_frames)
+        return self.seconds
+class KokoroTTS:
+    """Client for any OpenAI-compatible ``/v1/audio/speech`` endpoint.
+    Defaults to WAV so the clip duration is measurable (lets the reel pad each
+    segment to fit the speech exactly). Set ``response_format="mp3"`` for smaller
+    files; duration then reads as unknown and the segment uses ``-shortest``.
+    """
+    def __init__(
+        self,
+        url: str = "http://127.0.0.1:8021/v1/audio/speech",
+        voice: str = "af_heart",
+        model: str = "kokoro",
+        response_format: str = "wav",
+        timeout: float = 180.0,
+        phonetic: Optional[Dict[str, str]] = None,
+        session=None,
+    ):
+        self.url = url
+        self.voice = voice
+        self.model = model
+        self.response_format = response_format
+        self.timeout = timeout
+        self.phonetic = phonetic
+        self._session = session
+    def synthesize(self, text: str, out_path: Path) -> Optional[float]:
+        import requests
+        spoken = apply_phonetic(text, self.phonetic)
+        poster = self._session or requests
+        resp = poster.post(
+            self.url,
+            json={
+                "model": self.model,
+                "voice": self.voice,
+                "input": spoken,
+                "response_format": self.response_format,
+            },
+            timeout=self.timeout,
+        )
+        resp.raise_for_status()
+        Path(out_path).write_bytes(resp.content)
+        return wav_duration(out_path) if self.response_format == "wav" else None
+class GTTSTTS:
+    """Google Translate TTS via the ``gtts`` package. Emits MP3 (duration unknown)."""
+    def __init__(self, lang: str = "en", tld: str = "com", slow: bool = False,
+                 phonetic: Optional[Dict[str, str]] = None):
+        self.lang = lang
+        self.tld = tld
+        self.slow = slow
+        self.phonetic = phonetic
+    def synthesize(self, text: str, out_path: Path) -> Optional[float]:
+        from gtts import gTTS
+        spoken = apply_phonetic(text, self.phonetic)
+        gTTS(text=spoken, lang=self.lang, tld=self.tld, slow=self.slow).save(str(out_path))
+        return None

slidecast/video.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""ffmpeg steps: still image + audio -> segment, concat segments, grab a poster.
+Every function takes an injectable ``runner`` (defaults to ``subprocess.run``) and
+an optional ``ffmpeg`` path, so callers can swap in the bundled binary and tests
+can assert the exact command line without invoking ffmpeg.
+"""
+from __future__ import annotations
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import List, Optional
+from .ffmpeg import find_ffmpeg
+def build_segment(
+    image: Path,
+    audio: Path,
+    out: Path,
+    *,
+    width: int,
+    height: int,
+    fps: int = 25,
+    duration: Optional[float] = None,
+    audio_bitrate: str = "192k",
+    ffmpeg: Optional[str] = None,
+    runner=None,
+) -> Path:
+    """Render one still image + its audio into an H.264/AAC MP4 segment.
+    If ``duration`` is given, the segment is exactly that long and the audio is
+    padded with silence to fill it (so narration is never clipped). If it's None,
+    the audio drives the length (``-shortest``).
+    """
+    ffmpeg = ffmpeg or find_ffmpeg()
+    runner = runner or subprocess.run
+    cmd: List[str] = [
+        ffmpeg, "-y", "-loglevel", "error",
+        "-loop", "1", "-i", str(image),
+        "-i", str(audio),
+    ]
+    if duration is not None:
+        cmd += ["-t", f"{duration:.3f}", "-af", "apad"]
+    else:
+        cmd += ["-shortest"]
+    cmd += [
+        "-r", str(fps),
+        "-c:v", "libx264", "-tune", "stillimage", "-pix_fmt", "yuv420p",
+        "-vf", f"scale={width}:{height}",
+        "-c:a", "aac", "-b:a", audio_bitrate,
+        "-movflags", "+faststart",
+        str(out),
+    ]
+    runner(cmd, check=True)
+    return out
+def concat(
+    segments: List[Path],
+    out: Path,
+    *,
+    ffmpeg: Optional[str] = None,
+    runner=None,
+) -> Path:
+    """Concatenate pre-encoded segments losslessly via the concat demuxer."""
+    if not segments:
+        raise ValueError("concat() needs at least one segment")
+    ffmpeg = ffmpeg or find_ffmpeg()
+    runner = runner or subprocess.run
+    with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
+        for seg in segments:
+            f.write(f"file '{Path(seg).resolve()}'\n")
+        list_path = Path(f.name)
+    try:
+        runner(
+            [
+                ffmpeg, "-y", "-loglevel", "error",
+                "-f", "concat", "-safe", "0",
+                "-i", str(list_path),
+                "-c", "copy", "-movflags", "+faststart",
+                str(out),
+            ],
+            check=True,
+        )
+    finally:
+        list_path.unlink(missing_ok=True)
+    return out
+def poster(
+    video: Path,
+    out: Path,
+    *,
+    quality: int = 3,
+    ffmpeg: Optional[str] = None,
+    runner=None,
+) -> Path:
+    """Write the first frame of ``video`` as a JPEG (a <video> poster image)."""
+    ffmpeg = ffmpeg or find_ffmpeg()
+    runner = runner or subprocess.run
+    runner(
+        [
+            ffmpeg, "-y", "-loglevel", "error",
+            "-i", str(video), "-frames:v", "1", "-q:v", str(quality), str(out),
+        ],
+        check=True,
+    )
+    return out

slidecast-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,123 @@
+Metadata-Version: 2.4
+Name: slidecast
+Version: 0.1.0
+Summary: Turn a list of HTML slides + narration into a narrated MP4. Headless-browser screenshots, pluggable text-to-speech, ffmpeg stitching. Bring your own slide design and voice.
+Project-URL: Homepage, https://github.com/vinayvobbili/slidecast
+Project-URL: Source, https://github.com/vinayvobbili/slidecast
+Author: Vinay Vobbilichetty
+License: MIT
+License-File: LICENSE
+Keywords: ffmpeg,kokoro,narration,playwright,screencast,slides,tts,video
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
+Classifier: Topic :: Multimedia :: Video
+Requires-Python: >=3.10
+Requires-Dist: requests>=2
+Provides-Extra: dev
+Requires-Dist: pytest>=7; extra == 'dev'
+Requires-Dist: pyyaml>=6; extra == 'dev'
+Provides-Extra: ffmpeg
+Requires-Dist: imageio-ffmpeg>=0.4; extra == 'ffmpeg'
+Provides-Extra: gtts
+Requires-Dist: gtts>=2.3; extra == 'gtts'
+Provides-Extra: playwright
+Requires-Dist: playwright>=1.40; extra == 'playwright'
+Provides-Extra: yaml
+Requires-Dist: pyyaml>=6; extra == 'yaml'
+Description-Content-Type: text/markdown
+# slidecast
+Turn a list of HTML slides + narration into a narrated MP4.
+You bring the slide design — any HTML you like — and the words. slidecast
+screenshots each slide with a headless browser, narrates it with a pluggable
+text-to-speech provider, and stitches the frames into one MP4 with ffmpeg. It
+has no opinion about how your slides look and no hard dependency on a specific
+voice or browser: every piece is swappable.
+## Install
+```
+pip install slidecast              # core (requests only)
+pip install slidecast[playwright]  # default renderer (headless Chromium)
+pip install slidecast[gtts]        # Google Translate TTS
+pip install slidecast[ffmpeg]      # bundled ffmpeg binary (no system install)
+```
+After installing the Playwright extra, fetch the browser once:
+```
+playwright install chromium
+```
+You also need ffmpeg — on `PATH`, via `$SLIDECAST_FFMPEG`, or the `[ffmpeg]`
+extra's bundled binary.
+## Library
+```python
+from slidecast import Reel, KokoroTTS
+reel = Reel(width=1280, height=720, tts=KokoroTTS(voice="af_heart"))
+reel.add("<!doctype html><h1>Hello</h1>", "Hello, and welcome.")
+reel.add("<!doctype html><h1>Goodbye</h1>", "Thanks for watching.", tail_pad=0.8)
+reel.render("out.mp4", make_poster=True)
+```
+A slide with empty narration becomes a silent hold (`min_duration` seconds). When
+the TTS provider reports a clip duration, the segment is padded to fit the speech
+exactly; when it can't (e.g. MP3), the audio drives the length.
+## CLI
+```
+slidecast render reel.yaml -o out.mp4 --poster
+```
+```yaml
+width: 1280
+height: 720
+fps: 25
+tts:
+  provider: kokoro        # kokoro | gtts | silent
+  url: http://127.0.0.1:8021/v1/audio/speech
+  voice: af_heart
+  response_format: wav
+slides:
+  - html_file: intro.html
+    narration: "Before any of this, here's why it matters."
+    tail_pad: 0.8
+  - html: "<!doctype html><h1>Step one</h1>"
+    narration: ""          # silent slide
+    min_duration: 3
+```
+## The swappable pieces
+**Text-to-speech** — anything with `synthesize(text, path) -> seconds | None`:
+- `KokoroTTS` — any OpenAI-compatible `/v1/audio/speech` endpoint (Kokoro,
+  OpenAI, LocalAI, …). Defaults to WAV so the clip length is measurable.
+- `GTTSTTS` — Google Translate TTS (`gtts`).
+- `SilentTTS` — a silent track of a fixed length. No dependencies; the default,
+  so a reel renders end to end with nothing configured.
+Pass `phonetic={r"\bSOC\b": "sock"}` to rewrite how tricky tokens are spoken
+without changing the on-screen text.
+**Renderer** — a context manager exposing `screenshot(html, path, *, width, height)`:
+- `PlaywrightRenderer` — headless Chromium, launched once per reel (default).
+- `ChromeBinaryRenderer` — drive an existing Chrome/Chromium binary by path.
+**ffmpeg steps** are exposed directly (`build_segment`, `concat`, `poster`) and
+take an injectable `runner`, so you can compose your own pipeline or test command
+construction without invoking ffmpeg.
+## License
+MIT

slidecast-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+slidecast/__init__.py,sha256=Bs9DxGs4QqtWlserg1jINorsxko8WmlpZ_DZiWGnEgk,2110
+slidecast/cli.py,sha256=avAqBlCdorNzoh0bo7Dfl03QJWMbQduAD_b1GWsroIg,3870
+slidecast/ffmpeg.py,sha256=IRJNG7LXxYyMWiQT5iX2WBQJH_vIKP_LXxRgcEye-14,1338
+slidecast/models.py,sha256=qpZQfjxbaSH4L4JBV2sepxByPT7y43jp8RybZfz9Xkw,1385
+slidecast/reel.py,sha256=rq9kfENgCe3Amt_EG15ECIvXoTtqysQZtMxy0GXgM8U,4890
+slidecast/render.py,sha256=vq770U6FowOwzDXm_bC9UOjPJ6Gbu1s-ysEebk9Xr_g,4674
+slidecast/tts.py,sha256=KN9yjmYmGasoR54yy0niwfUdH1bPN80gFQt9ItNy67o,5025
+slidecast/video.py,sha256=XyNj4O2iJIjh7AkcOdmiRjYFiOimUP7E1gJuf3rtEKU,3093
+slidecast-0.1.0.dist-info/METADATA,sha256=B8jZcIwML31an6N26rieWSRZ_QNScNZLTNulVzp3h80,4237
+slidecast-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+slidecast-0.1.0.dist-info/entry_points.txt,sha256=Cg0RI0Em73vEFYtes8I7YQCePngmIIfc1JsMzk0VTK4,49
+slidecast-0.1.0.dist-info/licenses/LICENSE,sha256=Q7CXTchzC9hqR2Dr-9cRh3bM2kXTgXGwk-dO0rGvQsE,1076
+slidecast-0.1.0.dist-info/RECORD,,

slidecast-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

slidecast-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ slidecast = slidecast.cli:main

slidecast-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Vinay Vobbilichetty
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.