slidecast 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
slidecast/__init__.py ADDED
@@ -0,0 +1,67 @@
1
+ """slidecast — turn a list of HTML slides + narration into a narrated MP4.
2
+
3
+ You bring the slide design (any HTML you like) and the words; slidecast
4
+ screenshots each slide with a headless browser, narrates it with a pluggable
5
+ text-to-speech provider, and stitches the frames into one MP4 with ffmpeg.
6
+
7
+ Quick start
8
+ -----------
9
+ from slidecast import Reel, KokoroTTS
10
+
11
+ reel = Reel(width=1280, height=720, tts=KokoroTTS(voice="af_heart"))
12
+ reel.add("<!doctype html><h1>Hello</h1>", "Hello, and welcome.")
13
+ reel.add("<!doctype html><h1>Bye</h1>", "Thanks for watching.", tail_pad=0.8)
14
+ reel.render("out.mp4", make_poster=True)
15
+
16
+ Pieces (all swappable)
17
+ ----------------------
18
+ Model:
19
+ Slide(html, narration="", tail_pad=0.0, min_duration=0.0)
20
+ Reel(width, height, fps, tts=..., renderer=...).add(...).render(out)
21
+ Text-to-speech (``synthesize(text, path) -> seconds | None``):
22
+ KokoroTTS — any OpenAI-compatible /v1/audio/speech endpoint
23
+ GTTSTTS — Google Translate TTS (mp3)
24
+ SilentTTS — silent track, no deps (default)
25
+ Renderers (HTML -> PNG, used as a context manager):
26
+ PlaywrightRenderer — headless Chromium (default)
27
+ ChromeBinaryRenderer — drive an existing Chrome binary by path
28
+ ffmpeg steps (injectable runner, for direct use/testing):
29
+ build_segment(...) / concat(...) / poster(...)
30
+ find_ffmpeg() -> path (PATH, $SLIDECAST_FFMPEG, or imageio-ffmpeg)
31
+ """
32
+
33
+ from .ffmpeg import FFmpegNotFound, find_ffmpeg
34
+ from .models import Slide
35
+ from .reel import Reel
36
+ from .render import ChromeBinaryRenderer, PlaywrightRenderer, Renderer
37
+ from .tts import (
38
+ GTTSTTS,
39
+ KokoroTTS,
40
+ SilentTTS,
41
+ TTSProvider,
42
+ apply_phonetic,
43
+ wav_duration,
44
+ )
45
+ from .video import build_segment, concat, poster
46
+
47
+ __version__ = "0.1.0"
48
+
49
+ __all__ = [
50
+ "Slide",
51
+ "Reel",
52
+ "Renderer",
53
+ "PlaywrightRenderer",
54
+ "ChromeBinaryRenderer",
55
+ "TTSProvider",
56
+ "KokoroTTS",
57
+ "GTTSTTS",
58
+ "SilentTTS",
59
+ "apply_phonetic",
60
+ "wav_duration",
61
+ "build_segment",
62
+ "concat",
63
+ "poster",
64
+ "find_ffmpeg",
65
+ "FFmpegNotFound",
66
+ "__version__",
67
+ ]
slidecast/cli.py ADDED
@@ -0,0 +1,112 @@
1
+ """Command line: render a reel from a spec file.
2
+
3
+ slidecast render reel.yaml -o out.mp4 --poster
4
+
5
+ A spec is YAML or JSON::
6
+
7
+ width: 1280
8
+ height: 720
9
+ fps: 25
10
+ tts:
11
+ provider: kokoro # kokoro | gtts | silent
12
+ url: http://127.0.0.1:8021/v1/audio/speech
13
+ voice: af_heart
14
+ response_format: wav
15
+ slides:
16
+ - html_file: intro.html # path (relative to the spec) ...
17
+ narration: "Welcome."
18
+ tail_pad: 0.8
19
+ - html: "<!doctype html>..." # ... or inline HTML
20
+ narration: "" # empty => silent slide
21
+ min_duration: 3
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import argparse
27
+ import json
28
+ import sys
29
+ from pathlib import Path
30
+ from typing import Any, Dict
31
+
32
+ from .reel import Reel
33
+ from .render import ChromeBinaryRenderer
34
+ from .tts import GTTSTTS, KokoroTTS, SilentTTS
35
+
36
+
37
+ def _load_spec(path: Path) -> Dict[str, Any]:
38
+ text = path.read_text()
39
+ if path.suffix in (".yaml", ".yml"):
40
+ try:
41
+ import yaml
42
+ except ImportError: # noqa: BLE001
43
+ raise SystemExit("YAML spec needs PyYAML — `pip install slidecast[yaml]`")
44
+ return yaml.safe_load(text)
45
+ return json.loads(text)
46
+
47
+
48
+ def _build_tts(cfg: Dict[str, Any]):
49
+ cfg = dict(cfg or {})
50
+ provider = (cfg.pop("provider", "silent") or "silent").lower()
51
+ if provider == "kokoro":
52
+ return KokoroTTS(**cfg)
53
+ if provider == "gtts":
54
+ return GTTSTTS(**cfg)
55
+ if provider == "silent":
56
+ return SilentTTS(**cfg) if cfg else SilentTTS()
57
+ raise SystemExit(f"Unknown tts provider: {provider!r}")
58
+
59
+
60
+ def _build_reel(spec: Dict[str, Any], base: Path) -> Reel:
61
+ reel = Reel(
62
+ width=int(spec.get("width", 1920)),
63
+ height=int(spec.get("height", 1080)),
64
+ fps=int(spec.get("fps", 25)),
65
+ tts=_build_tts(spec.get("tts", {})),
66
+ silent_slide_seconds=float(spec.get("silent_slide_seconds", 3.0)),
67
+ )
68
+ chrome = spec.get("chrome")
69
+ if chrome:
70
+ reel.renderer = ChromeBinaryRenderer(chrome=chrome)
71
+ for s in spec.get("slides", []):
72
+ html = s.get("html")
73
+ if not html and s.get("html_file"):
74
+ html = (base / s["html_file"]).read_text()
75
+ if not html:
76
+ raise SystemExit("each slide needs 'html' or 'html_file'")
77
+ reel.add(html, s.get("narration", ""),
78
+ tail_pad=float(s.get("tail_pad", 0.0)),
79
+ min_duration=float(s.get("min_duration", 0.0)))
80
+ return reel
81
+
82
+
83
+ def main(argv=None) -> int:
84
+ parser = argparse.ArgumentParser(prog="slidecast", description=__doc__,
85
+ formatter_class=argparse.RawDescriptionHelpFormatter)
86
+ sub = parser.add_subparsers(dest="cmd", required=True)
87
+ r = sub.add_parser("render", help="render a reel spec to an MP4")
88
+ r.add_argument("spec", type=Path, help="YAML or JSON reel spec")
89
+ r.add_argument("-o", "--out", type=Path, required=True, help="output .mp4 path")
90
+ r.add_argument("--poster", action="store_true", help="also write <stem>_poster.jpg")
91
+ r.add_argument("--keep-work", type=Path, default=None,
92
+ help="keep intermediate frames/audio in this directory")
93
+ args = parser.parse_args(argv)
94
+
95
+ if args.cmd == "render":
96
+ spec = _load_spec(args.spec)
97
+ reel = _build_reel(spec, args.spec.resolve().parent)
98
+ n = len(reel.slides)
99
+
100
+ def progress(i, total, slide):
101
+ head = slide.narration[:48].replace("\n", " ") or "(silent)"
102
+ print(f" [{i}/{total}] {head}", file=sys.stderr)
103
+
104
+ out = reel.render(args.out, make_poster=args.poster,
105
+ workdir=args.keep_work, on_progress=progress)
106
+ size_mb = out.stat().st_size / 1e6
107
+ print(f"✓ wrote {out} ({size_mb:.1f} MB) from {n} slides")
108
+ return 0
109
+
110
+
111
+ if __name__ == "__main__":
112
+ raise SystemExit(main())
slidecast/ffmpeg.py ADDED
@@ -0,0 +1,42 @@
1
+ """Locate an ffmpeg binary without forcing a system install.
2
+
3
+ Resolution order:
4
+ 1. ``$SLIDECAST_FFMPEG`` — an explicit path, wins over everything.
5
+ 2. ``ffmpeg`` on ``$PATH`` — the normal case on a dev box or CI image.
6
+ 3. The binary bundled with ``imageio-ffmpeg`` (the ``[ffmpeg]`` extra), so a pure
7
+ ``pip install`` with no system package still works.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import shutil
14
+
15
+
16
+ class FFmpegNotFound(RuntimeError):
17
+ """Raised when no ffmpeg binary can be located by any strategy."""
18
+
19
+
20
+ def find_ffmpeg() -> str:
21
+ """Return a path to an ffmpeg executable, or raise :class:`FFmpegNotFound`."""
22
+ explicit = os.environ.get("SLIDECAST_FFMPEG")
23
+ if explicit:
24
+ if shutil.which(explicit) or os.path.isfile(explicit):
25
+ return explicit
26
+ raise FFmpegNotFound(f"SLIDECAST_FFMPEG={explicit!r} is not an executable")
27
+
28
+ on_path = shutil.which("ffmpeg")
29
+ if on_path:
30
+ return on_path
31
+
32
+ try:
33
+ import imageio_ffmpeg # type: ignore
34
+
35
+ return imageio_ffmpeg.get_ffmpeg_exe()
36
+ except Exception: # noqa: BLE001 — any failure here means "not available"
37
+ pass
38
+
39
+ raise FFmpegNotFound(
40
+ "ffmpeg not found. Install it on PATH, set $SLIDECAST_FFMPEG, or "
41
+ "`pip install slidecast[ffmpeg]` to use the bundled binary."
42
+ )
slidecast/models.py ADDED
@@ -0,0 +1,34 @@
1
+ """Core data model: a slide is one screen of HTML plus what the voice says over it."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class Slide:
10
+ """One frame of a reel.
11
+
12
+ Attributes:
13
+ html: A complete, self-contained HTML document. slidecast does not style
14
+ your slides — it screenshots exactly what you hand it, so the design,
15
+ fonts, and layout are entirely yours.
16
+ narration: What the voice reads over this slide. Empty string => a silent
17
+ slide that holds for ``min_duration`` seconds.
18
+ tail_pad: Seconds of silence appended after the narration so the last word
19
+ is never clipped. Only applied when the narration duration is known.
20
+ min_duration: A floor on the segment length in seconds. For silent slides
21
+ this *is* the duration; for narrated slides the segment is at least
22
+ this long even if the narration is shorter.
23
+ """
24
+
25
+ html: str
26
+ narration: str = ""
27
+ tail_pad: float = 0.0
28
+ min_duration: float = 0.0
29
+
30
+ def __post_init__(self) -> None:
31
+ if not isinstance(self.html, str) or not self.html.strip():
32
+ raise ValueError("Slide.html must be a non-empty HTML string")
33
+ if self.tail_pad < 0 or self.min_duration < 0:
34
+ raise ValueError("tail_pad and min_duration must be non-negative")
slidecast/reel.py ADDED
@@ -0,0 +1,123 @@
1
+ """The orchestrator: a list of slides in, one narrated MP4 out.
2
+
3
+ A :class:`Reel` ties the three pluggable pieces together — a renderer (HTML ->
4
+ PNG), a TTS provider (text -> audio + duration), and the ffmpeg steps (segment +
5
+ concat). Per slide it screenshots the HTML, narrates the text, and builds a
6
+ segment whose length fits the speech; then it concatenates every segment.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import shutil
12
+ import tempfile
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Callable, List, Optional
16
+
17
+ from . import video as _video
18
+ from .ffmpeg import find_ffmpeg
19
+ from .models import Slide
20
+ from .render import PlaywrightRenderer, Renderer
21
+ from .tts import SilentTTS, TTSProvider
22
+
23
+ # Called as on_progress(index, total, slide) before each slide is built.
24
+ ProgressHook = Callable[[int, int, Slide], None]
25
+
26
+
27
+ @dataclass
28
+ class Reel:
29
+ """A narrated slide reel.
30
+
31
+ Args:
32
+ width / height: Output resolution in pixels.
33
+ fps: Output frame rate.
34
+ tts: A text-to-speech provider. Defaults to silent (:class:`SilentTTS`),
35
+ so a reel renders end to end with no audio backend configured.
36
+ renderer: An HTML screenshotter. Defaults to :class:`PlaywrightRenderer`.
37
+ silent_slide_seconds: Hold time for a slide with empty narration when it
38
+ sets no ``min_duration`` of its own.
39
+ """
40
+
41
+ width: int = 1920
42
+ height: int = 1080
43
+ fps: int = 25
44
+ tts: TTSProvider = field(default_factory=SilentTTS)
45
+ renderer: Optional[Renderer] = None
46
+ silent_slide_seconds: float = 3.0
47
+ slides: List[Slide] = field(default_factory=list)
48
+
49
+ def add(self, html: str, narration: str = "", *,
50
+ tail_pad: float = 0.0, min_duration: float = 0.0) -> Slide:
51
+ """Append a slide and return it (chainable-ish convenience over the list)."""
52
+ slide = Slide(html=html, narration=narration,
53
+ tail_pad=tail_pad, min_duration=min_duration)
54
+ self.slides.append(slide)
55
+ return slide
56
+
57
+ def _segment_duration(self, slide: Slide, audio: Path) -> Optional[float]:
58
+ """Narrate ``slide`` into ``audio`` and return the segment's target length.
59
+
60
+ Returns a concrete duration when it's known (so the segment is padded to
61
+ fit), or None to let the audio drive the length (ffmpeg ``-shortest``).
62
+ """
63
+ if not slide.narration.strip():
64
+ seconds = slide.min_duration or self.silent_slide_seconds
65
+ SilentTTS(seconds=seconds).synthesize("", audio)
66
+ return seconds
67
+
68
+ narrated = self.tts.synthesize(slide.narration, audio)
69
+ if narrated is None:
70
+ # Provider couldn't measure (e.g. MP3) — audio drives the length.
71
+ return None
72
+ return max(narrated + slide.tail_pad, slide.min_duration)
73
+
74
+ def render(
75
+ self,
76
+ out_path,
77
+ *,
78
+ make_poster: bool = False,
79
+ workdir: Optional[Path] = None,
80
+ ffmpeg: Optional[str] = None,
81
+ on_progress: Optional[ProgressHook] = None,
82
+ ) -> Path:
83
+ """Render the whole reel to ``out_path`` (an .mp4). Returns the path.
84
+
85
+ If ``make_poster`` is set, also writes ``<stem>_poster.jpg`` next to it.
86
+ """
87
+ if not self.slides:
88
+ raise ValueError("Reel has no slides")
89
+ out_path = Path(out_path)
90
+ out_path.parent.mkdir(parents=True, exist_ok=True)
91
+ ffmpeg = ffmpeg or find_ffmpeg()
92
+
93
+ keep_work = workdir is not None
94
+ work = Path(workdir) if workdir else Path(tempfile.mkdtemp(prefix="slidecast_"))
95
+ work.mkdir(parents=True, exist_ok=True)
96
+ renderer = self.renderer or PlaywrightRenderer()
97
+
98
+ segments: List[Path] = []
99
+ total = len(self.slides)
100
+ try:
101
+ with renderer as r:
102
+ for i, slide in enumerate(self.slides, start=1):
103
+ if on_progress:
104
+ on_progress(i, total, slide)
105
+ png = work / f"s{i:03d}.png"
106
+ audio = work / f"s{i:03d}.wav"
107
+ seg = work / f"s{i:03d}.mp4"
108
+ r.screenshot(slide.html, png, width=self.width, height=self.height)
109
+ duration = self._segment_duration(slide, audio)
110
+ _video.build_segment(
111
+ png, audio, seg,
112
+ width=self.width, height=self.height, fps=self.fps,
113
+ duration=duration, ffmpeg=ffmpeg,
114
+ )
115
+ segments.append(seg)
116
+ _video.concat(segments, out_path, ffmpeg=ffmpeg)
117
+ if make_poster:
118
+ _video.poster(out_path, out_path.with_name(out_path.stem + "_poster.jpg"),
119
+ ffmpeg=ffmpeg)
120
+ finally:
121
+ if not keep_work:
122
+ shutil.rmtree(work, ignore_errors=True)
123
+ return out_path
slidecast/render.py ADDED
@@ -0,0 +1,131 @@
1
+ """Renderers turn an HTML string into a PNG screenshot at a fixed size.
2
+
3
+ A renderer is a context manager (so an implementation can launch a browser once
4
+ and reuse it across every slide) exposing::
5
+
6
+ screenshot(html: str, out_path: Path, *, width: int, height: int) -> None
7
+
8
+ Two ship in the box:
9
+
10
+ * :class:`PlaywrightRenderer` — the default. Needs the ``[playwright]`` extra and
11
+ a one-time ``playwright install chromium``. Launches Chromium once per reel.
12
+ * :class:`ChromeBinaryRenderer` — drives an existing Chrome/Chromium binary by
13
+ path via ``--headless --screenshot``. No Python browser dep; handy when a
14
+ Chrome is already on the box.
15
+
16
+ Bring your own by matching the same context-manager + ``screenshot`` shape.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import subprocess
22
+ import tempfile
23
+ from pathlib import Path
24
+ from typing import Optional, Protocol, Sequence, runtime_checkable
25
+
26
+
27
+ @runtime_checkable
28
+ class Renderer(Protocol):
29
+ def __enter__(self) -> "Renderer": ...
30
+ def __exit__(self, *exc) -> None: ...
31
+ def screenshot(self, html: str, out_path: Path, *, width: int, height: int) -> None: ...
32
+
33
+
34
+ class PlaywrightRenderer:
35
+ """Screenshot HTML with headless Chromium via Playwright.
36
+
37
+ The browser is launched on ``__enter__`` and reused for every ``screenshot``
38
+ call, so rendering a 20-slide reel pays the startup cost once.
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ device_scale_factor: int = 1,
44
+ wait_ms: int = 250,
45
+ wait_until: str = "networkidle",
46
+ launch_args: Sequence[str] = ("--force-color-profile=srgb",),
47
+ ):
48
+ self.device_scale_factor = device_scale_factor
49
+ self.wait_ms = wait_ms
50
+ self.wait_until = wait_until
51
+ self.launch_args = list(launch_args)
52
+ self._pw = None
53
+ self._browser = None
54
+
55
+ def __enter__(self) -> "PlaywrightRenderer":
56
+ from playwright.sync_api import sync_playwright
57
+
58
+ self._pw = sync_playwright().start()
59
+ self._browser = self._pw.chromium.launch(args=self.launch_args)
60
+ return self
61
+
62
+ def __exit__(self, *exc) -> None:
63
+ if self._browser is not None:
64
+ self._browser.close()
65
+ self._browser = None
66
+ if self._pw is not None:
67
+ self._pw.stop()
68
+ self._pw = None
69
+
70
+ def screenshot(self, html: str, out_path: Path, *, width: int, height: int) -> None:
71
+ if self._browser is None:
72
+ raise RuntimeError("PlaywrightRenderer must be used as a context manager")
73
+ page = self._browser.new_context(
74
+ viewport={"width": width, "height": height},
75
+ device_scale_factor=self.device_scale_factor,
76
+ ).new_page()
77
+ try:
78
+ page.set_content(html, wait_until=self.wait_until)
79
+ if self.wait_ms:
80
+ page.wait_for_timeout(self.wait_ms)
81
+ page.screenshot(path=str(out_path))
82
+ finally:
83
+ page.context.close()
84
+
85
+
86
+ class ChromeBinaryRenderer:
87
+ """Screenshot HTML by shelling out to a Chrome/Chromium binary.
88
+
89
+ Stateless: writes the HTML to a temp file and runs the binary headless with
90
+ ``--screenshot``. Use when you already have a Chrome on the box and would
91
+ rather not add the Playwright dependency.
92
+ """
93
+
94
+ def __init__(
95
+ self,
96
+ chrome: str,
97
+ device_scale_factor: int = 1,
98
+ virtual_time_budget_ms: int = 2000,
99
+ extra_args: Sequence[str] = (),
100
+ runner=subprocess.run,
101
+ ):
102
+ self.chrome = chrome
103
+ self.device_scale_factor = device_scale_factor
104
+ self.virtual_time_budget_ms = virtual_time_budget_ms
105
+ self.extra_args = list(extra_args)
106
+ self.runner = runner
107
+
108
+ def __enter__(self) -> "ChromeBinaryRenderer":
109
+ return self
110
+
111
+ def __exit__(self, *exc) -> None:
112
+ return None
113
+
114
+ def screenshot(self, html: str, out_path: Path, *, width: int, height: int) -> None:
115
+ with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False) as f:
116
+ f.write(html)
117
+ html_path = Path(f.name)
118
+ try:
119
+ cmd = [
120
+ self.chrome, "--headless=new", "--no-sandbox", "--disable-gpu",
121
+ "--hide-scrollbars",
122
+ f"--force-device-scale-factor={self.device_scale_factor}",
123
+ f"--window-size={width},{height}",
124
+ "--default-background-color=00000000",
125
+ f"--virtual-time-budget={self.virtual_time_budget_ms}",
126
+ *self.extra_args,
127
+ f"--screenshot={out_path}", html_path.as_uri(),
128
+ ]
129
+ self.runner(cmd, check=True, capture_output=True)
130
+ finally:
131
+ html_path.unlink(missing_ok=True)
slidecast/tts.py ADDED
@@ -0,0 +1,144 @@
1
+ """Text-to-speech providers.
2
+
3
+ A provider is anything with::
4
+
5
+ synthesize(text: str, out_path: Path) -> float | None
6
+
7
+ It writes an audio file to ``out_path`` and returns the clip duration in seconds,
8
+ or ``None`` if it can't measure it (e.g. it emitted MP3 and you don't want a
9
+ probe dependency). When the duration is unknown, the video builder lets the audio
10
+ drive the segment length (ffmpeg ``-shortest``) instead of padding to a target.
11
+
12
+ Three providers ship in the box:
13
+
14
+ * :class:`KokoroTTS` — any OpenAI-compatible ``/v1/audio/speech`` endpoint
15
+ (Kokoro, OpenAI, LocalAI, …). Defaults to WAV so duration is measurable.
16
+ * :class:`GTTSTTS` — Google Translate TTS via the ``gtts`` package (MP3, no
17
+ measurable duration → ``-shortest``).
18
+ * :class:`SilentTTS` — a silent track of a fixed length. No dependencies; used
19
+ for silent slides, muted reels, and deterministic tests.
20
+
21
+ Bring your own by implementing the same one-method shape.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ import struct
28
+ import wave
29
+ from pathlib import Path
30
+ from typing import Dict, Optional, Protocol, runtime_checkable
31
+
32
+
33
+ @runtime_checkable
34
+ class TTSProvider(Protocol):
35
+ def synthesize(self, text: str, out_path: Path) -> Optional[float]:
36
+ """Write audio for ``text`` to ``out_path``; return its duration or None."""
37
+ ...
38
+
39
+
40
+ def wav_duration(path: Path) -> Optional[float]:
41
+ """Duration of a WAV file in seconds, or None if it isn't a readable WAV."""
42
+ try:
43
+ with wave.open(str(path)) as w:
44
+ rate = w.getframerate()
45
+ return w.getnframes() / float(rate) if rate else None
46
+ except Exception: # noqa: BLE001
47
+ return None
48
+
49
+
50
+ def apply_phonetic(text: str, rules: Optional[Dict[str, str]]) -> str:
51
+ """Rewrite spoken text by regex so TTS pronounces tricky tokens correctly.
52
+
53
+ ``rules`` maps a regex pattern to its spoken replacement, e.g.
54
+ ``{r"\\bSOC\\b": "sock"}`` so "SOC" is said as a word, not spelled out. The
55
+ on-screen slide text is untouched — this only changes the audio.
56
+ """
57
+ if not rules:
58
+ return text
59
+ for pattern, repl in rules.items():
60
+ text = re.sub(pattern, repl, text)
61
+ return text
62
+
63
+
64
+ class SilentTTS:
65
+ """Emit a silent WAV of a fixed length. Pure stdlib, fully deterministic."""
66
+
67
+ def __init__(self, seconds: float = 3.0, sample_rate: int = 24000):
68
+ if seconds <= 0:
69
+ raise ValueError("seconds must be > 0")
70
+ self.seconds = float(seconds)
71
+ self.sample_rate = int(sample_rate)
72
+
73
+ def synthesize(self, text: str, out_path: Path) -> float:
74
+ n_frames = int(self.seconds * self.sample_rate)
75
+ with wave.open(str(out_path), "w") as w:
76
+ w.setnchannels(1)
77
+ w.setsampwidth(2) # 16-bit
78
+ w.setframerate(self.sample_rate)
79
+ w.writeframes(struct.pack("<h", 0) * n_frames)
80
+ return self.seconds
81
+
82
+
83
+ class KokoroTTS:
84
+ """Client for any OpenAI-compatible ``/v1/audio/speech`` endpoint.
85
+
86
+ Defaults to WAV so the clip duration is measurable (lets the reel pad each
87
+ segment to fit the speech exactly). Set ``response_format="mp3"`` for smaller
88
+ files; duration then reads as unknown and the segment uses ``-shortest``.
89
+ """
90
+
91
+ def __init__(
92
+ self,
93
+ url: str = "http://127.0.0.1:8021/v1/audio/speech",
94
+ voice: str = "af_heart",
95
+ model: str = "kokoro",
96
+ response_format: str = "wav",
97
+ timeout: float = 180.0,
98
+ phonetic: Optional[Dict[str, str]] = None,
99
+ session=None,
100
+ ):
101
+ self.url = url
102
+ self.voice = voice
103
+ self.model = model
104
+ self.response_format = response_format
105
+ self.timeout = timeout
106
+ self.phonetic = phonetic
107
+ self._session = session
108
+
109
+ def synthesize(self, text: str, out_path: Path) -> Optional[float]:
110
+ import requests
111
+
112
+ spoken = apply_phonetic(text, self.phonetic)
113
+ poster = self._session or requests
114
+ resp = poster.post(
115
+ self.url,
116
+ json={
117
+ "model": self.model,
118
+ "voice": self.voice,
119
+ "input": spoken,
120
+ "response_format": self.response_format,
121
+ },
122
+ timeout=self.timeout,
123
+ )
124
+ resp.raise_for_status()
125
+ Path(out_path).write_bytes(resp.content)
126
+ return wav_duration(out_path) if self.response_format == "wav" else None
127
+
128
+
129
+ class GTTSTTS:
130
+ """Google Translate TTS via the ``gtts`` package. Emits MP3 (duration unknown)."""
131
+
132
+ def __init__(self, lang: str = "en", tld: str = "com", slow: bool = False,
133
+ phonetic: Optional[Dict[str, str]] = None):
134
+ self.lang = lang
135
+ self.tld = tld
136
+ self.slow = slow
137
+ self.phonetic = phonetic
138
+
139
+ def synthesize(self, text: str, out_path: Path) -> Optional[float]:
140
+ from gtts import gTTS
141
+
142
+ spoken = apply_phonetic(text, self.phonetic)
143
+ gTTS(text=spoken, lang=self.lang, tld=self.tld, slow=self.slow).save(str(out_path))
144
+ return None
slidecast/video.py ADDED
@@ -0,0 +1,110 @@
1
+ """ffmpeg steps: still image + audio -> segment, concat segments, grab a poster.
2
+
3
+ Every function takes an injectable ``runner`` (defaults to ``subprocess.run``) and
4
+ an optional ``ffmpeg`` path, so callers can swap in the bundled binary and tests
5
+ can assert the exact command line without invoking ffmpeg.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import subprocess
11
+ import tempfile
12
+ from pathlib import Path
13
+ from typing import List, Optional
14
+
15
+ from .ffmpeg import find_ffmpeg
16
+
17
+
18
+ def build_segment(
19
+ image: Path,
20
+ audio: Path,
21
+ out: Path,
22
+ *,
23
+ width: int,
24
+ height: int,
25
+ fps: int = 25,
26
+ duration: Optional[float] = None,
27
+ audio_bitrate: str = "192k",
28
+ ffmpeg: Optional[str] = None,
29
+ runner=None,
30
+ ) -> Path:
31
+ """Render one still image + its audio into an H.264/AAC MP4 segment.
32
+
33
+ If ``duration`` is given, the segment is exactly that long and the audio is
34
+ padded with silence to fill it (so narration is never clipped). If it's None,
35
+ the audio drives the length (``-shortest``).
36
+ """
37
+ ffmpeg = ffmpeg or find_ffmpeg()
38
+ runner = runner or subprocess.run
39
+ cmd: List[str] = [
40
+ ffmpeg, "-y", "-loglevel", "error",
41
+ "-loop", "1", "-i", str(image),
42
+ "-i", str(audio),
43
+ ]
44
+ if duration is not None:
45
+ cmd += ["-t", f"{duration:.3f}", "-af", "apad"]
46
+ else:
47
+ cmd += ["-shortest"]
48
+ cmd += [
49
+ "-r", str(fps),
50
+ "-c:v", "libx264", "-tune", "stillimage", "-pix_fmt", "yuv420p",
51
+ "-vf", f"scale={width}:{height}",
52
+ "-c:a", "aac", "-b:a", audio_bitrate,
53
+ "-movflags", "+faststart",
54
+ str(out),
55
+ ]
56
+ runner(cmd, check=True)
57
+ return out
58
+
59
+
60
+ def concat(
61
+ segments: List[Path],
62
+ out: Path,
63
+ *,
64
+ ffmpeg: Optional[str] = None,
65
+ runner=None,
66
+ ) -> Path:
67
+ """Concatenate pre-encoded segments losslessly via the concat demuxer."""
68
+ if not segments:
69
+ raise ValueError("concat() needs at least one segment")
70
+ ffmpeg = ffmpeg or find_ffmpeg()
71
+ runner = runner or subprocess.run
72
+ with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
73
+ for seg in segments:
74
+ f.write(f"file '{Path(seg).resolve()}'\n")
75
+ list_path = Path(f.name)
76
+ try:
77
+ runner(
78
+ [
79
+ ffmpeg, "-y", "-loglevel", "error",
80
+ "-f", "concat", "-safe", "0",
81
+ "-i", str(list_path),
82
+ "-c", "copy", "-movflags", "+faststart",
83
+ str(out),
84
+ ],
85
+ check=True,
86
+ )
87
+ finally:
88
+ list_path.unlink(missing_ok=True)
89
+ return out
90
+
91
+
92
+ def poster(
93
+ video: Path,
94
+ out: Path,
95
+ *,
96
+ quality: int = 3,
97
+ ffmpeg: Optional[str] = None,
98
+ runner=None,
99
+ ) -> Path:
100
+ """Write the first frame of ``video`` as a JPEG (a <video> poster image)."""
101
+ ffmpeg = ffmpeg or find_ffmpeg()
102
+ runner = runner or subprocess.run
103
+ runner(
104
+ [
105
+ ffmpeg, "-y", "-loglevel", "error",
106
+ "-i", str(video), "-frames:v", "1", "-q:v", str(quality), str(out),
107
+ ],
108
+ check=True,
109
+ )
110
+ return out
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: slidecast
3
+ Version: 0.1.0
4
+ Summary: Turn a list of HTML slides + narration into a narrated MP4. Headless-browser screenshots, pluggable text-to-speech, ffmpeg stitching. Bring your own slide design and voice.
5
+ Project-URL: Homepage, https://github.com/vinayvobbili/slidecast
6
+ Project-URL: Source, https://github.com/vinayvobbili/slidecast
7
+ Author: Vinay Vobbilichetty
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: ffmpeg,kokoro,narration,playwright,screencast,slides,tts,video
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
16
+ Classifier: Topic :: Multimedia :: Video
17
+ Requires-Python: >=3.10
18
+ Requires-Dist: requests>=2
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest>=7; extra == 'dev'
21
+ Requires-Dist: pyyaml>=6; extra == 'dev'
22
+ Provides-Extra: ffmpeg
23
+ Requires-Dist: imageio-ffmpeg>=0.4; extra == 'ffmpeg'
24
+ Provides-Extra: gtts
25
+ Requires-Dist: gtts>=2.3; extra == 'gtts'
26
+ Provides-Extra: playwright
27
+ Requires-Dist: playwright>=1.40; extra == 'playwright'
28
+ Provides-Extra: yaml
29
+ Requires-Dist: pyyaml>=6; extra == 'yaml'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # slidecast
33
+
34
+ Turn a list of HTML slides + narration into a narrated MP4.
35
+
36
+ You bring the slide design — any HTML you like — and the words. slidecast
37
+ screenshots each slide with a headless browser, narrates it with a pluggable
38
+ text-to-speech provider, and stitches the frames into one MP4 with ffmpeg. It
39
+ has no opinion about how your slides look and no hard dependency on a specific
40
+ voice or browser: every piece is swappable.
41
+
42
+ ## Install
43
+
44
+ ```
45
+ pip install slidecast # core (requests only)
46
+ pip install slidecast[playwright] # default renderer (headless Chromium)
47
+ pip install slidecast[gtts] # Google Translate TTS
48
+ pip install slidecast[ffmpeg] # bundled ffmpeg binary (no system install)
49
+ ```
50
+
51
+ After installing the Playwright extra, fetch the browser once:
52
+
53
+ ```
54
+ playwright install chromium
55
+ ```
56
+
57
+ You also need ffmpeg — on `PATH`, via `$SLIDECAST_FFMPEG`, or the `[ffmpeg]`
58
+ extra's bundled binary.
59
+
60
+ ## Library
61
+
62
+ ```python
63
+ from slidecast import Reel, KokoroTTS
64
+
65
+ reel = Reel(width=1280, height=720, tts=KokoroTTS(voice="af_heart"))
66
+ reel.add("<!doctype html><h1>Hello</h1>", "Hello, and welcome.")
67
+ reel.add("<!doctype html><h1>Goodbye</h1>", "Thanks for watching.", tail_pad=0.8)
68
+ reel.render("out.mp4", make_poster=True)
69
+ ```
70
+
71
+ A slide with empty narration becomes a silent hold (`min_duration` seconds). When
72
+ the TTS provider reports a clip duration, the segment is padded to fit the speech
73
+ exactly; when it can't (e.g. MP3), the audio drives the length.
74
+
75
+ ## CLI
76
+
77
+ ```
78
+ slidecast render reel.yaml -o out.mp4 --poster
79
+ ```
80
+
81
+ ```yaml
82
+ width: 1280
83
+ height: 720
84
+ fps: 25
85
+ tts:
86
+ provider: kokoro # kokoro | gtts | silent
87
+ url: http://127.0.0.1:8021/v1/audio/speech
88
+ voice: af_heart
89
+ response_format: wav
90
+ slides:
91
+ - html_file: intro.html
92
+ narration: "Before any of this, here's why it matters."
93
+ tail_pad: 0.8
94
+ - html: "<!doctype html><h1>Step one</h1>"
95
+ narration: "" # silent slide
96
+ min_duration: 3
97
+ ```
98
+
99
+ ## The swappable pieces
100
+
101
+ **Text-to-speech** — anything with `synthesize(text, path) -> seconds | None`:
102
+
103
+ - `KokoroTTS` — any OpenAI-compatible `/v1/audio/speech` endpoint (Kokoro,
104
+ OpenAI, LocalAI, …). Defaults to WAV so the clip length is measurable.
105
+ - `GTTSTTS` — Google Translate TTS (`gtts`).
106
+ - `SilentTTS` — a silent track of a fixed length. No dependencies; the default,
107
+ so a reel renders end to end with nothing configured.
108
+
109
+ Pass `phonetic={r"\bSOC\b": "sock"}` to rewrite how tricky tokens are spoken
110
+ without changing the on-screen text.
111
+
112
+ **Renderer** — a context manager exposing `screenshot(html, path, *, width, height)`:
113
+
114
+ - `PlaywrightRenderer` — headless Chromium, launched once per reel (default).
115
+ - `ChromeBinaryRenderer` — drive an existing Chrome/Chromium binary by path.
116
+
117
+ **ffmpeg steps** are exposed directly (`build_segment`, `concat`, `poster`) and
118
+ take an injectable `runner`, so you can compose your own pipeline or test command
119
+ construction without invoking ffmpeg.
120
+
121
+ ## License
122
+
123
+ MIT
@@ -0,0 +1,13 @@
1
+ slidecast/__init__.py,sha256=Bs9DxGs4QqtWlserg1jINorsxko8WmlpZ_DZiWGnEgk,2110
2
+ slidecast/cli.py,sha256=avAqBlCdorNzoh0bo7Dfl03QJWMbQduAD_b1GWsroIg,3870
3
+ slidecast/ffmpeg.py,sha256=IRJNG7LXxYyMWiQT5iX2WBQJH_vIKP_LXxRgcEye-14,1338
4
+ slidecast/models.py,sha256=qpZQfjxbaSH4L4JBV2sepxByPT7y43jp8RybZfz9Xkw,1385
5
+ slidecast/reel.py,sha256=rq9kfENgCe3Amt_EG15ECIvXoTtqysQZtMxy0GXgM8U,4890
6
+ slidecast/render.py,sha256=vq770U6FowOwzDXm_bC9UOjPJ6Gbu1s-ysEebk9Xr_g,4674
7
+ slidecast/tts.py,sha256=KN9yjmYmGasoR54yy0niwfUdH1bPN80gFQt9ItNy67o,5025
8
+ slidecast/video.py,sha256=XyNj4O2iJIjh7AkcOdmiRjYFiOimUP7E1gJuf3rtEKU,3093
9
+ slidecast-0.1.0.dist-info/METADATA,sha256=B8jZcIwML31an6N26rieWSRZ_QNScNZLTNulVzp3h80,4237
10
+ slidecast-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ slidecast-0.1.0.dist-info/entry_points.txt,sha256=Cg0RI0Em73vEFYtes8I7YQCePngmIIfc1JsMzk0VTK4,49
12
+ slidecast-0.1.0.dist-info/licenses/LICENSE,sha256=Q7CXTchzC9hqR2Dr-9cRh3bM2kXTgXGwk-dO0rGvQsE,1076
13
+ slidecast-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ slidecast = slidecast.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vinay Vobbilichetty
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.