PyPI - slidesonnet - Versions diffs - 0.1.0__py3-none-any.whl - Mend

slidesonnet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

slidesonnet/__init__.py +3 -0
slidesonnet/actions.py +202 -0
slidesonnet/clean.py +289 -0
slidesonnet/cli.py +556 -0
slidesonnet/config.py +132 -0
slidesonnet/doctor.py +221 -0
slidesonnet/exceptions.py +25 -0
slidesonnet/hashing.py +99 -0
slidesonnet/init.py +69 -0
slidesonnet/models.py +222 -0
slidesonnet/parsers/__init__.py +0 -0
slidesonnet/parsers/base.py +24 -0
slidesonnet/parsers/beamer.py +361 -0
slidesonnet/parsers/expansion.py +169 -0
slidesonnet/parsers/marp.py +443 -0
slidesonnet/pipeline.py +849 -0
slidesonnet/playlist.py +63 -0
slidesonnet/preview.py +119 -0
slidesonnet/subtitles.py +344 -0
slidesonnet/tasks.py +423 -0
slidesonnet/templates/__init__.py +0 -0
slidesonnet/templates/env.txt +2 -0
slidesonnet/templates/example_playlist.yaml +27 -0
slidesonnet/templates/example_playlist_tex.yaml +27 -0
slidesonnet/templates/example_pronunciation.md +12 -0
slidesonnet/templates/example_slides_defs.md +27 -0
slidesonnet/templates/example_slides_defs.tex +31 -0
slidesonnet/templates/example_slides_intro.md +22 -0
slidesonnet/templates/example_slides_intro.tex +24 -0
slidesonnet/templates/gitignore.txt +8 -0
slidesonnet/tts/__init__.py +20 -0
slidesonnet/tts/base.py +37 -0
slidesonnet/tts/elevenlabs.py +114 -0
slidesonnet/tts/piper.py +103 -0
slidesonnet/tts/pronunciation.py +81 -0
slidesonnet/video/__init__.py +0 -0
slidesonnet/video/composer.py +444 -0
slidesonnet-0.1.0.dist-info/METADATA +383 -0
slidesonnet-0.1.0.dist-info/RECORD +43 -0
slidesonnet-0.1.0.dist-info/WHEEL +5 -0
slidesonnet-0.1.0.dist-info/entry_points.txt +2 -0
slidesonnet-0.1.0.dist-info/licenses/LICENSE +21 -0
slidesonnet-0.1.0.dist-info/top_level.txt +1 -0

slidesonnet/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""slideSonnet - Compile text-based presentations into narrated videos."""
+__version__ = "0.1.0"

slidesonnet/actions.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""Action functions executed by doit tasks.
+These are the actual build steps (image extraction, TTS synthesis,
+video composition, concatenation, assembly) that doit invokes.
+"""
+from __future__ import annotations
+import json
+import logging
+import shutil
+from collections.abc import Callable
+from pathlib import Path
+from slidesonnet.models import ModuleType, ProjectConfig
+from slidesonnet.parsers.base import SlideParser
+from slidesonnet.tts.base import TTSEngine
+from slidesonnet.video import composer
+logger = logging.getLogger(__name__)
+def action_extract_images(
+    source: Path,
+    slides_dir: Path,
+    extract_fn: Callable[[Path, Path], list[Path]],
+    manifest_path: Path,
+) -> None:
+    """Run image extraction and write manifest."""
+    slides_dir.mkdir(parents=True, exist_ok=True)
+    images = extract_fn(source, slides_dir)
+    manifest_path.parent.mkdir(parents=True, exist_ok=True)
+    manifest_path.write_text(
+        json.dumps([str(p) for p in images]),
+        encoding="utf-8",
+    )
+def action_tts(
+    text: str,
+    output_path: Path,
+    tts: TTSEngine,
+    utterance_path: Path,
+    voice: str | None = None,
+) -> None:
+    """Synthesize TTS audio.
+    Caching is handled by doit's uptodate/targets mechanism.
+    """
+    utterance_path.parent.mkdir(parents=True, exist_ok=True)
+    utterance_path.write_text(text, encoding="utf-8")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    logger.info("  slide synthesizing...")
+    tts.synthesize(text, output_path, voice=voice)
+def action_concat_audio(audio_paths: list[Path], output_path: Path) -> None:
+    """Concatenate multiple audio files into a single file."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    composer.concatenate_audio(audio_paths, output_path)
+def action_compose_narrated(
+    manifest_path: Path,
+    slide_index: int,
+    audio_path: Path,
+    output: Path,
+    config: ProjectConfig,
+) -> None:
+    """Compose a narrated slide segment."""
+    images = json.loads(manifest_path.read_text(encoding="utf-8"))
+    image = Path(images[slide_index - 1])
+    duration = composer.get_duration(audio_path)
+    logger.debug("slide %d: audio=%.3fs image=%s", slide_index, duration, image.name)
+    composer.compose_segment(
+        image=image,
+        audio=audio_path,
+        output=output,
+        duration=duration,
+        pad_seconds=config.video.pad_seconds,
+        pre_silence=config.video.pre_silence,
+        resolution=config.video.resolution,
+        fps=config.video.fps,
+        crf=config.video.crf,
+        preset=config.video.preset,
+    )
+def action_compose_silent(
+    manifest_path: Path,
+    slide_index: int,
+    output: Path,
+    config: ProjectConfig,
+    silence_override: float | None = None,
+) -> None:
+    """Compose a silent slide segment."""
+    images = json.loads(manifest_path.read_text(encoding="utf-8"))
+    image = Path(images[slide_index - 1])
+    duration = silence_override if silence_override is not None else config.video.silence_duration
+    composer.compose_silent_segment(
+        image=image,
+        output=output,
+        duration=duration,
+        resolution=config.video.resolution,
+        fps=config.video.fps,
+        crf=config.video.crf,
+        preset=config.video.preset,
+    )
+def action_assemble(segments: list[Path], output: Path, config: ProjectConfig) -> None:
+    """Assemble all segments into final output."""
+    if not segments:
+        raise RuntimeError("No segments to assemble — the playlist may be empty.")
+    _merge_videos(segments, output, config)
+def _merge_videos(inputs: list[Path], output: Path, config: ProjectConfig) -> None:
+    """Merge one or more video files into a single output."""
+    if len(inputs) == 1:
+        output.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(inputs[0], output)
+    else:
+        if config.video.crossfade > 0:
+            composer.concatenate_segments_xfade(
+                inputs,
+                output,
+                crossfade=config.video.crossfade,
+                crf=config.video.crf,
+                preset=config.video.preset,
+                resolution=config.video.resolution,
+                fps=config.video.fps,
+            )
+        else:
+            composer.concatenate_segments(inputs, output)
+def action_compile_beamer(
+    source: Path,
+    slides_dir: Path,
+    pdf_path: Path,
+) -> None:
+    """Compile Beamer source to PDF."""
+    from slidesonnet.parsers.beamer import compile_pdf
+    compile_pdf(source, slides_dir)
+    if not pdf_path.exists():
+        raise RuntimeError(f"Expected PDF not produced: {pdf_path}")
+def action_extract_images_beamer(
+    pdf_path: Path,
+    slides_dir: Path,
+    manifest_path: Path,
+) -> None:
+    """Extract images from a compiled Beamer PDF."""
+    from slidesonnet.parsers.beamer import extract_images_from_pdf
+    images = extract_images_from_pdf(pdf_path, slides_dir)
+    manifest_path.parent.mkdir(parents=True, exist_ok=True)
+    manifest_path.write_text(
+        json.dumps([str(p) for p in images]),
+        encoding="utf-8",
+    )
+def action_export_pdf_marp(
+    source: Path,
+    output_path: Path,
+) -> None:
+    """Export a MARP presentation to PDF."""
+    from slidesonnet.parsers.marp import export_pdf
+    export_pdf(source, output_path)
+def action_export_pdf_beamer(
+    cache_pdf: Path,
+    output_path: Path,
+) -> None:
+    """Copy compiled Beamer PDF to the output directory."""
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(cache_pdf, output_path)
+def get_parser_and_extractor(
+    module_type: ModuleType,
+) -> tuple[type[SlideParser], Callable[[Path, Path], list[Path]]]:
+    """Get parser class and image extraction function for a module type."""
+    if module_type == ModuleType.MARP:
+        from slidesonnet.parsers.marp import MarpParser
+        from slidesonnet.parsers.marp import extract_images as marp_extract
+        return MarpParser, marp_extract
+    elif module_type == ModuleType.BEAMER:
+        from slidesonnet.parsers.beamer import BeamerParser
+        from slidesonnet.parsers.beamer import extract_images as beamer_extract
+        return BeamerParser, beamer_extract
+    else:
+        raise ValueError(f"No parser for module type: {module_type}")

slidesonnet/clean.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""Selective cache cleanup with graduated preservation levels.
+Four --keep levels, each progressively more aggressive:
+  nothing — nuke entire cache directory
+  api     — keep all API-generated audio, remove build artifacts + piper audio
+  current — keep audio for current slide text (any engine), remove orphans
+  exact   — keep only audio matching current text + current TTS config
+"""
+from __future__ import annotations
+import logging
+import shutil
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal
+from slidesonnet.actions import get_parser_and_extractor
+from slidesonnet.config import load_config
+from slidesonnet.hashing import audio_filename, parse_audio_filename, text_hash
+from slidesonnet.models import API_BACKENDS, ModuleType, resolve_voice
+from slidesonnet.playlist import parse_playlist
+from slidesonnet.tts.pronunciation import apply_pronunciation, load_pronunciation_dict
+logger = logging.getLogger(__name__)
+KeepLevel = Literal["nothing", "api", "current", "exact"]
+_API_BACKENDS = API_BACKENDS
+@dataclass
+class CleanResult:
+    """Summary of what was removed/kept during cleanup."""
+    removed_files: int = 0
+    removed_bytes: int = 0
+    kept_files: int = 0
+    @property
+    def removed_mb(self) -> float:
+        return self.removed_bytes / (1024 * 1024)
+def _count_dir(path: Path) -> tuple[int, int]:
+    """Count files and total bytes in a directory tree."""
+    count = 0
+    total = 0
+    if not path.exists():
+        return 0, 0
+    for f in path.rglob("*"):
+        if f.is_file():
+            count += 1
+            total += f.stat().st_size
+    return count, total
+def clean(playlist_path: Path, keep: KeepLevel = "api") -> CleanResult:
+    """Clean build artifacts with the given preservation level."""
+    build_dir = playlist_path.resolve().parent / "cache"
+    if not build_dir.exists():
+        return CleanResult()
+    # Count files before
+    files_before, bytes_before = _count_dir(build_dir)
+    if keep == "nothing":
+        _clean_all(build_dir)
+    elif keep == "api":
+        _clean_keep_api(build_dir)
+    elif keep == "current":
+        _clean_keep_current(build_dir, playlist_path)
+    elif keep == "exact":
+        _clean_keep_exact(build_dir, playlist_path)
+    # Count files after
+    files_after, _ = _count_dir(build_dir)
+    removed_files = files_before - files_after
+    _, bytes_after = _count_dir(build_dir)
+    return CleanResult(
+        removed_files=removed_files,
+        removed_bytes=bytes_before - bytes_after,
+        kept_files=files_after,
+    )
+def _clean_all(build_dir: Path) -> None:
+    """Remove the entire cache directory."""
+    shutil.rmtree(build_dir)
+def _clean_keep_api(build_dir: Path) -> None:
+    """Remove build artifacts + piper audio + concat + old-format. Keep API audio."""
+    _remove_build_artifacts(build_dir)
+    audio_dir = build_dir / "audio"
+    if not audio_dir.exists():
+        return
+    for f in audio_dir.iterdir():
+        if not f.is_file():
+            continue
+        parsed = parse_audio_filename(f.name)
+        if parsed is not None:
+            _, backend, _ = parsed
+            if backend in _API_BACKENDS:
+                continue  # keep API audio
+        # Remove: piper audio, concat files, old-format files
+        f.unlink()
+    _remove_empty_dir(audio_dir)
+def _clean_keep_current(build_dir: Path, playlist_path: Path) -> None:
+    """Remove build artifacts + orphaned audio. Keep current slide text audio (any engine)."""
+    _remove_build_artifacts(build_dir)
+    audio_dir = build_dir / "audio"
+    if not audio_dir.exists():
+        return
+    current_hashes = _collect_current_text_hashes(playlist_path)
+    for f in audio_dir.iterdir():
+        if not f.is_file():
+            continue
+        parsed = parse_audio_filename(f.name)
+        if parsed is not None:
+            th, _, _ = parsed
+            if th in current_hashes:
+                continue  # keep: matches a current utterance
+        # Remove: orphaned audio, concat files, old-format files
+        f.unlink()
+    _remove_empty_dir(audio_dir)
+def _clean_keep_exact(build_dir: Path, playlist_path: Path) -> None:
+    """Remove build artifacts + orphaned + stale-config audio. Keep exact matches only."""
+    _remove_build_artifacts(build_dir)
+    audio_dir = build_dir / "audio"
+    if not audio_dir.exists():
+        return
+    current_filenames = _collect_current_audio_filenames(playlist_path)
+    for f in audio_dir.iterdir():
+        if not f.is_file():
+            continue
+        if f.name in current_filenames:
+            continue  # keep: exact match
+        f.unlink()
+    _remove_empty_dir(audio_dir)
+def _remove_build_artifacts(build_dir: Path) -> None:
+    """Remove everything in build_dir except the audio/ directory."""
+    for child in build_dir.iterdir():
+        if child.name == "audio":
+            continue
+        if child.is_dir():
+            shutil.rmtree(child)
+        else:
+            # .doit.db, .doit.db.bak, etc.
+            child.unlink()
+def _remove_empty_dir(path: Path) -> None:
+    """Remove directory if it's empty."""
+    try:
+        path.rmdir()  # only succeeds if empty
+    except OSError:
+        pass
+def _collect_current_text_hashes(playlist_path: Path) -> set[str]:
+    """Parse the playlist and return text_hashes for all current utterances.
+    Resolves voice presets across ALL backends so that audio from any engine
+    is preserved if its utterance content matches.
+    """
+    playlist_path = playlist_path.resolve()
+    playlist_dir = playlist_path.parent
+    build_dir = playlist_dir / "cache"
+    raw_config, entries = parse_playlist(playlist_path)
+    config = load_config(raw_config, playlist_dir)
+    config.pronunciation = load_pronunciation_dict(config.pronunciation_files)
+    # Collect pronunciation dicts for all backends so audio from any engine is preserved
+    all_backends = {"piper", "elevenlabs"}
+    backend_prons = {b: config.pronunciation_for(b) for b in all_backends}
+    text_hashes: set[str] = set()
+    for entry in entries:
+        if entry.module_type == ModuleType.VIDEO:
+            continue
+        source_path = playlist_dir / entry.path
+        parser_cls, _ = get_parser_and_extractor(entry.module_type)
+        module_dir = build_dir / entry.path.parent / entry.path.stem
+        slides_dir = module_dir / "slides"
+        parser = parser_cls()
+        slides = parser.parse(source_path, slides_dir)
+        for slide in slides:
+            if not slide.has_narration:
+                continue
+            # Collect all possible voice resolutions across all backends
+            voices: set[str | None] = {None}  # always include default (no voice)
+            if slide.voice:
+                voice_cfg = config.voices.get(slide.voice)
+                if voice_cfg:
+                    voices |= voice_cfg.all_voice_ids()
+            # Apply each backend's pronunciation and collect text_hashes
+            for pron in backend_prons.values():
+                processed = apply_pronunciation(slide.narration_raw, pron)
+                parts_processed = [
+                    apply_pronunciation(part, pron) for part in slide.narration_parts
+                ]
+                texts = parts_processed if len(parts_processed) > 1 else [processed]
+                for utterance_text in texts:
+                    for voice in voices:
+                        text_hashes.add(text_hash(utterance_text, voice))
+    return text_hashes
+def _collect_current_audio_filenames(playlist_path: Path) -> set[str]:
+    """Parse the playlist and return expected audio filenames for the current TTS config.
+    Only considers the currently configured backend, unlike _collect_current_text_hashes
+    which considers all backends.
+    """
+    from slidesonnet.tts import create_tts
+    from dotenv import load_dotenv
+    playlist_path = playlist_path.resolve()
+    playlist_dir = playlist_path.parent
+    build_dir = playlist_dir / "cache"
+    load_dotenv(playlist_dir / ".env")
+    raw_config, entries = parse_playlist(playlist_path)
+    config = load_config(raw_config, playlist_dir)
+    config.pronunciation = load_pronunciation_dict(config.pronunciation_files)
+    tts = create_tts(config)
+    pron = config.pronunciation_for(config.tts.backend)
+    filenames: set[str] = set()
+    for entry in entries:
+        if entry.module_type == ModuleType.VIDEO:
+            continue
+        source_path = playlist_dir / entry.path
+        parser_cls, _ = get_parser_and_extractor(entry.module_type)
+        module_dir = build_dir / entry.path.parent / entry.path.stem
+        slides_dir = module_dir / "slides"
+        parser = parser_cls()
+        slides = parser.parse(source_path, slides_dir)
+        for slide in slides:
+            if not slide.has_narration:
+                continue
+            slide.narration_processed = apply_pronunciation(slide.narration_raw, pron)
+            slide.narration_parts_processed = [
+                apply_pronunciation(part, pron) for part in slide.narration_parts
+            ]
+            voice = resolve_voice(slide.voice, config.voices, config.tts.backend)
+            parts = slide.narration_parts_processed
+            texts = parts if len(parts) > 1 else [slide.narration_processed]
+            for utterance_text in texts:
+                filenames.add(audio_filename(utterance_text, tts.name(), tts.cache_key(), voice))
+    return filenames