slidesonnet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. slidesonnet/__init__.py +3 -0
  2. slidesonnet/actions.py +202 -0
  3. slidesonnet/clean.py +289 -0
  4. slidesonnet/cli.py +556 -0
  5. slidesonnet/config.py +132 -0
  6. slidesonnet/doctor.py +221 -0
  7. slidesonnet/exceptions.py +25 -0
  8. slidesonnet/hashing.py +99 -0
  9. slidesonnet/init.py +69 -0
  10. slidesonnet/models.py +222 -0
  11. slidesonnet/parsers/__init__.py +0 -0
  12. slidesonnet/parsers/base.py +24 -0
  13. slidesonnet/parsers/beamer.py +361 -0
  14. slidesonnet/parsers/expansion.py +169 -0
  15. slidesonnet/parsers/marp.py +443 -0
  16. slidesonnet/pipeline.py +849 -0
  17. slidesonnet/playlist.py +63 -0
  18. slidesonnet/preview.py +119 -0
  19. slidesonnet/subtitles.py +344 -0
  20. slidesonnet/tasks.py +423 -0
  21. slidesonnet/templates/__init__.py +0 -0
  22. slidesonnet/templates/env.txt +2 -0
  23. slidesonnet/templates/example_playlist.yaml +27 -0
  24. slidesonnet/templates/example_playlist_tex.yaml +27 -0
  25. slidesonnet/templates/example_pronunciation.md +12 -0
  26. slidesonnet/templates/example_slides_defs.md +27 -0
  27. slidesonnet/templates/example_slides_defs.tex +31 -0
  28. slidesonnet/templates/example_slides_intro.md +22 -0
  29. slidesonnet/templates/example_slides_intro.tex +24 -0
  30. slidesonnet/templates/gitignore.txt +8 -0
  31. slidesonnet/tts/__init__.py +20 -0
  32. slidesonnet/tts/base.py +37 -0
  33. slidesonnet/tts/elevenlabs.py +114 -0
  34. slidesonnet/tts/piper.py +103 -0
  35. slidesonnet/tts/pronunciation.py +81 -0
  36. slidesonnet/video/__init__.py +0 -0
  37. slidesonnet/video/composer.py +444 -0
  38. slidesonnet-0.1.0.dist-info/METADATA +383 -0
  39. slidesonnet-0.1.0.dist-info/RECORD +43 -0
  40. slidesonnet-0.1.0.dist-info/WHEEL +5 -0
  41. slidesonnet-0.1.0.dist-info/entry_points.txt +2 -0
  42. slidesonnet-0.1.0.dist-info/licenses/LICENSE +21 -0
  43. slidesonnet-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3 @@
1
+ """slideSonnet - Compile text-based presentations into narrated videos."""
2
+
3
+ __version__ = "0.1.0"
slidesonnet/actions.py ADDED
@@ -0,0 +1,202 @@
1
+ """Action functions executed by doit tasks.
2
+
3
+ These are the actual build steps (image extraction, TTS synthesis,
4
+ video composition, concatenation, assembly) that doit invokes.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import shutil
12
+ from collections.abc import Callable
13
+ from pathlib import Path
14
+
15
+ from slidesonnet.models import ModuleType, ProjectConfig
16
+ from slidesonnet.parsers.base import SlideParser
17
+ from slidesonnet.tts.base import TTSEngine
18
+ from slidesonnet.video import composer
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def action_extract_images(
24
+ source: Path,
25
+ slides_dir: Path,
26
+ extract_fn: Callable[[Path, Path], list[Path]],
27
+ manifest_path: Path,
28
+ ) -> None:
29
+ """Run image extraction and write manifest."""
30
+ slides_dir.mkdir(parents=True, exist_ok=True)
31
+ images = extract_fn(source, slides_dir)
32
+ manifest_path.parent.mkdir(parents=True, exist_ok=True)
33
+ manifest_path.write_text(
34
+ json.dumps([str(p) for p in images]),
35
+ encoding="utf-8",
36
+ )
37
+
38
+
39
+ def action_tts(
40
+ text: str,
41
+ output_path: Path,
42
+ tts: TTSEngine,
43
+ utterance_path: Path,
44
+ voice: str | None = None,
45
+ ) -> None:
46
+ """Synthesize TTS audio.
47
+
48
+ Caching is handled by doit's uptodate/targets mechanism.
49
+ """
50
+ utterance_path.parent.mkdir(parents=True, exist_ok=True)
51
+ utterance_path.write_text(text, encoding="utf-8")
52
+
53
+ output_path.parent.mkdir(parents=True, exist_ok=True)
54
+ logger.info(" slide synthesizing...")
55
+ tts.synthesize(text, output_path, voice=voice)
56
+
57
+
58
+ def action_concat_audio(audio_paths: list[Path], output_path: Path) -> None:
59
+ """Concatenate multiple audio files into a single file."""
60
+ output_path.parent.mkdir(parents=True, exist_ok=True)
61
+ composer.concatenate_audio(audio_paths, output_path)
62
+
63
+
64
+ def action_compose_narrated(
65
+ manifest_path: Path,
66
+ slide_index: int,
67
+ audio_path: Path,
68
+ output: Path,
69
+ config: ProjectConfig,
70
+ ) -> None:
71
+ """Compose a narrated slide segment."""
72
+ images = json.loads(manifest_path.read_text(encoding="utf-8"))
73
+ image = Path(images[slide_index - 1])
74
+ duration = composer.get_duration(audio_path)
75
+ logger.debug("slide %d: audio=%.3fs image=%s", slide_index, duration, image.name)
76
+ composer.compose_segment(
77
+ image=image,
78
+ audio=audio_path,
79
+ output=output,
80
+ duration=duration,
81
+ pad_seconds=config.video.pad_seconds,
82
+ pre_silence=config.video.pre_silence,
83
+ resolution=config.video.resolution,
84
+ fps=config.video.fps,
85
+ crf=config.video.crf,
86
+ preset=config.video.preset,
87
+ )
88
+
89
+
90
+ def action_compose_silent(
91
+ manifest_path: Path,
92
+ slide_index: int,
93
+ output: Path,
94
+ config: ProjectConfig,
95
+ silence_override: float | None = None,
96
+ ) -> None:
97
+ """Compose a silent slide segment."""
98
+ images = json.loads(manifest_path.read_text(encoding="utf-8"))
99
+ image = Path(images[slide_index - 1])
100
+ duration = silence_override if silence_override is not None else config.video.silence_duration
101
+ composer.compose_silent_segment(
102
+ image=image,
103
+ output=output,
104
+ duration=duration,
105
+ resolution=config.video.resolution,
106
+ fps=config.video.fps,
107
+ crf=config.video.crf,
108
+ preset=config.video.preset,
109
+ )
110
+
111
+
112
+ def action_assemble(segments: list[Path], output: Path, config: ProjectConfig) -> None:
113
+ """Assemble all segments into final output."""
114
+ if not segments:
115
+ raise RuntimeError("No segments to assemble — the playlist may be empty.")
116
+ _merge_videos(segments, output, config)
117
+
118
+
119
+ def _merge_videos(inputs: list[Path], output: Path, config: ProjectConfig) -> None:
120
+ """Merge one or more video files into a single output."""
121
+ if len(inputs) == 1:
122
+ output.parent.mkdir(parents=True, exist_ok=True)
123
+ shutil.copy2(inputs[0], output)
124
+ else:
125
+ if config.video.crossfade > 0:
126
+ composer.concatenate_segments_xfade(
127
+ inputs,
128
+ output,
129
+ crossfade=config.video.crossfade,
130
+ crf=config.video.crf,
131
+ preset=config.video.preset,
132
+ resolution=config.video.resolution,
133
+ fps=config.video.fps,
134
+ )
135
+ else:
136
+ composer.concatenate_segments(inputs, output)
137
+
138
+
139
+ def action_compile_beamer(
140
+ source: Path,
141
+ slides_dir: Path,
142
+ pdf_path: Path,
143
+ ) -> None:
144
+ """Compile Beamer source to PDF."""
145
+ from slidesonnet.parsers.beamer import compile_pdf
146
+
147
+ compile_pdf(source, slides_dir)
148
+ if not pdf_path.exists():
149
+ raise RuntimeError(f"Expected PDF not produced: {pdf_path}")
150
+
151
+
152
+ def action_extract_images_beamer(
153
+ pdf_path: Path,
154
+ slides_dir: Path,
155
+ manifest_path: Path,
156
+ ) -> None:
157
+ """Extract images from a compiled Beamer PDF."""
158
+ from slidesonnet.parsers.beamer import extract_images_from_pdf
159
+
160
+ images = extract_images_from_pdf(pdf_path, slides_dir)
161
+ manifest_path.parent.mkdir(parents=True, exist_ok=True)
162
+ manifest_path.write_text(
163
+ json.dumps([str(p) for p in images]),
164
+ encoding="utf-8",
165
+ )
166
+
167
+
168
+ def action_export_pdf_marp(
169
+ source: Path,
170
+ output_path: Path,
171
+ ) -> None:
172
+ """Export a MARP presentation to PDF."""
173
+ from slidesonnet.parsers.marp import export_pdf
174
+
175
+ export_pdf(source, output_path)
176
+
177
+
178
+ def action_export_pdf_beamer(
179
+ cache_pdf: Path,
180
+ output_path: Path,
181
+ ) -> None:
182
+ """Copy compiled Beamer PDF to the output directory."""
183
+ output_path.parent.mkdir(parents=True, exist_ok=True)
184
+ shutil.copy2(cache_pdf, output_path)
185
+
186
+
187
+ def get_parser_and_extractor(
188
+ module_type: ModuleType,
189
+ ) -> tuple[type[SlideParser], Callable[[Path, Path], list[Path]]]:
190
+ """Get parser class and image extraction function for a module type."""
191
+ if module_type == ModuleType.MARP:
192
+ from slidesonnet.parsers.marp import MarpParser
193
+ from slidesonnet.parsers.marp import extract_images as marp_extract
194
+
195
+ return MarpParser, marp_extract
196
+ elif module_type == ModuleType.BEAMER:
197
+ from slidesonnet.parsers.beamer import BeamerParser
198
+ from slidesonnet.parsers.beamer import extract_images as beamer_extract
199
+
200
+ return BeamerParser, beamer_extract
201
+ else:
202
+ raise ValueError(f"No parser for module type: {module_type}")
slidesonnet/clean.py ADDED
@@ -0,0 +1,289 @@
1
+ """Selective cache cleanup with graduated preservation levels.
2
+
3
+ Four --keep levels, each progressively more aggressive:
4
+ nothing — nuke entire cache directory
5
+ api — keep all API-generated audio, remove build artifacts + piper audio
6
+ current — keep audio for current slide text (any engine), remove orphans
7
+ exact — keep only audio matching current text + current TTS config
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import shutil
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+ from typing import Literal
17
+
18
+ from slidesonnet.actions import get_parser_and_extractor
19
+ from slidesonnet.config import load_config
20
+ from slidesonnet.hashing import audio_filename, parse_audio_filename, text_hash
21
+ from slidesonnet.models import API_BACKENDS, ModuleType, resolve_voice
22
+ from slidesonnet.playlist import parse_playlist
23
+ from slidesonnet.tts.pronunciation import apply_pronunciation, load_pronunciation_dict
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ KeepLevel = Literal["nothing", "api", "current", "exact"]
28
+
29
+ _API_BACKENDS = API_BACKENDS
30
+
31
+
32
+ @dataclass
33
+ class CleanResult:
34
+ """Summary of what was removed/kept during cleanup."""
35
+
36
+ removed_files: int = 0
37
+ removed_bytes: int = 0
38
+ kept_files: int = 0
39
+
40
+ @property
41
+ def removed_mb(self) -> float:
42
+ return self.removed_bytes / (1024 * 1024)
43
+
44
+
45
+ def _count_dir(path: Path) -> tuple[int, int]:
46
+ """Count files and total bytes in a directory tree."""
47
+ count = 0
48
+ total = 0
49
+ if not path.exists():
50
+ return 0, 0
51
+ for f in path.rglob("*"):
52
+ if f.is_file():
53
+ count += 1
54
+ total += f.stat().st_size
55
+ return count, total
56
+
57
+
58
+ def clean(playlist_path: Path, keep: KeepLevel = "api") -> CleanResult:
59
+ """Clean build artifacts with the given preservation level."""
60
+ build_dir = playlist_path.resolve().parent / "cache"
61
+ if not build_dir.exists():
62
+ return CleanResult()
63
+
64
+ # Count files before
65
+ files_before, bytes_before = _count_dir(build_dir)
66
+
67
+ if keep == "nothing":
68
+ _clean_all(build_dir)
69
+ elif keep == "api":
70
+ _clean_keep_api(build_dir)
71
+ elif keep == "current":
72
+ _clean_keep_current(build_dir, playlist_path)
73
+ elif keep == "exact":
74
+ _clean_keep_exact(build_dir, playlist_path)
75
+
76
+ # Count files after
77
+ files_after, _ = _count_dir(build_dir)
78
+ removed_files = files_before - files_after
79
+ _, bytes_after = _count_dir(build_dir)
80
+
81
+ return CleanResult(
82
+ removed_files=removed_files,
83
+ removed_bytes=bytes_before - bytes_after,
84
+ kept_files=files_after,
85
+ )
86
+
87
+
88
+ def _clean_all(build_dir: Path) -> None:
89
+ """Remove the entire cache directory."""
90
+ shutil.rmtree(build_dir)
91
+
92
+
93
+ def _clean_keep_api(build_dir: Path) -> None:
94
+ """Remove build artifacts + piper audio + concat + old-format. Keep API audio."""
95
+ _remove_build_artifacts(build_dir)
96
+
97
+ audio_dir = build_dir / "audio"
98
+ if not audio_dir.exists():
99
+ return
100
+
101
+ for f in audio_dir.iterdir():
102
+ if not f.is_file():
103
+ continue
104
+ parsed = parse_audio_filename(f.name)
105
+ if parsed is not None:
106
+ _, backend, _ = parsed
107
+ if backend in _API_BACKENDS:
108
+ continue # keep API audio
109
+ # Remove: piper audio, concat files, old-format files
110
+ f.unlink()
111
+
112
+ _remove_empty_dir(audio_dir)
113
+
114
+
115
+ def _clean_keep_current(build_dir: Path, playlist_path: Path) -> None:
116
+ """Remove build artifacts + orphaned audio. Keep current slide text audio (any engine)."""
117
+ _remove_build_artifacts(build_dir)
118
+
119
+ audio_dir = build_dir / "audio"
120
+ if not audio_dir.exists():
121
+ return
122
+
123
+ current_hashes = _collect_current_text_hashes(playlist_path)
124
+
125
+ for f in audio_dir.iterdir():
126
+ if not f.is_file():
127
+ continue
128
+ parsed = parse_audio_filename(f.name)
129
+ if parsed is not None:
130
+ th, _, _ = parsed
131
+ if th in current_hashes:
132
+ continue # keep: matches a current utterance
133
+ # Remove: orphaned audio, concat files, old-format files
134
+ f.unlink()
135
+
136
+ _remove_empty_dir(audio_dir)
137
+
138
+
139
+ def _clean_keep_exact(build_dir: Path, playlist_path: Path) -> None:
140
+ """Remove build artifacts + orphaned + stale-config audio. Keep exact matches only."""
141
+ _remove_build_artifacts(build_dir)
142
+
143
+ audio_dir = build_dir / "audio"
144
+ if not audio_dir.exists():
145
+ return
146
+
147
+ current_filenames = _collect_current_audio_filenames(playlist_path)
148
+
149
+ for f in audio_dir.iterdir():
150
+ if not f.is_file():
151
+ continue
152
+ if f.name in current_filenames:
153
+ continue # keep: exact match
154
+ f.unlink()
155
+
156
+ _remove_empty_dir(audio_dir)
157
+
158
+
159
+ def _remove_build_artifacts(build_dir: Path) -> None:
160
+ """Remove everything in build_dir except the audio/ directory."""
161
+ for child in build_dir.iterdir():
162
+ if child.name == "audio":
163
+ continue
164
+ if child.is_dir():
165
+ shutil.rmtree(child)
166
+ else:
167
+ # .doit.db, .doit.db.bak, etc.
168
+ child.unlink()
169
+
170
+
171
+ def _remove_empty_dir(path: Path) -> None:
172
+ """Remove directory if it's empty."""
173
+ try:
174
+ path.rmdir() # only succeeds if empty
175
+ except OSError:
176
+ pass
177
+
178
+
179
+ def _collect_current_text_hashes(playlist_path: Path) -> set[str]:
180
+ """Parse the playlist and return text_hashes for all current utterances.
181
+
182
+ Resolves voice presets across ALL backends so that audio from any engine
183
+ is preserved if its utterance content matches.
184
+ """
185
+ playlist_path = playlist_path.resolve()
186
+ playlist_dir = playlist_path.parent
187
+ build_dir = playlist_dir / "cache"
188
+
189
+ raw_config, entries = parse_playlist(playlist_path)
190
+ config = load_config(raw_config, playlist_dir)
191
+ config.pronunciation = load_pronunciation_dict(config.pronunciation_files)
192
+
193
+ # Collect pronunciation dicts for all backends so audio from any engine is preserved
194
+ all_backends = {"piper", "elevenlabs"}
195
+ backend_prons = {b: config.pronunciation_for(b) for b in all_backends}
196
+
197
+ text_hashes: set[str] = set()
198
+
199
+ for entry in entries:
200
+ if entry.module_type == ModuleType.VIDEO:
201
+ continue
202
+
203
+ source_path = playlist_dir / entry.path
204
+ parser_cls, _ = get_parser_and_extractor(entry.module_type)
205
+ module_dir = build_dir / entry.path.parent / entry.path.stem
206
+ slides_dir = module_dir / "slides"
207
+
208
+ parser = parser_cls()
209
+ slides = parser.parse(source_path, slides_dir)
210
+
211
+ for slide in slides:
212
+ if not slide.has_narration:
213
+ continue
214
+
215
+ # Collect all possible voice resolutions across all backends
216
+ voices: set[str | None] = {None} # always include default (no voice)
217
+ if slide.voice:
218
+ voice_cfg = config.voices.get(slide.voice)
219
+ if voice_cfg:
220
+ voices |= voice_cfg.all_voice_ids()
221
+
222
+ # Apply each backend's pronunciation and collect text_hashes
223
+ for pron in backend_prons.values():
224
+ processed = apply_pronunciation(slide.narration_raw, pron)
225
+ parts_processed = [
226
+ apply_pronunciation(part, pron) for part in slide.narration_parts
227
+ ]
228
+
229
+ texts = parts_processed if len(parts_processed) > 1 else [processed]
230
+ for utterance_text in texts:
231
+ for voice in voices:
232
+ text_hashes.add(text_hash(utterance_text, voice))
233
+
234
+ return text_hashes
235
+
236
+
237
+ def _collect_current_audio_filenames(playlist_path: Path) -> set[str]:
238
+ """Parse the playlist and return expected audio filenames for the current TTS config.
239
+
240
+ Only considers the currently configured backend, unlike _collect_current_text_hashes
241
+ which considers all backends.
242
+ """
243
+ from slidesonnet.tts import create_tts
244
+
245
+ from dotenv import load_dotenv
246
+
247
+ playlist_path = playlist_path.resolve()
248
+ playlist_dir = playlist_path.parent
249
+ build_dir = playlist_dir / "cache"
250
+
251
+ load_dotenv(playlist_dir / ".env")
252
+
253
+ raw_config, entries = parse_playlist(playlist_path)
254
+ config = load_config(raw_config, playlist_dir)
255
+ config.pronunciation = load_pronunciation_dict(config.pronunciation_files)
256
+ tts = create_tts(config)
257
+
258
+ pron = config.pronunciation_for(config.tts.backend)
259
+ filenames: set[str] = set()
260
+
261
+ for entry in entries:
262
+ if entry.module_type == ModuleType.VIDEO:
263
+ continue
264
+
265
+ source_path = playlist_dir / entry.path
266
+ parser_cls, _ = get_parser_and_extractor(entry.module_type)
267
+ module_dir = build_dir / entry.path.parent / entry.path.stem
268
+ slides_dir = module_dir / "slides"
269
+
270
+ parser = parser_cls()
271
+ slides = parser.parse(source_path, slides_dir)
272
+
273
+ for slide in slides:
274
+ if not slide.has_narration:
275
+ continue
276
+
277
+ slide.narration_processed = apply_pronunciation(slide.narration_raw, pron)
278
+ slide.narration_parts_processed = [
279
+ apply_pronunciation(part, pron) for part in slide.narration_parts
280
+ ]
281
+
282
+ voice = resolve_voice(slide.voice, config.voices, config.tts.backend)
283
+
284
+ parts = slide.narration_parts_processed
285
+ texts = parts if len(parts) > 1 else [slide.narration_processed]
286
+ for utterance_text in texts:
287
+ filenames.add(audio_filename(utterance_text, tts.name(), tts.cache_key(), voice))
288
+
289
+ return filenames