slidemovie 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
slidemovie/core.py ADDED
@@ -0,0 +1,1438 @@
1
+ import json
2
+ import os
3
+ import hashlib
4
+ import multiai_tts
5
+ import subprocess
6
+ import tempfile
7
+ import time
8
+ import wave
9
+ import sys
10
+ import logging
11
+ import shutil
12
+ from datetime import datetime
13
+
14
+ # Configure module logger
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class Movie():
18
+ """
19
+ A class to automatically generate narration videos based on PowerPoint slides and Markdown notes.
20
+
21
+ It performs Text-to-Speech (TTS) synthesis, converts slides to images, and stitches them together
22
+ into video files. It supports incremental builds by detecting changes in source files (Markdown,
23
+ PPTX) and configuration settings.
24
+
25
+ Usage:
26
+ 1. Initialize the class (loads configuration).
27
+ 2. Configure paths using `configure_project_paths` or `configure_subproject_paths`.
28
+ 3. Run `build_slide_pptx()` to generate the PPTX file from Markdown (optional/drafting).
29
+ 4. Run `build_all()` to generate the audio, slide images, and the final video.
30
+ """
31
+
32
+ def __init__(self):
33
+ """
34
+ Initializes the Movie instance.
35
+
36
+ This method checks for required external tools and loads the configuration settings.
37
+ Settings are loaded in the following order of precedence (highest to lowest):
38
+ 1. ./config.json (Current directory)
39
+ 2. ~/.config/slidemovie/config.json (User home directory)
40
+ 3. Default settings defined in `_get_default_settings()`
41
+ """
42
+ self._check_external_tools()
43
+ self._load_settings()
44
+
45
+ def _check_external_tools(self):
46
+ """
47
+ Checks if required external command-line tools are installed.
48
+ Exits the program if any tool is missing.
49
+
50
+ Required tools:
51
+ - ffmpeg
52
+ - ffprobe
53
+ - pandoc
54
+ """
55
+ required_tools = ['ffmpeg', 'ffprobe', 'pandoc']
56
+ missing_tools = [tool for tool in required_tools if not shutil.which(tool)]
57
+
58
+ if missing_tools:
59
+ logger.error(f"Required external commands not found: {', '.join(missing_tools)}")
60
+ logger.error("Please install them before running this tool.")
61
+ sys.exit(1)
62
+
63
+ def _get_default_settings(self):
64
+ """
65
+ Returns the default configuration dictionary.
66
+
67
+ Settings:
68
+ tts_provider (str): TTS provider (e.g., 'google', 'openai'). Default: 'google'.
69
+ tts_model (str): TTS model name. Default: 'gemini-2.5-flash-preview-tts'.
70
+ tts_voice (str): Voice setting for TTS. Default: 'sadaltager'.
71
+ tts_use_prompt (bool): Whether to use a system prompt for TTS. Default: True.
72
+ prompt (str): System prompt for TTS generation.
73
+ screen_size (list): Video resolution [width, height]. Default: [1280, 720].
74
+ video_fps (int): Video frame rate. Default: 30.
75
+ video_timescale (int): Video timescale. Default: 90000.
76
+ video_pix_fmt (str): Pixel format. Default: 'yuv420p'.
77
+ video_codec (str): Video codec. Default: 'libx264'.
78
+ audio_codec (str): Audio codec. Default: 'aac'.
79
+ sample_rate (int): Audio sample rate. Default: 44100.
80
+ audio_bitrate (str): Audio bitrate. Default: '192k'.
81
+ audio_channels (int): Audio channels. Default: 2 (Stereo).
82
+ ffmpeg_loglevel (str): Log level for ffmpeg. Default: 'error'.
83
+ silence_sec (float): Silence duration inserted at the start of each slide (seconds). Default: 2.5.
84
+ show_skip (bool): Whether to log skipped tasks. Default: False.
85
+ max_retry (int): Max retries for TTS API errors. Default: 2.
86
+ output_root (str): Root directory for video output. Default: None.
87
+ """
88
+ return {
89
+ # TTS settings
90
+ "tts_provider": 'google',
91
+ "tts_model": 'gemini-2.5-flash-preview-tts',
92
+ "tts_voice": 'sadaltager',
93
+ "tts_use_prompt": True,
94
+ "prompt": 'Please speak the following.',
95
+
96
+ # Screen settings (Defined as list for JSON serialization)
97
+ "screen_size": [1280, 720],
98
+
99
+ # Video format settings
100
+ "video_fps": 30,
101
+ "video_timescale": 90000,
102
+ "video_pix_fmt": 'yuv420p',
103
+ "video_codec": 'libx264',
104
+
105
+ # Audio settings
106
+ "audio_codec": 'aac',
107
+ "sample_rate": 44100,
108
+ "audio_bitrate": '192k',
109
+ "audio_channels": 2,
110
+
111
+ # Other settings
112
+ "ffmpeg_loglevel": 'error',
113
+ "silence_sec": 2.5,
114
+ "show_skip": False,
115
+ "max_retry": 2,
116
+
117
+ # Output path setting (Used if not provided via CLI)
118
+ "output_root": None
119
+ }
120
+
121
+ def _load_settings(self):
122
+ """
123
+ Loads settings from JSON files and merges them with defaults.
124
+
125
+ It looks for configuration in:
126
+ 1. ~/.config/slidemovie/config.json
127
+ 2. ./config.json
128
+
129
+ Finally, it sets the configuration values as instance attributes.
130
+ """
131
+ # 1. Get default settings
132
+ config = self._get_default_settings()
133
+
134
+ # 2. Process ~/.config/slidemovie/config.json
135
+ config_dir = os.path.expanduser("~/.config/slidemovie")
136
+ if not os.path.exists(config_dir):
137
+ try:
138
+ os.makedirs(config_dir, exist_ok=True)
139
+ except OSError as e:
140
+ logger.warning(f"Failed to create config directory {config_dir}: {e}")
141
+
142
+ home_config_path = os.path.join(config_dir, "config.json")
143
+
144
+ if not os.path.exists(home_config_path):
145
+ # Create default config file if it doesn't exist
146
+ try:
147
+ with open(home_config_path, 'w', encoding='utf-8') as f:
148
+ json.dump(config, f, indent=4, ensure_ascii=False)
149
+ logger.info(f"Created default config file: {home_config_path}")
150
+ except IOError as e:
151
+ logger.warning(f"Failed to create {home_config_path}: {e}")
152
+ else:
153
+ # Load and merge if exists
154
+ try:
155
+ with open(home_config_path, 'r', encoding='utf-8') as f:
156
+ home_config = json.load(f)
157
+ config.update(home_config)
158
+ except (json.JSONDecodeError, IOError) as e:
159
+ logger.warning(f"Failed to load {home_config_path}: {e}")
160
+
161
+ # 3. Process ./config.json (Current directory)
162
+ local_config_path = "./config.json"
163
+
164
+ if os.path.exists(local_config_path):
165
+ try:
166
+ with open(local_config_path, 'r', encoding='utf-8') as f:
167
+ local_config = json.load(f)
168
+ config.update(local_config)
169
+ logger.info(f"Loaded local config: {local_config_path}")
170
+ except (json.JSONDecodeError, IOError) as e:
171
+ logger.warning(f"Failed to load {local_config_path}: {e}")
172
+
173
+ # 4. Set attributes
174
+
175
+ # Special handling: Convert screen_size from list to tuple
176
+ if "screen_size" in config and isinstance(config["screen_size"], list):
177
+ config["screen_size"] = tuple(config["screen_size"])
178
+
179
+ # Set dictionary values as instance attributes
180
+ for key, value in config.items():
181
+ setattr(self, key, value)
182
+
183
+ def configure_project_paths(self, project_name, source_dir, output_root_dir=None, output_filename=None):
184
+ """
185
+ Configures paths for a standard (flat) project structure.
186
+
187
+ Args:
188
+ project_name (str): The name/ID of the project.
189
+ source_dir (str): The directory containing source files (.md, .pptx).
190
+ output_root_dir (str, optional): Root directory for video output.
191
+ Defaults to `self.output_root` or `{source_dir}/movie`.
192
+ output_filename (str, optional): Filename for the output video (without extension).
193
+ Defaults to `project_name`.
194
+ """
195
+ # Determine output root directory
196
+ if output_root_dir:
197
+ target_root = output_root_dir
198
+ elif self.output_root:
199
+ target_root = self.output_root
200
+ else:
201
+ target_root = f'{source_dir}/movie'
202
+
203
+ # Expand path and check existence
204
+ target_root = os.path.expanduser(target_root)
205
+ if not os.path.isdir(target_root):
206
+ logger.error(f'Directory {target_root} does not exist.')
207
+ sys.exit(1)
208
+
209
+ # Set member variables
210
+ self.source_dir = source_dir
211
+ self.project_id = project_name
212
+
213
+ if not output_filename:
214
+ output_filename = project_name
215
+
216
+ # Construct file paths
217
+ self.md_file = f'{self.source_dir}/{project_name}.md'
218
+ self.status_file = f'{self.source_dir}/status.json'
219
+ self.video_length_file = f'{self.source_dir}/video_length.csv'
220
+
221
+ # Create intermediate/output directories
222
+ self.movie_dir = f'{target_root}/{project_name}'
223
+ if not os.path.isdir(self.movie_dir):
224
+ os.mkdir(self.movie_dir)
225
+
226
+ self.slide_file = f'{self.source_dir}/{project_name}.pptx'
227
+ self.video_file = f'{self.movie_dir}/{output_filename}.mp4'
228
+
229
+
230
+ def configure_subproject_paths(self, parent_project_name, subproject_name, source_parent_dir, output_root_dir=None, output_filename=None):
231
+ """
232
+ Configures paths for a nested project structure (Parent Folder -> Child Folder).
233
+
234
+ Args:
235
+ parent_project_name (str): The name of the parent project.
236
+ subproject_name (str): The name of the subproject (child folder name).
237
+ source_parent_dir (str): The directory containing the parent project folder.
238
+ output_root_dir (str, optional): Root directory for video output.
239
+ output_filename (str, optional): Filename for the output video (without extension).
240
+ """
241
+ # Determine output root directory
242
+ if output_root_dir:
243
+ target_root = output_root_dir
244
+ elif self.output_root:
245
+ target_root = self.output_root
246
+ else:
247
+ target_root = f'{source_parent_dir}/movie'
248
+
249
+ # Expand path and check existence
250
+ target_root = os.path.expanduser(target_root)
251
+ if not os.path.isdir(target_root):
252
+ logger.error(f'Directory {target_root} does not exist.')
253
+ sys.exit(1)
254
+
255
+ # Source directory is "Parent/Child"
256
+ self.source_dir = f'{source_parent_dir}/{subproject_name}'
257
+
258
+ # Project ID format: "Parent-Child"
259
+ self.project_id = f'{parent_project_name}-{subproject_name}'
260
+
261
+ if not output_filename:
262
+ output_filename = self.project_id
263
+
264
+ # Construct file paths
265
+ self.md_file = f'{self.source_dir}/{subproject_name}.md'
266
+ self.status_file = f'{self.source_dir}/status.json'
267
+ self.video_length_file = f'{self.source_dir}/video_length.csv'
268
+
269
+ # Create output directory hierarchy (movie/parent/child)
270
+ parent_movie_dir = f'{target_root}/{parent_project_name}'
271
+ self.movie_dir = f'{parent_movie_dir}/{subproject_name}'
272
+
273
+ if not os.path.isdir(parent_movie_dir):
274
+ os.mkdir(parent_movie_dir)
275
+ if not os.path.isdir(self.movie_dir):
276
+ os.mkdir(self.movie_dir)
277
+
278
+ self.slide_file = f'{self.source_dir}/{subproject_name}.pptx'
279
+ self.video_file = f'{self.movie_dir}/{output_filename}.mp4'
280
+
281
+ def build_all(self):
282
+ """
283
+ Orchestrates the creation of the complete video from Markdown and PPTX files.
284
+
285
+ Note: This does not update the PPTX file from Markdown.
286
+ Run `build_slide_pptx()` beforehand if necessary.
287
+ """
288
+ self._check_external_tools()
289
+ if not os.path.isfile(self.md_file):
290
+ logger.error(f'{self.md_file} does not exist.')
291
+ sys.exit(1)
292
+
293
+ # 1. Generate narration audio from Markdown notes
294
+ self.build_slide_audio()
295
+ # 2. Generate slide images from PPTX
296
+ self.build_slide_images()
297
+ # 3. Create individual video clips for each slide
298
+ self.build_slide_videos()
299
+ # 4. Concatenate clips into the final video
300
+ self.build_final_video()
301
+
302
+ def build_slide_audio(self):
303
+ """
304
+ Synthesizes audio (TTS) from Markdown notes and saves as WAV files.
305
+ Skips slides that have a pre-defined video file.
306
+ """
307
+ self._ensure_slide_ids()
308
+ state = self._load_audio_state()
309
+ slides_list = self._extract_slides_list()
310
+
311
+ # 1. Sync metadata and sort
312
+ self._sync_slide_metadata(state, slides_list)
313
+
314
+ # 2. Audio generation loop
315
+ for slide in slides_list:
316
+ slide_id = slide["id"]
317
+
318
+ # Skip TTS if a video file is specified
319
+ if slide.get("video_file"):
320
+ if self.show_skip:
321
+ logger.info(
322
+ f"[SKIP] {slide_id} (Movie Mode: {slide['video_file']})")
323
+ continue
324
+
325
+ raw_notes = slide["notes"]
326
+
327
+ norm = self._normalize_notes(raw_notes)
328
+ current_notes_hash = self._hash_notes(norm)
329
+
330
+ slide_state = state["slides"][slide_id]
331
+ audio_state = slide_state["audio"]
332
+
333
+ saved_notes_hash = slide_state.get("notes_hash")
334
+ audio_status = audio_state.get("status")
335
+
336
+ # Determine file path
337
+ wav_path = os.path.join(
338
+ self.movie_dir,
339
+ audio_state["wav_file"]
340
+ )
341
+
342
+ # Regeneration check (Status mismatch OR Hash mismatch OR File missing)
343
+ if (audio_status != "generated" or
344
+ saved_notes_hash != current_notes_hash or
345
+ not os.path.isfile(wav_path)):
346
+
347
+ logger.info(f"[TTS] regenerate {slide_id}")
348
+
349
+ add_prompt = audio_state.get("additional_prompt", "")
350
+ if norm == "":
351
+ logger.error(f'Error: "::: notes" not found in {slide_id}.')
352
+ sys.exit()
353
+ self._speak_to_wav(
354
+ norm, wav_path, additional_prompt=add_prompt)
355
+ self.prepend_silence(wav_path)
356
+ duration = self._get_wav_duration(wav_path)
357
+
358
+ slide_state["notes_hash"] = current_notes_hash
359
+ slide_state["notes_length"] = len(norm)
360
+
361
+ audio_state["status"] = "generated"
362
+ audio_state["generated_at"] = self._now()
363
+ audio_state["duration_sec"] = duration
364
+
365
+ state["last_checked"] = self._now()
366
+ self._save_audio_state(state)
367
+ else:
368
+ if self.show_skip:
369
+ logger.info(f"[SKIP] {slide_id} (Audio: Unchanged)")
370
+
371
+ def build_slide_images(self):
372
+ """
373
+ Converts PPTX slides to images and renames them based on Markdown slide-ids.
374
+ Uses a state file to detect PPTX changes and skip unnecessary processing.
375
+
376
+ Prerequisites:
377
+ - `self.slide_file` (pptx) must exist.
378
+ - External tool `pptxtoimages` (LibreOffice + Poppler) must be available.
379
+ """
380
+ from pptxtoimages.tools import PPTXToImageConverter
381
+ import glob
382
+
383
+ if not os.path.isfile(self.slide_file):
384
+ logger.error(f"Slide file does not exist: {self.slide_file}")
385
+ return
386
+
387
+ # 1. Change detection (Check PPTX hash)
388
+ state = self._load_audio_state()
389
+ current_pptx_hash = self._hash_file(self.slide_file)
390
+
391
+ # Get existing state
392
+ images_task = state.get("images_task", {})
393
+
394
+ if (images_task.get("status") == "generated" and
395
+ images_task.get("source_hash") == current_pptx_hash):
396
+ if self.show_skip:
397
+ logger.info(f"[SKIP] Images (PPTX unchanged)")
398
+ return
399
+
400
+ # --- Start Generation ---
401
+
402
+ # Create output directory
403
+ os.makedirs(self.movie_dir, exist_ok=True)
404
+
405
+ # Remove existing slide_*.png
406
+ for f in glob.glob(os.path.join(self.movie_dir, "slide_*.png")):
407
+ os.remove(f)
408
+
409
+ logger.info(f"Starting PPTX -> Image conversion.")
410
+
411
+ # PPTX -> PNG
412
+ converter = PPTXToImageConverter(self.slide_file, self.movie_dir)
413
+ converter.convert()
414
+
415
+ # Get generated filenames (slide_1.png, slide_2.png...)
416
+ generated_files = sorted(
417
+ glob.glob(os.path.join(self.movie_dir, "slide_*.png")),
418
+ key=lambda x: int(os.path.splitext(
419
+ os.path.basename(x))[0].split("_")[1])
420
+ )
421
+
422
+ # Get list of slide_ids
423
+ slide_notes = self._extract_slide_notes()
424
+ slide_ids = list(slide_notes.keys())
425
+
426
+ if len(slide_ids) != len(generated_files):
427
+ logger.warning(
428
+ f"Generated image count ({len(generated_files)}) does not match slide_id count ({len(slide_ids)}).")
429
+
430
+ # Rename using slide_id
431
+ for i, slide_id in enumerate(slide_ids):
432
+ if i >= len(generated_files):
433
+ break
434
+ old_path = generated_files[i]
435
+ new_path = os.path.join(self.movie_dir, f"{slide_id}.png")
436
+ os.rename(old_path, new_path)
437
+
438
+ # 2. Save state
439
+ state["images_task"] = {
440
+ "status": "generated",
441
+ "source_file": os.path.basename(self.slide_file),
442
+ "source_hash": current_pptx_hash,
443
+ "generated_at": self._now()
444
+ }
445
+ state["last_checked"] = self._now()
446
+ self._save_audio_state(state)
447
+
448
+ logger.info(f"Image conversion completed.")
449
+
450
+ def build_slide_videos(self):
451
+ """
452
+ Generates individual video files for each slide.
453
+ - Normal slide: Image (PNG) + Audio (WAV) -> MP4
454
+ - Video slide: Source Video (MP4) -> Resize & Padding -> MP4
455
+ """
456
+ import subprocess
457
+
458
+ width, height = self.screen_size
459
+ state = self._load_audio_state()
460
+ slides_list = self._extract_slides_list()
461
+ self._sync_slide_metadata(state, slides_list)
462
+
463
+ # Generation loop
464
+ for slide in slides_list:
465
+ slide_id = slide["id"]
466
+ video_file_src = slide.get("video_file")
467
+
468
+ output_mp4 = os.path.join(self.movie_dir, f"{slide_id}.mp4")
469
+
470
+ # Get state
471
+ if slide_id not in state["slides"]:
472
+ state["slides"][slide_id] = self._init_slide_state(slide_id)
473
+
474
+ slide_state = state["slides"][slide_id]
475
+
476
+ # --- Branch: If video file is specified ---
477
+ if video_file_src:
478
+ src_path = os.path.join(self.movie_dir, video_file_src)
479
+
480
+ if not os.path.isfile(src_path):
481
+ logger.error(
482
+ f"Original video not found: {src_path} (Slide: {slide_id})")
483
+ continue
484
+
485
+ # Calculate hash
486
+ current_src_hash = self._hash_file(src_path)
487
+
488
+ # Check state
489
+ video_state = slide_state["video"]
490
+
491
+ # Regeneration check
492
+ if (video_state.get("status") == "generated" and
493
+ video_state.get("source_hash") == current_src_hash and
494
+ os.path.isfile(output_mp4)):
495
+ if self.show_skip:
496
+ logger.info(
497
+ f"[SKIP] {slide_id} (Video: unchanged/Source:{video_file_src})")
498
+ continue
499
+
500
+ logger.info(f"Converting video: {video_file_src} -> {slide_id}.mp4")
501
+
502
+ # FFmpeg command: Resize + Audio re-encode
503
+ cmd = [
504
+ "ffmpeg", "-y",
505
+ "-v", self.ffmpeg_loglevel,
506
+ "-i", src_path,
507
+ "-vf", f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2",
508
+
509
+ # --- Video settings ---
510
+ "-c:v", self.video_codec,
511
+ "-pix_fmt", self.video_pix_fmt,
512
+ "-r", str(self.video_fps),
513
+ "-video_track_timescale", str(self.video_timescale),
514
+
515
+ # --- Audio settings ---
516
+ "-c:a", self.audio_codec,
517
+ "-ar", str(self.sample_rate),
518
+ "-ac", str(self.audio_channels),
519
+ "-b:a", self.audio_bitrate,
520
+ output_mp4
521
+ ]
522
+
523
+ try:
524
+ subprocess.run(cmd, check=True)
525
+ duration = self._get_mp4_duration(output_mp4)
526
+
527
+ # Update state
528
+ slide_state["video"] = {
529
+ "status": "generated",
530
+ "source_video": video_file_src,
531
+ "source_hash": current_src_hash,
532
+ "duration_sec": duration,
533
+ "generated_at": self._now()
534
+ }
535
+ state["last_checked"] = self._now()
536
+ self._save_audio_state(state)
537
+ logger.info(f"Done: {output_mp4} ({duration:.2f}s)")
538
+
539
+ except subprocess.CalledProcessError:
540
+ logger.error(f"Video conversion failed: {slide_id}")
541
+
542
+ # --- Branch: Normal slide (TTS + Image) ---
543
+ else:
544
+ png_file = os.path.join(self.movie_dir, f"{slide_id}.png")
545
+ wav_file = os.path.join(self.movie_dir, f"{slide_id}.wav")
546
+
547
+ if not os.path.isfile(png_file) or not os.path.isfile(wav_file):
548
+ # Skip if assets are missing
549
+ logger.warning(f"Material missing, skipping: {slide_id}")
550
+ continue
551
+
552
+ current_png_hash = self._hash_file(png_file)
553
+ current_wav_hash = self._hash_file(wav_file)
554
+
555
+ video_state = slide_state["video"]
556
+
557
+ if (video_state.get("status") == "generated" and
558
+ video_state.get("wav_hash") == current_wav_hash and
559
+ video_state.get("png_hash") == current_png_hash and
560
+ os.path.isfile(output_mp4)):
561
+ if self.show_skip:
562
+ logger.info(f"[SKIP] {slide_id} (Video: unchanged)")
563
+ continue
564
+
565
+ # Generate video from still image
566
+ cmd = [
567
+ "ffmpeg", "-y",
568
+ "-v", self.ffmpeg_loglevel,
569
+ "-loop", "1",
570
+ "-i", png_file,
571
+ "-i", wav_file,
572
+
573
+ # --- Video settings ---
574
+ "-c:v", self.video_codec,
575
+ "-tune", "stillimage",
576
+ "-pix_fmt", self.video_pix_fmt,
577
+ "-r", str(self.video_fps),
578
+ "-video_track_timescale", str(self.video_timescale),
579
+ "-vf", f"scale={width}:{height}",
580
+
581
+ # --- Audio settings ---
582
+ "-c:a", self.audio_codec,
583
+ "-ar", str(self.sample_rate),
584
+ "-ac", str(self.audio_channels),
585
+ "-b:a", self.audio_bitrate,
586
+
587
+ "-shortest",
588
+ output_mp4
589
+ ]
590
+
591
+ logger.info(f"Generating {slide_id}.mp4...")
592
+ try:
593
+ subprocess.run(cmd, check=True)
594
+ duration = self._get_mp4_duration(output_mp4)
595
+
596
+ slide_state["video"] = {
597
+ "status": "generated",
598
+ "wav_hash": current_wav_hash,
599
+ "png_hash": current_png_hash,
600
+ "duration_sec": duration,
601
+ "generated_at": self._now()
602
+ }
603
+ state["last_checked"] = self._now()
604
+ self._save_audio_state(state)
605
+ logger.info(f"Done: {output_mp4} ({duration:.2f}s)")
606
+
607
+ except subprocess.CalledProcessError:
608
+ logger.error(f"MP4 creation failed: {slide_id}")
609
+
610
+ def build_final_video(self):
611
+ """
612
+ Concatenates all generated slide videos (MP4) into a final movie.
613
+
614
+ Process:
615
+ 1. Identify target slides based on Markdown order.
616
+ 2. Calculate a source hash from all target MP4 files.
617
+ 3. Check JSON "final_movie" entry; skip if unchanged.
618
+ 4. Use ffmpeg concat demuxer to merge.
619
+ 5. Save results to JSON.
620
+ """
621
+ import subprocess
622
+
623
+ state = self._load_audio_state()
624
+
625
+ # Get correct order from Markdown
626
+ slides_list = self._extract_slides_list()
627
+ slide_ids = [s["id"] for s in slides_list]
628
+
629
+ if not slide_ids:
630
+ logger.error("No Slide IDs found.")
631
+ return
632
+
633
+ # 1. Calculate source hash
634
+ current_source_hash = self._calculate_source_hash(slide_ids)
635
+
636
+ # 2. Skip check
637
+ final_movie_state = state.get("final_movie", {})
638
+
639
+ if (final_movie_state.get("status") == "generated" and
640
+ final_movie_state.get("source_hash") == current_source_hash and
641
+ os.path.isfile(self.video_file)):
642
+ if self.show_skip:
643
+ logger.info(f"[SKIP] Final Video (unchanged)")
644
+ return
645
+
646
+ # 3. Create concatenation list
647
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
648
+ concat_list = f.name
649
+ found_count = 0
650
+ for slide_id in slide_ids:
651
+ mp4_path = os.path.join(self.movie_dir, f"{slide_id}.mp4")
652
+ if os.path.isfile(mp4_path):
653
+ # ffmpeg concat demuxer format
654
+ # Use abspath for Windows path compatibility
655
+ f.write(f"file '{os.path.abspath(mp4_path)}'\n")
656
+ found_count += 1
657
+ else:
658
+ logger.warning(f"MP4 not found: {mp4_path} (Skipping)")
659
+
660
+ if found_count == 0:
661
+ logger.error("No MP4s found for concatenation.")
662
+ os.remove(concat_list)
663
+ return
664
+
665
+ logger.info("Starting final video concatenation...")
666
+
667
+ # 4. Run FFmpeg
668
+ cmd = [
669
+ "ffmpeg",
670
+ "-v", self.ffmpeg_loglevel,
671
+ "-y",
672
+ "-f", "concat",
673
+ "-safe", "0",
674
+ "-i", concat_list,
675
+ "-c", "copy",
676
+ self.video_file
677
+ ]
678
+
679
+ try:
680
+ subprocess.run(cmd, check=True)
681
+
682
+ # 5. Save results
683
+ duration_sec = self._get_mp4_duration(self.video_file)
684
+
685
+ state["final_movie"] = {
686
+ "status": "generated",
687
+ "file_name": os.path.basename(self.video_file),
688
+ "generated_at": self._now(),
689
+ "duration_min": duration_sec / 60.0,
690
+ "slides": found_count,
691
+ "source_hash": current_source_hash
692
+ }
693
+
694
+ state["last_checked"] = self._now()
695
+ self._save_audio_state(state)
696
+
697
+ logger.info(
698
+ f"Final video created and saved: {self.video_file} ({duration_sec/60.0:.2f} min)")
699
+
700
+ except subprocess.CalledProcessError:
701
+ logger.error("MP4 concatenation failed.")
702
+ finally:
703
+ if os.path.exists(concat_list):
704
+ os.remove(concat_list)
705
+
706
+ def _get_build_config(self):
707
+ """
708
+ Returns a dictionary of current build configuration for consistency checks.
709
+ """
710
+ return {
711
+ "screen": {
712
+ "width": self.screen_size[0],
713
+ "height": self.screen_size[1]
714
+ },
715
+ "video": {
716
+ "fps": self.video_fps,
717
+ "timescale": self.video_timescale,
718
+ "pix_fmt": self.video_pix_fmt,
719
+ "codec": self.video_codec
720
+ },
721
+ "audio": {
722
+ "codec": self.audio_codec,
723
+ "sample_rate": self.sample_rate,
724
+ "bitrate": self.audio_bitrate,
725
+ "channels": self.audio_channels
726
+ },
727
+ "common": {
728
+ "silence_sec": self.silence_sec
729
+ }
730
+ }
731
+
732
+ def _ensure_slide_ids(self):
733
+ """
734
+ Scans the Markdown file and inserts `<!-- slide-id: ... -->` for headers that lack them.
735
+ Also checks for duplicate slide IDs.
736
+
737
+ Format:
738
+ <!-- slide-id: {project_id}-{seq} -->
739
+ """
740
+ import re
741
+
742
+ # 1. Read current file content
743
+ if not os.path.exists(self.md_file):
744
+ return
745
+
746
+ with open(self.md_file, encoding="utf-8") as f:
747
+ lines = f.readlines()
748
+
749
+ # 2. Extract existing slide-ids to prevent duplicates
750
+ # Pattern: <!-- slide-id: {project_id}-XX -->
751
+ id_pattern = re.compile(r'<!--\s*slide-id:\s*(.+?)\s*-->')
752
+ existing_ids = set()
753
+ max_seq = 0
754
+
755
+ for line in lines:
756
+ m = id_pattern.search(line)
757
+ if m:
758
+ sid = m.group(1).strip()
759
+
760
+ if sid in existing_ids:
761
+ logger.error(f"Duplicate slide_id detected: {sid}")
762
+ sys.exit(1)
763
+
764
+ existing_ids.add(sid)
765
+
766
+ # Track max sequence number for auto-numbering
767
+ prefix = f"{self.project_id}-"
768
+ if sid.startswith(prefix):
769
+ try:
770
+ num_part = sid[len(prefix):]
771
+ val = int(num_part)
772
+ if val > max_seq:
773
+ max_seq = val
774
+ except ValueError:
775
+ pass
776
+
777
+ # 3. Insert IDs for headers missing them
778
+ new_lines = []
779
+
780
+ for i, line in enumerate(lines):
781
+ stripped_line = line.strip()
782
+
783
+ # If it's a header line
784
+ if stripped_line.startswith("#"):
785
+
786
+ # Check if the previous non-empty line was an ID
787
+ has_id = False
788
+ check_index = len(new_lines) - 1
789
+ while check_index >= 0:
790
+ prev = new_lines[check_index].strip()
791
+ if prev == "":
792
+ check_index -= 1
793
+ continue
794
+ if id_pattern.match(prev):
795
+ has_id = True
796
+ break
797
+
798
+ # If no ID, generate and insert one
799
+ if not has_id:
800
+ while True:
801
+ max_seq += 1
802
+ new_id = f"{self.project_id}-{max_seq:02d}"
803
+ if new_id not in existing_ids:
804
+ existing_ids.add(new_id)
805
+ break
806
+
807
+ new_lines.append(f"<!-- slide-id: {new_id} -->\n")
808
+
809
+ new_lines.append(line)
810
+
811
+ # 4. Write back if changes occurred
812
+ if new_lines != lines:
813
+ logger.info("Adding missing slide-ids...")
814
+ with open(self.md_file, "w", encoding="utf-8") as f:
815
+ f.writelines(new_lines)
816
+
817
+ def _sync_slide_metadata(self, state, slides_list):
818
+ """
819
+ Syncs Markdown information (titles, order, video_files) with the JSON state.
820
+ """
821
+ is_updated = False
822
+
823
+ for i, slide in enumerate(slides_list):
824
+ slide_id = slide["id"]
825
+
826
+ if slide_id not in state["slides"]:
827
+ state["slides"][slide_id] = self._init_slide_state(slide_id)
828
+ is_updated = True
829
+
830
+ slide_state = state["slides"][slide_id]
831
+
832
+ # index
833
+ new_index = i + 1
834
+ if slide_state.get("slide_index") != new_index:
835
+ slide_state["slide_index"] = new_index
836
+ is_updated = True
837
+
838
+ # title
839
+ if slide_state.get("title") != slide["title"]:
840
+ slide_state["title"] = slide["title"]
841
+ is_updated = True
842
+
843
+ # video_file sync
844
+ new_video_file = slide.get("video_file")
845
+ if slide_state.get("video_file") != new_video_file:
846
+ slide_state["video_file"] = new_video_file
847
+ is_updated = True
848
+
849
+ # audio/additional_prompt backfill
850
+ if "audio" in slide_state:
851
+ if "additional_prompt" not in slide_state["audio"]:
852
+ slide_state["audio"]["additional_prompt"] = ""
853
+ is_updated = True
854
+
855
+ # video init
856
+ if "video" not in slide_state:
857
+ slide_state["video"] = {
858
+ "status": "missing"
859
+ }
860
+ is_updated = True
861
+
862
+ if is_updated:
863
+ state["last_checked"] = self._now()
864
+ self._save_audio_state(state)
865
+ logger.info("Slide order and metadata updated.")
866
+
867
+ def _extract_slide_notes(self):
868
+ """
869
+ Parses the Markdown file to extract slide-ids and corresponding notes.
870
+ Exits if duplicate slide_ids are found.
871
+
872
+ Returns:
873
+ dict: {slide_id (str): notes_text (str)}
874
+ """
875
+ slides = {}
876
+ current_id = None
877
+ in_notes = False
878
+ buffer = []
879
+
880
+ with open(self.md_file, encoding="utf-8") as f:
881
+ for line in f:
882
+ if line.startswith("<!-- slide-id:"):
883
+ current_id = line.strip()[len(
884
+ "<!-- slide-id:"): -3].strip()
885
+
886
+ if current_id in slides:
887
+ logger.error(
888
+ f"Duplicate slide_id detected: {current_id}")
889
+ sys.exit(1)
890
+
891
+ slides[current_id] = ""
892
+ continue
893
+
894
+ if line.strip() == "::: notes":
895
+ in_notes = True
896
+ buffer = []
897
+ continue
898
+
899
+ if line.strip() == ":::" and in_notes:
900
+ slides[current_id] = "".join(buffer).strip()
901
+ in_notes = False
902
+ continue
903
+
904
+ if in_notes:
905
+ buffer.append(line)
906
+
907
+ return slides
908
+
909
+ def _extract_slides_list(self):
910
+ """
911
+ Parses Markdown to extract a list of slides containing:
912
+ slide-id, video-file, title, and notes.
913
+ Exits on duplicate slide_ids.
914
+ """
915
+ slides = []
916
+ seen_ids = set()
917
+
918
+ current_data = {
919
+ "id": None,
920
+ "title": "",
921
+ "video_file": None,
922
+ "notes_buffer": [],
923
+ "in_notes": False
924
+ }
925
+
926
+ def _save_current(data):
927
+ if data["id"]:
928
+ notes_text = "".join(data["notes_buffer"]).strip()
929
+ slides.append({
930
+ "id": data["id"],
931
+ "title": data["title"],
932
+ "video_file": data["video_file"],
933
+ "notes": notes_text
934
+ })
935
+
936
+ with open(self.md_file, encoding="utf-8") as f:
937
+ for line in f:
938
+ stripped = line.strip()
939
+
940
+ # slide-id
941
+ if line.startswith("<!-- slide-id:"):
942
+ _save_current(current_data)
943
+ new_id = line.strip()[len("<!-- slide-id:"): -3].strip()
944
+
945
+ if new_id in seen_ids:
946
+ logger.error(f"Duplicate slide_id detected: {new_id}")
947
+ sys.exit(1)
948
+ seen_ids.add(new_id)
949
+
950
+ current_data = {
951
+ "id": new_id,
952
+ "title": "",
953
+ "video_file": None,
954
+ "notes_buffer": [],
955
+ "in_notes": False
956
+ }
957
+ continue
958
+
959
+ # video-file
960
+ if line.startswith("<!-- video-file:"):
961
+ v_file = line.strip()[len("<!-- video-file:"): -3].strip()
962
+ current_data["video_file"] = v_file
963
+ continue
964
+
965
+ # Title
966
+ if (current_data["id"] and not current_data["title"]
967
+ and line.startswith("# ") and not line.startswith("##")):
968
+ current_data["title"] = line[2:].strip()
969
+ continue
970
+
971
+ # notes block
972
+ if stripped == "::: notes":
973
+ current_data["in_notes"] = True
974
+ current_data["notes_buffer"] = []
975
+ continue
976
+
977
+ if stripped == ":::" and current_data["in_notes"]:
978
+ current_data["in_notes"] = False
979
+ continue
980
+
981
+ if current_data["in_notes"]:
982
+ current_data["notes_buffer"].append(line)
983
+
984
+ _save_current(current_data)
985
+
986
+ return slides
987
+
988
+ def _load_audio_state(self):
989
+ """
990
+ Loads the audio generation state file (JSON).
991
+
992
+ Validation:
993
+ 1. build_config: If inconsistent with current settings (e.g., resolution change), exits with error.
994
+ 2. tts_config: If inconsistent, prompts the user to continue or abort.
995
+ """
996
+ if not os.path.isfile(self.status_file):
997
+ return self._init_audio_state(self.status_file)
998
+
999
+ with open(self.status_file, encoding="utf-8") as f:
1000
+ state = json.load(f)
1001
+
1002
+ # --- build_config check ---
1003
+ stored_config = state.get("build_config")
1004
+ current_config = self._get_build_config()
1005
+
1006
+ if stored_config is None:
1007
+ logger.info("No build_config in state file. Applying current settings.")
1008
+ state["build_config"] = current_config
1009
+ self._save_audio_state(state)
1010
+ stored_config = current_config
1011
+
1012
+ if stored_config != current_config:
1013
+ import pprint
1014
+ logger.error("build_config inconsistency detected.")
1015
+ logger.error("Changing resolution/FPS mid-process is not supported. Aborting.")
1016
+ logger.error("-" * 40)
1017
+ logger.error("[Stored Config]")
1018
+ logger.error(pprint.pformat(stored_config))
1019
+ logger.error("-" * 40)
1020
+ logger.error("[Current Config]")
1021
+ logger.error(pprint.pformat(current_config))
1022
+ logger.error("-" * 40)
1023
+ sys.exit(1)
1024
+
1025
+ # --- tts_config check ---
1026
+ stored_tts = state.get("tts_config")
1027
+ current_tts = self._get_tts_config()
1028
+
1029
+ # Auto-fill if missing (Migration)
1030
+ if stored_tts is None:
1031
+ logger.info("No TTS config in state file. Applying current settings.")
1032
+ state["tts_config"] = current_tts
1033
+ self._save_audio_state(state)
1034
+
1035
+ # Confirmation prompt if mismatched
1036
+ elif stored_tts != current_tts:
1037
+ import pprint
1038
+ logger.warning("=" * 60)
1039
+ logger.warning("TTS config change detected.")
1040
+ logger.warning("=" * 60)
1041
+ logger.warning("[Stored Config (Previous)]")
1042
+ logger.warning(pprint.pformat(stored_tts))
1043
+ logger.warning("-" * 40)
1044
+ logger.warning("[Current Config (Now)]")
1045
+ logger.warning(pprint.pformat(current_tts))
1046
+ logger.warning("=" * 60)
1047
+ logger.warning("Generating audio with different settings may result in inconsistent audio in the video.")
1048
+
1049
+ while True:
1050
+ choice = input(
1051
+ "Select action: 1) Ignore and Continue (Overwrite config) 2) Abort [1/2]: ").strip()
1052
+ if choice == '1':
1053
+ logger.info("Applying new settings and continuing. Updating state file.")
1054
+ state["tts_config"] = current_tts
1055
+ self._save_audio_state(state)
1056
+ break
1057
+ elif choice == '2':
1058
+ logger.info("Aborted by user.")
1059
+ sys.exit(0)
1060
+ else:
1061
+ logger.warning("Please enter 1 or 2.")
1062
+
1063
+ return state
1064
+
1065
+ def _save_audio_state(self, state):
1066
+ """
1067
+ Saves the audio generation state to the JSON file.
1068
+ Sorts slides by `slide_index` before saving to ensure order in the file.
1069
+ """
1070
+ if "slides" in state:
1071
+ # Sort dictionary by slide_index
1072
+ sorted_slides = dict(sorted(
1073
+ state["slides"].items(),
1074
+ key=lambda item: item[1].get("slide_index", 999999)
1075
+ ))
1076
+ state["slides"] = sorted_slides
1077
+
1078
+ with open(self.status_file, "w", encoding="utf-8") as f:
1079
+ json.dump(state, f, ensure_ascii=False, indent=2)
1080
+
1081
+ def _init_audio_state(self, path):
1082
+ """
1083
+ Creates and returns a new state management dictionary.
1084
+ Records current build_config and tts_config.
1085
+ """
1086
+ return {
1087
+ "schema_version": "1.0",
1088
+ "project_id": self.project_id,
1089
+ "last_checked": None,
1090
+ "build_config": self._get_build_config(),
1091
+ "tts_config": self._get_tts_config(),
1092
+ "pptx_task": {
1093
+ "status": "missing",
1094
+ "source_file": os.path.basename(self.md_file) if hasattr(self, 'md_file') else "",
1095
+ "source_hash": None,
1096
+ "generated_at": None
1097
+ },
1098
+ "images_task": {
1099
+ "status": "missing",
1100
+ "source_file": os.path.basename(self.slide_file) if hasattr(self, 'slide_file') else "",
1101
+ "source_hash": None,
1102
+ "generated_at": None
1103
+ },
1104
+ "tts_engine": { # Kept for legacy compatibility
1105
+ "provider": self.tts_provider,
1106
+ "model": self.tts_model,
1107
+ "voice": self.tts_voice
1108
+ },
1109
+ "slides": {}
1110
+ }
1111
+
1112
+ def _get_tts_config(self):
1113
+ """
1114
+ Returns a dictionary of current TTS configuration.
1115
+ """
1116
+ return {
1117
+ "provider": self.tts_provider,
1118
+ "model": self.tts_model,
1119
+ "voice": self.tts_voice,
1120
+ "use_prompt": self.tts_use_prompt,
1121
+ "prompt": self.prompt
1122
+ }
1123
+
1124
+ def _get_wav_duration(self, wav_path):
1125
+ """
1126
+ Gets the duration (seconds) of a WAV file.
1127
+ """
1128
+ if not os.path.isfile(wav_path):
1129
+ return 0.0
1130
+ try:
1131
+ with wave.open(wav_path, 'rb') as f:
1132
+ frames = f.getnframes()
1133
+ rate = f.getframerate()
1134
+ if rate > 0:
1135
+ return frames / float(rate)
1136
+ except Exception as e:
1137
+ logger.warning(f"WAV duration check failed: {e}")
1138
+ return 0.0
1139
+
1140
+ def prepend_silence(self, wav_file):
1141
+ """
1142
+ Inserts a silence period at the beginning of the specified WAV file.
1143
+ The duration is defined by `self.silence_sec`.
1144
+
1145
+ Args:
1146
+ wav_file (str): Path to the target WAV file.
1147
+ """
1148
+ tmp = tempfile.NamedTemporaryFile(
1149
+ suffix=".wav", delete=False, dir=os.path.dirname(wav_file)
1150
+ )
1151
+ tmp.close()
1152
+
1153
+ cmd = [
1154
+ "ffmpeg", "-y",
1155
+ "-v", self.ffmpeg_loglevel,
1156
+ "-f", "lavfi",
1157
+ "-t", str(self.silence_sec),
1158
+ "-i", f"anullsrc=r={self.sample_rate}:cl=mono",
1159
+ "-i", wav_file,
1160
+ "-filter_complex", "[0:a][1:a]concat=n=2:v=0:a=1",
1161
+ tmp.name
1162
+ ]
1163
+
1164
+ subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL,
1165
+ stderr=subprocess.DEVNULL)
1166
+
1167
+ os.replace(tmp.name, wav_file)
1168
+
1169
+ def build_slide_pptx(self):
1170
+ """
1171
+ Generates a PowerPoint file (.pptx) from Markdown using Pandoc.
1172
+ Checks hash to skip generation if Markdown hasn't changed.
1173
+
1174
+ Prerequisites:
1175
+ - `pandoc` must be installed.
1176
+ """
1177
+ if not os.path.exists(self.md_file):
1178
+ logger.error(f"Markdown file does not exist: {self.md_file}")
1179
+ return
1180
+
1181
+ # 1. Change detection
1182
+ state = self._load_audio_state()
1183
+ current_md_hash = self._hash_file(self.md_file)
1184
+
1185
+ pptx_task = state.get("pptx_task", {})
1186
+
1187
+ # Skip if PPTX exists and hash matches
1188
+ if (pptx_task.get("status") == "generated" and
1189
+ pptx_task.get("source_hash") == current_md_hash and
1190
+ os.path.isfile(self.slide_file)):
1191
+
1192
+ logger.info(f"[SKIP] PPTX Create (Markdown unchanged)")
1193
+ return
1194
+
1195
+ # --- Start Generation ---
1196
+
1197
+ command = (
1198
+ f'pandoc {self.md_file} '
1199
+ f'--slide-level=1 '
1200
+ f'--resource-path={self.source_dir} '
1201
+ f'-o {self.slide_file}'
1202
+ )
1203
+
1204
+ logger.info(f'Starting Markdown -> PPTX conversion.')
1205
+
1206
+ try:
1207
+ subprocess.check_call(command, shell=True)
1208
+
1209
+ # 2. Save state
1210
+ state["pptx_task"] = {
1211
+ "status": "generated",
1212
+ "source_file": os.path.basename(self.md_file),
1213
+ "source_hash": current_md_hash,
1214
+ "generated_at": self._now()
1215
+ }
1216
+ state["last_checked"] = self._now()
1217
+ self._save_audio_state(state)
1218
+
1219
+ logger.info(f"PPTX conversion completed and state saved.")
1220
+
1221
+ except subprocess.CalledProcessError:
1222
+ logger.error(f'PPTX conversion error')
1223
+ sys.exit(0)
1224
+
1225
+ def _init_slide_state(self, slide_id):
1226
+ """
1227
+ Returns the initial state dictionary for a single slide.
1228
+ """
1229
+ return {
1230
+ "slide_index": None,
1231
+ "title": "",
1232
+ "notes_hash": None,
1233
+ "notes_length": 0,
1234
+ "audio": {
1235
+ "status": "missing",
1236
+ "wav_file": f"{slide_id}.wav",
1237
+ "generated_at": None,
1238
+ "duration_sec": None,
1239
+ "additional_prompt": ""
1240
+ }
1241
+ }
1242
+
1243
+ def _speak_to_wav(self, text, wav_path, additional_prompt=""):
1244
+ """
1245
+ Synthesizes text to speech using the configured TTS client and saves as WAV.
1246
+
1247
+ Args:
1248
+ text (str): Text to synthesize.
1249
+ wav_path (str): Output WAV file path.
1250
+ additional_prompt (str): Additional prompt for specific slides.
1251
+ """
1252
+ client = multiai_tts.Prompt()
1253
+ client.set_tts_model(self.tts_provider, self.tts_model)
1254
+ if self.tts_provider == 'openai':
1255
+ client.tts_voice_openai = self.tts_voice
1256
+ if self.tts_provider == 'google':
1257
+ client.tts_voice_google = self.tts_voice
1258
+
1259
+ if self.tts_use_prompt:
1260
+ full_prompt_text = f'{self.prompt}{additional_prompt}\n{text}'
1261
+ else:
1262
+ full_prompt_text = text
1263
+
1264
+ for attempt in range(self.max_retry):
1265
+ client.save_tts(full_prompt_text, wav_path)
1266
+
1267
+ if not client.error:
1268
+ return
1269
+
1270
+ if attempt > 0 or 'RESOURCE_EXHAUSTED' in client.error_message:
1271
+ logger.error(client.error_message)
1272
+ sys.exit()
1273
+ else:
1274
+ logger.error(
1275
+ f'{full_prompt_text}\n{client.error_message}\nWaiting for 3 minutes and retry...')
1276
+ time.sleep(180)
1277
+
1278
+ def _normalize_notes(self, text):
1279
+ """
1280
+ Normalizes note text by stripping whitespace and empty lines.
1281
+ """
1282
+ return "\n".join(
1283
+ line.strip()
1284
+ for line in text.strip().splitlines()
1285
+ if line.strip()
1286
+ )
1287
+
1288
+ def _hash_notes(self, text):
1289
+ """
1290
+ Calculates SHA-256 hash of the text.
1291
+ """
1292
+ return "sha256:" + hashlib.sha256(
1293
+ text.encode("utf-8")
1294
+ ).hexdigest()
1295
+
1296
+ def _hash_file(self, filepath):
1297
+ """
1298
+ Calculates SHA-256 hash of a file.
1299
+ """
1300
+ if not os.path.isfile(filepath):
1301
+ return None
1302
+ h = hashlib.sha256()
1303
+ with open(filepath, "rb") as f:
1304
+ while chunk := f.read(8192):
1305
+ h.update(chunk)
1306
+ return "sha256:" + h.hexdigest()
1307
+
1308
+ def _calculate_source_hash(self, slide_ids):
1309
+ """
1310
+ Calculates a unique hash representing the entire sequence of source MP4s.
1311
+ Reads MP4 files in the order of `slide_ids`.
1312
+ """
1313
+ h = hashlib.sha256()
1314
+
1315
+ for sid in slide_ids:
1316
+ mp4_path = os.path.join(self.movie_dir, f"{sid}.mp4")
1317
+
1318
+ if os.path.isfile(mp4_path):
1319
+ # Update hash with file content
1320
+ with open(mp4_path, "rb") as f:
1321
+ while chunk := f.read(8192):
1322
+ h.update(chunk)
1323
+ else:
1324
+ # Mark as missing in hash
1325
+ h.update(f"{sid}:missing".encode("utf-8"))
1326
+
1327
+ return "sha256:" + h.hexdigest()
1328
+
1329
+ def _now(self):
1330
+ """
1331
+ Returns current datetime in ISO format (seconds precision).
1332
+ """
1333
+ return datetime.now().isoformat(timespec="seconds")
1334
+
1335
+ def write_video_length_csv(self):
1336
+ """
1337
+ Generates a CSV report comparing Markdown slide structure and generated videos.
1338
+
1339
+ Columns:
1340
+ - slide_id
1341
+ - title
1342
+ - notes_length
1343
+ - duration_sec
1344
+
1345
+ Output: `self.video_length_file`
1346
+ """
1347
+ import csv
1348
+
1349
+ slides = [] # [(slide_id, title, notes_length)]
1350
+
1351
+ current_id = None
1352
+ current_title = ""
1353
+ in_notes = False
1354
+ notes_buffer = []
1355
+
1356
+ with open(self.md_file, encoding="utf-8") as f:
1357
+ for line in f:
1358
+ # slide-id
1359
+ if line.startswith("<!-- slide-id:"):
1360
+ current_id = line.strip()[len(
1361
+ "<!-- slide-id:"): -3].strip()
1362
+ current_title = ""
1363
+ notes_buffer = []
1364
+ in_notes = False
1365
+ continue
1366
+
1367
+ # Title
1368
+ if current_id and not current_title and line.startswith("# "):
1369
+ current_title = line[2:].strip()
1370
+ continue
1371
+
1372
+ # notes start
1373
+ if line.strip() == "::: notes":
1374
+ in_notes = True
1375
+ notes_buffer = []
1376
+ continue
1377
+
1378
+ # notes end
1379
+ if line.strip() == ":::" and in_notes:
1380
+ notes_text = "".join(notes_buffer)
1381
+ notes_length = len(
1382
+ self._normalize_notes(notes_text)
1383
+ )
1384
+ slides.append(
1385
+ (current_id, current_title, notes_length)
1386
+ )
1387
+ in_notes = False
1388
+ current_id = None
1389
+ continue
1390
+
1391
+ # notes body
1392
+ if in_notes:
1393
+ notes_buffer.append(line)
1394
+
1395
+ os.makedirs(os.path.dirname(self.video_length_file), exist_ok=True)
1396
+
1397
+ # Use utf-8-sig for Excel compatibility
1398
+ with open(
1399
+ self.video_length_file,
1400
+ "w",
1401
+ encoding="utf-8-sig",
1402
+ newline=""
1403
+ ) as f:
1404
+ writer = csv.writer(f)
1405
+ writer.writerow(
1406
+ ["slide_id", "title", "notes_length", "duration_sec"]
1407
+ )
1408
+
1409
+ for slide_id, title, notes_length in slides:
1410
+ mp4 = os.path.join(self.movie_dir, f"{slide_id}.mp4")
1411
+
1412
+ if not os.path.isfile(mp4):
1413
+ logger.warning(f"mp4 does not exist: {mp4}")
1414
+ continue
1415
+
1416
+ duration = self._get_mp4_duration(mp4)
1417
+ writer.writerow(
1418
+ [slide_id, title, notes_length, f"{duration:.2f}"]
1419
+ )
1420
+
1421
+ def _get_mp4_duration(self, mp4_path):
1422
+ """
1423
+ Uses ffprobe to get the duration of an MP4 file in seconds.
1424
+ """
1425
+ cmd = [
1426
+ "ffprobe",
1427
+ "-v", "error",
1428
+ "-show_entries", "format=duration",
1429
+ "-of", "default=noprint_wrappers=1:nokey=1",
1430
+ mp4_path,
1431
+ ]
1432
+ result = subprocess.run(
1433
+ cmd,
1434
+ capture_output=True,
1435
+ text=True,
1436
+ check=True,
1437
+ )
1438
+ return float(result.stdout.strip())