PyPI - lyrics-transcriber - Versions diffs - 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

lyrics-transcriber 0.41.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

lyrics_transcriber/lyrics/base_lyrics_provider.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from dataclasses import dataclass
 import logging
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, List
 import json
 import hashlib
 from pathlib import Path
 import os
 from abc import ABC, abstractmethod
-from lyrics_transcriber.types import LyricsData
+from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
 from karaoke_lyrics_processor import KaraokeLyricsProcessor
+from lyrics_transcriber.utils.word_utils import WordUtils
 @dataclass
@@ -41,20 +42,31 @@ class BaseLyricsProvider(ABC):
         # Use artist and title for cache key instead of audio file hash
         cache_key = self._get_artist_title_hash(artist, title)
-        raw_cache_path = self._get_cache_path(cache_key, "raw")
-        # Try to load from cache first
+        # Check converted cache first
+        converted_cache_path = self._get_cache_path(cache_key, "converted")
+        converted_data = self._load_from_cache(converted_cache_path)
+        if converted_data:
+            self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
+            return LyricsData.from_dict(converted_data)
+        # Check raw cache next
+        raw_cache_path = self._get_cache_path(cache_key, "raw")
         raw_data = self._load_from_cache(raw_cache_path)
-        if raw_data is not None:
-            self.logger.info(f"Using cached lyrics for {artist} - {title}")
-            return self._save_and_convert_result(cache_key, raw_data)
+        if raw_data:
+            self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
+            converted_result = self._convert_result_format(raw_data)
+            self._save_to_cache(converted_cache_path, converted_result.to_dict())
+            return converted_result
         # If not in cache, fetch from source
         raw_result = self._fetch_data_from_source(artist, title)
         if raw_result:
             # Save raw API response
             self._save_to_cache(raw_cache_path, raw_result)
-            return self._save_and_convert_result(cache_key, raw_result)
+            converted_result = self._convert_result_format(raw_result)
+            self._save_to_cache(converted_cache_path, converted_result.to_dict())
+            return converted_result
         return None
@@ -100,18 +112,69 @@ class BaseLyricsProvider(ABC):
             self.logger.warning(f"Cache file {cache_path} is corrupted")
             return None
+    def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
+        """Create LyricsSegment objects with properly formatted words from text.
+        Args:
+            text: Raw lyrics text
+            is_synced: Whether timing information is available
+        Returns:
+            List of LyricsSegment objects with unique IDs and Word objects
+        """
+        segments = []
+        lines = text.strip().split("\n")
+        for line in lines:
+            if not line.strip():
+                continue
+            # Split line into words
+            word_texts = line.strip().split()
+            if not word_texts:
+                continue
+            words = []
+            for word_text in word_texts:
+                word = Word(
+                    id=WordUtils.generate_id(),
+                    text=word_text,
+                    start_time=0.0 if is_synced else None,
+                    end_time=0.0 if is_synced else None,
+                    confidence=1.0,  # Reference lyrics are considered ground truth
+                    created_during_correction=False,
+                )
+                words.append(word)
+            segment = LyricsSegment(
+                id=WordUtils.generate_id(),
+                text=line.strip(),
+                words=words,
+                start_time=words[0].start_time if is_synced else None,
+                end_time=words[-1].end_time if is_synced else None,
+            )
+            segments.append(segment)
+        return segments
     def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
-        """Process lyrics using KaraokeLyricsProcessor."""
+        """Process lyrics using KaraokeLyricsProcessor and create proper segments."""
+        # Concatenate all segment texts to get the full lyrics
+        full_lyrics = lyrics_data.get_full_text()
         processor = KaraokeLyricsProcessor(
             log_level=self.logger.getEffectiveLevel(),
             log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
-            input_lyrics_text=lyrics_data.lyrics,
+            input_lyrics_text=full_lyrics,
             max_line_length=self.max_line_length,
         )
         processed_text = processor.process()
-        # Create new LyricsData with processed text
-        return LyricsData(source=lyrics_data.source, lyrics=processed_text, segments=lyrics_data.segments, metadata=lyrics_data.metadata)
+        # Create segments with words from processed text
+        segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
+        # Create new LyricsData with processed text and segments
+        return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
     def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
         """Convert raw result to standardized format, process lyrics, save to cache, and return."""

lyrics_transcriber/lyrics/file_provider.py CHANGED Viewed

@@ -67,7 +67,7 @@ class FileProvider(BaseLyricsProvider):
         self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
         try:
-            # Create metadata object like Genius provider does
+            # Create metadata object
             metadata = LyricsMetadata(
                 source="file",
                 track_name=self.title,
@@ -78,10 +78,11 @@ class FileProvider(BaseLyricsProvider):
                 provider_metadata={"filepath": raw_data["filepath"]},
             )
-            lyrics_data = LyricsData(
-                source="file", lyrics=raw_data["text"], segments=[], metadata=metadata  # No timing information from file
-            )
-            self.logger.debug(f"Created LyricsData object: {lyrics_data}")
+            # Create segments with words from the processed text
+            segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
+            lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
+            self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
             return lyrics_data
         except Exception as e:

lyrics_transcriber/lyrics/genius.py CHANGED Viewed

@@ -77,8 +77,11 @@ class GeniusProvider(BaseLyricsProvider):
             },
         )
-        # Create result object
-        return LyricsData(source="genius", lyrics=lyrics, segments=[], metadata=metadata)
+        # Create segments with words from cleaned lyrics
+        segments = self._create_segments_with_words(lyrics, is_synced=False)
+        # Create result object with segments
+        return LyricsData(source="genius", segments=segments, metadata=metadata)
     def _clean_lyrics(self, lyrics: str) -> str:
         """Clean and process lyrics from Genius to remove unwanted content."""

lyrics_transcriber/lyrics/spotify.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import logging
 from typing import Optional, Dict, Any
 import syrics.api
+import time
-from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
+from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
 from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
+from lyrics_transcriber.utils.word_utils import WordUtils
 class SpotifyProvider(BaseLyricsProvider):
@@ -12,7 +14,22 @@ class SpotifyProvider(BaseLyricsProvider):
     def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
         super().__init__(config, logger)
         self.cookie = config.spotify_cookie
-        self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
+        self.client = None
+        if self.cookie:
+            max_retries = 5
+            retry_delay = 5  # seconds
+            for attempt in range(max_retries):
+                try:
+                    self.client = syrics.api.Spotify(self.cookie)
+                    break  # Successfully initialized
+                except Exception as e:
+                    if attempt == max_retries - 1:  # Last attempt
+                        self.logger.error(f"Failed to initialize Spotify client after {max_retries} attempts: {str(e)}")
+                        break
+                    self.logger.warning(f"Attempt {attempt + 1}/{max_retries} failed, retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
     def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
         """Fetch raw data from Spotify APIs using syrics library."""
@@ -45,24 +62,6 @@ class SpotifyProvider(BaseLyricsProvider):
         track_data = raw_data["track_data"]
         lyrics_data = raw_data["lyrics_data"]["lyrics"]
-        # Convert raw lines to LyricsSegment objects
-        segments = []
-        for line in lyrics_data.get("lines", []):
-            if not line.get("words"):
-                continue
-            # Skip lines that are just musical notes
-            if not self._clean_lyrics(line["words"]):
-                continue
-            segment = LyricsSegment(
-                text=line["words"],
-                words=[],  # TODO: Could potentially split words if needed
-                start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
-                end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
-            )
-            segments.append(segment)
         # Create metadata object
         metadata = LyricsMetadata(
             source="spotify",
@@ -83,7 +82,45 @@ class SpotifyProvider(BaseLyricsProvider):
             },
         )
-        return LyricsData(source="spotify", lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)
+        # Create segments with timing information
+        segments = []
+        for line in lyrics_data.get("lines", []):
+            if not line.get("words"):
+                continue
+            # Skip lines that are just musical notes
+            if not self._clean_lyrics(line["words"]):
+                continue
+            # Split line into words
+            word_texts = line["words"].strip().split()
+            if not word_texts:
+                continue
+            # Calculate approximate timing for each word
+            start_time = float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else 0.0
+            end_time = float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else 0.0
+            duration = end_time - start_time
+            word_duration = duration / len(word_texts)
+            words = []
+            for i, word_text in enumerate(word_texts):
+                word = Word(
+                    id=WordUtils.generate_id(),
+                    text=word_text,
+                    start_time=start_time + (i * word_duration),
+                    end_time=start_time + ((i + 1) * word_duration),
+                    confidence=1.0,
+                    created_during_correction=False,
+                )
+                words.append(word)
+            segment = LyricsSegment(
+                id=WordUtils.generate_id(), text=line["words"].strip(), words=words, start_time=start_time, end_time=end_time
+            )
+            segments.append(segment)
+        return LyricsData(source="spotify", segments=segments, metadata=metadata)
     def _clean_lyrics(self, lyrics: str) -> str:
         """Clean and process lyrics from Spotify to remove unwanted content."""

lyrics_transcriber/output/ass/config.py CHANGED Viewed

@@ -4,7 +4,18 @@ from dataclasses import dataclass
 class ScreenConfig:
     """Configuration for screen timing and layout."""
-    def __init__(self, line_height: int = 50, max_visible_lines: int = 4, top_padding: int = None, video_width: int = 640, video_height: int = 360):
+    def __init__(
+        self,
+        line_height: int = 50,
+        max_visible_lines: int = 4,
+        top_padding: int = None,
+        video_width: int = 640,
+        video_height: int = 360,
+        screen_gap_threshold: float = 5.0,
+        post_roll_time: float = 1.0,
+        fade_in_ms: int = 200,
+        fade_out_ms: int = 300,
+    ):
         # Screen layout
         self.max_visible_lines = max_visible_lines
         self.line_height = line_height
@@ -12,10 +23,10 @@ class ScreenConfig:
         self.video_height = video_height
         self.video_width = video_width
         # Timing configuration
-        self.screen_gap_threshold = 5.0
-        self.post_roll_time = 1.0
-        self.fade_in_ms = 200
-        self.fade_out_ms = 300
+        self.screen_gap_threshold = screen_gap_threshold
+        self.post_roll_time = post_roll_time
+        self.fade_in_ms = fade_in_ms
+        self.fade_out_ms = fade_out_ms
 @dataclass

lyrics_transcriber/output/cdg.py CHANGED Viewed

@@ -496,7 +496,7 @@ class CDGGenerator:
                 text = text[1:]
             current_line += text + " "
-            self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
+            # self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
             is_last_before_instrumental = any(
                 inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals

lyrics_transcriber/output/generator.py CHANGED Viewed

@@ -97,28 +97,42 @@ class OutputGenerator:
     def generate_outputs(
         self,
         transcription_corrected: Optional[CorrectionResult],
-        lyrics_results: List[LyricsData],
+        lyrics_results: dict[str, LyricsData],
         output_prefix: str,
         audio_filepath: str,
         artist: Optional[str] = None,
         title: Optional[str] = None,
+        preview_mode: bool = False,
     ) -> OutputPaths:
         """Generate all requested output formats."""
         outputs = OutputPaths()
         try:
-            # Generate plain lyrics files for each provider
-            for lyrics_data in lyrics_results:
-                self.plain_text.write_lyrics(lyrics_data, output_prefix)
             # Only process transcription-related outputs if we have transcription data
             if transcription_corrected:
-                # Write original (uncorrected) transcription
-                outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
-                # Resize corrected segments to ensure none are longer than max_line_length
+                # Resize corrected segments
                 resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
                 transcription_corrected.resized_segments = resized_segments
+                # For preview, we only need to generate ASS and video
+                if preview_mode:
+                    # Generate ASS subtitles for preview
+                    outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
+                    # Generate preview video
+                    outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
+                    return outputs
+                # Normal output generation (non-preview mode)
+                # Generate plain lyrics files for each provider
+                for name, lyrics_data in lyrics_results.items():
+                    self.plain_text.write_lyrics(lyrics_data, output_prefix)
+                # Write original (uncorrected) transcription
+                outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
                 outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
                 # Write corrected lyrics as plain text

lyrics_transcriber/output/plain_text.py CHANGED Viewed

@@ -5,12 +5,13 @@ from typing import List, Optional
 from lyrics_transcriber.types import LyricsData, LyricsSegment
 from lyrics_transcriber.correction.corrector import CorrectionResult
 class PlainTextGenerator:
     """Handles generation of plain text output files for lyrics and transcriptions."""
     def __init__(self, output_dir: str, logger: Optional[logging.Logger] = None):
         """Initialize PlainTextGenerator.
         Args:
             output_dir: Directory where output files will be written
             logger: Optional logger instance
@@ -24,11 +25,11 @@ class PlainTextGenerator:
     def write_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
         """Write plain text lyrics file from provider data.
         Args:
             lyrics_data: LyricsData from a lyrics provider
             output_prefix: Prefix for output filename
         Returns:
             Path to generated file
         """
@@ -38,7 +39,9 @@ class PlainTextGenerator:
         try:
             with open(output_path, "w", encoding="utf-8") as f:
-                f.write(lyrics_data.lyrics)
+                # Join segment texts with newlines
+                lyrics_text = "\n".join(segment.text for segment in lyrics_data.segments)
+                f.write(lyrics_text)
             self.logger.info(f"Plain lyrics file generated: {output_path}")
             return output_path
         except Exception as e:
@@ -47,11 +50,11 @@ class PlainTextGenerator:
     def write_corrected_lyrics(self, segments: List[LyricsSegment], output_prefix: str) -> str:
         """Write corrected lyrics as plain text file.
         Args:
             segments: List of corrected LyricsSegment objects
             output_prefix: Prefix for output filename
         Returns:
             Path to generated file
         """
@@ -70,22 +73,24 @@ class PlainTextGenerator:
     def write_original_transcription(self, correction_result: CorrectionResult, output_prefix: str) -> str:
         """Write original (uncorrected) transcription as plain text.
         Args:
             correction_result: CorrectionResult containing original transcription
             output_prefix: Prefix for output filename
         Returns:
             Path to generated file
         """
         self.logger.info("Writing original transcription file")
         output_path = self._get_output_path(f"{output_prefix} (Lyrics Uncorrected)", "txt")
+        transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in correction_result.original_segments)
         try:
             with open(output_path, "w", encoding="utf-8") as f:
-                f.write(correction_result.transcribed_text)
+                f.write(transcribed_text)
             self.logger.info(f"Original transcription file generated: {output_path}")
             return output_path
         except Exception as e:
             self.logger.error(f"Failed to write original transcription file: {str(e)}")
-            raise
+            raise

lyrics_transcriber/output/segment_resizer.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 import re
-from typing import List, Optional, Tuple
+from typing import List, Optional
 from lyrics_transcriber.types import LyricsSegment, Word
+from lyrics_transcriber.utils.word_utils import WordUtils
 class SegmentResizer:
@@ -101,7 +102,13 @@ class SegmentResizer:
             Output: LyricsSegment(text="Hello World", words=[...])
         """
         cleaned_text = self._clean_text(segment.text)
-        return LyricsSegment(text=cleaned_text, words=segment.words, start_time=segment.start_time, end_time=segment.end_time)
+        return LyricsSegment(
+            id=segment.id,  # Preserve the original segment ID
+            text=cleaned_text,
+            words=segment.words,
+            start_time=segment.start_time,
+            end_time=segment.end_time,
+        )
     def _create_cleaned_word(self, word: Word) -> Word:
         """Create a new word with cleaned text."""
@@ -226,7 +233,13 @@ class SegmentResizer:
     def _create_segment_from_words(self, line: str, words: List[Word]) -> LyricsSegment:
         """Create a new segment from a list of words."""
         cleaned_text = self._clean_text(line)
-        return LyricsSegment(text=cleaned_text, words=words, start_time=words[0].start_time, end_time=words[-1].end_time)
+        return LyricsSegment(
+            id=WordUtils.generate_id(),  # Generate new ID for split segments
+            text=cleaned_text,
+            words=words,
+            start_time=words[0].start_time,
+            end_time=words[-1].end_time,
+        )
     def _process_segment_text(self, text: str) -> List[str]:
         """Process segment text to determine optimal split points."""

lyrics_transcriber/output/subtitles.py CHANGED Viewed

@@ -44,7 +44,30 @@ class SubtitlesGenerator:
         self.font_size = font_size
         self.styles = styles
         self.subtitle_offset_ms = subtitle_offset_ms
-        self.config = ScreenConfig(line_height=line_height, video_width=video_resolution[0], video_height=video_resolution[1])
+        # Create ScreenConfig with potential overrides from styles
+        karaoke_styles = styles.get("karaoke", {})
+        config_params = {
+            "line_height": line_height,
+            "video_width": video_resolution[0],
+            "video_height": video_resolution[1]
+        }
+        # Add any overrides from styles
+        screen_config_props = [
+            "max_visible_lines",
+            "top_padding",
+            "screen_gap_threshold",
+            "post_roll_time",
+            "fade_in_ms",
+            "fade_out_ms"
+        ]
+        for prop in screen_config_props:
+            if prop in karaoke_styles:
+                config_params[prop] = karaoke_styles[prop]
+        self.config = ScreenConfig(**config_params)
         self.logger = logger or logging.getLogger(__name__)
     def _get_output_path(self, output_prefix: str, extension: str) -> str:
@@ -102,13 +125,16 @@ class SubtitlesGenerator:
             offset_seconds = self.subtitle_offset_ms / 1000.0
             segments = [
                 LyricsSegment(
+                    id=seg.id,  # Preserve original segment ID
                     text=seg.text,
                     words=[
                         Word(
+                            id=word.id,  # Preserve original word ID
                             text=word.text,
                             start_time=max(0, word.start_time + offset_seconds),
                             end_time=word.end_time + offset_seconds,
                             confidence=word.confidence,
+                            created_during_correction=getattr(word, "created_during_correction", False),  # Preserve correction flag
                         )
                         for word in seg.words
                     ],

lyrics_transcriber/output/video.py CHANGED Viewed

@@ -88,6 +88,52 @@ class VideoGenerator:
                     pass
             raise
+    def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
+        """Generate lower resolution MP4 preview video with lyrics overlay.
+        Args:
+            ass_path: Path to ASS subtitles file
+            audio_path: Path to audio file
+            output_prefix: Prefix for output filename
+        Returns:
+            Path to generated preview video file
+        """
+        self.logger.info("Generating preview video with lyrics overlay")
+        output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
+        # Check input files exist before running FFmpeg
+        if not os.path.isfile(ass_path):
+            raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
+        if not os.path.isfile(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        try:
+            # Create a temporary copy of the ASS file with a safe filename
+            temp_ass_path = os.path.join(self.cache_dir, "temp_preview_subtitles.ass")
+            import shutil
+            shutil.copy2(ass_path, temp_ass_path)
+            self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
+            cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
+            self._run_ffmpeg_command(cmd)
+            self.logger.info(f"Preview video generated: {output_path}")
+            # Clean up temporary file
+            os.remove(temp_ass_path)
+            return output_path
+        except Exception as e:
+            self.logger.error(f"Failed to generate preview video: {str(e)}")
+            # Clean up temporary file in case of error
+            if "temp_ass_path" in locals():
+                try:
+                    os.remove(temp_ass_path)
+                except:
+                    pass
+            raise
     def _get_output_path(self, output_prefix: str, extension: str) -> str:
         """Generate full output path for a file."""
         return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
@@ -181,7 +227,7 @@ class VideoGenerator:
             "-vf", f"ass={ass_path}",  # Add subtitles
             "-c:v", self._get_video_codec(),
             # Video quality settings
-            "-preset", "slow",  # Better compression efficiency
+            "-preset", "fast",  # Better compression efficiency
             "-b:v", "5000k",  # Base video bitrate
             "-minrate", "5000k",  # Minimum bitrate
             "-maxrate", "20000k",  # Maximum bitrate
@@ -196,6 +242,66 @@ class VideoGenerator:
         return cmd
+    def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
+        """Build FFmpeg command for preview video generation with optimized settings."""
+        # Use 360p resolution for preview
+        width, height = 640, 360
+        # fmt: off
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel", "error",
+            "-r", "30",  # Set frame rate to 30 fps
+        ]
+        # Input source (background)
+        if self.background_image:
+            # Resize background image first
+            resized_bg = self._resize_background_image(self.background_image)
+            self.logger.debug(f"Using resized background image: {resized_bg}")
+            cmd.extend([
+                "-loop", "1",  # Loop the image
+                "-i", resized_bg,
+            ])
+        else:
+            self.logger.debug(
+                f"Using solid {self.background_color} background "
+                f"with resolution: {width}x{height}"
+            )
+            cmd.extend([
+                "-f", "lavfi",
+                "-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
+            ])
+        # Add audio input and subtitle overlay
+        cmd.extend([
+            "-i", audio_path,
+            "-c:a", "aac",  # Use AAC for audio
+            "-b:a", "128k",  # Audio bitrate
+            "-vf", f"ass={ass_path}",  # Add subtitles
+            "-c:v", "libx264",  # Use H.264 codec
+            "-profile:v", "baseline",  # Most compatible H.264 profile
+            "-level", "3.0",  # Compatibility level
+            "-pix_fmt", "yuv420p",  # Required for browser compatibility
+            "-preset", "ultrafast",
+            "-b:v", "1000k",  # Slightly higher bitrate
+            "-maxrate", "1500k",
+            "-bufsize", "2000k",
+            "-movflags", "+faststart+frag_keyframe+empty_moov",  # Enhanced streaming flags
+            "-g", "30",  # Keyframe every 30 frames (1 second)
+            "-keyint_min", "30",  # Minimum keyframe interval
+            "-sc_threshold", "0",  # Disable scene change detection
+            "-shortest",
+            "-y"
+        ])
+        # fmt: on
+        # Add output path
+        cmd.append(output_path)
+        return cmd
     def _get_video_codec(self) -> str:
         """Determine the best available video codec."""
         # try:

lyrics-transcriber 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

lyrics-transcriber 0.41.0py3-none-any.whl → 0.42.0py3-none-any.whl