PyPI - lyrics-transcriber - Versions diffs - 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

lyrics-transcriber 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

lyrics_transcriber/lyrics/base_lyrics_provider.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from dataclasses import dataclass
 import logging
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, List
 import json
 import hashlib
 from pathlib import Path
 import os
 from abc import ABC, abstractmethod
-from lyrics_transcriber.types import LyricsData
+from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
 from karaoke_lyrics_processor import KaraokeLyricsProcessor
+from lyrics_transcriber.utils.word_utils import WordUtils
 @dataclass
@@ -41,20 +42,31 @@ class BaseLyricsProvider(ABC):
         # Use artist and title for cache key instead of audio file hash
         cache_key = self._get_artist_title_hash(artist, title)
-        raw_cache_path = self._get_cache_path(cache_key, "raw")
-        # Try to load from cache first
+        # Check converted cache first
+        converted_cache_path = self._get_cache_path(cache_key, "converted")
+        converted_data = self._load_from_cache(converted_cache_path)
+        if converted_data:
+            self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
+            return LyricsData.from_dict(converted_data)
+        # Check raw cache next
+        raw_cache_path = self._get_cache_path(cache_key, "raw")
         raw_data = self._load_from_cache(raw_cache_path)
-        if raw_data is not None:
-            self.logger.info(f"Using cached lyrics for {artist} - {title}")
-            return self._save_and_convert_result(cache_key, raw_data)
+        if raw_data:
+            self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
+            converted_result = self._convert_result_format(raw_data)
+            self._save_to_cache(converted_cache_path, converted_result.to_dict())
+            return converted_result
         # If not in cache, fetch from source
         raw_result = self._fetch_data_from_source(artist, title)
         if raw_result:
             # Save raw API response
             self._save_to_cache(raw_cache_path, raw_result)
-            return self._save_and_convert_result(cache_key, raw_result)
+            converted_result = self._convert_result_format(raw_result)
+            self._save_to_cache(converted_cache_path, converted_result.to_dict())
+            return converted_result
         return None
@@ -100,18 +112,69 @@ class BaseLyricsProvider(ABC):
             self.logger.warning(f"Cache file {cache_path} is corrupted")
             return None
+    def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
+        """Create LyricsSegment objects with properly formatted words from text.
+        Args:
+            text: Raw lyrics text
+            is_synced: Whether timing information is available
+        Returns:
+            List of LyricsSegment objects with unique IDs and Word objects
+        """
+        segments = []
+        lines = text.strip().split("\n")
+        for line in lines:
+            if not line.strip():
+                continue
+            # Split line into words
+            word_texts = line.strip().split()
+            if not word_texts:
+                continue
+            words = []
+            for word_text in word_texts:
+                word = Word(
+                    id=WordUtils.generate_id(),
+                    text=word_text,
+                    start_time=0.0 if is_synced else None,
+                    end_time=0.0 if is_synced else None,
+                    confidence=1.0,  # Reference lyrics are considered ground truth
+                    created_during_correction=False,
+                )
+                words.append(word)
+            segment = LyricsSegment(
+                id=WordUtils.generate_id(),
+                text=line.strip(),
+                words=words,
+                start_time=words[0].start_time if is_synced else None,
+                end_time=words[-1].end_time if is_synced else None,
+            )
+            segments.append(segment)
+        return segments
     def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
-        """Process lyrics using KaraokeLyricsProcessor."""
+        """Process lyrics using KaraokeLyricsProcessor and create proper segments."""
+        # Concatenate all segment texts to get the full lyrics
+        full_lyrics = lyrics_data.get_full_text()
         processor = KaraokeLyricsProcessor(
             log_level=self.logger.getEffectiveLevel(),
             log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
-            input_lyrics_text=lyrics_data.lyrics,
+            input_lyrics_text=full_lyrics,
             max_line_length=self.max_line_length,
         )
         processed_text = processor.process()
-        # Create new LyricsData with processed text
-        return LyricsData(source=lyrics_data.source, lyrics=processed_text, segments=lyrics_data.segments, metadata=lyrics_data.metadata)
+        # Create segments with words from processed text
+        segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
+        # Create new LyricsData with processed text and segments
+        return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
     def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
         """Convert raw result to standardized format, process lyrics, save to cache, and return."""

lyrics_transcriber/lyrics/file_provider.py CHANGED Viewed

@@ -67,7 +67,7 @@ class FileProvider(BaseLyricsProvider):
         self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
         try:
-            # Create metadata object like Genius provider does
+            # Create metadata object
             metadata = LyricsMetadata(
                 source="file",
                 track_name=self.title,
@@ -78,10 +78,11 @@ class FileProvider(BaseLyricsProvider):
                 provider_metadata={"filepath": raw_data["filepath"]},
             )
-            lyrics_data = LyricsData(
-                source="file", lyrics=raw_data["text"], segments=[], metadata=metadata  # No timing information from file
-            )
-            self.logger.debug(f"Created LyricsData object: {lyrics_data}")
+            # Create segments with words from the processed text
+            segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
+            lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
+            self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
             return lyrics_data
         except Exception as e:

lyrics_transcriber/lyrics/genius.py CHANGED Viewed

@@ -77,8 +77,11 @@ class GeniusProvider(BaseLyricsProvider):
             },
         )
-        # Create result object
-        return LyricsData(source="genius", lyrics=lyrics, segments=[], metadata=metadata)
+        # Create segments with words from cleaned lyrics
+        segments = self._create_segments_with_words(lyrics, is_synced=False)
+        # Create result object with segments
+        return LyricsData(source="genius", segments=segments, metadata=metadata)
     def _clean_lyrics(self, lyrics: str) -> str:
         """Clean and process lyrics from Genius to remove unwanted content."""

lyrics_transcriber/lyrics/spotify.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import logging
 from typing import Optional, Dict, Any
 import syrics.api
+import time
-from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
+from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
 from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
+from lyrics_transcriber.utils.word_utils import WordUtils
 class SpotifyProvider(BaseLyricsProvider):
@@ -12,7 +14,22 @@ class SpotifyProvider(BaseLyricsProvider):
     def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
         super().__init__(config, logger)
         self.cookie = config.spotify_cookie
-        self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
+        self.client = None
+        if self.cookie:
+            max_retries = 5
+            retry_delay = 5  # seconds
+            for attempt in range(max_retries):
+                try:
+                    self.client = syrics.api.Spotify(self.cookie)
+                    break  # Successfully initialized
+                except Exception as e:
+                    if attempt == max_retries - 1:  # Last attempt
+                        self.logger.error(f"Failed to initialize Spotify client after {max_retries} attempts: {str(e)}")
+                        break
+                    self.logger.warning(f"Attempt {attempt + 1}/{max_retries} failed, retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
     def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
         """Fetch raw data from Spotify APIs using syrics library."""
@@ -45,24 +62,6 @@ class SpotifyProvider(BaseLyricsProvider):
         track_data = raw_data["track_data"]
         lyrics_data = raw_data["lyrics_data"]["lyrics"]
-        # Convert raw lines to LyricsSegment objects
-        segments = []
-        for line in lyrics_data.get("lines", []):
-            if not line.get("words"):
-                continue
-            # Skip lines that are just musical notes
-            if not self._clean_lyrics(line["words"]):
-                continue
-            segment = LyricsSegment(
-                text=line["words"],
-                words=[],  # TODO: Could potentially split words if needed
-                start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
-                end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
-            )
-            segments.append(segment)
         # Create metadata object
         metadata = LyricsMetadata(
             source="spotify",
@@ -83,7 +82,45 @@ class SpotifyProvider(BaseLyricsProvider):
             },
         )
-        return LyricsData(source="spotify", lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)
+        # Create segments with timing information
+        segments = []
+        for line in lyrics_data.get("lines", []):
+            if not line.get("words"):
+                continue
+            # Skip lines that are just musical notes
+            if not self._clean_lyrics(line["words"]):
+                continue
+            # Split line into words
+            word_texts = line["words"].strip().split()
+            if not word_texts:
+                continue
+            # Calculate approximate timing for each word
+            start_time = float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else 0.0
+            end_time = float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else 0.0
+            duration = end_time - start_time
+            word_duration = duration / len(word_texts)
+            words = []
+            for i, word_text in enumerate(word_texts):
+                word = Word(
+                    id=WordUtils.generate_id(),
+                    text=word_text,
+                    start_time=start_time + (i * word_duration),
+                    end_time=start_time + ((i + 1) * word_duration),
+                    confidence=1.0,
+                    created_during_correction=False,
+                )
+                words.append(word)
+            segment = LyricsSegment(
+                id=WordUtils.generate_id(), text=line["words"].strip(), words=words, start_time=start_time, end_time=end_time
+            )
+            segments.append(segment)
+        return LyricsData(source="spotify", segments=segments, metadata=metadata)
     def _clean_lyrics(self, lyrics: str) -> str:
         """Clean and process lyrics from Spotify to remove unwanted content."""

lyrics_transcriber/output/ass/config.py CHANGED Viewed

@@ -4,7 +4,18 @@ from dataclasses import dataclass
 class ScreenConfig:
     """Configuration for screen timing and layout."""
-    def __init__(self, line_height: int = 50, max_visible_lines: int = 4, top_padding: int = None, video_width: int = 640, video_height: int = 360):
+    def __init__(
+        self,
+        line_height: int = 50,
+        max_visible_lines: int = 4,
+        top_padding: int = None,
+        video_width: int = 640,
+        video_height: int = 360,
+        screen_gap_threshold: float = 5.0,
+        post_roll_time: float = 1.0,
+        fade_in_ms: int = 200,
+        fade_out_ms: int = 300,
+    ):
         # Screen layout
         self.max_visible_lines = max_visible_lines
         self.line_height = line_height
@@ -12,10 +23,10 @@ class ScreenConfig:
         self.video_height = video_height
         self.video_width = video_width
         # Timing configuration
-        self.screen_gap_threshold = 5.0
-        self.post_roll_time = 1.0
-        self.fade_in_ms = 200
-        self.fade_out_ms = 300
+        self.screen_gap_threshold = screen_gap_threshold
+        self.post_roll_time = post_roll_time
+        self.fade_in_ms = fade_in_ms
+        self.fade_out_ms = fade_out_ms
 @dataclass

lyrics_transcriber/output/cdg.py CHANGED Viewed

@@ -126,7 +126,7 @@ class CDGGenerator:
         cdg_styles: dict,
     ) -> str:
         """Create TOML configuration file for CDG generation."""
-        safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "toml")
+        safe_filename = self._get_safe_filename(artist, title, "Karaoke", "toml")
         toml_file = os.path.join(self.output_dir, safe_filename)
         self.logger.debug(f"Generating TOML file: {toml_file}")
@@ -161,7 +161,7 @@ class CDGGenerator:
             title=title,
             artist=artist,
             audio_file=audio_file,
-            output_name=f"{artist} - {title} (Karaoke CDG)",
+            output_name=f"{artist} - {title} (Karaoke)",
             sync_times=sync_times,
             instrumentals=instrumentals,
             formatted_lyrics=formatted_lyrics,
@@ -190,11 +190,11 @@ class CDGGenerator:
         """Compose CDG using KaraokeComposer."""
         kc = KaraokeComposer.from_file(toml_file)
         kc.compose()
-        kc.create_mp4(height=1080, fps=30)
+        # kc.create_mp4(height=1080, fps=30)
     def _find_cdg_zip(self, artist: str, title: str) -> str:
         """Find the generated CDG ZIP file."""
-        safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "zip")
+        safe_filename = self._get_safe_filename(artist, title, "Karaoke", "zip")
         output_zip = os.path.join(self.output_dir, safe_filename)
         self.logger.info(f"Looking for CDG ZIP file in output directory: {output_zip}")
@@ -216,12 +216,12 @@ class CDGGenerator:
     def _get_cdg_path(self, artist: str, title: str) -> str:
         """Get the path to the CDG file."""
-        safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "cdg")
+        safe_filename = self._get_safe_filename(artist, title, "Karaoke", "cdg")
         return os.path.join(self.output_dir, safe_filename)
     def _get_mp3_path(self, artist: str, title: str) -> str:
         """Get the path to the MP3 file."""
-        safe_filename = self._get_safe_filename(artist, title, "Karaoke CDG", "mp3")
+        safe_filename = self._get_safe_filename(artist, title, "Karaoke", "mp3")
         return os.path.join(self.output_dir, safe_filename)
     def _verify_output_files(self, cdg_file: str, mp3_file: str) -> None:
@@ -376,7 +376,7 @@ class CDGGenerator:
         cdg_styles: dict,
     ) -> dict:
         """Create TOML data structure."""
-        safe_output_name = self._get_safe_filename(artist, title, "Karaoke CDG")
+        safe_output_name = self._get_safe_filename(artist, title, "Karaoke")
         return {
             "title": title,
             "artist": artist,
@@ -496,7 +496,7 @@ class CDGGenerator:
                 text = text[1:]
             current_line += text + " "
-            self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
+            # self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
             is_last_before_instrumental = any(
                 inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals

lyrics_transcriber/output/generator.py CHANGED Viewed

@@ -78,6 +78,7 @@ class OutputGenerator:
                 font_size=self.font_size,
                 line_height=self.line_height,
                 styles=self.config.styles,
+                subtitle_offset_ms=self.config.subtitle_offset_ms,
                 logger=self.logger,
             )
@@ -96,28 +97,42 @@ class OutputGenerator:
     def generate_outputs(
         self,
         transcription_corrected: Optional[CorrectionResult],
-        lyrics_results: List[LyricsData],
+        lyrics_results: dict[str, LyricsData],
         output_prefix: str,
         audio_filepath: str,
         artist: Optional[str] = None,
         title: Optional[str] = None,
+        preview_mode: bool = False,
     ) -> OutputPaths:
         """Generate all requested output formats."""
         outputs = OutputPaths()
         try:
-            # Generate plain lyrics files for each provider
-            for lyrics_data in lyrics_results:
-                self.plain_text.write_lyrics(lyrics_data, output_prefix)
             # Only process transcription-related outputs if we have transcription data
             if transcription_corrected:
-                # Write original (uncorrected) transcription
-                outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
-                # Resize corrected segments to ensure none are longer than max_line_length
+                # Resize corrected segments
                 resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
                 transcription_corrected.resized_segments = resized_segments
+                # For preview, we only need to generate ASS and video
+                if preview_mode:
+                    # Generate ASS subtitles for preview
+                    outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
+                    # Generate preview video
+                    outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
+                    return outputs
+                # Normal output generation (non-preview mode)
+                # Generate plain lyrics files for each provider
+                for name, lyrics_data in lyrics_results.items():
+                    self.plain_text.write_lyrics(lyrics_data, output_prefix)
+                # Write original (uncorrected) transcription
+                outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
                 outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
                 # Write corrected lyrics as plain text
@@ -161,12 +176,12 @@ class OutputGenerator:
             "720p": (1280, 720),
             "360p": (640, 360),
         }
         if resolution not in resolution_map:
             raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
         resolution_dims = resolution_map[resolution]
         # Default font sizes for each resolution
         default_font_sizes = {
             "4k": 250,
@@ -174,13 +189,13 @@ class OutputGenerator:
             "720p": 100,
             "360p": 40,
         }
         # Get font size from styles if available, otherwise use default
         font_size = self.config.styles.get("karaoke", {}).get("font_size", default_font_sizes[resolution])
         # Line height matches font size for all except 360p
         line_height = 50 if resolution == "360p" else font_size
         return resolution_dims, font_size, line_height
     def write_corrections_data(self, correction_result: CorrectionResult, output_prefix: str) -> str:

lyrics_transcriber/output/plain_text.py CHANGED Viewed

@@ -5,12 +5,13 @@ from typing import List, Optional
 from lyrics_transcriber.types import LyricsData, LyricsSegment
 from lyrics_transcriber.correction.corrector import CorrectionResult
 class PlainTextGenerator:
     """Handles generation of plain text output files for lyrics and transcriptions."""
     def __init__(self, output_dir: str, logger: Optional[logging.Logger] = None):
         """Initialize PlainTextGenerator.
         Args:
             output_dir: Directory where output files will be written
             logger: Optional logger instance
@@ -24,11 +25,11 @@ class PlainTextGenerator:
     def write_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
         """Write plain text lyrics file from provider data.
         Args:
             lyrics_data: LyricsData from a lyrics provider
             output_prefix: Prefix for output filename
         Returns:
             Path to generated file
         """
@@ -38,7 +39,9 @@ class PlainTextGenerator:
         try:
             with open(output_path, "w", encoding="utf-8") as f:
-                f.write(lyrics_data.lyrics)
+                # Join segment texts with newlines
+                lyrics_text = "\n".join(segment.text for segment in lyrics_data.segments)
+                f.write(lyrics_text)
             self.logger.info(f"Plain lyrics file generated: {output_path}")
             return output_path
         except Exception as e:
@@ -47,11 +50,11 @@ class PlainTextGenerator:
     def write_corrected_lyrics(self, segments: List[LyricsSegment], output_prefix: str) -> str:
         """Write corrected lyrics as plain text file.
         Args:
             segments: List of corrected LyricsSegment objects
             output_prefix: Prefix for output filename
         Returns:
             Path to generated file
         """
@@ -70,22 +73,24 @@ class PlainTextGenerator:
     def write_original_transcription(self, correction_result: CorrectionResult, output_prefix: str) -> str:
         """Write original (uncorrected) transcription as plain text.
         Args:
             correction_result: CorrectionResult containing original transcription
             output_prefix: Prefix for output filename
         Returns:
             Path to generated file
         """
         self.logger.info("Writing original transcription file")
         output_path = self._get_output_path(f"{output_prefix} (Lyrics Uncorrected)", "txt")
+        transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in correction_result.original_segments)
         try:
             with open(output_path, "w", encoding="utf-8") as f:
-                f.write(correction_result.transcribed_text)
+                f.write(transcribed_text)
             self.logger.info(f"Original transcription file generated: {output_path}")
             return output_path
         except Exception as e:
             self.logger.error(f"Failed to write original transcription file: {str(e)}")
-            raise
+            raise

lyrics_transcriber/output/segment_resizer.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 import re
-from typing import List, Optional, Tuple
+from typing import List, Optional
 from lyrics_transcriber.types import LyricsSegment, Word
+from lyrics_transcriber.utils.word_utils import WordUtils
 class SegmentResizer:
@@ -101,7 +102,13 @@ class SegmentResizer:
             Output: LyricsSegment(text="Hello World", words=[...])
         """
         cleaned_text = self._clean_text(segment.text)
-        return LyricsSegment(text=cleaned_text, words=segment.words, start_time=segment.start_time, end_time=segment.end_time)
+        return LyricsSegment(
+            id=segment.id,  # Preserve the original segment ID
+            text=cleaned_text,
+            words=segment.words,
+            start_time=segment.start_time,
+            end_time=segment.end_time,
+        )
     def _create_cleaned_word(self, word: Word) -> Word:
         """Create a new word with cleaned text."""
@@ -226,7 +233,13 @@ class SegmentResizer:
     def _create_segment_from_words(self, line: str, words: List[Word]) -> LyricsSegment:
         """Create a new segment from a list of words."""
         cleaned_text = self._clean_text(line)
-        return LyricsSegment(text=cleaned_text, words=words, start_time=words[0].start_time, end_time=words[-1].end_time)
+        return LyricsSegment(
+            id=WordUtils.generate_id(),  # Generate new ID for split segments
+            text=cleaned_text,
+            words=words,
+            start_time=words[0].start_time,
+            end_time=words[-1].end_time,
+        )
     def _process_segment_text(self, text: str) -> List[str]:
         """Process segment text to determine optimal split points."""

lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

lyrics-transcriber 0.40.0py3-none-any.whl → 0.42.0py3-none-any.whl