PyPI - lyrics-transcriber - Versions diffs - 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl - Mend

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

lyrics_transcriber/cli/{main.py → cli_main.py} +15 -3
lyrics_transcriber/core/controller.py +129 -95
lyrics_transcriber/correction/base_strategy.py +29 -0
lyrics_transcriber/correction/corrector.py +52 -0
lyrics_transcriber/correction/strategy_diff.py +263 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
lyrics_transcriber/lyrics/genius.py +70 -0
lyrics_transcriber/lyrics/spotify.py +82 -0
lyrics_transcriber/output/generator.py +158 -97
lyrics_transcriber/output/subtitles.py +12 -12
lyrics_transcriber/storage/dropbox.py +110 -134
lyrics_transcriber/transcribers/audioshake.py +170 -105
lyrics_transcriber/transcribers/base_transcriber.py +186 -0
lyrics_transcriber/transcribers/whisper.py +268 -133
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +1 -1
lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
lyrics_transcriber/core/corrector.py +0 -56
lyrics_transcriber/core/fetcher.py +0 -143
lyrics_transcriber/storage/tokens.py +0 -116
lyrics_transcriber/transcribers/base.py +0 -31
lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0

lyrics_transcriber/cli/{main.py → cli_main.py} RENAMED Viewed

@@ -57,10 +57,10 @@ def create_arg_parser() -> argparse.ArgumentParser:
     # Output options
     output_group = parser.add_argument_group("Output Options")
     output_group.add_argument("--output_dir", type=Path, help="Directory where output files will be saved. Default: current directory")
     output_group.add_argument(
         "--cache_dir",
         type=Path,
-        default=Path("/tmp/lyrics-transcriber-cache/"),
         help="Directory to cache downloaded/generated files. Default: /tmp/lyrics-transcriber-cache/",
     )
@@ -80,6 +80,18 @@ def create_arg_parser() -> argparse.ArgumentParser:
     return parser
+def parse_args(parser: argparse.ArgumentParser, args_list: list[str] | None = None) -> argparse.Namespace:
+    """Parse and process command line arguments."""
+    # Use provided args_list for testing, otherwise use sys.argv
+    args = parser.parse_args(args_list)
+    # Set default cache_dir if not provided
+    if not hasattr(args, "cache_dir") or args.cache_dir is None:
+        args.cache_dir = Path(os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/"))
+    return args
 def get_config_from_env() -> Dict[str, str]:
     """Load configuration from environment variables."""
     load_dotenv()
@@ -121,7 +133,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
     )
     output_config = OutputConfig(
-        output_dir=str(args.output_dir) if args.output_dir else None,
+        output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
         cache_dir=str(args.cache_dir),
         render_video=args.render_video,
         video_resolution=args.video_resolution,
@@ -151,7 +163,7 @@ def validate_args(args: argparse.Namespace, parser: argparse.ArgumentParser, log
 def main() -> None:
     """Main entry point for the CLI."""
     parser = create_arg_parser()
-    args = parser.parse_args()
+    args = parse_args(parser)
     # Set up logging first
     logger = setup_logging(args.log_level)

lyrics_transcriber/core/controller.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import os
 import logging
-from dataclasses import dataclass
-from typing import Dict, Optional, List
-from ..transcribers.base import BaseTranscriber
-from ..transcribers.audioshake import AudioShakeTranscriber
-from ..transcribers.whisper import WhisperTranscriber
-from .fetcher import LyricsFetcher
-from ..output.generator import OutputGenerator
-from .corrector import LyricsTranscriptionCorrector
+from dataclasses import dataclass, field
+from typing import Dict, Optional, Any, List
+from ..transcribers.base_transcriber import BaseTranscriber, TranscriptionResult
+from ..transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
+from ..transcribers.whisper import WhisperTranscriber, WhisperConfig
+from ..lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig, LyricsData
+from ..lyrics.genius import GeniusProvider
+from ..lyrics.spotify import SpotifyProvider
+from ..output.generator import OutputGenerator, OutputGeneratorConfig
+from ..correction.corrector import LyricsCorrector, CorrectionResult
 @dataclass
@@ -31,8 +33,8 @@ class LyricsConfig:
 class OutputConfig:
     """Configuration for output generation."""
-    output_dir: Optional[str] = None
-    cache_dir: str = "/tmp/lyrics-transcriber-cache/"
+    output_dir: Optional[str] = os.getcwd()
+    cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
     render_video: bool = False
     video_resolution: str = "360p"
     video_background_image: Optional[str] = None
@@ -40,21 +42,15 @@ class OutputConfig:
 @dataclass
-class TranscriptionResult:
+class LyricsControllerResult:
     """Holds the results of the transcription and correction process."""
-    # Lyrics from internet sources
-    lyrics_text: Optional[str] = None
-    lyrics_source: Optional[str] = None
-    lyrics_genius: Optional[str] = None
-    lyrics_spotify: Optional[str] = None
-    spotify_lyrics_data: Optional[Dict] = None
+    # Results from different sources
+    lyrics_results: List[LyricsData] = field(default_factory=list)
+    transcription_results: List[TranscriptionResult] = field(default_factory=list)
-    # Transcription results
-    transcription_whisper: Optional[Dict] = None
-    transcription_audioshake: Optional[Dict] = None
-    transcription_primary: Optional[Dict] = None
-    transcription_corrected: Optional[Dict] = None
+    # Corrected results
+    transcription_corrected: Optional[CorrectionResult] = None
     # Output files
     lrc_filepath: Optional[str] = None
@@ -79,8 +75,9 @@ class LyricsTranscriber:
         transcriber_config: Optional[TranscriberConfig] = None,
         lyrics_config: Optional[LyricsConfig] = None,
         output_config: Optional[OutputConfig] = None,
-        lyrics_fetcher: Optional[LyricsFetcher] = None,
-        corrector: Optional[LyricsTranscriptionCorrector] = None,
+        transcribers: Optional[Dict[str, BaseTranscriber]] = None,
+        lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
+        corrector: Optional[LyricsCorrector] = None,
         output_generator: Optional[OutputGenerator] = None,
         logger: Optional[logging.Logger] = None,
         log_level: int = logging.DEBUG,
@@ -109,46 +106,92 @@ class LyricsTranscriber:
         self.title = title
         self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
+        # Add after creating necessary folders
+        self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
+        self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
         # Create necessary folders
         os.makedirs(self.output_config.cache_dir, exist_ok=True)
-        if self.output_config.output_dir:
-            os.makedirs(self.output_config.output_dir, exist_ok=True)
+        os.makedirs(self.output_config.output_dir, exist_ok=True)
         # Initialize results
-        self.results = TranscriptionResult()
+        self.results = LyricsControllerResult()
         # Initialize components (with dependency injection)
-        self.transcribers = self._initialize_transcribers()
-        self.lyrics_fetcher = lyrics_fetcher or self._initialize_lyrics_fetcher()
-        self.corrector = corrector or LyricsTranscriptionCorrector(logger=self.logger)
+        self.transcribers = transcribers or self._initialize_transcribers()
+        self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
+        self.corrector = corrector or LyricsCorrector(logger=self.logger)
         self.output_generator = output_generator or self._initialize_output_generator()
     def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
         """Initialize available transcription services."""
         transcribers = {}
+        # Add debug logging for config values
+        self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
+        self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
         if self.transcriber_config.audioshake_api_token:
-            transcribers["audioshake"] = AudioShakeTranscriber(api_token=self.transcriber_config.audioshake_api_token, logger=self.logger)
+            self.logger.debug("Initializing AudioShake transcriber")
+            transcribers["audioshake"] = {
+                "instance": AudioShakeTranscriber(
+                    cache_dir=self.output_config.cache_dir,
+                    config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
+                    logger=self.logger,
+                ),
+                "priority": 1,  # AudioShake has highest priority
+            }
+        else:
+            self.logger.debug("Skipping AudioShake transcriber - no API token provided")
         if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
-            transcribers["whisper"] = WhisperTranscriber(
-                logger=self.logger,
-                runpod_api_key=self.transcriber_config.runpod_api_key,
-                endpoint_id=self.transcriber_config.whisper_runpod_id,
-            )
+            self.logger.debug("Initializing Whisper transcriber")
+            transcribers["whisper"] = {
+                "instance": WhisperTranscriber(
+                    cache_dir=self.output_config.cache_dir,
+                    config=WhisperConfig(
+                        runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
+                    ),
+                    logger=self.logger,
+                ),
+                "priority": 2,  # Whisper has lower priority
+            }
+        else:
+            self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
         return transcribers
-    def _initialize_lyrics_fetcher(self) -> LyricsFetcher:
-        """Initialize lyrics fetching service."""
-        return LyricsFetcher(
-            genius_api_token=self.lyrics_config.genius_api_token, spotify_cookie=self.lyrics_config.spotify_cookie, logger=self.logger
+    def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
+        """Initialize available lyrics providers."""
+        providers = {}
+        # Create provider config with all necessary parameters
+        provider_config = LyricsProviderConfig(
+            genius_api_token=self.lyrics_config.genius_api_token,
+            spotify_cookie=self.lyrics_config.spotify_cookie,
+            cache_dir=self.output_config.cache_dir,
+            audio_filepath=self.audio_filepath,
         )
+        if provider_config.genius_api_token:
+            self.logger.debug("Initializing Genius lyrics provider")
+            providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
+        else:
+            self.logger.debug("Skipping Genius provider - no API token provided")
+        if provider_config.spotify_cookie:
+            self.logger.debug("Initializing Spotify lyrics provider")
+            providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
+        else:
+            self.logger.debug("Skipping Spotify provider - no cookie provided")
+        return providers
     def _initialize_output_generator(self) -> OutputGenerator:
         """Initialize output generation service."""
-        return OutputGenerator(
-            logger=self.logger,
+        # Convert OutputConfig to OutputGeneratorConfig
+        generator_config = OutputGeneratorConfig(
             output_dir=self.output_config.output_dir,
             cache_dir=self.output_config.cache_dir,
             video_resolution=self.output_config.video_resolution,
@@ -156,12 +199,15 @@ class LyricsTranscriber:
             video_background_color=self.output_config.video_background_color,
         )
-    def process(self) -> TranscriptionResult:
+        # Initialize output generator
+        return OutputGenerator(config=generator_config, logger=self.logger)
+    def process(self) -> LyricsControllerResult:
         """
         Main processing method that orchestrates the entire workflow.
         Returns:
-            TranscriptionResult containing all outputs and generated files.
+            LyricsControllerResult containing all outputs and generated files.
         Raises:
             Exception: If a critical error occurs during processing.
@@ -175,12 +221,10 @@ class LyricsTranscriber:
             self.transcribe()
             # Step 3: Process and correct lyrics
-            if self.results.transcription_primary:
-                self.correct_lyrics()
+            self.correct_lyrics()
             # Step 4: Generate outputs
-            if self.results.transcription_corrected:
-                self.generate_outputs()
+            self.generate_outputs()
             self.logger.info("Processing completed successfully")
             return self.results
@@ -190,22 +234,22 @@ class LyricsTranscriber:
             raise
     def fetch_lyrics(self) -> None:
-        """Fetch lyrics from online sources."""
+        """Fetch lyrics from available providers."""
         self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
         try:
-            lyrics_result = self.lyrics_fetcher.fetch_lyrics(self.artist, self.title)
-            # Update results
-            self.results.lyrics_text = lyrics_result["lyrics"]
-            self.results.lyrics_source = lyrics_result["source"]
-            self.results.lyrics_genius = lyrics_result["genius_lyrics"]
-            self.results.lyrics_spotify = lyrics_result["spotify_lyrics"]
-            self.results.spotify_lyrics_data = lyrics_result.get("spotify_lyrics_data")
-            if lyrics_result["lyrics"]:
-                self.logger.info(f"Successfully fetched lyrics from {lyrics_result['source']}")
-            else:
+            for name, provider in self.lyrics_providers.items():
+                try:
+                    result = provider.fetch_lyrics(self.artist, self.title)
+                    if result:
+                        self.results.lyrics_results.append(result)
+                        self.logger.info(f"Successfully fetched lyrics from {name}")
+                except Exception as e:
+                    self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
+                    continue
+            if not self.results.lyrics_results:
                 self.logger.warning("No lyrics found from any source")
         except Exception as e:
@@ -214,52 +258,42 @@ class LyricsTranscriber:
     def transcribe(self) -> None:
         """Run transcription using all available transcribers."""
-        self.logger.info("Starting transcription process")
+        self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
-        for name, transcriber in self.transcribers.items():
+        for name, transcriber_info in self.transcribers.items():
+            self.logger.info(f"Running transcription with {name}")
             try:
-                result = transcriber.transcribe(self.audio_filepath)
-                # Store result based on transcriber type
-                if name == "whisper":
-                    self.results.transcription_whisper = result
-                elif name == "audioshake":
-                    self.results.transcription_audioshake = result
-                # Use first successful transcription as primary
-                if not self.results.transcription_primary:
-                    self.results.transcription_primary = result
+                result = transcriber_info["instance"].transcribe(self.audio_filepath)
+                if result:
+                    # Add the transcriber name and priority to the result
+                    self.results.transcription_results.append(
+                        TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
+                    )
+                    self.logger.debug(f"Transcription completed for {name}")
             except Exception as e:
-                self.logger.error(f"Transcription failed for {name}: {str(e)}")
+                self.logger.error(f"Transcription failed for {name}: {str(e)}", exc_info=True)
                 continue
+        if not self.results.transcription_results:
+            self.logger.warning("No successful transcriptions from any provider")
     def correct_lyrics(self) -> None:
         """Run lyrics correction using transcription and internet lyrics."""
         self.logger.info("Starting lyrics correction process")
         try:
-            # Set input data for correction
-            self.corrector.set_input_data(
-                spotify_lyrics_data_dict=self.results.spotify_lyrics_data,
-                spotify_lyrics_text=self.results.lyrics_spotify,
-                genius_lyrics_text=self.results.lyrics_genius,
-                transcription_data_dict_whisper=self.results.transcription_whisper,
-                transcription_data_dict_audioshake=self.results.transcription_audioshake,
-            )
             # Run correction
-            corrected_data = self.corrector.run_corrector()
+            corrected_data = self.corrector.run(
+                transcription_results=self.results.transcription_results, lyrics_results=self.results.lyrics_results
+            )
             # Store corrected results
             self.results.transcription_corrected = corrected_data
             self.logger.info("Lyrics correction completed")
         except Exception as e:
-            self.logger.error(f"Failed to correct lyrics: {str(e)}")
-            # Use uncorrected transcription as fallback
-            self.results.transcription_corrected = self.results.transcription_primary
-            self.logger.warning("Using uncorrected transcription as fallback")
+            self.logger.error(f"Failed to correct lyrics: {str(e)}", exc_info=True)
     def generate_outputs(self) -> None:
         """Generate output files."""
@@ -267,16 +301,16 @@ class LyricsTranscriber:
         try:
             output_files = self.output_generator.generate_outputs(
-                transcription_data=self.results.transcription_corrected,
+                transcription_corrected=self.results.transcription_corrected,
+                lyrics_results=self.results.lyrics_results,
                 output_prefix=self.output_prefix,
                 audio_filepath=self.audio_filepath,
-                render_video=self.output_config.render_video,
             )
-            # Store output paths
-            self.results.lrc_filepath = output_files.get("lrc")
-            self.results.ass_filepath = output_files.get("ass")
-            self.results.video_filepath = output_files.get("video")
+            # Store output paths - access attributes directly instead of using .get()
+            self.results.lrc_filepath = output_files.lrc
+            self.results.ass_filepath = output_files.ass
+            self.results.video_filepath = output_files.video
         except Exception as e:
             self.logger.error(f"Failed to generate outputs: {str(e)}")

lyrics_transcriber/correction/base_strategy.py ADDED Viewed

@@ -0,0 +1,29 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Protocol
+from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
+from ..transcribers.base_transcriber import LyricsSegment, TranscriptionResult
+@dataclass
+class CorrectionResult:
+    """Container for correction results."""
+    segments: List[LyricsSegment]
+    text: str
+    confidence: float
+    corrections_made: int
+    source_mapping: Dict[str, str]  # Maps corrected words to their source
+    metadata: Dict[str, Any]
+class CorrectionStrategy(Protocol):
+    """Interface for different lyrics correction strategies."""
+    def correct(
+        self,
+        transcription_results: List[TranscriptionResult],
+        lyrics_results: List[LyricsData],
+    ) -> CorrectionResult:
+        """Apply correction strategy to transcribed lyrics."""
+        ...  # pragma: no cover

lyrics_transcriber/correction/corrector.py ADDED Viewed

@@ -0,0 +1,52 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Protocol
+import logging
+from lyrics_transcriber.transcribers.base_transcriber import TranscriptionResult
+from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
+from .strategy_diff import DiffBasedCorrector
+from .base_strategy import CorrectionResult, CorrectionStrategy
+class LyricsCorrector:
+    """
+    Coordinates lyrics correction process using multiple data sources
+    and correction strategies.
+    """
+    def __init__(
+        self,
+        correction_strategy: Optional[CorrectionStrategy] = None,
+        logger: Optional[logging.Logger] = None,
+    ):
+        self.logger = logger or logging.getLogger(__name__)
+        self.correction_strategy = correction_strategy or DiffBasedCorrector(logger=self.logger)
+    def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
+        """Execute the correction process using configured strategy."""
+        if not transcription_results:
+            self.logger.error("No transcription results available")
+            raise ValueError("No primary transcription data available")
+        try:
+            self.logger.debug(f"Running correction with strategy: {self.correction_strategy.__class__.__name__}")
+            result = self.correction_strategy.correct(
+                transcription_results=transcription_results,
+                lyrics_results=lyrics_results,
+            )
+            self.logger.debug(f"Correction completed. Made {result.corrections_made} corrections")
+            return result
+        except Exception as e:
+            self.logger.error(f"Correction failed: {str(e)}", exc_info=True)
+            # Return uncorrected transcription as fallback
+            return CorrectionResult(
+                segments=transcription_results[0].result.segments,
+                text=transcription_results[0].result.text,
+                confidence=1.0,
+                corrections_made=0,
+                source_mapping={},
+                metadata=transcription_results[0].result.metadata or {},
+            )

lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl