PyPI - lyrics-transcriber - Versions diffs - 0.35.1__py3-none-any.whl → 0.37.0__py3-none-any.whl - Mend

lyrics-transcriber 0.35.1py3-none-any.whl → 0.37.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

lyrics_transcriber/cli/cli_main.py CHANGED Viewed

@@ -39,6 +39,7 @@ def create_arg_parser() -> argparse.ArgumentParser:
     song_group = parser.add_argument_group("Song Identification")
     song_group.add_argument("--artist", help="Song artist for lyrics lookup and auto-correction")
     song_group.add_argument("--title", help="Song title for lyrics lookup and auto-correction")
+    song_group.add_argument("--lyrics_file", help="Path to file containing lyrics (txt, docx, or rtf format)")
     # API Credentials
     api_group = parser.add_argument_group("API Credentials")
@@ -134,6 +135,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
     lyrics_config = LyricsConfig(
         genius_api_token=args.genius_api_token or env_config.get("genius_api_token"),
         spotify_cookie=args.spotify_cookie or env_config.get("spotify_cookie"),
+        lyrics_file=args.lyrics_file,
     )
     output_config = OutputConfig(

lyrics_transcriber/core/config.py CHANGED Viewed

@@ -18,7 +18,7 @@ class LyricsConfig:
     genius_api_token: Optional[str] = None
     spotify_cookie: Optional[str] = None
+    lyrics_file: Optional[str] = None
 @dataclass
 class OutputConfig:

lyrics_transcriber/core/controller.py CHANGED Viewed

@@ -13,6 +13,7 @@ from lyrics_transcriber.lyrics.spotify import SpotifyProvider
 from lyrics_transcriber.output.generator import OutputGenerator
 from lyrics_transcriber.correction.corrector import LyricsCorrector
 from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
+from lyrics_transcriber.lyrics.file_provider import FileProvider
 @dataclass
@@ -90,11 +91,11 @@ class LyricsTranscriber:
                 self.output_config.generate_cdg = False
                 self.output_config.render_video = False
-        # Basic settings
+        # Basic settings with sanitized filenames
         self.audio_filepath = audio_filepath
         self.artist = artist
         self.title = title
-        self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
+        self.output_prefix = self._create_sanitized_output_prefix(artist, title)
         # Add after creating necessary folders
         self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
@@ -125,6 +126,26 @@ class LyricsTranscriber:
         if self.output_config.render_video:
             self.logger.info(f"    Video resolution: {self.output_config.video_resolution}")
+    def _sanitize_filename(self, filename: str) -> str:
+        """Replace or remove characters that are unsafe for filenames."""
+        if not filename:
+            return ""
+        # Replace problematic characters with underscores
+        for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
+            filename = filename.replace(char, "_")
+        # Remove any trailing spaces
+        filename = filename.rstrip(" ")
+        return filename
+    def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
+        """Create a sanitized output prefix from artist and title."""
+        if artist and title:
+            sanitized_artist = self._sanitize_filename(artist)
+            sanitized_title = self._sanitize_filename(title)
+            return f"{sanitized_artist} - {sanitized_title}"
+        else:
+            return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
     def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
         """Initialize available transcription services."""
         transcribers = {}
@@ -171,10 +192,16 @@ class LyricsTranscriber:
         provider_config = LyricsProviderConfig(
             genius_api_token=self.lyrics_config.genius_api_token,
             spotify_cookie=self.lyrics_config.spotify_cookie,
+            lyrics_file=self.lyrics_config.lyrics_file,
             cache_dir=self.output_config.cache_dir,
             audio_filepath=self.audio_filepath,
         )
+        if provider_config.lyrics_file and os.path.exists(provider_config.lyrics_file):
+            self.logger.debug(f"Initializing File lyrics provider with file: {provider_config.lyrics_file}")
+            providers["file"] = FileProvider(config=provider_config, logger=self.logger)
+            return providers
         if provider_config.genius_api_token:
             self.logger.debug("Initializing Genius lyrics provider")
             providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
@@ -196,13 +223,19 @@ class LyricsTranscriber:
     def process(self) -> LyricsControllerResult:
         """Main processing method that orchestrates the entire workflow."""
+        self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
         # Step 1: Fetch lyrics if enabled and artist/title are provided
         if self.output_config.fetch_lyrics and self.artist and self.title:
             self.fetch_lyrics()
+        else:
+            self.logger.info("Skipping lyrics fetching - no artist/title provided or fetching disabled")
         # Step 2: Run transcription if enabled
         if self.output_config.run_transcription:
             self.transcribe()
+        else:
+            self.logger.info("Skipping transcription - transcription disabled")
         # Step 3: Process and correct lyrics if enabled AND we have transcription results
         if self.output_config.run_correction and self.results.transcription_results:

lyrics_transcriber/correction/corrector.py CHANGED Viewed

@@ -33,14 +33,14 @@ class LyricsCorrector:
         # Default handlers in order of preference
         self.handlers = handlers or [
-            WordCountMatchHandler(),
-            RelaxedWordCountMatchHandler(),
-            NoSpacePunctuationMatchHandler(),
-            SyllablesMatchHandler(),
-            ExtendAnchorHandler(),
-            # RepeatCorrectionHandler(),
-            # SoundAlikeHandler(),
-            # LevenshteinHandler(),
+            # WordCountMatchHandler(logger=self.logger),
+            # RelaxedWordCountMatchHandler(logger=self.logger),
+            # NoSpacePunctuationMatchHandler(logger=self.logger),
+            # SyllablesMatchHandler(logger=self.logger),
+            ExtendAnchorHandler(logger=self.logger),
+            # RepeatCorrectionHandler(logger=self.logger),
+            # SoundAlikeHandler(logger=self.logger),
+            # LevenshteinHandler(logger=self.logger),
         ]
     @property

lyrics_transcriber/correction/handlers/base.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from typing import List, Optional, Tuple, Dict, Any
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
@@ -7,6 +8,9 @@ from lyrics_transcriber.types import GapSequence, WordCorrection
 class GapCorrectionHandler(ABC):
     """Base class for gap correction handlers."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
     @abstractmethod
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         """Determine if this handler can process the given gap.

lyrics_transcriber/correction/handlers/extend_anchor.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Optional, Tuple, Dict, Any
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -38,13 +39,19 @@ class ExtendAnchorHandler(GapCorrectionHandler):
             - Leave "youre" and "a" unchanged
     """
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         # Gap must have words
         if not gap.words:
+            self.logger.debug("No words in the gap to process.")
             return False, {}
         # At least one word must match between gap and any reference source
@@ -55,6 +62,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
             for i in range(min(len(gap.words), len(ref_words)))
         )
+        self.logger.debug(f"Can handle gap: {has_match}")
         return has_match, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -86,6 +94,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
                         reference_positions=reference_positions,
                     )
                 )
+                self.logger.debug(f"Validated word '{word}' with confidence {confidence} from sources: {sources}")
             # No else clause - non-matching words are left unchanged
         return corrections

lyrics_transcriber/correction/handlers/no_space_punct_match.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Optional, Tuple, Dict, Any
+import logging
 import re
 from lyrics_transcriber.types import GapSequence, WordCorrection
@@ -9,6 +10,10 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
     """Handles gaps where reference text matches when spaces and punctuation are removed."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
     def _remove_spaces_and_punct(self, words: List[str]) -> str:
         """Join words and remove all whitespace and punctuation."""
         text = "".join(words).lower()
@@ -18,6 +23,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         # Get the gap text without spaces and punctuation
@@ -27,8 +33,10 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
         for words in gap.reference_words.values():
             ref_text = self._remove_spaces_and_punct(words)
             if gap_text == ref_text:
+                self.logger.debug("Found a matching reference source with spaces and punctuation removed.")
                 return True, {}
+        self.logger.debug("No matching reference source found with spaces and punctuation removed.")
         return False, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -44,6 +52,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
                 matching_source = source
                 reference_words = words
                 reference_words_original = gap.reference_words_original[source]
+                self.logger.debug(f"Using source '{source}' for corrections.")
                 break
         # Calculate reference positions for the matching source
@@ -64,6 +73,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
                     reference_positions=reference_positions,
                 )
             )
+            self.logger.debug(f"Combined words into '{reference_words_original[0]}'.")
         elif len(gap.words) < len(reference_words):
             # Single transcribed word -> multiple reference words
@@ -78,21 +88,22 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
                     reference_positions=reference_positions,
                 )
             )
+            self.logger.debug(f"Split word '{gap.words[0]}' into {reference_words_original}.")
         else:
             # One-to-one replacement
             for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
                 if orig_word.lower() != ref_word.lower():
-                    corrections.append(
-                        WordOperations.create_word_replacement_correction(
-                            original_word=orig_word,
-                            corrected_word=ref_word_original,
-                            original_position=gap.transcription_position + i,
-                            source=matching_source,
-                            confidence=1.0,
-                            reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
-                            reference_positions=reference_positions,
-                        )
+                    correction = WordOperations.create_word_replacement_correction(
+                        original_word=orig_word,
+                        corrected_word=ref_word_original,
+                        original_position=gap.transcription_position + i,
+                        source=matching_source,
+                        confidence=1.0,
+                        reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
+                        reference_positions=reference_positions,
                     )
+                    corrections.append(correction)
+                    self.logger.debug(f"Correction made: {correction}")
         return corrections

lyrics_transcriber/correction/handlers/relaxed_word_count_match.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Tuple, Dict, Any, Optional
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -8,16 +9,23 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class RelaxedWordCountMatchHandler(GapCorrectionHandler):
     """Handles gaps where at least one reference source has matching word count."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         # Check if any source has matching word count
-        for words in gap.reference_words.values():
+        for source, words in gap.reference_words.items():
             if len(words) == gap.length:
+                self.logger.debug(f"Source '{source}' has matching word count.")
                 return True, {}
+        self.logger.debug("No source with matching word count found.")
         return False, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -32,24 +40,26 @@ class RelaxedWordCountMatchHandler(GapCorrectionHandler):
                 matching_source = source
                 reference_words = words
                 reference_words_original = gap.reference_words_original[source]
+                self.logger.debug(f"Using source '{source}' for corrections.")
                 break
         # Use the centralized method to calculate reference positions for the matching source
         reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
+        self.logger.debug(f"Calculated reference positions: {reference_positions}")
         # Since we found a source with matching word count, we can correct using that source
         for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
             if orig_word.lower() != ref_word.lower():
-                corrections.append(
-                    WordOperations.create_word_replacement_correction(
-                        original_word=orig_word,
-                        corrected_word=ref_word_original,
-                        original_position=gap.transcription_position + i,
-                        source=matching_source,
-                        confidence=1.0,
-                        reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
-                        reference_positions=reference_positions,
-                    )
+                correction = WordOperations.create_word_replacement_correction(
+                    original_word=orig_word,
+                    corrected_word=ref_word_original,
+                    original_position=gap.transcription_position + i,
+                    source=matching_source,
+                    confidence=1.0,
+                    reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
+                    reference_positions=reference_positions,
                 )
+                corrections.append(correction)
+                self.logger.debug(f"Correction made: {correction}")
         return corrections

lyrics_transcriber/correction/handlers/syllables_match.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Tuple, Dict, Any
+from typing import List, Tuple, Dict, Any, Optional
 import spacy
 import logging
 import pyphen
@@ -15,9 +15,9 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class SyllablesMatchHandler(GapCorrectionHandler):
     """Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
-    def __init__(self):
-        # Initialize logger first
-        self.logger = logging.getLogger(__name__)
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
         # Marking SpacySyllables as used to prevent unused import warning
         _ = SpacySyllables

lyrics_transcriber/correction/handlers/word_count_match.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Tuple, Dict, Any, Optional
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -8,21 +9,29 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class WordCountMatchHandler(GapCorrectionHandler):
     """Handles gaps where reference sources agree and have matching word counts."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         ref_words_lists = list(gap.reference_words.values())
         # All sources must have same number of words as gap
         if not all(len(words) == gap.length for words in ref_words_lists):
+            self.logger.debug("Not all sources have the same number of words as the gap.")
             return False, {}
         # If we have multiple sources, they must all agree
         if len(ref_words_lists) > 1 and not all(words == ref_words_lists[0] for words in ref_words_lists[1:]):
+            self.logger.debug("Not all sources agree on the words.")
             return False, {}
+        self.logger.debug("All sources agree and have matching word counts.")
         return True, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -39,16 +48,16 @@ class WordCountMatchHandler(GapCorrectionHandler):
         # Since we know all reference sources agree, we can correct all words in the gap
         for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
             if orig_word.lower() != ref_word.lower():
-                corrections.append(
-                    WordOperations.create_word_replacement_correction(
-                        original_word=orig_word,
-                        corrected_word=ref_word_original,
-                        original_position=gap.transcription_position + i,
-                        source=sources,
-                        confidence=1.0,
-                        reason="WordCountMatchHandler: Reference sources had same word count as gap",
-                        reference_positions=reference_positions,
-                    )
+                correction = WordOperations.create_word_replacement_correction(
+                    original_word=orig_word,
+                    corrected_word=ref_word_original,
+                    original_position=gap.transcription_position + i,
+                    source=sources,
+                    confidence=1.0,
+                    reason="WordCountMatchHandler: Reference sources had same word count as gap",
+                    reference_positions=reference_positions,
                 )
+                corrections.append(correction)
+                self.logger.debug(f"Correction made: {correction}")
         return corrections

lyrics-transcriber 0.35.1__py3-none-any.whl → 0.37.0__py3-none-any.whl

lyrics-transcriber 0.35.1py3-none-any.whl → 0.37.0py3-none-any.whl