PyPI - lyrics-transcriber - Versions diffs - 0.36.1__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

lyrics-transcriber 0.36.1py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

lyrics_transcriber/core/controller.py CHANGED Viewed

@@ -91,11 +91,11 @@ class LyricsTranscriber:
                 self.output_config.generate_cdg = False
                 self.output_config.render_video = False
-        # Basic settings
+        # Basic settings with sanitized filenames
         self.audio_filepath = audio_filepath
         self.artist = artist
         self.title = title
-        self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
+        self.output_prefix = self._create_sanitized_output_prefix(artist, title)
         # Add after creating necessary folders
         self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
@@ -126,6 +126,26 @@ class LyricsTranscriber:
         if self.output_config.render_video:
             self.logger.info(f"    Video resolution: {self.output_config.video_resolution}")
+    def _sanitize_filename(self, filename: str) -> str:
+        """Replace or remove characters that are unsafe for filenames."""
+        if not filename:
+            return ""
+        # Replace problematic characters with underscores
+        for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
+            filename = filename.replace(char, "_")
+        # Remove any trailing spaces
+        filename = filename.rstrip(" ")
+        return filename
+    def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
+        """Create a sanitized output prefix from artist and title."""
+        if artist and title:
+            sanitized_artist = self._sanitize_filename(artist)
+            sanitized_title = self._sanitize_filename(title)
+            return f"{sanitized_artist} - {sanitized_title}"
+        else:
+            return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
     def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
         """Initialize available transcription services."""
         transcribers = {}

lyrics_transcriber/correction/corrector.py CHANGED Viewed

@@ -33,14 +33,14 @@ class LyricsCorrector:
         # Default handlers in order of preference
         self.handlers = handlers or [
-            WordCountMatchHandler(),
-            RelaxedWordCountMatchHandler(),
-            NoSpacePunctuationMatchHandler(),
-            SyllablesMatchHandler(),
-            ExtendAnchorHandler(),
-            # RepeatCorrectionHandler(),
-            # SoundAlikeHandler(),
-            # LevenshteinHandler(),
+            # WordCountMatchHandler(logger=self.logger),
+            # RelaxedWordCountMatchHandler(logger=self.logger),
+            # NoSpacePunctuationMatchHandler(logger=self.logger),
+            # SyllablesMatchHandler(logger=self.logger),
+            ExtendAnchorHandler(logger=self.logger),
+            # RepeatCorrectionHandler(logger=self.logger),
+            # SoundAlikeHandler(logger=self.logger),
+            # LevenshteinHandler(logger=self.logger),
         ]
     @property

lyrics_transcriber/correction/handlers/base.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from typing import List, Optional, Tuple, Dict, Any
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
@@ -7,6 +8,9 @@ from lyrics_transcriber.types import GapSequence, WordCorrection
 class GapCorrectionHandler(ABC):
     """Base class for gap correction handlers."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
     @abstractmethod
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         """Determine if this handler can process the given gap.

lyrics_transcriber/correction/handlers/extend_anchor.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Optional, Tuple, Dict, Any
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -38,13 +39,19 @@ class ExtendAnchorHandler(GapCorrectionHandler):
             - Leave "youre" and "a" unchanged
     """
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         # Gap must have words
         if not gap.words:
+            self.logger.debug("No words in the gap to process.")
             return False, {}
         # At least one word must match between gap and any reference source
@@ -55,6 +62,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
             for i in range(min(len(gap.words), len(ref_words)))
         )
+        self.logger.debug(f"Can handle gap: {has_match}")
         return has_match, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -72,8 +80,19 @@ class ExtendAnchorHandler(GapCorrectionHandler):
                 confidence = len(matching_sources) / len(gap.reference_words)
                 sources = ", ".join(matching_sources)
-                # Calculate reference positions for matching sources
-                reference_positions = WordOperations.calculate_reference_positions(gap, matching_sources)
+                # Get base reference positions
+                base_reference_positions = WordOperations.calculate_reference_positions(gap, matching_sources)
+                # Adjust reference positions based on the word's position in the reference text
+                reference_positions = {}
+                for source in matching_sources:
+                    if source in base_reference_positions:
+                        # Find this word's position in the reference text
+                        ref_words = gap.reference_words[source]
+                        for ref_idx, ref_word in enumerate(ref_words):
+                            if ref_word.lower() == word.lower():
+                                reference_positions[source] = base_reference_positions[source] + ref_idx
+                                break
                 corrections.append(
                     WordOperations.create_word_replacement_correction(
@@ -86,6 +105,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
                         reference_positions=reference_positions,
                     )
                 )
+                self.logger.debug(f"Validated word '{word}' with confidence {confidence} from sources: {sources}")
             # No else clause - non-matching words are left unchanged
         return corrections

lyrics_transcriber/correction/handlers/no_space_punct_match.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Optional, Tuple, Dict, Any
+import logging
 import re
 from lyrics_transcriber.types import GapSequence, WordCorrection
@@ -9,6 +10,10 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
     """Handles gaps where reference text matches when spaces and punctuation are removed."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
     def _remove_spaces_and_punct(self, words: List[str]) -> str:
         """Join words and remove all whitespace and punctuation."""
         text = "".join(words).lower()
@@ -18,6 +23,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         # Get the gap text without spaces and punctuation
@@ -27,8 +33,10 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
         for words in gap.reference_words.values():
             ref_text = self._remove_spaces_and_punct(words)
             if gap_text == ref_text:
+                self.logger.debug("Found a matching reference source with spaces and punctuation removed.")
                 return True, {}
+        self.logger.debug("No matching reference source found with spaces and punctuation removed.")
         return False, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -44,6 +52,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
                 matching_source = source
                 reference_words = words
                 reference_words_original = gap.reference_words_original[source]
+                self.logger.debug(f"Using source '{source}' for corrections.")
                 break
         # Calculate reference positions for the matching source
@@ -64,6 +73,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
                     reference_positions=reference_positions,
                 )
             )
+            self.logger.debug(f"Combined words into '{reference_words_original[0]}'.")
         elif len(gap.words) < len(reference_words):
             # Single transcribed word -> multiple reference words
@@ -78,21 +88,22 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
                     reference_positions=reference_positions,
                 )
             )
+            self.logger.debug(f"Split word '{gap.words[0]}' into {reference_words_original}.")
         else:
             # One-to-one replacement
             for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
                 if orig_word.lower() != ref_word.lower():
-                    corrections.append(
-                        WordOperations.create_word_replacement_correction(
-                            original_word=orig_word,
-                            corrected_word=ref_word_original,
-                            original_position=gap.transcription_position + i,
-                            source=matching_source,
-                            confidence=1.0,
-                            reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
-                            reference_positions=reference_positions,
-                        )
+                    correction = WordOperations.create_word_replacement_correction(
+                        original_word=orig_word,
+                        corrected_word=ref_word_original,
+                        original_position=gap.transcription_position + i,
+                        source=matching_source,
+                        confidence=1.0,
+                        reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
+                        reference_positions=reference_positions,
                     )
+                    corrections.append(correction)
+                    self.logger.debug(f"Correction made: {correction}")
         return corrections

lyrics_transcriber/correction/handlers/relaxed_word_count_match.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Tuple, Dict, Any, Optional
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -8,16 +9,23 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class RelaxedWordCountMatchHandler(GapCorrectionHandler):
     """Handles gaps where at least one reference source has matching word count."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         # Check if any source has matching word count
-        for words in gap.reference_words.values():
+        for source, words in gap.reference_words.items():
             if len(words) == gap.length:
+                self.logger.debug(f"Source '{source}' has matching word count.")
                 return True, {}
+        self.logger.debug("No source with matching word count found.")
         return False, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -32,24 +40,26 @@ class RelaxedWordCountMatchHandler(GapCorrectionHandler):
                 matching_source = source
                 reference_words = words
                 reference_words_original = gap.reference_words_original[source]
+                self.logger.debug(f"Using source '{source}' for corrections.")
                 break
         # Use the centralized method to calculate reference positions for the matching source
         reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
+        self.logger.debug(f"Calculated reference positions: {reference_positions}")
         # Since we found a source with matching word count, we can correct using that source
         for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
             if orig_word.lower() != ref_word.lower():
-                corrections.append(
-                    WordOperations.create_word_replacement_correction(
-                        original_word=orig_word,
-                        corrected_word=ref_word_original,
-                        original_position=gap.transcription_position + i,
-                        source=matching_source,
-                        confidence=1.0,
-                        reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
-                        reference_positions=reference_positions,
-                    )
+                correction = WordOperations.create_word_replacement_correction(
+                    original_word=orig_word,
+                    corrected_word=ref_word_original,
+                    original_position=gap.transcription_position + i,
+                    source=matching_source,
+                    confidence=1.0,
+                    reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
+                    reference_positions=reference_positions,
                 )
+                corrections.append(correction)
+                self.logger.debug(f"Correction made: {correction}")
         return corrections

lyrics_transcriber/correction/handlers/syllables_match.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Tuple, Dict, Any
+from typing import List, Tuple, Dict, Any, Optional
 import spacy
 import logging
 import pyphen
@@ -15,9 +15,9 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class SyllablesMatchHandler(GapCorrectionHandler):
     """Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
-    def __init__(self):
-        # Initialize logger first
-        self.logger = logging.getLogger(__name__)
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
         # Marking SpacySyllables as used to prevent unused import warning
         _ = SpacySyllables

lyrics_transcriber/correction/handlers/word_count_match.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List, Tuple, Dict, Any, Optional
+import logging
 from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -8,21 +9,29 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
 class WordCountMatchHandler(GapCorrectionHandler):
     """Handles gaps where reference sources agree and have matching word counts."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
     def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
         # Must have reference words
         if not gap.reference_words:
+            self.logger.debug("No reference words available.")
             return False, {}
         ref_words_lists = list(gap.reference_words.values())
         # All sources must have same number of words as gap
         if not all(len(words) == gap.length for words in ref_words_lists):
+            self.logger.debug("Not all sources have the same number of words as the gap.")
             return False, {}
         # If we have multiple sources, they must all agree
         if len(ref_words_lists) > 1 and not all(words == ref_words_lists[0] for words in ref_words_lists[1:]):
+            self.logger.debug("Not all sources agree on the words.")
             return False, {}
+        self.logger.debug("All sources agree and have matching word counts.")
         return True, {}
     def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -39,16 +48,16 @@ class WordCountMatchHandler(GapCorrectionHandler):
         # Since we know all reference sources agree, we can correct all words in the gap
         for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
             if orig_word.lower() != ref_word.lower():
-                corrections.append(
-                    WordOperations.create_word_replacement_correction(
-                        original_word=orig_word,
-                        corrected_word=ref_word_original,
-                        original_position=gap.transcription_position + i,
-                        source=sources,
-                        confidence=1.0,
-                        reason="WordCountMatchHandler: Reference sources had same word count as gap",
-                        reference_positions=reference_positions,
-                    )
+                correction = WordOperations.create_word_replacement_correction(
+                    original_word=orig_word,
+                    corrected_word=ref_word_original,
+                    original_position=gap.transcription_position + i,
+                    source=sources,
+                    confidence=1.0,
+                    reason="WordCountMatchHandler: Reference sources had same word count as gap",
+                    reference_positions=reference_positions,
                 )
+                corrections.append(correction)
+                self.logger.debug(f"Correction made: {correction}")
         return corrections

lyrics_transcriber/correction/handlers/word_operations.py CHANGED Viewed

@@ -23,9 +23,15 @@ class WordOperations:
             for source in sources_to_check:
                 if source in gap.preceding_anchor.reference_positions:
-                    # Calculate position based on anchor position and offset
+                    # Calculate base position from anchor
                     anchor_pos = gap.preceding_anchor.reference_positions[source]
-                    ref_pos = anchor_pos + len(gap.preceding_anchor.words)
+                    base_ref_pos = anchor_pos + len(gap.preceding_anchor.words)
+                    # Calculate word offset within the gap
+                    word_offset = gap.words.index(gap.words[gap.transcription_position - gap.transcription_position])
+                    # Add word offset to base position
+                    ref_pos = base_ref_pos + word_offset
                     reference_positions[source] = ref_pos
         return reference_positions

lyrics-transcriber 0.36.1__py3-none-any.whl → 0.39.0__py3-none-any.whl

lyrics-transcriber 0.36.1py3-none-any.whl → 0.39.0py3-none-any.whl