PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1965 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.27.dist-info/METADATA +610 -0
karaoke_gen-0.71.27.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/correction/handlers/no_space_punct_match.py ADDED Viewed

@@ -0,0 +1,154 @@
+from typing import List, Optional, Tuple, Dict, Any
+import logging
+import re
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
+    """Handles gaps where reference text matches when spaces and punctuation are removed."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
+    def _remove_spaces_and_punct(self, words: List[str]) -> str:
+        """Join words and remove all whitespace and punctuation."""
+        text = "".join(words).lower()
+        # Remove all punctuation including apostrophes
+        return re.sub(r"[^\w\s]", "", text)
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        # Must have reference words
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference word IDs available.")
+            return False, {}
+        # Get word lookup map from data
+        if not data or "word_map" not in data:
+            self.logger.error("No word_map provided in data")
+            return False, {}
+        word_map = data["word_map"]
+        # Get the actual words from word IDs
+        gap_words = []
+        for word_id in gap.transcribed_word_ids:
+            if word_id not in word_map:
+                self.logger.error(f"Word ID {word_id} not found in word_map")
+                return False, {}
+            gap_words.append(word_map[word_id].text)
+        # Get the gap text without spaces and punctuation
+        gap_text = self._remove_spaces_and_punct(gap_words)
+        # Check if any reference source matches when spaces and punctuation are removed
+        for source, ref_word_ids in gap.reference_word_ids.items():
+            ref_words = []
+            for word_id in ref_word_ids:
+                if word_id not in word_map:
+                    self.logger.error(f"Reference word ID {word_id} not found in word_map")
+                    continue
+                ref_words.append(word_map[word_id].text)
+            if not ref_words:
+                continue
+            ref_text = self._remove_spaces_and_punct(ref_words)
+            if gap_text == ref_text:
+                self.logger.debug("Found a matching reference source with spaces and punctuation removed.")
+                return True, {
+                    "matching_source": source,
+                    "reference_word_ids": ref_word_ids,
+                    "word_map": word_map,
+                }
+        self.logger.debug("No matching reference source found with spaces and punctuation removed.")
+        return False, {}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Handle the gap using no-space punctuation matching."""
+        if not data:
+            can_handle, data = self.can_handle(gap)
+            if not can_handle:
+                return []
+        corrections = []
+        matching_source = data["matching_source"]
+        reference_word_ids = data["reference_word_ids"]
+        word_map = data["word_map"]
+        # Calculate reference positions for the matching source
+        reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
+        # Handle cases where number of words differ
+        if len(gap.transcribed_word_ids) > len(reference_word_ids):
+            # Multiple transcribed words -> fewer reference words
+            # Get the actual words from word IDs
+            gap_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids]
+            ref_word = word_map[reference_word_ids[0]].text
+            corrections.extend(
+                WordOperations.create_word_combine_corrections(
+                    original_words=gap_words,
+                    reference_word=ref_word,
+                    original_position=gap.transcription_position,
+                    source=matching_source,
+                    confidence=1.0,
+                    combine_reason="Words combined based on text match",
+                    delete_reason="Word removed as part of text match combination",
+                    reference_positions=reference_positions,
+                    handler="NoSpacePunctuationMatchHandler",
+                    original_word_ids=gap.transcribed_word_ids,
+                    corrected_word_id=reference_word_ids[0],  # Use the reference word's ID
+                )
+            )
+            self.logger.debug(f"Combined words into '{ref_word}'.")
+        elif len(gap.transcribed_word_ids) < len(reference_word_ids):
+            # Single transcribed word -> multiple reference words
+            # Get the actual words
+            gap_word = word_map[gap.transcribed_word_ids[0]].text
+            ref_words = [word_map[word_id].text for word_id in reference_word_ids]
+            corrections.extend(
+                WordOperations.create_word_split_corrections(
+                    original_word=gap_word,
+                    reference_words=ref_words,
+                    original_position=gap.transcription_position,
+                    source=matching_source,
+                    confidence=1.0,
+                    reason="Split word based on text match",
+                    reference_positions=reference_positions,
+                    handler="NoSpacePunctuationMatchHandler",
+                    original_word_id=gap.transcribed_word_ids[0],
+                    corrected_word_ids=reference_word_ids,  # Use the reference word IDs
+                )
+            )
+            self.logger.debug(f"Split word '{gap_word}' into {ref_words}.")
+        else:
+            # One-to-one replacement
+            for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
+                orig_word = word_map[orig_word_id]
+                ref_word = word_map[ref_word_id]
+                if orig_word.text.lower() != ref_word.text.lower():
+                    correction = WordOperations.create_word_replacement_correction(
+                        original_word=orig_word.text,
+                        corrected_word=ref_word.text,
+                        original_position=gap.transcription_position + i,
+                        source=matching_source,
+                        confidence=1.0,
+                        reason=f"Source '{matching_source}' matched when spaces and punctuation removed",
+                        reference_positions=reference_positions,
+                        handler="NoSpacePunctuationMatchHandler",
+                        original_word_id=orig_word_id,
+                        corrected_word_id=ref_word_id,
+                    )
+                    corrections.append(correction)
+                    self.logger.debug(f"Correction made: {correction}")
+        return corrections

lyrics_transcriber/correction/handlers/relaxed_word_count_match.py ADDED Viewed

@@ -0,0 +1,85 @@
+from typing import List, Tuple, Dict, Any, Optional
+import logging
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class RelaxedWordCountMatchHandler(GapCorrectionHandler):
+    """Handles gaps where at least one reference source has matching word count."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        # Must have reference words
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference word IDs available.")
+            return False, {}
+        if not self._validate_data(data):
+            return False, {}
+        # Check if any source has matching word count
+        for source, ref_word_ids in gap.reference_word_ids.items():
+            if len(ref_word_ids) == gap.length:
+                self.logger.debug(f"Source '{source}' has matching word count.")
+                return True, {
+                    "matching_source": source,
+                    "reference_word_ids": ref_word_ids,
+                    "word_map": data["word_map"],
+                    "anchor_sequences": data.get("anchor_sequences", []),
+                }
+        self.logger.debug("No source with matching word count found.")
+        return False, {}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Handle the gap using word count matching."""
+        if not self._validate_data(data):
+            return []
+        corrections = []
+        matching_source = data["matching_source"]
+        reference_word_ids = data["reference_word_ids"]
+        word_map = data["word_map"]
+        anchor_sequences = data.get("anchor_sequences", [])
+        # Use the centralized method to calculate reference positions
+        reference_positions = WordOperations.calculate_reference_positions(
+            gap, sources=[matching_source], anchor_sequences=anchor_sequences
+        )
+        self.logger.debug(f"Calculated reference positions: {reference_positions}")
+        # Since we found a source with matching word count, we can correct using that source
+        for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
+            # Get the actual words from the word map
+            if orig_word_id not in word_map:
+                self.logger.error(f"Original word ID {orig_word_id} not found in word_map")
+                continue
+            orig_word = word_map[orig_word_id]
+            if ref_word_id not in word_map:
+                self.logger.error(f"Reference word ID {ref_word_id} not found in word_map")
+                continue
+            ref_word = word_map[ref_word_id]
+            if orig_word.text.lower() != ref_word.text.lower():
+                correction = WordOperations.create_word_replacement_correction(
+                    original_word=orig_word.text,
+                    corrected_word=ref_word.text,
+                    original_position=gap.transcription_position + i,
+                    source=matching_source,
+                    confidence=1.0,
+                    reason=f"Source '{matching_source}' had matching word count",
+                    reference_positions=reference_positions,
+                    handler="RelaxedWordCountMatchHandler",
+                    original_word_id=orig_word_id,
+                    corrected_word_id=ref_word_id,  # Use the reference word's ID
+                )
+                corrections.append(correction)
+                self.logger.debug(f"Correction made: {correction}")
+        return corrections

lyrics_transcriber/correction/handlers/repeat.py ADDED Viewed

@@ -0,0 +1,88 @@
+from typing import List, Dict, Optional, Tuple, Any
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+import logging
+class RepeatCorrectionHandler(GapCorrectionHandler):
+    """Handler that applies corrections that were previously made by other handlers."""
+    def __init__(self, logger: Optional[logging.Logger] = None, confidence_threshold: float = 0.7):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
+        self.confidence_threshold = confidence_threshold
+        self.previous_corrections: List[WordCorrection] = []
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        """Check if any words in the gap match previous corrections."""
+        if not self._validate_data(data):
+            return False, {}
+        return bool(self.previous_corrections), {"word_map": data["word_map"], "anchor_sequences": data.get("anchor_sequences", [])}
+    def set_previous_corrections(self, corrections: List[WordCorrection]) -> None:
+        """Store corrections from previous handlers to use as reference."""
+        self.previous_corrections = corrections
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Apply previous corrections to matching words in the current gap."""
+        if not self._validate_data(data):
+            return []
+        word_map = data["word_map"]
+        corrections = []
+        # Use the centralized method to calculate reference positions
+        reference_positions = WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", []))
+        # Build a map of original words to their corrections
+        correction_map: Dict[str, List[WordCorrection]] = {}
+        for corr in self.previous_corrections:
+            if corr.confidence >= self.confidence_threshold:
+                correction_map.setdefault(corr.original_word.lower(), []).append(corr)
+        # Check each word in the gap
+        for i, word_id in enumerate(gap.transcribed_word_ids):
+            if word_id not in word_map:
+                self.logger.error(f"Word ID {word_id} not found in word map")
+                continue
+            word = word_map[word_id]
+            word_lower = word.text.lower()
+            if word_lower in correction_map:
+                # Get the most common correction for this word
+                prev_corrections = correction_map[word_lower]
+                best_correction = max(
+                    prev_corrections,
+                    key=lambda c: (sum(1 for pc in prev_corrections if pc.corrected_word == c.corrected_word), c.confidence),
+                )
+                self.logger.debug(
+                    f"Applying previous correction: {word.text} -> {best_correction.corrected_word} "
+                    f"(confidence: {best_correction.confidence:.2f})"
+                )
+                corrections.append(
+                    WordCorrection(
+                        original_word=word.text,
+                        corrected_word=best_correction.corrected_word,
+                        segment_index=0,
+                        original_position=gap.transcription_position + i,
+                        confidence=best_correction.confidence * 0.9,  # Slightly lower confidence for repeats
+                        source=best_correction.source,
+                        reason=f"RepeatCorrectionHandler: Matches previous correction",
+                        alternatives={best_correction.corrected_word: 1},
+                        is_deletion=best_correction.is_deletion,
+                        reference_positions=reference_positions,
+                        length=best_correction.length,
+                        split_index=best_correction.split_index,
+                        split_total=best_correction.split_total,
+                        handler="RepeatCorrectionHandler",
+                        word_id=word_id,
+                        corrected_word_id=best_correction.corrected_word_id,
+                    )
+                )
+        return corrections

lyrics_transcriber/correction/handlers/sound_alike.py ADDED Viewed

@@ -0,0 +1,259 @@
+from typing import List, Dict, Tuple, Optional, Any
+import logging
+from metaphone import doublemetaphone
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class SoundAlikeHandler(GapCorrectionHandler):
+    """Handles gaps where words sound similar to reference words but are spelled differently.
+    Uses Double Metaphone algorithm to detect sound-alike words. For each word in the gap,
+    it checks if its phonetic encoding matches any reference word's encoding.
+    The confidence of corrections is based on:
+    1. The ratio of reference sources agreeing on the correction
+    2. Whether the match was on primary (1.0) or secondary (0.8) metaphone code
+    Examples:
+        Gap: "shush look deep"
+        References:
+            genius: ["search", "look", "deep"]
+            spotify: ["search", "look", "deep"]
+        Result:
+            - Correct "shush" to "search" (confidence based on metaphone match type)
+            - Validate "look" and "deep" (exact matches)
+    """
+    def __init__(self, logger: Optional[logging.Logger] = None, similarity_threshold: float = 0.6):
+        """Initialize the handler.
+        Args:
+            logger: Optional logger instance
+            similarity_threshold: Minimum confidence threshold for matches (default: 0.6)
+        """
+        self.logger = logger or logging.getLogger(__name__)
+        self.similarity_threshold = similarity_threshold
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        """Check if any gap word has a metaphone match with any reference word."""
+        if not self._validate_data(data):
+            return False, {}
+        word_map = data["word_map"]
+        # Must have reference words
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference words available")
+            return False, {}
+        # Gap must have words
+        if not gap.transcribed_word_ids:
+            self.logger.debug("No gap words available")
+            return False, {}
+        # Check if any gap word has a metaphone match with any reference word
+        for word_id in gap.transcribed_word_ids:
+            if word_id not in word_map:
+                continue
+            word = word_map[word_id]
+            word_codes = doublemetaphone(word.text)
+            self.logger.debug(f"Gap word '{word.text}' has metaphone codes: {word_codes}")
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                for ref_word_id in ref_word_ids:
+                    if ref_word_id not in word_map:
+                        continue
+                    ref_word = word_map[ref_word_id]
+                    ref_codes = doublemetaphone(ref_word.text)
+                    self.logger.debug(f"Reference word '{ref_word.text}' has metaphone codes: {ref_codes}")
+                    if self._codes_match(word_codes, ref_codes):
+                        self.logger.debug(f"Found metaphone match between '{word.text}' and '{ref_word.text}'")
+                        return True, {}
+        self.logger.debug("No metaphone matches found")
+        return False, {}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Process the gap and create corrections for sound-alike matches."""
+        if not self._validate_data(data):
+            return []
+        word_map = data["word_map"]
+        corrections = []
+        # Use the centralized method to calculate reference positions
+        reference_positions = WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", []))
+        # For each word in the gap
+        for i, word_id in enumerate(gap.transcribed_word_ids):
+            if word_id not in word_map:
+                continue
+            word = word_map[word_id]
+            word_codes = doublemetaphone(word.text)
+            self.logger.debug(f"Processing '{word.text}' (codes: {word_codes})")
+            # Skip if word exactly matches any reference
+            exact_match = False
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                if i < len(ref_word_ids):
+                    ref_word_id = ref_word_ids[i]
+                    if ref_word_id in word_map:
+                        ref_word = word_map[ref_word_id]
+                        if word.text.lower() == ref_word.text.lower():
+                            exact_match = True
+                            break
+            if exact_match:
+                continue
+            # Find sound-alike matches in references
+            matches: Dict[str, Tuple[List[str], float, str]] = {}  # Added word_id to tuple
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                for j, ref_word_id in enumerate(ref_word_ids):
+                    if ref_word_id not in word_map:
+                        continue
+                    ref_word = word_map[ref_word_id]
+                    ref_codes = doublemetaphone(ref_word.text)
+                    match_confidence = self._get_match_confidence(word_codes, ref_codes)
+                    if match_confidence >= self.similarity_threshold:
+                        # Special handling for short codes - don't apply position penalty
+                        is_short_code = any(len(c) <= 2 for c in word_codes if c) or any(len(c) <= 2 for c in ref_codes if c)
+                        position_multiplier = 1.0 if is_short_code or i == j else 0.8
+                        adjusted_confidence = match_confidence * position_multiplier
+                        if adjusted_confidence >= self.similarity_threshold:
+                            if ref_word.text not in matches:
+                                matches[ref_word.text] = ([], adjusted_confidence, ref_word_id)
+                            matches[ref_word.text][0].append(source)
+            # Create correction for best match if any found
+            if matches:
+                best_match, (sources, base_confidence, ref_word_id) = max(matches.items(), key=lambda x: (len(x[1][0]), x[1][1]))
+                source_confidence = len(sources) / len(gap.reference_word_ids)
+                final_confidence = base_confidence * source_confidence
+                self.logger.debug(f"Found match: {word.text} -> {best_match} " f"(confidence: {final_confidence:.2f}, sources: {sources})")
+                corrections.append(
+                    WordCorrection(
+                        original_word=word.text,
+                        corrected_word=best_match,
+                        segment_index=0,
+                        original_position=gap.transcription_position + i,
+                        confidence=final_confidence,
+                        source=", ".join(sources),
+                        reason=f"SoundAlikeHandler: Phonetic match ({final_confidence:.2f} confidence)",
+                        alternatives={k: len(v[0]) for k, v in matches.items()},
+                        is_deletion=False,
+                        reference_positions=reference_positions,
+                        length=1,
+                        handler="SoundAlikeHandler",
+                        word_id=word_id,
+                        corrected_word_id=ref_word_id,
+                    )
+                )
+        return corrections
+    def _codes_match(self, codes1: Tuple[str, str], codes2: Tuple[str, str]) -> float:
+        """Check if two sets of metaphone codes match and return match quality."""
+        # Get all non-empty codes
+        codes1_set = {c for c in codes1 if c}
+        codes2_set = {c for c in codes2 if c}
+        if not codes1_set or not codes2_set:
+            return 0.0
+        best_match = 0.0
+        for code1 in codes1_set:
+            for code2 in codes2_set:
+                # Special case for very short codes (like 'A' for 'you')
+                if len(code1) <= 2 or len(code2) <= 2:
+                    if code1 == code2:
+                        best_match = max(best_match, 1.0)
+                    elif code1 in code2 or code2 in code1:
+                        best_match = max(best_match, 0.8)
+                    elif code1[0] == code2[0]:  # Match first character
+                        best_match = max(best_match, 0.7)
+                    continue
+                # Skip if codes are too different in length
+                length_diff = abs(len(code1) - len(code2))
+                if length_diff > 3:
+                    continue
+                # Exact match
+                if code1 == code2:
+                    best_match = max(best_match, 1.0)
+                    continue
+                # Similar codes (allow 1-2 character differences)
+                if len(code1) >= 2 and len(code2) >= 2:
+                    # Compare first N characters where N is min length
+                    min_len = min(len(code1), len(code2))
+                    # Check for shared characters in any position
+                    shared_chars = sum(1 for c in code1 if c in code2)
+                    if shared_chars >= min(2, min_len):  # More lenient shared character requirement
+                        match_quality = 0.7 + (0.1 * shared_chars / max(len(code1), len(code2)))
+                        best_match = max(best_match, match_quality)
+                        continue
+                    # Compare aligned characters
+                    differences = sum(1 for a, b in zip(code1[:min_len], code2[:min_len]) if a != b)
+                    if differences <= 2:
+                        match_quality = 0.85 - (differences * 0.1)
+                        best_match = max(best_match, match_quality)
+                        continue
+                # Common prefix/suffix match with more lenient threshold
+                common_prefix_len = 0
+                for a, b in zip(code1, code2):
+                    if a != b:
+                        break
+                    common_prefix_len += 1
+                common_suffix_len = 0
+                for a, b in zip(code1[::-1], code2[::-1]):
+                    if a != b:
+                        break
+                    common_suffix_len += 1
+                if common_prefix_len >= 1 or common_suffix_len >= 1:  # Even more lenient prefix/suffix requirement
+                    match_quality = 0.7 + (0.1 * max(common_prefix_len, common_suffix_len))
+                    best_match = max(best_match, match_quality)
+                    continue
+                # Substring match
+                if len(code1) >= 2 and len(code2) >= 2:  # More lenient length requirement
+                    # Look for shared substrings of length 2 or more
+                    for length in range(min(len(code1), len(code2)), 1, -1):
+                        for i in range(len(code1) - length + 1):
+                            substring = code1[i : i + length]
+                            if substring in code2:
+                                match_quality = 0.7 + (0.1 * length / max(len(code1), len(code2)))
+                                best_match = max(best_match, match_quality)
+                                break
+        return best_match
+    def _get_match_confidence(self, codes1: Tuple[str, str], codes2: Tuple[str, str]) -> float:
+        """Calculate confidence score for a metaphone code match."""
+        match_quality = self._codes_match(codes1, codes2)
+        if match_quality == 0:
+            return 0.0
+        # Get primary codes (first code of each tuple)
+        code1, code2 = codes1[0], codes2[0]
+        # Boost confidence for codes that share prefixes
+        if code1 and code2 and len(code1) >= 2 and len(code2) >= 2:
+            if code1[:2] == code2[:2]:
+                match_quality = min(1.0, match_quality + 0.1)
+        return match_quality

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl