PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1815 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.23.dist-info/METADATA +610 -0
karaoke_gen-0.71.23.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/correction/handlers/levenshtein.py ADDED Viewed

@@ -0,0 +1,189 @@
+from typing import List, Optional, Tuple, Dict, Any
+import string
+import Levenshtein
+import logging
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class LevenshteinHandler(GapCorrectionHandler):
+    """Handles corrections based on Levenshtein (edit distance) similarity between words.
+    This handler looks for words that are similar in spelling to reference words in the same position.
+    The similarity calculation includes:
+    1. Basic Levenshtein ratio
+    2. Bonus for words starting with the same letter
+    3. Penalty for words starting with different letters
+    4. Bonus for similar length words
+    Examples:
+        Gap: "wold" (misspelling)
+        References:
+            genius: ["world"]
+            spotify: ["world"]
+        Result:
+            - Correct "wold" to "world" (high confidence due to small edit distance)
+        Gap: "worde" (misspelling)
+        References:
+            genius: ["world"]
+            spotify: ["words"]
+        Result:
+            - Correct "worde" to "world" (lower confidence due to disagreeing sources)
+    """
+    def __init__(self, similarity_threshold: float = 0.65, logger: Optional[logging.Logger] = None):
+        self.similarity_threshold = similarity_threshold
+        self.logger = logger or logging.getLogger(__name__)
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        """Check if we can handle this gap - we'll try if there are reference words."""
+        if not data or "word_map" not in data:
+            self.logger.error("No word_map provided in data")
+            return False, {}
+        word_map = data["word_map"]
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference words available")
+            return False, {}
+        if not gap.transcribed_word_ids:
+            self.logger.debug("No gap words available")
+            return False, {}
+        # Check if any word has sufficient similarity to reference
+        for i, word_id in enumerate(gap.transcribed_word_ids):
+            if word_id not in word_map:
+                continue
+            word = word_map[word_id]
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                if i < len(ref_word_ids):
+                    ref_word_id = ref_word_ids[i]
+                    if ref_word_id not in word_map:
+                        continue
+                    ref_word = word_map[ref_word_id]
+                    similarity = self._get_string_similarity(word.text, ref_word.text)
+                    if similarity >= self.similarity_threshold:
+                        self.logger.debug(f"Found similar word: '{word.text}' -> '{ref_word.text}' ({similarity:.2f})")
+                        return True, {}
+        self.logger.debug("No words meet similarity threshold")
+        return False, {}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Try to correct words based on string similarity."""
+        if not data or "word_map" not in data:
+            self.logger.error("No word_map provided in data")
+            return []
+        word_map = data["word_map"]
+        corrections = []
+        # Process each word in the gap
+        for i, word_id in enumerate(gap.transcribed_word_ids):
+            if word_id not in word_map:
+                continue
+            word = word_map[word_id]
+            # Skip if word is empty or just punctuation
+            if not word.text.strip():
+                continue
+            # Skip exact matches
+            exact_match = False
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                if i < len(ref_word_ids):
+                    ref_word_id = ref_word_ids[i]
+                    if ref_word_id in word_map:
+                        ref_word = word_map[ref_word_id]
+                        if word.text.lower() == ref_word.text.lower():
+                            exact_match = True
+                            break
+            if exact_match:
+                continue
+            # Find matching reference words at this position
+            matches: Dict[str, Tuple[List[str], float, str]] = {}  # word -> (sources, similarity, word_id)
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                if i >= len(ref_word_ids):
+                    continue
+                ref_word_id = ref_word_ids[i]
+                if ref_word_id not in word_map:
+                    continue
+                ref_word = word_map[ref_word_id]
+                similarity = self._get_string_similarity(word.text, ref_word.text)
+                if similarity >= self.similarity_threshold:
+                    self.logger.debug(f"Found match: '{word.text}' -> '{ref_word.text}' ({similarity:.2f})")
+                    if ref_word.text not in matches:
+                        matches[ref_word.text] = ([], similarity, ref_word_id)
+                    matches[ref_word.text][0].append(source)
+            # Create correction for best match if any found
+            if matches:
+                best_match, (sources, similarity, ref_word_id) = max(
+                    matches.items(), key=lambda x: (len(x[1][0]), x[1][1])  # Sort by number of sources, then similarity
+                )
+                source_confidence = len(sources) / len(gap.reference_word_ids)
+                final_confidence = similarity * source_confidence
+                # Calculate reference positions
+                reference_positions = WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", []))
+                self.logger.debug(f"Creating correction: {word.text} -> {best_match} (confidence: {final_confidence})")
+                corrections.append(
+                    WordCorrection(
+                        original_word=word.text,
+                        corrected_word=best_match,
+                        segment_index=0,
+                        original_position=gap.transcription_position + i,
+                        confidence=final_confidence,
+                        source=", ".join(sources),
+                        reason=f"String similarity ({final_confidence:.2f})",
+                        alternatives={k: len(v[0]) for k, v in matches.items()},
+                        is_deletion=False,
+                        reference_positions=reference_positions,
+                        length=1,
+                        handler="LevenshteinHandler",
+                        word_id=word_id,
+                        corrected_word_id=ref_word_id,
+                    )
+                )
+        return corrections
+    def _clean_word(self, word: str) -> str:
+        """Remove punctuation and standardize for comparison."""
+        return word.strip().lower().strip(string.punctuation)
+    def _get_string_similarity(self, word1: str, word2: str) -> float:
+        """Calculate string similarity using Levenshtein ratio with adjustments."""
+        # Clean words
+        w1, w2 = self._clean_word(word1), self._clean_word(word2)
+        if not w1 or not w2:
+            return 0.0
+        # Calculate Levenshtein ratio
+        similarity = Levenshtein.ratio(w1, w2)
+        # Boost similarity for words starting with the same letter
+        if w1[0] == w2[0]:
+            similarity = (similarity + 1) / 2
+        else:
+            # Penalize words starting with different letters
+            similarity = similarity * 0.9
+        # Boost for similar length words
+        length_ratio = min(len(w1), len(w2)) / max(len(w1), len(w2))
+        similarity = (similarity + length_ratio) / 2
+        return similarity

lyrics_transcriber/correction/handlers/llm.py ADDED Viewed

@@ -0,0 +1,293 @@
+from typing import List, Optional, Tuple, Dict, Any, Union
+import logging
+import json
+from datetime import datetime
+from pathlib import Path
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+from lyrics_transcriber.correction.handlers.llm_providers import LLMProvider
+class LLMHandler(GapCorrectionHandler):
+    """Uses an LLM to analyze and correct gaps by comparing with reference lyrics."""
+    def __init__(
+        self, provider: LLMProvider, name: str, logger: Optional[logging.Logger] = None, cache_dir: Optional[Union[str, Path]] = None
+    ):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
+        self.provider = provider
+        self.name = name
+        self.cache_dir = Path(cache_dir) if cache_dir else None
+    def _format_prompt(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> str:
+        """Format the prompt for the LLM with context about the gap and reference lyrics."""
+        word_map = data.get("word_map", {})
+        metadata = data.get("metadata", {}) if data else {}
+        if not word_map:
+            self.logger.error("No word_map provided in data")
+            return ""
+        # Format transcribed words with their IDs
+        transcribed_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in gap.transcribed_word_ids if word_id in word_map]
+        prompt = (
+            "You are a lyrics correction expert. You will be given transcribed lyrics that may contain errors "
+            "and reference lyrics from multiple sources. Your task is to analyze each word in the transcribed text "
+            "and suggest specific corrections based on the reference lyrics.\n\n"
+            "Each word has a unique ID. When suggesting corrections, you must specify the ID of the word being corrected. "
+            "This ensures accuracy in applying your corrections.\n\n"
+            "For each correction, specify:\n"
+            "1. The word ID being corrected\n"
+            "2. The correction type ('replace', 'split', 'combine', or 'delete')\n"
+            "3. The corrected text\n"
+            "4. Your confidence level\n"
+            "5. The reason for the correction\n\n"
+        )
+        # Add song context if available
+        if metadata and metadata.get("artist") and metadata.get("title"):
+            prompt += f"Song: {metadata['title']}\nArtist: {metadata['artist']}\n\n"
+        # Format transcribed words with IDs
+        prompt += "Transcribed words:\n"
+        for word in transcribed_words:
+            prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
+        prompt += "\nReference lyrics from different sources:\n"
+        # Add each reference source with words and their IDs
+        for source, word_ids in gap.reference_word_ids.items():
+            reference_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in word_ids if word_id in word_map]
+            prompt += f"\n{source} immediate context:\n"
+            for word in reference_words:
+                prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
+            # Add full lyrics if available
+            if metadata and metadata.get("full_reference_texts", {}).get(source):
+                prompt += f"\nFull {source} lyrics:\n{metadata['full_reference_texts'][source]}\n"
+        # Add context about surrounding anchors if available
+        if gap.preceding_anchor_id:
+            preceding_anchor = next((a.anchor for a in data.get("anchor_sequences", []) if a.anchor.id == gap.preceding_anchor_id), None)
+            if preceding_anchor:
+                anchor_words = [
+                    {"id": word_id, "text": word_map[word_id].text}
+                    for word_id in preceding_anchor.transcribed_word_ids
+                    if word_id in word_map
+                ]
+                prompt += "\nPreceding correct words:\n"
+                for word in anchor_words:
+                    prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
+        prompt += (
+            "\nProvide corrections in the following JSON format:\n"
+            "{\n"
+            '  "corrections": [\n'
+            "    {\n"
+            '      "word_id": "id_of_word_to_correct",\n'
+            '      "type": "replace|split|combine|delete",\n'
+            '      "corrected_text": "new text",\n'
+            '      "reference_word_id": "id_from_reference_lyrics",  // Optional, use when matching a specific reference word\n'
+            '      "confidence": 0.9,\n'
+            '      "reason": "explanation of correction"\n'
+            "    }\n"
+            "  ]\n"
+            "}\n\n"
+            "Important rules:\n"
+            "1. Always include the word_id for each correction\n"
+            "2. For 'split' type, corrected_text should contain the space-separated words\n"
+            "3. For 'combine' type, word_id should be the first word to combine\n"
+            "4. Include reference_word_id when the correction matches a specific reference word\n"
+            "5. Only suggest corrections when you're confident they improve the lyrics\n"
+            "6. Preserve any existing words that match the reference lyrics\n"
+            "7. Respond ONLY with the JSON object, no other text"
+        )
+        return prompt
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        """LLM handler can attempt to handle any gap with reference words."""
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference words available")
+            return False, {}
+        return True, {}
+    def _write_debug_info(self, prompt: str, response: str, gap_index: int, audio_file_hash: Optional[str] = None) -> None:
+        """Write prompt and response to debug files."""
+        if not self.cache_dir:
+            self.logger.warning("No cache directory provided, skipping LLM debug output")
+            return
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        debug_dir = self.cache_dir / "llm_debug"
+        debug_dir.mkdir(exist_ok=True, parents=True)
+        hash_prefix = f"{audio_file_hash}_" if audio_file_hash else ""
+        filename = debug_dir / f"llm_debug_{hash_prefix}{gap_index}_{timestamp}.txt"
+        debug_content = "=== LLM PROMPT ===\n" f"{prompt}\n\n" "=== LLM RESPONSE ===\n" f"{response}\n"
+        try:
+            with open(filename, "w", encoding="utf-8") as f:
+                f.write(debug_content)
+        except IOError as e:
+            self.logger.error(f"Failed to write LLM debug file: {e}")
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Process the gap using the LLM and create corrections based on its response."""
+        if not data or "word_map" not in data:
+            self.logger.error("No word_map provided in data")
+            return []
+        word_map = data["word_map"]
+        transcribed_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids if word_id in word_map]
+        # Calculate reference positions using the centralized method
+        reference_positions = (
+            WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", [])) or {}
+        )  # Ensure empty dict if None
+        prompt = self._format_prompt(gap, data)
+        if not prompt:
+            return []
+        # Get a unique index for this gap based on its position
+        gap_index = gap.transcription_position
+        try:
+            self.logger.debug(f"Processing gap words: {transcribed_words}")
+            self.logger.debug(f"Reference word IDs: {gap.reference_word_ids}")
+            response = self.provider.generate_response(prompt)
+            # Write debug info to files
+            self._write_debug_info(prompt, response, gap_index, audio_file_hash=data.get("audio_file_hash"))
+            try:
+                corrections_data = json.loads(response)
+            except json.JSONDecodeError as e:
+                self.logger.error(f"Failed to parse LLM response as JSON: {e}")
+                self.logger.error(f"Raw response content: {response}")
+                return []
+            # Check if corrections exist and are non-empty
+            if not corrections_data.get("corrections"):
+                self.logger.debug("No corrections suggested by LLM")
+                return []
+            corrections = []
+            for correction in corrections_data["corrections"]:
+                # Validate word_id exists in gap
+                if correction["word_id"] not in gap.transcribed_word_ids:
+                    self.logger.error(f"LLM suggested correction for word_id {correction['word_id']} which is not in the gap")
+                    continue
+                # Get original word from word map
+                original_word = word_map[correction["word_id"]]
+                position = gap.transcription_position + gap.transcribed_word_ids.index(correction["word_id"])
+                self.logger.debug(f"Processing correction: {correction}")
+                if correction["type"] == "replace":
+                    self.logger.debug(
+                        f"Creating replacement: '{original_word.text}' -> '{correction['corrected_text']}' " f"at position {position}"
+                    )
+                    corrections.append(
+                        WordOperations.create_word_replacement_correction(
+                            original_word=original_word.text,
+                            corrected_word=correction["corrected_text"],
+                            original_position=position,
+                            source="LLM",
+                            confidence=correction["confidence"],
+                            reason=correction["reason"],
+                            handler=self.name,
+                            reference_positions=reference_positions,
+                            original_word_id=correction["word_id"],
+                            corrected_word_id=correction.get("reference_word_id"),
+                        )
+                    )
+                elif correction["type"] == "split":
+                    split_words = correction["corrected_text"].split()
+                    self.logger.debug(f"Creating split: '{original_word.text}' -> {split_words} " f"at position {position}")
+                    # Get reference word IDs if provided
+                    reference_word_ids = correction.get("reference_word_ids", [None] * len(split_words))
+                    corrections.extend(
+                        WordOperations.create_word_split_corrections(
+                            original_word=original_word.text,
+                            reference_words=split_words,
+                            original_position=position,
+                            source="LLM",
+                            confidence=correction["confidence"],
+                            reason=correction["reason"],
+                            handler=self.name,
+                            reference_positions=reference_positions,
+                            original_word_id=correction["word_id"],
+                            corrected_word_ids=reference_word_ids,
+                        )
+                    )
+                elif correction["type"] == "combine":
+                    # Get all word IDs to combine
+                    word_ids_to_combine = []
+                    current_idx = gap.transcribed_word_ids.index(correction["word_id"])
+                    words_needed = len(correction["corrected_text"].split())
+                    if current_idx + words_needed <= len(gap.transcribed_word_ids):
+                        word_ids_to_combine = gap.transcribed_word_ids[current_idx : current_idx + words_needed]
+                    else:
+                        self.logger.error(f"Not enough words available to combine at position {position}")
+                        continue
+                    words_to_combine = [word_map[word_id].text for word_id in word_ids_to_combine]
+                    self.logger.debug(
+                        f"Creating combine: {words_to_combine} -> '{correction['corrected_text']}' " f"at position {position}"
+                    )
+                    corrections.extend(
+                        WordOperations.create_word_combine_corrections(
+                            original_words=words_to_combine,
+                            reference_word=correction["corrected_text"],
+                            original_position=position,
+                            source="LLM",
+                            confidence=correction["confidence"],
+                            combine_reason=correction["reason"],
+                            delete_reason=f"Part of combining words: {correction['reason']}",
+                            handler=self.name,
+                            reference_positions=reference_positions,
+                            original_word_ids=word_ids_to_combine,
+                            corrected_word_id=correction.get("reference_word_id"),
+                        )
+                    )
+                elif correction["type"] == "delete":
+                    self.logger.debug(f"Creating deletion: '{original_word.text}' at position {position}")
+                    corrections.append(
+                        WordCorrection(
+                            original_word=original_word.text,
+                            corrected_word="",
+                            segment_index=0,
+                            original_position=position,
+                            confidence=correction["confidence"],
+                            source="LLM",
+                            reason=correction["reason"],
+                            alternatives={},
+                            is_deletion=True,
+                            handler=self.name,
+                            reference_positions=reference_positions,
+                            word_id=correction["word_id"],
+                            corrected_word_id=None,
+                        )
+                    )
+            self.logger.debug(f"Created {len(corrections)} corrections: {[f'{c.original_word}->{c.corrected_word}' for c in corrections]}")
+            return corrections
+        except Exception as e:
+            self.logger.error(f"Unexpected error in LLM handler: {e}")
+            return []

lyrics_transcriber/correction/handlers/llm_providers.py ADDED Viewed

@@ -0,0 +1,60 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+import logging
+from ollama import chat as ollama_chat
+import openai
+class LLMProvider(ABC):
+    """Abstract base class for LLM providers."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
+    @abstractmethod
+    def generate_response(self, prompt: str, **kwargs) -> str:
+        """Generate a response from the LLM.
+        Args:
+            prompt: The prompt to send to the LLM
+            **kwargs: Additional provider-specific parameters
+        Returns:
+            str: The LLM's response
+        """
+        pass
+class OllamaProvider(LLMProvider):
+    """Provider for local Ollama models."""
+    def __init__(self, model: str, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.model = model
+    def generate_response(self, prompt: str, **kwargs) -> str:
+        try:
+            response = ollama_chat(model=self.model, messages=[{"role": "user", "content": prompt}], format="json")
+            return response.message.content
+        except Exception as e:
+            self.logger.error(f"Error generating Ollama response: {e}")
+            raise
+class OpenAIProvider(LLMProvider):
+    """Provider for OpenAI-compatible APIs (including OpenRouter)."""
+    def __init__(self, model: str, api_key: str, base_url: Optional[str] = None, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.model = model
+        self.client = openai.OpenAI(api_key=api_key, base_url=base_url)
+    def generate_response(self, prompt: str, **kwargs) -> str:
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model, messages=[{"role": "user", "content": prompt}], response_format={"type": "json_object"}, **kwargs
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            self.logger.error(f"Error generating OpenAI response: {e}")
+            raise

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl