PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1815 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.23.dist-info/METADATA +610 -0
karaoke_gen-0.71.23.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/correction/handlers/syllables_match.py ADDED Viewed

@@ -0,0 +1,252 @@
+from typing import List, Tuple, Dict, Any, Optional
+import spacy
+import logging
+import pyphen
+import nltk
+from nltk.corpus import cmudict
+import syllables
+from spacy_syllables import SpacySyllables
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class SyllablesMatchHandler(GapCorrectionHandler):
+    """Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
+        # Marking SpacySyllables as used to prevent unused import warning
+        _ = SpacySyllables
+        # Load spacy model with syllables pipeline
+        try:
+            self.nlp = spacy.load("en_core_web_sm")
+        except OSError:
+            self.logger.info("Language model 'en_core_web_sm' not found. Attempting to download...")
+            import subprocess
+            try:
+                subprocess.check_call(["python", "-m", "spacy", "download", "en_core_web_sm"])
+                self.nlp = spacy.load("en_core_web_sm")
+                self.logger.info("Successfully downloaded and loaded en_core_web_sm")
+            except subprocess.CalledProcessError as e:
+                raise OSError(
+                    "Language model 'en_core_web_sm' could not be downloaded. "
+                    "Please install it manually with: python -m spacy download en_core_web_sm"
+                ) from e
+        # Add syllables component to pipeline if not already present
+        if "syllables" not in self.nlp.pipe_names:
+            self.nlp.add_pipe("syllables", after="tagger")
+        # Initialize Pyphen for English
+        self.dic = pyphen.Pyphen(lang="en_US")
+        # Initialize NLTK's CMU dictionary
+        try:
+            self.cmudict = cmudict.dict()
+        except LookupError:
+            nltk.download("cmudict")
+            self.cmudict = cmudict.dict()
+    def _count_syllables_spacy(self, words: List[str]) -> int:
+        """Count syllables using spacy_syllables."""
+        text = " ".join(words)
+        doc = self.nlp(text)
+        total_syllables = sum(token._.syllables_count or 1 for token in doc)
+        return total_syllables
+    def _count_syllables_pyphen(self, words: List[str]) -> int:
+        """Count syllables using pyphen."""
+        total_syllables = 0
+        for word in words:
+            hyphenated = self.dic.inserted(word)
+            syllables_count = len(hyphenated.split("-")) if hyphenated else 1
+            total_syllables += syllables_count
+        return total_syllables
+    def _count_syllables_nltk(self, words: List[str]) -> int:
+        """Count syllables using NLTK's CMU dictionary."""
+        total_syllables = 0
+        for word in words:
+            word = word.lower()
+            if word in self.cmudict:
+                syllables_count = len([ph for ph in self.cmudict[word][0] if ph[-1].isdigit()])
+                total_syllables += syllables_count
+            else:
+                total_syllables += 1
+        return total_syllables
+    def _count_syllables_lib(self, words: List[str]) -> int:
+        """Count syllables using the syllables library."""
+        total_syllables = 0
+        for word in words:
+            syllables_count = syllables.estimate(word)
+            total_syllables += syllables_count
+        return total_syllables
+    def _count_syllables(self, words: List[str]) -> List[int]:
+        """Count syllables using multiple methods."""
+        spacy_count = self._count_syllables_spacy(words)
+        pyphen_count = self._count_syllables_pyphen(words)
+        nltk_count = self._count_syllables_nltk(words)
+        syllables_count = self._count_syllables_lib(words)
+        text = " ".join(words)
+        self.logger.debug(
+            f"Syllable counts for '{text}': spacy={spacy_count}, pyphen={pyphen_count}, nltk={nltk_count}, syllables={syllables_count}"
+        )
+        return [spacy_count, pyphen_count, nltk_count, syllables_count]
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        # Must have reference words
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference word IDs available")
+            return False, {}
+        # Get word lookup map from data
+        if not data or "word_map" not in data:
+            self.logger.error("No word_map provided in data")
+            return False, {}
+        word_map = data["word_map"]
+        # Get actual words from word IDs
+        gap_words = []
+        for word_id in gap.transcribed_word_ids:
+            if word_id not in word_map:
+                self.logger.error(f"Word ID {word_id} not found in word_map")
+                return False, {}
+            gap_words.append(word_map[word_id].text)
+        # Get syllable counts for gap text using different methods
+        gap_syllables = self._count_syllables(gap_words)
+        # Check if any reference source has matching syllable count with any method
+        for source, ref_word_ids in gap.reference_word_ids.items():
+            # Get reference words from word map
+            ref_words = []
+            for word_id in ref_word_ids:
+                if word_id not in word_map:
+                    self.logger.error(f"Reference word ID {word_id} not found in word_map")
+                    continue
+                ref_words.append(word_map[word_id].text)
+            if not ref_words:
+                continue
+            ref_syllables = self._count_syllables(ref_words)
+            # If any counting method matches between gap and reference, we can handle it
+            if any(gap_count == ref_count for gap_count in gap_syllables for ref_count in ref_syllables):
+                self.logger.debug(f"Found matching syllable count in source '{source}'")
+                return True, {
+                    "gap_syllables": gap_syllables,
+                    "matching_source": source,
+                    "reference_word_ids": ref_word_ids,
+                    "word_map": word_map,
+                }
+        self.logger.debug("No reference source had matching syllable count")
+        return False, {}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Handle the gap using syllable matching."""
+        if not data:
+            can_handle, data = self.can_handle(gap)
+            if not can_handle:
+                return []
+        corrections = []
+        matching_source = data["matching_source"]
+        reference_word_ids = data["reference_word_ids"]
+        word_map = data["word_map"]
+        # Get the actual words from word IDs
+        gap_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids]
+        ref_words = [word_map[word_id].text for word_id in reference_word_ids]
+        # Use the centralized method to calculate reference positions
+        reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
+        # Since we matched syllable counts for the entire gap, we should handle all words
+        if len(gap_words) > len(ref_words):
+            # Multiple transcribed words -> fewer reference words
+            # Try to distribute the reference words across the gap words
+            words_per_ref = len(gap_words) / len(ref_words)
+            for ref_idx, ref_word_id in enumerate(reference_word_ids):
+                start_idx = int(ref_idx * words_per_ref)
+                end_idx = int((ref_idx + 1) * words_per_ref)
+                # Get the group of words to combine
+                words_to_combine = gap_words[start_idx:end_idx]
+                word_ids_to_combine = gap.transcribed_word_ids[start_idx:end_idx]
+                corrections.extend(
+                    WordOperations.create_word_combine_corrections(
+                        original_words=words_to_combine,
+                        reference_word=word_map[ref_word_id].text,
+                        original_position=gap.transcription_position + start_idx,
+                        source=matching_source,
+                        confidence=0.8,
+                        combine_reason="Words combined based on syllable match",
+                        delete_reason="Word removed as part of syllable match combination",
+                        reference_positions=reference_positions,
+                        handler="SyllablesMatchHandler",
+                        original_word_ids=word_ids_to_combine,
+                        corrected_word_id=ref_word_id,
+                    )
+                )
+        elif len(gap_words) < len(ref_words):
+            # Single transcribed word -> multiple reference words
+            words_per_gap = len(ref_words) / len(gap_words)
+            for i, word_id in enumerate(gap.transcribed_word_ids):
+                start_idx = int(i * words_per_gap)
+                end_idx = int((i + 1) * words_per_gap)
+                ref_word_ids_for_split = reference_word_ids[start_idx:end_idx]
+                ref_words_for_split = [word_map[ref_id].text for ref_id in ref_word_ids_for_split]
+                corrections.extend(
+                    WordOperations.create_word_split_corrections(
+                        original_word=word_map[word_id].text,
+                        reference_words=ref_words_for_split,
+                        original_position=gap.transcription_position + i,
+                        source=matching_source,
+                        confidence=0.8,
+                        reason="Split word based on syllable match",
+                        reference_positions=reference_positions,
+                        handler="SyllablesMatchHandler",
+                        original_word_id=word_id,
+                        corrected_word_ids=ref_word_ids_for_split,
+                    )
+                )
+        else:
+            # One-to-one replacement
+            for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
+                orig_word = word_map[orig_word_id]
+                ref_word = word_map[ref_word_id]
+                if orig_word.text.lower() != ref_word.text.lower():
+                    corrections.append(
+                        WordOperations.create_word_replacement_correction(
+                            original_word=orig_word.text,
+                            corrected_word=ref_word.text,
+                            original_position=gap.transcription_position + i,
+                            source=matching_source,
+                            confidence=0.8,
+                            reason=f"Source '{matching_source}' had matching syllable count",
+                            reference_positions=reference_positions,
+                            handler="SyllablesMatchHandler",
+                            original_word_id=orig_word_id,
+                            corrected_word_id=ref_word_id,
+                        )
+                    )
+        return corrections

lyrics_transcriber/correction/handlers/word_count_match.py ADDED Viewed

@@ -0,0 +1,80 @@
+from typing import List, Tuple, Dict, Any, Optional
+import logging
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class WordCountMatchHandler(GapCorrectionHandler):
+    """Handles gaps where reference sources agree and have matching word counts."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        super().__init__(logger)
+        self.logger = logger or logging.getLogger(__name__)
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        # Must have reference words
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference word IDs available.")
+            return False, {}
+        if not self._validate_data(data):
+            return False, {}
+        ref_word_lists = list(gap.reference_word_ids.values())
+        # All sources must have same number of words as gap
+        if not all(len(words) == gap.length for words in ref_word_lists):
+            self.logger.debug("Not all sources have the same number of words as the gap.")
+            return False, {}
+        # If we have multiple sources, they must all agree
+        if len(ref_word_lists) > 1 and not all(words == ref_word_lists[0] for words in ref_word_lists[1:]):
+            self.logger.debug("Not all sources agree on the words.")
+            return False, {}
+        self.logger.debug("All sources agree and have matching word counts.")
+        return True, {"word_map": data["word_map"]}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        if not self._validate_data(data):
+            return []
+        corrections = []
+        word_map = data["word_map"]
+        source = list(gap.reference_word_ids.keys())[0]
+        reference_word_ids = gap.reference_word_ids[source]
+        sources = ", ".join(gap.reference_word_ids.keys())
+        reference_positions = WordOperations.calculate_reference_positions(gap)
+        for i, (orig_word_id, ref_word_id) in enumerate(zip(gap.transcribed_word_ids, reference_word_ids)):
+            # Get the actual words from the word map
+            if orig_word_id not in word_map:
+                self.logger.error(f"Original word ID {orig_word_id} not found in word_map")
+                continue
+            orig_word = word_map[orig_word_id]
+            if ref_word_id not in word_map:
+                self.logger.error(f"Reference word ID {ref_word_id} not found in word_map")
+                continue
+            ref_word = word_map[ref_word_id]
+            if orig_word.text.lower() != ref_word.text.lower():
+                correction = WordOperations.create_word_replacement_correction(
+                    original_word=orig_word.text,
+                    corrected_word=ref_word.text,
+                    original_position=gap.transcription_position + i,
+                    source=sources,
+                    confidence=1.0,
+                    reason="Reference sources had same word count as gap",
+                    reference_positions=reference_positions,
+                    handler="WordCountMatchHandler",
+                    original_word_id=orig_word_id,
+                    corrected_word_id=ref_word_id,  # Use the reference word's ID
+                )
+                corrections.append(correction)
+                self.logger.debug(f"Correction made: {correction}")
+        return corrections

lyrics_transcriber/correction/handlers/word_operations.py ADDED Viewed

@@ -0,0 +1,187 @@
+from typing import List, Optional, Dict, Any
+from lyrics_transcriber.types import WordCorrection, GapSequence
+from lyrics_transcriber.utils.word_utils import WordUtils
+class WordOperations:
+    """Utility class for common word manipulation operations used by correction handlers."""
+    @staticmethod
+    def calculate_reference_positions(
+        gap: GapSequence, sources: Optional[List[str]] = None, anchor_sequences: Optional[List[Any]] = None
+    ) -> Dict[str, int]:
+        """Calculate reference positions for given sources based on preceding anchor.
+        Args:
+            gap: The gap sequence containing the preceding anchor ID
+            sources: Optional list of sources to calculate positions for. If None, uses all sources.
+            anchor_sequences: List of anchor sequences to look up preceding anchor
+        Returns:
+            Dictionary mapping source names to their reference positions
+        """
+        reference_positions = {}
+        if not gap.preceding_anchor_id or not anchor_sequences:
+            return reference_positions
+        # Find the preceding anchor in the sequences
+        preceding_anchor = next(
+            (scored_anchor.anchor for scored_anchor in anchor_sequences if scored_anchor.anchor.id == gap.preceding_anchor_id), None
+        )
+        if not preceding_anchor:
+            return reference_positions
+        # If no sources specified, use all sources from reference words
+        sources_to_check = sources or list(gap.reference_word_ids.keys())
+        for source in sources_to_check:
+            # Get reference positions from the anchor
+            if source in preceding_anchor.reference_positions:
+                # Calculate base position from anchor
+                anchor_pos = preceding_anchor.reference_positions[source]
+                base_ref_pos = anchor_pos + len(preceding_anchor.reference_word_ids[source])
+                # Calculate word offset within the gap
+                word_offset = 0
+                # Add word offset to base position
+                ref_pos = base_ref_pos + word_offset
+                reference_positions[source] = ref_pos
+        return reference_positions
+    @staticmethod
+    def create_word_replacement_correction(
+        original_word: str,
+        corrected_word: str,
+        original_position: int,
+        source: str,
+        confidence: float,
+        reason: str,
+        handler: str,
+        reference_positions: Optional[Dict[str, int]] = None,
+        original_word_id: Optional[str] = None,
+        corrected_word_id: Optional[str] = None,
+    ) -> WordCorrection:
+        """Creates a correction for replacing a single word with another word."""
+        return WordCorrection(
+            original_word=original_word,
+            corrected_word=corrected_word,
+            segment_index=0,
+            original_position=original_position,
+            confidence=confidence,
+            source=source,
+            reason=reason,
+            alternatives={},
+            reference_positions=reference_positions,
+            length=1,
+            handler=handler,
+            word_id=original_word_id,
+            corrected_word_id=corrected_word_id if corrected_word_id is not None else (WordUtils.generate_id() if corrected_word else None),
+        )
+    @staticmethod
+    def create_word_split_corrections(
+        original_word: str,
+        reference_words: List[str],
+        original_position: int,
+        source: str,
+        confidence: float,
+        reason: str,
+        handler: str,
+        reference_positions: Optional[Dict[str, int]] = None,
+        original_word_id: Optional[str] = None,
+        corrected_word_ids: Optional[List[str]] = None,
+    ) -> List[WordCorrection]:
+        """Creates corrections for splitting a single word into multiple words."""
+        corrections = []
+        # Generate word IDs if none provided
+        if corrected_word_ids is None:
+            corrected_word_ids = [WordUtils.generate_id() for _ in reference_words]
+        for split_idx, (ref_word, word_id) in enumerate(zip(reference_words, corrected_word_ids)):
+            corrections.append(
+                WordCorrection(
+                    original_word=original_word,
+                    corrected_word=ref_word,
+                    segment_index=0,
+                    original_position=original_position,
+                    confidence=confidence,
+                    source=source,
+                    reason=reason,
+                    alternatives={},
+                    split_index=split_idx,
+                    split_total=len(reference_words),
+                    reference_positions=reference_positions,
+                    length=1,  # Each split word is length 1
+                    handler=handler,
+                    word_id=WordUtils.generate_id(),  # Generate new ID for each split
+                    corrected_word_id=word_id,
+                )
+            )
+        return corrections
+    @staticmethod
+    def create_word_combine_corrections(
+        original_words: List[str],
+        reference_word: str,
+        original_position: int,
+        source: str,
+        confidence: float,
+        combine_reason: str,
+        delete_reason: str,
+        handler: str,
+        reference_positions: Optional[Dict[str, int]] = None,
+        original_word_ids: Optional[List[str]] = None,
+        corrected_word_id: Optional[str] = None,
+    ) -> List[WordCorrection]:
+        """Creates corrections for combining multiple words into a single word."""
+        corrections = []
+        word_ids = original_word_ids or [None] * len(original_words)
+        final_word_id = corrected_word_id or WordUtils.generate_id()
+        # First word gets replaced
+        corrections.append(
+            WordCorrection(
+                original_word=original_words[0],
+                corrected_word=reference_word,
+                segment_index=0,
+                original_position=original_position,
+                confidence=confidence,
+                source=source,
+                reason=combine_reason,
+                alternatives={},
+                reference_positions=reference_positions,
+                length=len(original_words),  # Combined word spans all original words
+                handler=handler,
+                word_id=WordUtils.generate_id(),  # Generate new ID for combined word
+                corrected_word_id=final_word_id,
+            )
+        )
+        # Additional words get marked for deletion
+        for i, (word, word_id) in enumerate(zip(original_words[1:], word_ids[1:]), start=1):
+            corrections.append(
+                WordCorrection(
+                    original_word=word,
+                    corrected_word="",
+                    segment_index=0,
+                    original_position=original_position + i,
+                    confidence=confidence,
+                    source=source,
+                    reason=delete_reason,
+                    alternatives={},
+                    is_deletion=True,
+                    reference_positions=reference_positions,
+                    length=1,  # Deleted words are length 1
+                    handler=handler,
+                    word_id=WordUtils.generate_id(),  # Generate new ID for each deleted word
+                    corrected_word_id=None,  # Deleted words don't need a corrected ID
+                )
+            )
+        return corrections

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl