PyPI - karaoke-gen - Versions diffs - 0.75.54__py3-none-any.whl - Mend

karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show

karaoke_gen/__init__.py +38 -0
karaoke_gen/audio_fetcher.py +1614 -0
karaoke_gen/audio_processor.py +790 -0
karaoke_gen/config.py +83 -0
karaoke_gen/file_handler.py +387 -0
karaoke_gen/instrumental_review/__init__.py +45 -0
karaoke_gen/instrumental_review/analyzer.py +408 -0
karaoke_gen/instrumental_review/editor.py +322 -0
karaoke_gen/instrumental_review/models.py +171 -0
karaoke_gen/instrumental_review/server.py +475 -0
karaoke_gen/instrumental_review/static/index.html +1529 -0
karaoke_gen/instrumental_review/waveform.py +409 -0
karaoke_gen/karaoke_finalise/__init__.py +1 -0
karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
karaoke_gen/karaoke_gen.py +1026 -0
karaoke_gen/lyrics_processor.py +474 -0
karaoke_gen/metadata.py +160 -0
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/__init__.py +18 -0
karaoke_gen/utils/bulk_cli.py +492 -0
karaoke_gen/utils/cli_args.py +432 -0
karaoke_gen/utils/gen_cli.py +978 -0
karaoke_gen/utils/remote_cli.py +3268 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen/video_generator.py +424 -0
karaoke_gen-0.75.54.dist-info/METADATA +718 -0
karaoke_gen-0.75.54.dist-info/RECORD +287 -0
karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +594 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +919 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +214 -0
lyrics_transcriber/frontend/src/api.ts +254 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +306 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +379 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +650 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0

lyrics_transcriber/correction/agentic/adapter.py ADDED Viewed

@@ -0,0 +1,71 @@
+from __future__ import annotations
+from typing import Dict, Any, List
+from .models.schemas import CorrectionProposal
+from lyrics_transcriber.types import WordCorrection, Word
+from lyrics_transcriber.utils.word_utils import WordUtils
+def adapt_proposals_to_word_corrections(
+    proposals: List[CorrectionProposal],
+    word_map: Dict[str, Word],
+    linear_position_map: Dict[str, int],
+) -> List[WordCorrection]:
+    """Convert CorrectionProposal items into WordCorrection objects.
+    Minimal mapping: supports ReplaceWord and DeleteWord actions with single word_id.
+    Unknown or unsupported actions are ignored.
+    The reason field includes gap category and confidence for better UI feedback.
+    """
+    results: List[WordCorrection] = []
+    for p in proposals:
+        action = (p.action or "").lower()
+        target_id = p.word_id or (p.word_ids[0] if p.word_ids else None)
+        if not target_id or target_id not in word_map:
+            continue
+        original = word_map[target_id]
+        original_position = linear_position_map.get(target_id, 0)
+        # Build a detailed reason including gap category
+        category_str = f" [{p.gap_category.value}]" if p.gap_category else ""
+        confidence_str = f" (confidence: {p.confidence:.0%})" if p.confidence else ""
+        detailed_reason = f"{p.reason or 'AI correction'}{category_str}{confidence_str}"
+        if action == "replaceword" and p.replacement_text:
+            results.append(
+                WordCorrection(
+                    original_word=original.text,
+                    corrected_word=p.replacement_text,
+                    original_position=original_position,
+                    source="agentic",
+                    reason=detailed_reason,
+                    confidence=float(p.confidence or 0.0),
+                    is_deletion=False,
+                    word_id=target_id,
+                    corrected_word_id=WordUtils.generate_id(),  # Generate unique ID for corrected word
+                    handler="AgenticCorrector",  # Required by frontend
+                    reference_positions={},  # Required by frontend
+                )
+            )
+        elif action == "deleteword":
+            results.append(
+                WordCorrection(
+                    original_word=original.text,
+                    corrected_word="",
+                    original_position=original_position,
+                    source="agentic",
+                    reason=detailed_reason,
+                    confidence=float(p.confidence or 0.0),
+                    is_deletion=True,
+                    word_id=target_id,
+                    corrected_word_id=None,  # Deleted words don't need a corrected ID
+                    handler="AgenticCorrector",  # Required by frontend
+                    reference_positions={},  # Required by frontend
+                )
+            )
+    return results

lyrics_transcriber/correction/agentic/agent.py ADDED Viewed

@@ -0,0 +1,313 @@
+from __future__ import annotations
+import logging
+import os
+import json
+from typing import Dict, Any, List, Optional
+from .providers.base import BaseAIProvider
+from .providers.langchain_bridge import LangChainBridge
+from .providers.config import ProviderConfig
+from .models.schemas import CorrectionProposal, GapClassification, GapCategory
+from .workflows.correction_graph import build_correction_graph
+from .prompts.classifier import build_classification_prompt
+from .handlers.registry import HandlerRegistry
+logger = logging.getLogger(__name__)
+class AgenticCorrector:
+    """Main entry for agentic AI correction using LangChain + LangGraph.
+    This orchestrates correction workflows using LangGraph for state management
+    and LangChain ChatModels for provider integration. Langfuse tracing is
+    automatic via LangChain callbacks.
+    Uses dependency injection for better testability - you can inject a
+    mock provider for testing.
+    """
+    def __init__(
+        self,
+        provider: BaseAIProvider,
+        graph: Optional[Any] = None,
+        langfuse_handler: Optional[Any] = None,
+        session_id: Optional[str] = None
+    ):
+        """Initialize with injected dependencies.
+        Args:
+            provider: AI provider implementation (e.g., LangChainBridge)
+            graph: Optional LangGraph workflow (builds default if None)
+            langfuse_handler: Optional Langfuse callback handler (if None, will try to get from provider)
+            session_id: Optional Langfuse session ID to group related traces
+        """
+        self._provider = provider
+        self._session_id = session_id
+        # Get Langfuse handler from provider if available (avoids duplication)
+        self._langfuse_handler = langfuse_handler or self._get_provider_handler()
+        # Build graph with Langfuse callback if available
+        self._graph = graph if graph is not None else build_correction_graph(
+            callbacks=[self._langfuse_handler] if self._langfuse_handler else None
+        )
+    def _get_provider_handler(self) -> Optional[Any]:
+        """Get Langfuse handler from provider if it has one.
+        This avoids duplicating Langfuse initialization - if the provider
+        (e.g., LangChainBridge) already has a handler, we reuse it.
+        Returns:
+            CallbackHandler instance from provider, or None
+        """
+        # Check if provider is LangChainBridge and has a factory
+        if hasattr(self._provider, '_factory'):
+            factory = self._provider._factory
+            # Force initialization of Langfuse if keys are present
+            # This ensures the handler is available when we need it
+            if hasattr(factory, '_langfuse_initialized'):
+                if not factory._langfuse_initialized:
+                    # Initialize by calling _create_callbacks (which triggers _initialize_langfuse)
+                    factory._create_callbacks(self._provider._model)
+            # Now check if handler is available
+            if hasattr(factory, '_langfuse_handler'):
+                handler = factory._langfuse_handler
+                if handler:
+                    logger.debug("🤖 Reusing Langfuse handler from ModelFactory")
+                    return handler
+        logger.debug("🤖 No Langfuse handler from provider")
+        return None
+    @classmethod
+    def from_model(
+        cls,
+        model: str,
+        config: ProviderConfig | None = None,
+        session_id: Optional[str] = None,
+        cache_dir: Optional[str] = None
+    ) -> "AgenticCorrector":
+        """Factory method to create corrector from model specification.
+        This is a convenience method for the common case where you want
+        to use LangChainBridge with a model spec string.
+        Args:
+            model: Model identifier in format "provider/model"
+            config: Optional provider configuration
+            session_id: Optional Langfuse session ID to group related traces
+            cache_dir: Optional cache directory (uses default if not provided)
+        Returns:
+            AgenticCorrector instance with LangChainBridge provider
+        """
+        config = config or ProviderConfig.from_env(cache_dir=cache_dir)
+        provider = LangChainBridge(model=model, config=config)
+        return cls(provider=provider, session_id=session_id)
+    def classify_gap(
+        self,
+        gap_id: str,
+        gap_text: str,
+        preceding_words: str,
+        following_words: str,
+        reference_contexts: Dict[str, str],
+        artist: Optional[str] = None,
+        title: Optional[str] = None
+    ) -> Optional[GapClassification]:
+        """Classify a gap using the AI provider.
+        Args:
+            gap_id: Unique identifier for the gap
+            gap_text: The text of the gap
+            preceding_words: Text immediately before the gap
+            following_words: Text immediately after the gap
+            reference_contexts: Dictionary of reference lyrics from each source
+            artist: Song artist name
+            title: Song title
+        Returns:
+            GapClassification object or None if classification fails
+        """
+        # Build classification prompt
+        prompt = build_classification_prompt(
+            gap_text=gap_text,
+            preceding_words=preceding_words,
+            following_words=following_words,
+            reference_contexts=reference_contexts,
+            artist=artist,
+            title=title,
+            gap_id=gap_id
+        )
+        # Call AI provider to get classification
+        try:
+            data = self._provider.generate_correction_proposals(
+                prompt,
+                schema=GapClassification.model_json_schema(),
+                session_id=self._session_id
+            )
+            # Extract first result
+            if data and len(data) > 0:
+                item = data[0]
+                if isinstance(item, dict) and "error" not in item:
+                    classification = GapClassification.model_validate(item)
+                    logger.debug(f"🤖 Classified gap {gap_id} as {classification.category} (confidence: {classification.confidence})")
+                    return classification
+        except Exception as e:
+            logger.warning(f"🤖 Failed to classify gap {gap_id}: {e}")
+        return None
+    def propose_for_gap(
+        self,
+        gap_id: str,
+        gap_words: List[Dict[str, Any]],
+        preceding_words: str,
+        following_words: str,
+        reference_contexts: Dict[str, str],
+        artist: Optional[str] = None,
+        title: Optional[str] = None
+    ) -> List[CorrectionProposal]:
+        """Generate correction proposals for a gap using two-step classification workflow.
+        Args:
+            gap_id: Unique identifier for the gap
+            gap_words: List of word dictionaries with id, text, start_time, end_time
+            preceding_words: Text immediately before the gap
+            following_words: Text immediately after the gap
+            reference_contexts: Dictionary of reference lyrics from each source
+            artist: Song artist name
+            title: Song title
+        Returns:
+            List of CorrectionProposal objects
+        """
+        # Step 1: Classify the gap
+        gap_text = ' '.join(w.get('text', '') for w in gap_words)
+        classification = self.classify_gap(
+            gap_id=gap_id,
+            gap_text=gap_text,
+            preceding_words=preceding_words,
+            following_words=following_words,
+            reference_contexts=reference_contexts,
+            artist=artist,
+            title=title
+        )
+        if not classification:
+            # Classification failed, flag for human review
+            logger.warning(f"🤖 Classification failed for gap {gap_id}, flagging for review")
+            return [CorrectionProposal(
+                word_ids=[w['id'] for w in gap_words],
+                action="Flag",
+                confidence=0.0,
+                reason="Classification failed - unable to categorize gap",
+                requires_human_review=True,
+                artist=artist,
+                title=title
+            )]
+        # Step 2: Route to appropriate handler based on category
+        try:
+            handler = HandlerRegistry.get_handler(
+                category=classification.category,
+                artist=artist,
+                title=title
+            )
+            proposals = handler.handle(
+                gap_id=gap_id,
+                gap_words=gap_words,
+                preceding_words=preceding_words,
+                following_words=following_words,
+                reference_contexts=reference_contexts,
+                classification_reasoning=classification.reasoning
+            )
+            # Add classification metadata to proposals
+            for proposal in proposals:
+                if not proposal.gap_category:
+                    proposal.gap_category = classification.category
+                if not proposal.artist:
+                    proposal.artist = artist
+                if not proposal.title:
+                    proposal.title = title
+            return proposals
+        except Exception as e:
+            logger.error(f"🤖 Handler failed for gap {gap_id} (category: {classification.category}): {e}")
+            # Handler failed, flag for human review
+            return [CorrectionProposal(
+                word_ids=[w['id'] for w in gap_words],
+                action="Flag",
+                confidence=0.0,
+                reason=f"Handler error for category {classification.category}: {str(e)}",
+                gap_category=classification.category,
+                requires_human_review=True,
+                artist=artist,
+                title=title
+            )]
+    def propose(self, prompt: str) -> List[CorrectionProposal]:
+        """Generate correction proposals using LangGraph + LangChain.
+        DEPRECATED: This method uses the old single-step approach.
+        Use propose_for_gap() for the new two-step classification workflow.
+        Args:
+            prompt: The correction prompt with gap text and reference context
+        Returns:
+            List of validated CorrectionProposal objects
+        """
+        # Prepare config with session_id in metadata (Langfuse format)
+        config = {}
+        if self._langfuse_handler:
+            config["callbacks"] = [self._langfuse_handler]
+            if self._session_id:
+                config["metadata"] = {"langfuse_session_id": self._session_id}
+                logger.debug(f"🤖 Set Langfuse session_id in metadata: {self._session_id}")
+        # Run LangGraph workflow (with Langfuse tracing if configured)
+        if self._graph:
+            try:
+                self._graph.invoke(
+                    {"prompt": prompt, "proposals": []},
+                    config=config
+                )
+            except Exception as e:
+                logger.debug(f"🤖 LangGraph workflow invocation failed: {e}")
+        # Get proposals from LangChain ChatModel
+        # Pass the session_id via metadata to the provider
+        data = self._provider.generate_correction_proposals(
+            prompt,
+            schema=CorrectionProposal.model_json_schema(),
+            session_id=self._session_id
+        )
+        # Validate via Pydantic; invalid entries are dropped
+        proposals: List[CorrectionProposal] = []
+        for item in data:
+            # Check if this is an error response from the provider
+            if isinstance(item, dict) and "error" in item:
+                logger.warning(f"🤖 Provider returned error: {item}")
+                continue
+            try:
+                proposals.append(CorrectionProposal.model_validate(item))
+            except Exception as e:
+                # Log validation errors for debugging
+                logger.debug(f"🤖 Failed to validate proposal: {e}, item: {item}")
+                continue
+        return proposals

lyrics_transcriber/correction/agentic/feedback/aggregator.py ADDED Viewed

@@ -0,0 +1,12 @@
+from __future__ import annotations
+from typing import Dict, Any
+class FeedbackAggregator:
+    """Placeholder for learning data aggregation logic."""
+    def aggregate(self, session_id: str) -> Dict[str, Any]:
+        return {"session_id": session_id, "status": "ok"}

lyrics_transcriber/correction/agentic/feedback/collector.py ADDED Viewed

@@ -0,0 +1,17 @@
+from __future__ import annotations
+from typing import Dict, Any
+from .store import FeedbackStore
+class FeedbackCollector:
+    def __init__(self, store: FeedbackStore | None):
+        self._store = store
+    def collect(self, feedback_id: str, session_id: str | None, data_json: str) -> None:
+        if not self._store:
+            return
+        self._store.put_feedback(feedback_id, session_id, data_json)

lyrics_transcriber/correction/agentic/feedback/retention.py ADDED Viewed

@@ -0,0 +1,24 @@
+from __future__ import annotations
+import sqlite3
+from datetime import datetime, timedelta
+from typing import Optional
+def cleanup_expired(db_path: str, older_than_days: int = 365 * 3) -> int:
+    """Cleanup routine placeholder; returns number of deleted rows.
+    Note: This placeholder assumes `data` JSON contains an ISO timestamp under
+    key `createdAt`. For production, store timestamps as columns.
+    """
+    threshold = (datetime.utcnow() - timedelta(days=older_than_days)).isoformat()
+    with sqlite3.connect(db_path) as conn:
+        cur = conn.cursor()
+        # Delete sessions and feedback older than threshold by created_at
+        cur.execute("DELETE FROM sessions WHERE created_at < ?", (threshold,))
+        cur.execute("DELETE FROM feedback WHERE created_at < ?", (threshold,))
+        deleted = cur.rowcount
+        conn.commit()
+        return deleted

lyrics_transcriber/correction/agentic/feedback/store.py ADDED Viewed

@@ -0,0 +1,76 @@
+from __future__ import annotations
+import sqlite3
+from dataclasses import asdict
+from pathlib import Path
+from typing import Dict, Any, Iterable, Optional
+from datetime import datetime
+class FeedbackStore:
+    """SQLite-backed store for sessions, corrections, and feedback.
+    This is a minimal implementation to satisfy contract needs; schema may
+    evolve. All operations are simple and synchronous for local usage.
+    """
+    def __init__(self, db_path: str | Path):
+        self._db_path = str(db_path)
+        self._init()
+    def _init(self) -> None:
+        with sqlite3.connect(self._db_path) as conn:
+            cur = conn.cursor()
+            cur.execute(
+                """
+                CREATE TABLE IF NOT EXISTS sessions (
+                    id TEXT PRIMARY KEY,
+                    data TEXT NOT NULL,
+                    created_at TEXT NOT NULL
+                )
+                """
+            )
+            cur.execute(
+                """
+                CREATE TABLE IF NOT EXISTS feedback (
+                    id TEXT PRIMARY KEY,
+                    session_id TEXT,
+                    data TEXT NOT NULL,
+                    created_at TEXT NOT NULL
+                )
+                """
+            )
+            # Attempt to add created_at if upgrading from older schema
+            try:
+                cur.execute("ALTER TABLE sessions ADD COLUMN created_at TEXT")
+            except Exception:
+                pass
+            try:
+                cur.execute("ALTER TABLE feedback ADD COLUMN created_at TEXT")
+            except Exception:
+                pass
+            conn.commit()
+    def put_session(self, session_id: str, data_json: str) -> None:
+        with sqlite3.connect(self._db_path) as conn:
+            conn.execute(
+                "REPLACE INTO sessions (id, data, created_at) VALUES (?, ?, ?)",
+                (session_id, data_json, datetime.utcnow().isoformat()),
+            )
+            conn.commit()
+    def get_session(self, session_id: str) -> Optional[str]:
+        with sqlite3.connect(self._db_path) as conn:
+            cur = conn.execute("SELECT data FROM sessions WHERE id = ?", (session_id,))
+            row = cur.fetchone()
+            return row[0] if row else None
+    def put_feedback(self, feedback_id: str, session_id: Optional[str], data_json: str) -> None:
+        with sqlite3.connect(self._db_path) as conn:
+            conn.execute(
+                "REPLACE INTO feedback (id, session_id, data, created_at) VALUES (?, ?, ?, ?)",
+                (feedback_id, session_id, data_json, datetime.utcnow().isoformat()),
+            )
+            conn.commit()

lyrics_transcriber/correction/agentic/handlers/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Category-specific handlers for gap correction."""
+from .base import BaseHandler
+from .punctuation import PunctuationHandler
+from .sound_alike import SoundAlikeHandler
+from .background_vocals import BackgroundVocalsHandler
+from .extra_words import ExtraWordsHandler
+from .repeated_section import RepeatedSectionHandler
+from .complex_multi_error import ComplexMultiErrorHandler
+from .ambiguous import AmbiguousHandler
+from .no_error import NoErrorHandler
+__all__ = [
+    'BaseHandler',
+    'PunctuationHandler',
+    'SoundAlikeHandler',
+    'BackgroundVocalsHandler',
+    'ExtraWordsHandler',
+    'RepeatedSectionHandler',
+    'ComplexMultiErrorHandler',
+    'AmbiguousHandler',
+    'NoErrorHandler',
+]

lyrics_transcriber/correction/agentic/handlers/ambiguous.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Handler for ambiguous gaps that need human review."""
+from typing import List, Dict, Any
+from .base import BaseHandler
+from ..models.schemas import CorrectionProposal, GapCategory
+class AmbiguousHandler(BaseHandler):
+    """Handles ambiguous gaps where correct action is unclear without audio."""
+    @property
+    def category(self) -> GapCategory:
+        return GapCategory.AMBIGUOUS
+    def handle(
+        self,
+        gap_id: str,
+        gap_words: List[Dict[str, Any]],
+        preceding_words: str,
+        following_words: str,
+        reference_contexts: Dict[str, str],
+        classification_reasoning: str = ""
+    ) -> List[CorrectionProposal]:
+        """Flag ambiguous gaps for human review."""
+        if not gap_words:
+            return []
+        # Ambiguous cases always require human review with audio
+        gap_text = ' '.join(w.get('text', '') for w in gap_words)
+        proposal = CorrectionProposal(
+            word_ids=[w['id'] for w in gap_words],
+            action="Flag",
+            confidence=0.4,
+            reason=f"Ambiguous gap: '{gap_text[:100]}...'. Cannot determine correct action without listening to audio. {classification_reasoning}",
+            gap_category=self.category,
+            requires_human_review=True,
+            artist=self.artist,
+            title=self.title
+        )
+        return [proposal]

lyrics_transcriber/correction/agentic/handlers/background_vocals.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Handler for background vocals that should be removed."""
+from typing import List, Dict, Any
+from .base import BaseHandler
+from ..models.schemas import CorrectionProposal, GapCategory
+class BackgroundVocalsHandler(BaseHandler):
+    """Handles gaps containing background vocals (usually in parentheses)."""
+    @property
+    def category(self) -> GapCategory:
+        return GapCategory.BACKGROUND_VOCALS
+    def handle(
+        self,
+        gap_id: str,
+        gap_words: List[Dict[str, Any]],
+        preceding_words: str,
+        following_words: str,
+        reference_contexts: Dict[str, str],
+        classification_reasoning: str = ""
+    ) -> List[CorrectionProposal]:
+        """Propose deletion of words in parentheses."""
+        if not gap_words:
+            return []
+        proposals = []
+        # Find words that are in parentheses or are parentheses themselves
+        words_to_delete = []
+        for word in gap_words:
+            text = word.get('text', '')
+            # Check if word has parentheses or is just parentheses
+            if '(' in text or ')' in text:
+                words_to_delete.append(word)
+        if words_to_delete:
+            # Create delete proposals for parenthesized content
+            proposal = CorrectionProposal(
+                word_ids=[w['id'] for w in words_to_delete],
+                action="DeleteWord",
+                confidence=0.85,
+                reason=f"Background vocals in parentheses, not in reference lyrics. {classification_reasoning}",
+                gap_category=self.category,
+                requires_human_review=False,
+                artist=self.artist,
+                title=self.title
+            )
+            proposals.append(proposal)
+        else:
+            # If no parentheses found but classified as background vocals,
+            # flag for review as classifier may have other reasoning
+            proposal = CorrectionProposal(
+                word_ids=[w['id'] for w in gap_words],
+                action="Flag",
+                confidence=0.6,
+                reason=f"Classified as background vocals but no parentheses found. {classification_reasoning}",
+                gap_category=self.category,
+                requires_human_review=True,
+                artist=self.artist,
+                title=self.title
+            )
+            proposals.append(proposal)
+        return proposals