PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1965 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.27.dist-info/METADATA +610 -0
karaoke_gen-0.71.27.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/core/config.py ADDED Viewed

@@ -0,0 +1,50 @@
+import os
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
+@dataclass
+class TranscriberConfig:
+    """Configuration for transcription services."""
+    audioshake_api_token: Optional[str] = None
+    runpod_api_key: Optional[str] = None
+    whisper_runpod_id: Optional[str] = None
+@dataclass
+class LyricsConfig:
+    """Configuration for lyrics services."""
+    genius_api_token: Optional[str] = None
+    rapidapi_key: Optional[str] = None
+    spotify_cookie: Optional[str] = None
+    lyrics_file: Optional[str] = None
+@dataclass
+class OutputConfig:
+    """Configuration for output generation."""
+    output_styles_json: str
+    default_max_line_length: int = 36
+    styles: Dict[str, Any] = field(default_factory=dict)
+    output_dir: Optional[str] = os.getcwd()
+    cache_dir: str = os.getenv(
+        "LYRICS_TRANSCRIBER_CACHE_DIR",
+        os.path.join(os.path.expanduser("~"), "lyrics-transcriber-cache")
+    )
+    fetch_lyrics: bool = True
+    run_transcription: bool = True
+    run_correction: bool = True
+    enable_review: bool = True
+    generate_plain_text: bool = True
+    generate_lrc: bool = True
+    generate_cdg: bool = True
+    render_video: bool = True
+    video_resolution: str = "360p"
+    subtitle_offset_ms: int = 0
+    # Countdown feature for songs that start too quickly
+    add_countdown: bool = True

lyrics_transcriber/core/controller.py ADDED Viewed

@@ -0,0 +1,520 @@
+import os
+import logging
+import json
+from dataclasses import dataclass, field
+from typing import Dict, Optional, List
+from lyrics_transcriber.types import LyricsData, TranscriptionResult, CorrectionResult
+from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
+from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
+from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
+from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
+from lyrics_transcriber.lyrics.genius import GeniusProvider
+from lyrics_transcriber.lyrics.spotify import SpotifyProvider
+from lyrics_transcriber.lyrics.musixmatch import MusixmatchProvider
+from lyrics_transcriber.lyrics.lrclib import LRCLIBProvider
+from lyrics_transcriber.output.generator import OutputGenerator
+from lyrics_transcriber.correction.corrector import LyricsCorrector
+from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
+from lyrics_transcriber.lyrics.file_provider import FileProvider
+@dataclass
+class LyricsControllerResult:
+    """Holds the results of the transcription and correction process."""
+    # Results from different sources
+    lyrics_results: dict[str, LyricsData] = field(default_factory=dict)
+    transcription_results: List[TranscriptionResult] = field(default_factory=list)
+    # Corrected results
+    transcription_corrected: Optional[CorrectionResult] = None
+    # Output files
+    lrc_filepath: Optional[str] = None
+    ass_filepath: Optional[str] = None
+    video_filepath: Optional[str] = None
+    mp3_filepath: Optional[str] = None
+    cdg_filepath: Optional[str] = None
+    cdg_zip_filepath: Optional[str] = None
+    original_txt: Optional[str] = None
+    corrected_txt: Optional[str] = None
+    corrections_json: Optional[str] = None
+    # Countdown padding info (for applying same padding to other audio files)
+    countdown_padding_added: bool = False
+    countdown_padding_seconds: float = 0.0
+    padded_audio_filepath: Optional[str] = None
+class LyricsTranscriber:
+    """
+    Controller class that orchestrates the lyrics transcription workflow:
+    1. Fetch lyrics from internet sources
+    2. Run multiple transcription methods
+    3. Correct transcribed lyrics using fetched lyrics
+    4. Generate output formats (LRC, ASS, video)
+    """
+    def __init__(
+        self,
+        audio_filepath: str,
+        artist: Optional[str] = None,
+        title: Optional[str] = None,
+        transcriber_config: Optional[TranscriberConfig] = None,
+        lyrics_config: Optional[LyricsConfig] = None,
+        output_config: Optional[OutputConfig] = None,
+        transcribers: Optional[Dict[str, BaseTranscriber]] = None,
+        lyrics_providers: Optional[Dict[str, BaseLyricsProvider]] = None,
+        corrector: Optional[LyricsCorrector] = None,
+        output_generator: Optional[OutputGenerator] = None,
+        logger: Optional[logging.Logger] = None,
+        log_level: int = logging.DEBUG,
+        log_formatter: Optional[logging.Formatter] = None,
+    ):
+        # Set up logging
+        self.logger = logger or logging.getLogger(__name__)
+        if not logger:
+            self.logger.setLevel(log_level)
+            if not self.logger.handlers:
+                handler = logging.StreamHandler()
+                formatter = log_formatter or logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
+                handler.setFormatter(formatter)
+                self.logger.addHandler(handler)
+        self.logger.debug(f"LyricsTranscriber instantiating with input file: {audio_filepath}")
+        # Store configs (with defaults if not provided)
+        self.transcriber_config = transcriber_config or TranscriberConfig()
+        self.lyrics_config = lyrics_config or LyricsConfig()
+        self.output_config = output_config or OutputConfig()
+        # Check if styles JSON is available for CDG and video features
+        if not self.output_config.output_styles_json or not os.path.exists(self.output_config.output_styles_json):
+            if self.output_config.generate_cdg or self.output_config.render_video:
+                self.logger.warning(
+                    f"Output styles JSON file not found: {self.output_config.output_styles_json}. "
+                    "CDG and video generation will be disabled."
+                )
+                self.output_config.generate_cdg = False
+                self.output_config.render_video = False
+        # Basic settings with sanitized filenames
+        self.audio_filepath = audio_filepath
+        self.artist = artist
+        self.title = title
+        self.output_prefix = self._create_sanitized_output_prefix(artist, title)
+        # Add after creating necessary folders
+        self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
+        self.logger.debug(f"Using output directory: {self.output_config.output_dir}")
+        # Create necessary folders
+        os.makedirs(self.output_config.cache_dir, exist_ok=True)
+        os.makedirs(self.output_config.output_dir, exist_ok=True)
+        # Initialize results
+        self.results = LyricsControllerResult()
+        # Load styles early so lyrics providers can use them
+        self._load_styles()
+        # Initialize components (with dependency injection)
+        self.transcribers = transcribers or self._initialize_transcribers()
+        self.lyrics_providers = lyrics_providers or self._initialize_lyrics_providers()
+        self.corrector = corrector or LyricsCorrector(cache_dir=self.output_config.cache_dir, logger=self.logger)
+        self.output_generator = output_generator or self._initialize_output_generator()
+        # Log enabled features
+        self.logger.info("Enabled features:")
+        self.logger.info(f"  Lyrics fetching: {'enabled' if self.output_config.fetch_lyrics else 'disabled'}")
+        self.logger.info(f"  Transcription: {'enabled' if self.output_config.run_transcription else 'disabled'}")
+        self.logger.info(f"  Lyrics correction: {'enabled' if self.output_config.run_correction else 'disabled'}")
+        self.logger.info(f"  Plain text output: {'enabled' if self.output_config.generate_plain_text else 'disabled'}")
+        self.logger.info(f"  LRC file generation: {'enabled' if self.output_config.generate_lrc else 'disabled'}")
+        self.logger.info(f"  CDG file generation: {'enabled' if self.output_config.generate_cdg else 'disabled'}")
+        self.logger.info(f"  Video rendering: {'enabled' if self.output_config.render_video else 'disabled'}")
+        if self.output_config.render_video:
+            self.logger.info(f"    Video resolution: {self.output_config.video_resolution}")
+    def _load_styles(self) -> None:
+        """Load styles from JSON file if available."""
+        if self.output_config.output_styles_json and os.path.exists(self.output_config.output_styles_json):
+            try:
+                with open(self.output_config.output_styles_json, "r") as f:
+                    self.output_config.styles = json.load(f)
+                self.logger.debug(f"Loaded output styles from: {self.output_config.output_styles_json}")
+            except Exception as e:
+                self.logger.warning(f"Failed to load output styles file: {str(e)}")
+                self.output_config.styles = {}
+        else:
+            self.logger.debug("No styles JSON file provided or file does not exist")
+            self.output_config.styles = {}
+    def _sanitize_filename(self, filename: str) -> str:
+        """Replace or remove characters that are unsafe for filenames."""
+        if not filename:
+            return ""
+        # Replace problematic characters with underscores
+        for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
+            filename = filename.replace(char, "_")
+        # Remove any trailing spaces
+        filename = filename.rstrip(" ")
+        return filename
+    def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
+        """Create a sanitized output prefix from artist and title."""
+        if artist and title:
+            sanitized_artist = self._sanitize_filename(artist)
+            sanitized_title = self._sanitize_filename(title)
+            return f"{sanitized_artist} - {sanitized_title}"
+        else:
+            return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
+    def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
+        """Initialize available transcription services."""
+        transcribers = {}
+        # Add debug logging for config values
+        self.logger.debug(f"Initializing transcribers with config: {self.transcriber_config}")
+        self.logger.debug(f"Using cache directory for transcribers: {self.output_config.cache_dir}")
+        if self.transcriber_config.audioshake_api_token:
+            self.logger.debug("Initializing AudioShake transcriber")
+            transcribers["audioshake"] = {
+                "instance": AudioShakeTranscriber(
+                    cache_dir=self.output_config.cache_dir,
+                    config=AudioShakeConfig(api_token=self.transcriber_config.audioshake_api_token),
+                    logger=self.logger,
+                ),
+                "priority": 1,  # AudioShake has highest priority
+            }
+        else:
+            self.logger.debug("Skipping AudioShake transcriber - no API token provided")
+        if self.transcriber_config.runpod_api_key and self.transcriber_config.whisper_runpod_id:
+            self.logger.debug("Initializing Whisper transcriber")
+            transcribers["whisper"] = {
+                "instance": WhisperTranscriber(
+                    cache_dir=self.output_config.cache_dir,
+                    config=WhisperConfig(
+                        runpod_api_key=self.transcriber_config.runpod_api_key, endpoint_id=self.transcriber_config.whisper_runpod_id
+                    ),
+                    logger=self.logger,
+                ),
+                "priority": 2,  # Whisper has lower priority
+            }
+        else:
+            self.logger.debug("Skipping Whisper transcriber - missing runpod_api_key or whisper_runpod_id")
+        return transcribers
+    def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
+        """Initialize available lyrics providers."""
+        providers = {}
+        # Get max_line_length from styles if available, otherwise use config default
+        max_line_length = self.output_config.styles.get("karaoke", {}).get("max_line_length", self.output_config.default_max_line_length)
+        self.logger.info(f"Using max_line_length for lyrics providers: {max_line_length}")
+        # Create provider config with all necessary parameters
+        provider_config = LyricsProviderConfig(
+            genius_api_token=self.lyrics_config.genius_api_token,
+            rapidapi_key=self.lyrics_config.rapidapi_key,
+            spotify_cookie=self.lyrics_config.spotify_cookie,
+            lyrics_file=self.lyrics_config.lyrics_file,
+            cache_dir=self.output_config.cache_dir,
+            audio_filepath=self.audio_filepath,
+            max_line_length=max_line_length,
+        )
+        if provider_config.lyrics_file and os.path.exists(provider_config.lyrics_file):
+            self.logger.debug(f"Initializing File lyrics provider with file: {provider_config.lyrics_file}")
+            providers["file"] = FileProvider(config=provider_config, logger=self.logger)
+            return providers
+        # LRCLIB - always enabled (no API key required)
+        self.logger.debug("Initializing LRCLIB lyrics provider")
+        providers["lrclib"] = LRCLIBProvider(config=provider_config, logger=self.logger)
+        if provider_config.genius_api_token:
+            self.logger.debug("Initializing Genius lyrics provider")
+            providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
+        else:
+            self.logger.debug("Skipping Genius provider - no API token provided")
+        if provider_config.spotify_cookie:
+            self.logger.debug("Initializing Spotify lyrics provider")
+            providers["spotify"] = SpotifyProvider(config=provider_config, logger=self.logger)
+        else:
+            self.logger.debug("Skipping Spotify provider - no cookie provided")
+        if provider_config.rapidapi_key:
+            self.logger.debug("Initializing Musixmatch lyrics provider")
+            providers["musixmatch"] = MusixmatchProvider(config=provider_config, logger=self.logger)
+        else:
+            self.logger.debug("Skipping Musixmatch provider - no RapidAPI key provided")
+        return providers
+    def _initialize_output_generator(self) -> OutputGenerator:
+        """Initialize output generation service."""
+        return OutputGenerator(config=self.output_config, logger=self.logger)
+    def process(self) -> LyricsControllerResult:
+        """Main processing method that orchestrates the entire workflow."""
+        self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
+        # Debug: Log package version and environment variables
+        try:
+            import lyrics_transcriber
+            package_version = getattr(lyrics_transcriber, '__version__', 'unknown')
+            self.logger.info(f"LyricsTranscriber package version: {package_version}")
+        except Exception as e:
+            self.logger.warning(f"Could not get package version: {e}")
+        # Debug: Log environment variables (first 3 characters only for security)
+        env_vars = {}
+        for key, value in os.environ.items():
+            if value:
+                env_vars[key] = value[:3] + "..." if len(value) > 3 else value
+            else:
+                env_vars[key] = "(empty)"
+        self.logger.info(f"Environment variables count: {len(env_vars)}")
+        # Log specific API-related variables
+        api_vars = {k: v for k, v in env_vars.items() if any(keyword in k.upper() for keyword in ['API', 'TOKEN', 'KEY', 'SECRET'])}
+        if api_vars:
+            self.logger.info(f"API-related environment variables: {api_vars}")
+        else:
+            self.logger.warning("No API-related environment variables found")
+        # Log all env vars if in debug mode
+        if self.logger.getEffectiveLevel() <= logging.DEBUG:
+            self.logger.debug(f"All environment variables: {env_vars}")
+        # Check for existing corrections JSON
+        corrections_json_path = os.path.join(self.output_config.output_dir, f"{self.output_prefix} (Lyrics Corrections).json")
+        if os.path.exists(corrections_json_path):
+            self.logger.info(f"Found existing corrections JSON: {corrections_json_path}")
+            try:
+                with open(corrections_json_path, "r", encoding="utf-8") as f:
+                    corrections_data = json.load(f)
+                # Reconstruct CorrectionResult from JSON
+                self.results.transcription_corrected = CorrectionResult.from_dict(corrections_data)
+                self.logger.info("Successfully loaded existing corrections data")
+                # Skip to output generation
+                self.generate_outputs()
+                self.logger.info("Processing completed successfully using existing corrections")
+                return self.results
+            except Exception as e:
+                self.logger.error(f"Failed to load existing corrections JSON: {str(e)}")
+                # Continue with normal processing if loading fails
+        # Normal processing flow continues...
+        if self.output_config.fetch_lyrics and self.artist and self.title:
+            self.fetch_lyrics()
+        else:
+            self.logger.info("Skipping lyrics fetching - no artist/title provided or fetching disabled")
+        # Step 2: Run transcription if enabled
+        if self.output_config.run_transcription:
+            self.transcribe()
+        else:
+            self.logger.info("Skipping transcription - transcription disabled")
+        # Step 3: Process and correct lyrics if enabled AND we have transcription results
+        if self.output_config.run_correction and self.results.transcription_results:
+            self.correct_lyrics()
+        elif self.output_config.run_correction:
+            self.logger.info("Skipping lyrics correction - no transcription results available")
+        # Step 4: Generate outputs based on what we have
+        if self.results.transcription_corrected or self.results.lyrics_results:
+            self.generate_outputs()
+        else:
+            self.logger.warning("No corrected transcription or lyrics available. Skipping output generation.")
+        self.logger.info("Processing completed successfully")
+        return self.results
+    def fetch_lyrics(self) -> None:
+        """Fetch lyrics from available providers."""
+        self.logger.info(f"Fetching lyrics for {self.artist} - {self.title}")
+        for name, provider in self.lyrics_providers.items():
+            try:
+                result = provider.fetch_lyrics(self.artist, self.title)
+                if result:
+                    self.results.lyrics_results[name] = result
+                    self.logger.info(f"Successfully fetched lyrics from {name}")
+            except Exception as e:
+                self.logger.error(f"Failed to fetch lyrics from {name}: {str(e)}")
+                continue
+        if not self.results.lyrics_results:
+            self.logger.warning("No lyrics found from any source")
+    def transcribe(self) -> None:
+        """Run transcription using all available transcribers."""
+        self.logger.info(f"Starting transcription with providers: {list(self.transcribers.keys())}")
+        for name, transcriber_info in self.transcribers.items():
+            self.logger.info(f"Running transcription with {name}")
+            result = transcriber_info["instance"].transcribe(self.audio_filepath)
+            if result:
+                # Add the transcriber name and priority to the result
+                self.results.transcription_results.append(
+                    TranscriptionResult(name=name, priority=transcriber_info["priority"], result=result)
+                )
+                self.logger.debug(f"Transcription completed for {name}")
+        if not self.results.transcription_results:
+            self.logger.warning("No successful transcriptions from any provider")
+    def correct_lyrics(self) -> None:
+        """Run lyrics correction using transcription and internet lyrics."""
+        self.logger.info("Starting lyrics correction process")
+        # Check if we have reference lyrics to work with
+        if not self.results.lyrics_results:
+            self.logger.warning("No reference lyrics available for correction - using raw transcription")
+            # Use the highest priority transcription result as the "corrected" version
+            if self.results.transcription_results:
+                sorted_results = sorted(self.results.transcription_results, key=lambda x: x.priority)
+                best_transcription = sorted_results[0]
+                # Count total words in the transcription
+                total_words = sum(len(segment.words) for segment in best_transcription.result.segments)
+                # Create a CorrectionResult with no corrections
+                self.results.transcription_corrected = CorrectionResult(
+                    original_segments=best_transcription.result.segments,
+                    corrected_segments=best_transcription.result.segments,
+                    corrections=[],  # No corrections made
+                    corrections_made=0,  # No corrections made
+                    confidence=1.0,  # Full confidence since we're using original
+                    reference_lyrics={},
+                    anchor_sequences=[],
+                    gap_sequences=[],
+                    resized_segments=[],
+                    correction_steps=[],
+                    word_id_map={},
+                    segment_id_map={},
+                    metadata={
+                        "correction_type": "none",
+                        "reason": "no_reference_lyrics",
+                        "audio_filepath": self.audio_filepath,
+                        "anchor_sequences_count": 0,
+                        "gap_sequences_count": 0,
+                        "total_words": total_words,
+                        "correction_ratio": 0.0,
+                        "available_handlers": [],
+                        "enabled_handlers": [],
+                    },
+                )
+        else:
+            # Create metadata dict with song info
+            metadata = {
+                "artist": self.artist,
+                "title": self.title,
+                "full_reference_texts": {source: lyrics.get_full_text() for source, lyrics in self.results.lyrics_results.items()},
+            }
+            # Get enabled handlers from metadata if available
+            enabled_handlers = metadata.get("enabled_handlers", None)
+            # Create corrector with enabled handlers
+            corrector = LyricsCorrector(cache_dir=self.output_config.cache_dir, enabled_handlers=enabled_handlers, logger=self.logger)
+            corrected_data = corrector.run(
+                transcription_results=self.results.transcription_results,
+                lyrics_results=self.results.lyrics_results,
+                metadata=metadata,
+            )
+            # Store corrected results
+            self.results.transcription_corrected = corrected_data
+            self.logger.info("Lyrics correction completed")
+        # Add human review step (moved outside the else block)
+        if self.output_config.enable_review:
+            from lyrics_transcriber.review.server import ReviewServer
+            self.logger.info("Starting human review process")
+            # Create and start review server
+            review_server = ReviewServer(
+                correction_result=self.results.transcription_corrected,
+                output_config=self.output_config,
+                audio_filepath=self.audio_filepath,
+                logger=self.logger,
+            )
+            reviewed_data = review_server.start()
+            self.logger.info("Human review completed, updated transcription_corrected with reviewed_data")
+            self.results.transcription_corrected = reviewed_data
+        # Add countdown intro if enabled and needed (after review, before output generation)
+        if self.output_config.add_countdown and self.results.transcription_corrected:
+            from lyrics_transcriber.output.countdown_processor import CountdownProcessor
+            self.logger.info("Processing countdown intro (if needed)")
+            countdown_processor = CountdownProcessor(
+                cache_dir=self.output_config.cache_dir,
+                logger=self.logger,
+            )
+            # Process and potentially modify the correction result and audio filepath
+            (
+                self.results.transcription_corrected,
+                self.audio_filepath,
+                padding_added,
+                padding_seconds,
+            ) = countdown_processor.process(
+                correction_result=self.results.transcription_corrected,
+                audio_filepath=self.audio_filepath,
+            )
+            # Store padding information in results for parent code to use
+            self.results.countdown_padding_added = padding_added
+            self.results.countdown_padding_seconds = padding_seconds
+            if padding_added:
+                self.results.padded_audio_filepath = self.audio_filepath
+                self.logger.info(
+                    f"Countdown padding applied: {padding_seconds}s added to audio. "
+                    f"Padded audio: {self.audio_filepath}"
+                )
+    def generate_outputs(self) -> None:
+        """Generate output files based on enabled features and available data."""
+        self.logger.info("Generating output files")
+        # Only proceed with outputs that make sense based on what we have
+        has_correction = bool(self.results.transcription_corrected)
+        output_files = self.output_generator.generate_outputs(
+            transcription_corrected=self.results.transcription_corrected if has_correction else None,
+            lyrics_results=self.results.lyrics_results,
+            output_prefix=self.output_prefix,
+            audio_filepath=self.audio_filepath,
+            artist=self.artist,
+            title=self.title,
+        )
+        # Store results
+        self.results.lrc_filepath = output_files.lrc
+        self.results.ass_filepath = output_files.ass
+        self.results.video_filepath = output_files.video
+        self.results.original_txt = output_files.original_txt
+        self.results.corrected_txt = output_files.corrected_txt
+        self.results.corrections_json = output_files.corrections_json
+        self.results.cdg_filepath = output_files.cdg
+        self.results.mp3_filepath = output_files.mp3
+        self.results.cdg_zip_filepath = output_files.cdg_zip

lyrics_transcriber/correction/__init__.py ADDED Viewed

File without changes

lyrics_transcriber/correction/agentic/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Agentic AI correction system scaffold.
+This package will contain the semi-agentic correction workflows, providers,
+observability, and feedback modules. Implementation follows TDD; tests come first.
+"""
+__all__ = []

lyrics_transcriber/correction/agentic/adapter.py ADDED Viewed

@@ -0,0 +1,71 @@
+from __future__ import annotations
+from typing import Dict, Any, List
+from .models.schemas import CorrectionProposal
+from lyrics_transcriber.types import WordCorrection, Word
+from lyrics_transcriber.utils.word_utils import WordUtils
+def adapt_proposals_to_word_corrections(
+    proposals: List[CorrectionProposal],
+    word_map: Dict[str, Word],
+    linear_position_map: Dict[str, int],
+) -> List[WordCorrection]:
+    """Convert CorrectionProposal items into WordCorrection objects.
+    Minimal mapping: supports ReplaceWord and DeleteWord actions with single word_id.
+    Unknown or unsupported actions are ignored.
+    The reason field includes gap category and confidence for better UI feedback.
+    """
+    results: List[WordCorrection] = []
+    for p in proposals:
+        action = (p.action or "").lower()
+        target_id = p.word_id or (p.word_ids[0] if p.word_ids else None)
+        if not target_id or target_id not in word_map:
+            continue
+        original = word_map[target_id]
+        original_position = linear_position_map.get(target_id, 0)
+        # Build a detailed reason including gap category
+        category_str = f" [{p.gap_category.value}]" if p.gap_category else ""
+        confidence_str = f" (confidence: {p.confidence:.0%})" if p.confidence else ""
+        detailed_reason = f"{p.reason or 'AI correction'}{category_str}{confidence_str}"
+        if action == "replaceword" and p.replacement_text:
+            results.append(
+                WordCorrection(
+                    original_word=original.text,
+                    corrected_word=p.replacement_text,
+                    original_position=original_position,
+                    source="agentic",
+                    reason=detailed_reason,
+                    confidence=float(p.confidence or 0.0),
+                    is_deletion=False,
+                    word_id=target_id,
+                    corrected_word_id=WordUtils.generate_id(),  # Generate unique ID for corrected word
+                    handler="AgenticCorrector",  # Required by frontend
+                    reference_positions={},  # Required by frontend
+                )
+            )
+        elif action == "deleteword":
+            results.append(
+                WordCorrection(
+                    original_word=original.text,
+                    corrected_word="",
+                    original_position=original_position,
+                    source="agentic",
+                    reason=detailed_reason,
+                    confidence=float(p.confidence or 0.0),
+                    is_deletion=True,
+                    word_id=target_id,
+                    corrected_word_id=None,  # Deleted words don't need a corrected ID
+                    handler="AgenticCorrector",  # Required by frontend
+                    reference_positions={},  # Required by frontend
+                )
+            )
+    return results

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl