PyPI - karaoke-gen - Versions diffs - 0.75.54__py3-none-any.whl - Mend

karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show

karaoke_gen/__init__.py +38 -0
karaoke_gen/audio_fetcher.py +1614 -0
karaoke_gen/audio_processor.py +790 -0
karaoke_gen/config.py +83 -0
karaoke_gen/file_handler.py +387 -0
karaoke_gen/instrumental_review/__init__.py +45 -0
karaoke_gen/instrumental_review/analyzer.py +408 -0
karaoke_gen/instrumental_review/editor.py +322 -0
karaoke_gen/instrumental_review/models.py +171 -0
karaoke_gen/instrumental_review/server.py +475 -0
karaoke_gen/instrumental_review/static/index.html +1529 -0
karaoke_gen/instrumental_review/waveform.py +409 -0
karaoke_gen/karaoke_finalise/__init__.py +1 -0
karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
karaoke_gen/karaoke_gen.py +1026 -0
karaoke_gen/lyrics_processor.py +474 -0
karaoke_gen/metadata.py +160 -0
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-Bold.ttf +0 -0
karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/__init__.py +18 -0
karaoke_gen/utils/bulk_cli.py +492 -0
karaoke_gen/utils/cli_args.py +432 -0
karaoke_gen/utils/gen_cli.py +978 -0
karaoke_gen/utils/remote_cli.py +3268 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen/video_generator.py +424 -0
karaoke_gen-0.75.54.dist-info/METADATA +718 -0
karaoke_gen-0.75.54.dist-info/RECORD +287 -0
karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +594 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +919 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +214 -0
lyrics_transcriber/frontend/src/api.ts +254 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +306 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +379 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +650 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0

karaoke_gen/lyrics_processor.py ADDED Viewed

@@ -0,0 +1,474 @@
+import os
+import re
+import logging
+import shutil
+import json
+from lyrics_transcriber import LyricsTranscriber, OutputConfig, TranscriberConfig, LyricsConfig
+from lyrics_transcriber.core.controller import LyricsControllerResult
+from dotenv import load_dotenv
+from .utils import sanitize_filename
+# Placeholder class or functions for lyrics processing
+class LyricsProcessor:
+    # Standard countdown padding duration used by LyricsTranscriber
+    COUNTDOWN_PADDING_SECONDS = 3.0
+    def __init__(
+        self, logger, style_params_json, lyrics_file, skip_transcription, skip_transcription_review, render_video, subtitle_offset_ms
+    ):
+        self.logger = logger
+        self.style_params_json = style_params_json
+        self.lyrics_file = lyrics_file
+        self.skip_transcription = skip_transcription
+        self.skip_transcription_review = skip_transcription_review
+        self.render_video = render_video
+        self.subtitle_offset_ms = subtitle_offset_ms
+    def _detect_countdown_padding_from_lrc(self, lrc_filepath):
+        """
+        Detect if countdown padding was applied by checking for countdown text in the LRC file.
+        The countdown segment has the text "3... 2... 1..." at timestamp 0.1-2.9s.
+        We detect this by looking for the countdown text pattern.
+        Args:
+            lrc_filepath: Path to the LRC file
+        Returns:
+            Tuple of (countdown_padding_added: bool, countdown_padding_seconds: float)
+        """
+        try:
+            with open(lrc_filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+            # Method 1: Check for countdown text pattern "3... 2... 1..."
+            # This is the most reliable detection method since the countdown text is unique
+            countdown_text = "3... 2... 1..."
+            if countdown_text in content:
+                self.logger.info(f"Detected countdown padding from LRC: found countdown text '{countdown_text}'")
+                return (True, self.COUNTDOWN_PADDING_SECONDS)
+            # Method 2 (fallback): Check if first lyric timestamp is >= 3 seconds
+            # This handles cases where countdown text format might differ
+            # LRC timestamps: [mm:ss.xx] or [mm:ss.xxx]
+            timestamp_pattern = r'\[(\d{1,2}):(\d{2})\.(\d{2,3})\]'
+            matches = re.findall(timestamp_pattern, content)
+            if not matches:
+                self.logger.debug("No timestamps found in LRC file")
+                return (False, 0.0)
+            # Parse the first timestamp
+            first_timestamp = matches[0]
+            minutes = int(first_timestamp[0])
+            seconds = int(first_timestamp[1])
+            # Handle both .xx and .xxx formats
+            centiseconds = first_timestamp[2]
+            if len(centiseconds) == 2:
+                milliseconds = int(centiseconds) * 10
+            else:
+                milliseconds = int(centiseconds)
+            first_lyric_time = minutes * 60 + seconds + milliseconds / 1000.0
+            self.logger.debug(f"First lyric timestamp in LRC: {first_lyric_time:.3f}s")
+            # If first lyric is at or after 3 seconds, countdown padding was applied
+            # Use a small buffer (2.5s) to account for songs that naturally start a bit late
+            if first_lyric_time >= 2.5:
+                self.logger.info(f"Detected countdown padding from LRC: first lyric at {first_lyric_time:.2f}s")
+                return (True, self.COUNTDOWN_PADDING_SECONDS)
+            return (False, 0.0)
+        except Exception as e:
+            self.logger.warning(f"Failed to detect countdown padding from LRC file: {e}")
+            return (False, 0.0)
+    def find_best_split_point(self, line):
+        """
+        Find the best split point in a line based on the specified criteria.
+        """
+        self.logger.debug(f"Finding best_split_point for line: {line}")
+        words = line.split()
+        mid_word_index = len(words) // 2
+        self.logger.debug(f"words: {words} mid_word_index: {mid_word_index}")
+        # Check for a comma within one or two words of the middle word
+        if "," in line:
+            mid_point = len(" ".join(words[:mid_word_index]))
+            comma_indices = [i for i, char in enumerate(line) if char == ","]
+            for index in comma_indices:
+                if abs(mid_point - index) < 20 and len(line[: index + 1].strip()) <= 36:
+                    self.logger.debug(
+                        f"Found comma at index {index} which is within 20 characters of mid_point {mid_point} and results in a suitable line length, accepting as split point"
+                    )
+                    return index + 1  # Include the comma in the first line
+        # Check for 'and'
+        if " and " in line:
+            mid_point = len(line) // 2
+            and_indices = [m.start() for m in re.finditer(" and ", line)]
+            for index in sorted(and_indices, key=lambda x: abs(x - mid_point)):
+                if len(line[: index + len(" and ")].strip()) <= 36:
+                    self.logger.debug(f"Found 'and' at index {index} which results in a suitable line length, accepting as split point")
+                    return index + len(" and ")
+        # If no better split point is found, try splitting at the middle word
+        if len(words) > 2 and mid_word_index > 0:
+            split_at_middle = len(" ".join(words[:mid_word_index]))
+            if split_at_middle <= 36:
+                self.logger.debug(f"Splitting at middle word index: {mid_word_index}")
+                return split_at_middle
+        # If the line is still too long, forcibly split at the maximum length
+        forced_split_point = 36
+        if len(line) > forced_split_point:
+            self.logger.debug(f"Line is still too long, forcibly splitting at position {forced_split_point}")
+            return forced_split_point
+    def process_line(self, line):
+        """
+        Process a single line to ensure it's within the maximum length,
+        and handle parentheses.
+        """
+        processed_lines = []
+        iteration_count = 0
+        max_iterations = 100  # Failsafe limit
+        while len(line) > 36:
+            if iteration_count > max_iterations:
+                self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")
+                break
+            # Check if the line contains parentheses
+            if "(" in line and ")" in line:
+                start_paren = line.find("(")
+                end_paren = line.find(")") + 1
+                if end_paren < len(line) and line[end_paren] == ",":
+                    end_paren += 1
+                if start_paren > 0:
+                    processed_lines.append(line[:start_paren].strip())
+                processed_lines.append(line[start_paren:end_paren].strip())
+                line = line[end_paren:].strip()
+            else:
+                split_point = self.find_best_split_point(line)
+                processed_lines.append(line[:split_point].strip())
+                line = line[split_point:].strip()
+            iteration_count += 1
+        if line:  # Add the remaining part if not empty
+            processed_lines.append(line)
+        return processed_lines
+    def _check_transcription_providers(self) -> dict:
+        """
+        Check which transcription providers are configured and return their status.
+        Returns:
+            dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
+        """
+        load_dotenv()
+        configured = []
+        missing = []
+        # Check AudioShake
+        audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
+        if audioshake_token:
+            configured.append("AudioShake")
+            self.logger.debug("AudioShake transcription provider: configured")
+        else:
+            missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
+            self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
+        # Check Whisper via RunPod
+        runpod_key = os.getenv("RUNPOD_API_KEY")
+        whisper_id = os.getenv("WHISPER_RUNPOD_ID")
+        if runpod_key and whisper_id:
+            configured.append("Whisper (RunPod)")
+            self.logger.debug("Whisper transcription provider: configured")
+        elif runpod_key:
+            missing.append("Whisper (missing WHISPER_RUNPOD_ID)")
+            self.logger.debug("Whisper transcription provider: partially configured (missing WHISPER_RUNPOD_ID)")
+        elif whisper_id:
+            missing.append("Whisper (missing RUNPOD_API_KEY)")
+            self.logger.debug("Whisper transcription provider: partially configured (missing RUNPOD_API_KEY)")
+        else:
+            missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
+            self.logger.debug("Whisper transcription provider: not configured")
+        return {"configured": configured, "missing": missing}
+    def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
+        """Build a helpful error message when no transcription providers are configured."""
+        return (
+            "No transcription providers configured!\n"
+            "\n"
+            "Karaoke video generation requires at least one transcription provider to create "
+            "synchronized lyrics. Without a transcription provider, the system cannot generate "
+            "the word-level timing data needed for the karaoke video.\n"
+            "\n"
+            "AVAILABLE TRANSCRIPTION PROVIDERS:\n"
+            "\n"
+            "1. AudioShake (Recommended - Commercial, high-quality)\n"
+            "   - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
+            "   - Get an API key at: https://www.audioshake.ai/\n"
+            "\n"
+            "2. Whisper via RunPod (Open-source alternative)\n"
+            "   - Set environment variables:\n"
+            "     RUNPOD_API_KEY=your_key\n"
+            "     WHISPER_RUNPOD_ID=your_endpoint_id\n"
+            "   - Set up a Whisper endpoint at: https://www.runpod.io/\n"
+            "\n"
+            "ALTERNATIVES:\n"
+            "\n"
+            "- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
+            "- Use --lyrics_file to provide pre-timed lyrics (still needs transcription for timing)\n"
+            "\n"
+            f"Missing provider configurations: {', '.join(missing_providers)}\n"
+            "\n"
+            "See README.md 'Transcription Providers' section for detailed setup instructions."
+        )
+    def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
+        """
+        Transcribe lyrics for a track.
+        Args:
+            input_audio_wav: Path to the audio file
+            artist: Original artist name (used for filename generation)
+            title: Original title (used for filename generation)
+            track_output_dir: Output directory path
+            lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
+            lyrics_title: Title for lyrics processing (defaults to title if None)
+        Raises:
+            ValueError: If transcription is enabled but no providers are configured
+        """
+        # Use original artist/title for filename generation
+        filename_artist = artist
+        filename_title = title
+        # Use lyrics_artist/lyrics_title for actual lyrics processing, fall back to originals if not provided
+        processing_artist = lyrics_artist or artist
+        processing_title = lyrics_title or title
+        self.logger.info(
+            f"Transcribing lyrics for track {processing_artist} - {processing_title} from audio file: {input_audio_wav} with output directory: {track_output_dir}"
+        )
+        # Check for existing files first using sanitized names from ORIGINAL artist/title for consistency
+        sanitized_artist = sanitize_filename(filename_artist)
+        sanitized_title = sanitize_filename(filename_title)
+        parent_video_path = os.path.join(track_output_dir, f"{sanitized_artist} - {sanitized_title} (With Vocals).mkv")
+        parent_lrc_path = os.path.join(track_output_dir, f"{sanitized_artist} - {sanitized_title} (Karaoke).lrc")
+        # Check lyrics directory for existing files
+        lyrics_dir = os.path.join(track_output_dir, "lyrics")
+        lyrics_video_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (With Vocals).mkv")
+        lyrics_lrc_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (Karaoke).lrc")
+        # If files exist in parent directory, return early (but detect countdown padding first)
+        if os.path.exists(parent_video_path) and os.path.exists(parent_lrc_path):
+            self.logger.info("Found existing video and LRC files in parent directory, skipping transcription")
+            # Detect countdown padding from existing LRC file
+            countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
+            if countdown_padding_added:
+                self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
+            return {
+                "lrc_filepath": parent_lrc_path,
+                "ass_filepath": parent_video_path,
+                "countdown_padding_added": countdown_padding_added,
+                "countdown_padding_seconds": countdown_padding_seconds,
+                "padded_audio_filepath": None,  # Original padded audio may not exist
+            }
+        # If files exist in lyrics directory, copy to parent and return (but detect countdown padding first)
+        if os.path.exists(lyrics_video_path) and os.path.exists(lyrics_lrc_path):
+            self.logger.info("Found existing video and LRC files in lyrics directory, copying to parent")
+            os.makedirs(track_output_dir, exist_ok=True)
+            shutil.copy2(lyrics_video_path, parent_video_path)
+            shutil.copy2(lyrics_lrc_path, parent_lrc_path)
+            # Detect countdown padding from existing LRC file
+            countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
+            if countdown_padding_added:
+                self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
+            return {
+                "lrc_filepath": parent_lrc_path,
+                "ass_filepath": parent_video_path,
+                "countdown_padding_added": countdown_padding_added,
+                "countdown_padding_seconds": countdown_padding_seconds,
+                "padded_audio_filepath": None,  # Original padded audio may not exist
+            }
+        # Check transcription provider configuration if transcription is not being skipped
+        # Do this AFTER checking for existing files, since existing files don't need transcription
+        if not self.skip_transcription:
+            provider_status = self._check_transcription_providers()
+            if provider_status["configured"]:
+                self.logger.info(f"Transcription providers configured: {', '.join(provider_status['configured'])}")
+            else:
+                error_msg = self._build_transcription_provider_error_message(provider_status["missing"])
+                raise ValueError(error_msg)
+        # Create lyrics directory if it doesn't exist
+        os.makedirs(lyrics_dir, exist_ok=True)
+        self.logger.info(f"Created lyrics directory: {lyrics_dir}")
+        # Set render_video to False if explicitly disabled
+        render_video = self.render_video
+        if not render_video:
+            self.logger.info("Video rendering disabled, skipping video output")
+        # Load environment variables
+        load_dotenv()
+        env_config = {
+            "audioshake_api_token": os.getenv("AUDIOSHAKE_API_TOKEN"),
+            "genius_api_token": os.getenv("GENIUS_API_TOKEN"),
+            "spotify_cookie": os.getenv("SPOTIFY_COOKIE_SP_DC"),
+            "runpod_api_key": os.getenv("RUNPOD_API_KEY"),
+            "whisper_runpod_id": os.getenv("WHISPER_RUNPOD_ID"),
+            "rapidapi_key": os.getenv("RAPIDAPI_KEY"),  # Add missing RAPIDAPI_KEY
+        }
+        # Create config objects for LyricsTranscriber
+        transcriber_config = TranscriberConfig(
+            audioshake_api_token=env_config.get("audioshake_api_token"),
+        )
+        lyrics_config = LyricsConfig(
+            genius_api_token=env_config.get("genius_api_token"),
+            spotify_cookie=env_config.get("spotify_cookie"),
+            rapidapi_key=env_config.get("rapidapi_key"),
+            lyrics_file=self.lyrics_file,
+        )
+        # Debug logging for lyrics_config
+        self.logger.info(f"LyricsConfig created with:")
+        self.logger.info(f"  genius_api_token: {env_config.get('genius_api_token')[:3] + '...' if env_config.get('genius_api_token') else 'None'}")
+        self.logger.info(f"  spotify_cookie: {env_config.get('spotify_cookie')[:3] + '...' if env_config.get('spotify_cookie') else 'None'}")
+        self.logger.info(f"  rapidapi_key: {env_config.get('rapidapi_key')[:3] + '...' if env_config.get('rapidapi_key') else 'None'}")
+        self.logger.info(f"  lyrics_file: {self.lyrics_file}")
+        # Detect if we're running in a serverless environment (Modal)
+        # Modal sets specific environment variables we can check for
+        is_serverless = (
+            os.getenv("MODAL_TASK_ID") is not None or
+            os.getenv("MODAL_FUNCTION_NAME") is not None or
+            os.path.exists("/.modal")  # Modal creates this directory in containers
+        )
+        # In serverless environment, disable interactive review even if skip_transcription_review=False
+        # This preserves CLI behavior while fixing serverless hanging
+        enable_review_setting = not self.skip_transcription_review and not is_serverless
+        if is_serverless and not self.skip_transcription_review:
+            self.logger.info("Detected serverless environment - disabling interactive review to prevent hanging")
+        # In serverless environment, disable video generation during Phase 1 to save compute
+        # Video will be generated in Phase 2 after human review
+        serverless_render_video = render_video and not is_serverless
+        if is_serverless and render_video:
+            self.logger.info("Detected serverless environment - deferring video generation until after review")
+        output_config = OutputConfig(
+            output_styles_json=self.style_params_json,
+            output_dir=lyrics_dir,
+            render_video=serverless_render_video,  # Disable video in serverless Phase 1
+            fetch_lyrics=True,
+            run_transcription=not self.skip_transcription,
+            run_correction=True,
+            generate_plain_text=True,
+            generate_lrc=True,
+            generate_cdg=False,  # Also defer CDG generation to Phase 2
+            video_resolution="4k",
+            enable_review=enable_review_setting,
+            subtitle_offset_ms=self.subtitle_offset_ms,
+        )
+        # Add this log entry to debug the OutputConfig
+        self.logger.info(f"Instantiating LyricsTranscriber with OutputConfig: {output_config}")
+        # Initialize transcriber with new config objects - use PROCESSING artist/title for lyrics work
+        transcriber = LyricsTranscriber(
+            audio_filepath=input_audio_wav,
+            artist=processing_artist,  # Use lyrics_artist for processing
+            title=processing_title,   # Use lyrics_title for processing
+            transcriber_config=transcriber_config,
+            lyrics_config=lyrics_config,
+            output_config=output_config,
+            logger=self.logger,
+        )
+        # Process and get results
+        results: LyricsControllerResult = transcriber.process()
+        self.logger.info(f"Transcriber Results Filepaths:")
+        for key, value in results.__dict__.items():
+            if key.endswith("_filepath"):
+                self.logger.info(f"  {key}: {value}")
+        # Build output dictionary
+        transcriber_outputs = {}
+        if results.lrc_filepath:
+            transcriber_outputs["lrc_filepath"] = results.lrc_filepath
+            self.logger.info(f"Moving LRC file from {results.lrc_filepath} to {parent_lrc_path}")
+            shutil.copy2(results.lrc_filepath, parent_lrc_path)
+        if results.ass_filepath:
+            transcriber_outputs["ass_filepath"] = results.ass_filepath
+            self.logger.info(f"Moving video file from {results.video_filepath} to {parent_video_path}")
+            shutil.copy2(results.video_filepath, parent_video_path)
+        if results.transcription_corrected:
+            transcriber_outputs["corrected_lyrics_text"] = "\n".join(
+                segment.text for segment in results.transcription_corrected.corrected_segments
+            )
+            transcriber_outputs["corrected_lyrics_text_filepath"] = results.corrected_txt
+            # Save correction data to JSON file for review interface
+            # Use the expected filename format: "{artist} - {title} (Lyrics Corrections).json"
+            # Use sanitized names to be consistent with all other files created by lyrics_transcriber
+            corrections_filename = f"{sanitized_artist} - {sanitized_title} (Lyrics Corrections).json"
+            corrections_filepath = os.path.join(lyrics_dir, corrections_filename)
+            # Use the CorrectionResult's to_dict() method to serialize
+            correction_data = results.transcription_corrected.to_dict()
+            with open(corrections_filepath, 'w') as f:
+                json.dump(correction_data, f, indent=2)
+            self.logger.info(f"Saved correction data to {corrections_filepath}")
+        # Capture countdown padding information for syncing with instrumental audio
+        transcriber_outputs["countdown_padding_added"] = getattr(results, "countdown_padding_added", False)
+        transcriber_outputs["countdown_padding_seconds"] = getattr(results, "countdown_padding_seconds", 0.0)
+        transcriber_outputs["padded_audio_filepath"] = getattr(results, "padded_audio_filepath", None)
+        if transcriber_outputs["countdown_padding_added"]:
+            self.logger.info(
+                f"Countdown padding detected: {transcriber_outputs['countdown_padding_seconds']}s added to vocals. "
+                f"Instrumental audio will need to be padded accordingly."
+            )
+        if transcriber_outputs:
+            self.logger.info(f"*** Transcriber Filepath Outputs: ***")
+            for key, value in transcriber_outputs.items():
+                if key.endswith("_filepath"):
+                    self.logger.info(f"  {key}: {value}")
+        return transcriber_outputs

karaoke_gen/metadata.py ADDED Viewed

@@ -0,0 +1,160 @@
+import logging
+def extract_info_for_online_media(input_url, input_artist, input_title, logger, cookies_str=None):
+    """
+    Creates metadata info dict from provided artist and title.
+    Note: This function no longer supports URL-based metadata extraction.
+    Audio search and download is now handled by the AudioFetcher class using flacfetch.
+    When both artist and title are provided, this creates a metadata dict that can be
+    used by the rest of the pipeline.
+    Args:
+        input_url: Deprecated - URLs should be provided as local file paths or use AudioFetcher
+        input_artist: The artist name
+        input_title: The track title
+        logger: Logger instance
+        cookies_str: Deprecated - no longer used
+    Returns:
+        A dict with metadata if artist and title are provided
+    Raises:
+        ValueError: If URL is provided (deprecated) or if artist/title are missing
+    """
+    logger.info(f"Extracting info for input_url: {input_url} input_artist: {input_artist} input_title: {input_title}")
+    # URLs are no longer supported - use AudioFetcher for search and download
+    if input_url is not None:
+        raise ValueError(
+            "URL-based audio fetching has been replaced with flacfetch. "
+            "Please provide a local file path instead, or use artist and title only "
+            "to search for audio via flacfetch."
+        )
+    # When artist and title are provided, create a synthetic metadata dict
+    # The actual search and download is handled by AudioFetcher
+    if input_artist and input_title:
+        logger.info(f"Creating metadata for: {input_artist} - {input_title}")
+        return {
+            "title": f"{input_artist} - {input_title}",
+            "artist": input_artist,
+            "track_title": input_title,
+            "extractor_key": "flacfetch",
+            "id": f"flacfetch_{input_artist}_{input_title}".replace(" ", "_"),
+            "url": None,  # URL will be determined by flacfetch during download
+            "source": "flacfetch",
+        }
+    # No valid input provided
+    raise ValueError(
+        f"Artist and title are required for audio search. "
+        f"Received artist: {input_artist}, title: {input_title}"
+    )
+def parse_track_metadata(extracted_info, current_artist, current_title, persistent_artist, logger):
+    """
+    Parses extracted_info to determine URL, extractor, ID, artist, and title.
+    Returns a dictionary with the parsed values.
+    This function now supports both legacy yt-dlp style metadata and
+    the new flacfetch-based metadata format.
+    """
+    parsed_data = {
+        "url": None,
+        "extractor": None,
+        "media_id": None,
+        "artist": current_artist,
+        "title": current_title,
+    }
+    metadata_artist = ""
+    metadata_title = ""
+    # Handle flacfetch-style metadata (no URL required)
+    if extracted_info.get("source") == "flacfetch":
+        parsed_data["url"] = None  # URL determined at download time
+        parsed_data["extractor"] = "flacfetch"
+        parsed_data["media_id"] = extracted_info.get("id")
+        # Use the provided artist/title directly
+        if extracted_info.get("artist"):
+            parsed_data["artist"] = extracted_info["artist"]
+        if extracted_info.get("track_title"):
+            parsed_data["title"] = extracted_info["track_title"]
+        if persistent_artist:
+            parsed_data["artist"] = persistent_artist
+        logger.info(f"Using flacfetch metadata: artist: {parsed_data['artist']}, title: {parsed_data['title']}")
+        return parsed_data
+    # Legacy yt-dlp style metadata handling (for backward compatibility)
+    if "url" in extracted_info:
+        parsed_data["url"] = extracted_info["url"]
+    elif "webpage_url" in extracted_info:
+        parsed_data["url"] = extracted_info["webpage_url"]
+    else:
+        # For flacfetch results without URL, this is now acceptable
+        logger.debug("No URL in extracted info - will be determined at download time")
+        parsed_data["url"] = None
+    if "extractor_key" in extracted_info:
+        parsed_data["extractor"] = extracted_info["extractor_key"]
+    elif "ie_key" in extracted_info:
+        parsed_data["extractor"] = extracted_info["ie_key"]
+    elif extracted_info.get("source") == "flacfetch":
+        parsed_data["extractor"] = "flacfetch"
+    else:
+        # Default to flacfetch if no extractor specified
+        parsed_data["extractor"] = "flacfetch"
+    if "id" in extracted_info:
+        parsed_data["media_id"] = extracted_info["id"]
+    # Example: "Artist - Title"
+    if "title" in extracted_info and "-" in extracted_info["title"]:
+        try:
+            metadata_artist, metadata_title = extracted_info["title"].split("-", 1)
+            metadata_artist = metadata_artist.strip()
+            metadata_title = metadata_title.strip()
+        except ValueError:
+             logger.warning(f"Could not split title '{extracted_info['title']}' on '-', using full title.")
+             metadata_title = extracted_info["title"].strip()
+             if "uploader" in extracted_info:
+                 metadata_artist = extracted_info["uploader"]
+    elif "uploader" in extracted_info:
+        # Fallback to uploader as artist if title parsing fails
+        metadata_artist = extracted_info["uploader"]
+        if "title" in extracted_info:
+            metadata_title = extracted_info["title"].strip()
+    # If unable to parse, log an appropriate message
+    if not metadata_artist or not metadata_title:
+        logger.warning("Could not parse artist and title from the input media metadata.")
+    if not parsed_data["artist"] and metadata_artist:
+        logger.warning(f"Artist not provided as input, setting to {metadata_artist} from input media metadata...")
+        parsed_data["artist"] = metadata_artist
+    if not parsed_data["title"] and metadata_title:
+        logger.warning(f"Title not provided as input, setting to {metadata_title} from input media metadata...")
+        parsed_data["title"] = metadata_title
+    if persistent_artist:
+        logger.debug(
+            f"Resetting artist from {parsed_data['artist']} to persistent artist: {persistent_artist} for consistency while processing playlist..."
+        )
+        parsed_data["artist"] = persistent_artist
+    if parsed_data["artist"] and parsed_data["title"]:
+        logger.info(f"Parsed metadata - artist: {parsed_data['artist']}, title: {parsed_data['title']}")
+    else:
+        logger.debug(extracted_info)
+        raise Exception("Failed to extract artist and title from the input media metadata.")
+    return parsed_data