PyPI - karaoke-gen - Versions diffs - 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl - Mend

karaoke-gen 0.71.42py3-none-any.whl → 0.75.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

karaoke_gen/__init__.py +32 -1
karaoke_gen/audio_fetcher.py +1220 -67
karaoke_gen/audio_processor.py +15 -3
karaoke_gen/instrumental_review/server.py +154 -860
karaoke_gen/instrumental_review/static/index.html +1529 -0
karaoke_gen/karaoke_finalise/karaoke_finalise.py +87 -2
karaoke_gen/karaoke_gen.py +131 -14
karaoke_gen/lyrics_processor.py +172 -4
karaoke_gen/utils/bulk_cli.py +3 -0
karaoke_gen/utils/cli_args.py +7 -4
karaoke_gen/utils/gen_cli.py +221 -5
karaoke_gen/utils/remote_cli.py +786 -43
{karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +109 -4
{karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +37 -31
lyrics_transcriber/core/controller.py +76 -2
lyrics_transcriber/frontend/package.json +1 -1
lyrics_transcriber/frontend/src/App.tsx +6 -4
lyrics_transcriber/frontend/src/api.ts +25 -10
lyrics_transcriber/frontend/src/components/Header.tsx +38 -12
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +17 -3
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +190 -542
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
lyrics_transcriber/frontend/web_assets/assets/{index-DdJTDWH3.js → index-BECn1o8Q.js} +1802 -553
lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
lyrics_transcriber/frontend/web_assets/index.html +1 -1
lyrics_transcriber/output/countdown_processor.py +39 -0
lyrics_transcriber/review/server.py +5 -5
lyrics_transcriber/transcribers/audioshake.py +96 -7
lyrics_transcriber/types.py +14 -12
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +0 -1
{karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
{karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0

karaoke_gen/karaoke_finalise/karaoke_finalise.py CHANGED Viewed

@@ -47,6 +47,7 @@ class KaraokeFinalise:
         user_youtube_credentials=None,  # Add support for pre-stored credentials
         server_side_mode=False,  # New parameter for server-side deployment
         selected_instrumental_file=None,  # Add support for pre-selected instrumental file
+        countdown_padding_seconds=None,  # Padding applied to vocals; instrumental must match
     ):
         self.log_level = log_level
         self.log_formatter = log_formatter
@@ -54,6 +55,9 @@ class KaraokeFinalise:
         if logger is None:
             self.logger = logging.getLogger(__name__)
             self.logger.setLevel(log_level)
+            # Prevent log propagation to root logger to avoid duplicate logs
+            # when external packages (like lyrics_converter) configure root logger handlers
+            self.logger.propagate = False
             self.log_handler = logging.StreamHandler()
@@ -105,6 +109,7 @@ class KaraokeFinalise:
         self.user_youtube_credentials = user_youtube_credentials
         self.server_side_mode = server_side_mode
         self.selected_instrumental_file = selected_instrumental_file
+        self.countdown_padding_seconds = countdown_padding_seconds
         self.suffixes = {
             "title_mov": " (Title).mov",
@@ -421,6 +426,15 @@ class KaraokeFinalise:
         # Check if any videos were found
         if "items" in response and len(response["items"]) > 0:
             for item in response["items"]:
+                # YouTube search API sometimes returns results from other channels even with channelId filter
+                # Verify the video actually belongs to our channel
+                result_channel_id = item["snippet"]["channelId"]
+                if result_channel_id != channel_id:
+                    self.logger.debug(
+                        f"Skipping video from different channel: {item['snippet']['title']} (channel: {result_channel_id})"
+                    )
+                    continue
                 found_title = item["snippet"]["title"]
                 # In server-side mode, require an exact match to avoid false positives.
@@ -640,7 +654,31 @@ class KaraokeFinalise:
                 else:
                     self.logger.warning(f"Unsupported file extension: {current_ext}")
-        raise Exception("No suitable files found for processing.")
+        raise Exception(
+            "No suitable files found for processing.\n"
+            "\n"
+            "WHAT THIS MEANS:\n"
+            "The finalisation step requires a '(With Vocals).mkv' video file, which is created "
+            "during the lyrics transcription phase. This file contains the karaoke video with "
+            "synchronized lyrics overlay.\n"
+            "\n"
+            "COMMON CAUSES:\n"
+            "1. Transcription provider not configured - No AUDIOSHAKE_API_TOKEN or RUNPOD_API_KEY set\n"
+            "2. Transcription failed - Check logs above for API errors or timeout messages\n"
+            "3. Invalid API credentials - Verify your API tokens are correct and active\n"
+            "4. Network issues - Unable to reach transcription service\n"
+            "5. Running in wrong directory - Make sure you're in the track output folder\n"
+            "\n"
+            "TROUBLESHOOTING STEPS:\n"
+            "1. Check environment variables:\n"
+            "   - AUDIOSHAKE_API_TOKEN (for AudioShake transcription)\n"
+            "   - RUNPOD_API_KEY + WHISPER_RUNPOD_ID (for Whisper transcription)\n"
+            "2. Review the log output above for transcription errors\n"
+            "3. Try running with --log_level debug for more detailed output\n"
+            "4. If you don't need synchronized lyrics, use --skip-lyrics for instrumental-only karaoke\n"
+            "\n"
+            "See README.md 'Transcription Providers' and 'Troubleshooting' sections for more details."
+        )
     def choose_instrumental_audio_file(self, base_name):
         self.logger.info(f"Choosing instrumental audio file to use as karaoke audio...")
@@ -720,6 +758,32 @@ class KaraokeFinalise:
         artist, title = base_name.split(" - ", 1)
         return base_name, artist, title
+    def _pad_audio_file(self, input_audio, output_audio, padding_seconds):
+        """
+        Pad an audio file by prepending silence at the beginning.
+        Uses the same ffmpeg approach as LyricsTranscriber's CountdownProcessor
+        to ensure consistent padding behavior.
+        Args:
+            input_audio: Path to input audio file
+            output_audio: Path for the padded output file
+            padding_seconds: Amount of silence to prepend (in seconds)
+        """
+        self.logger.info(f"Padding audio file with {padding_seconds}s of silence")
+        # Use ffmpeg to prepend silence - this matches the approach in audio_processor.py
+        # adelay filter adds delay in milliseconds
+        delay_ms = int(padding_seconds * 1000)
+        ffmpeg_command = (
+            f'{self.ffmpeg_base_command} -i "{input_audio}" '
+            f'-af "adelay={delay_ms}|{delay_ms}" '
+            f'"{output_audio}"'
+        )
+        self.execute_command(ffmpeg_command, f"Padding audio with {padding_seconds}s silence")
     def execute_command(self, command, description):
         """Execute a shell command and log the output. For general commands (rclone, etc.)"""
         self.logger.info(f"{description}")
@@ -764,11 +828,32 @@ class KaraokeFinalise:
     def remux_with_instrumental(self, with_vocals_file, instrumental_audio, output_file):
         """Remux the video with instrumental audio to create karaoke version"""
+        # Safety net: If countdown padding was applied to vocals, ensure instrumental is padded too
+        actual_instrumental = instrumental_audio
+        if self.countdown_padding_seconds and self.countdown_padding_seconds > 0:
+            # Check if the instrumental file is already padded (has "(Padded)" in name)
+            if "(Padded)" not in instrumental_audio:
+                self.logger.warning(
+                    f"Countdown padding ({self.countdown_padding_seconds}s) was applied to vocals, "
+                    f"but instrumental doesn't appear to be padded. Creating padded version..."
+                )
+                # Create a padded version of the instrumental
+                base, ext = os.path.splitext(instrumental_audio)
+                padded_instrumental = f"{base} (Padded){ext}"
+                if not os.path.exists(padded_instrumental):
+                    self._pad_audio_file(instrumental_audio, padded_instrumental, self.countdown_padding_seconds)
+                    self.logger.info(f"Created padded instrumental: {padded_instrumental}")
+                actual_instrumental = padded_instrumental
+            else:
+                self.logger.info(f"Using already-padded instrumental: {instrumental_audio}")
         # This operation is primarily I/O bound (remuxing), so hardware acceleration doesn't provide significant benefit
         # Keep the existing approach but use the new execute method
         ffmpeg_command = (
             f'{self.ffmpeg_base_command} -an -i "{with_vocals_file}" '
-            f'-vn -i "{instrumental_audio}" -c:v copy -c:a pcm_s16le "{output_file}"'
+            f'-vn -i "{actual_instrumental}" -c:v copy -c:a pcm_s16le "{output_file}"'
         )
         self.execute_command(ffmpeg_command, "Remuxing video with instrumental audio")

karaoke_gen/karaoke_gen.py CHANGED Viewed

@@ -29,7 +29,7 @@ from .audio_processor import AudioProcessor
 from .lyrics_processor import LyricsProcessor
 from .video_generator import VideoGenerator
 from .video_background_processor import VideoBackgroundProcessor
-from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError
+from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError, UserCancelledError
 class KaraokePrep:
@@ -74,7 +74,7 @@ class KaraokePrep:
         skip_separation=False,
         # Video Background Configuration
         background_video=None,
-        background_video_darkness=0,
+        background_video_darkness=50,
         # Audio Fetcher Configuration
         auto_download=False,
     ):
@@ -84,6 +84,9 @@ class KaraokePrep:
         if logger is None:
             self.logger = logging.getLogger(__name__)
             self.logger.setLevel(log_level)
+            # Prevent log propagation to root logger to avoid duplicate logs
+            # when external packages (like lyrics_converter) configure root logger handlers
+            self.logger.propagate = False
             self.log_handler = logging.StreamHandler()
@@ -256,6 +259,101 @@ class KaraokePrep:
         self.artist = metadata_result["artist"]
         self.title = metadata_result["title"]
+    def _scan_directory_for_instrumentals(self, track_output_dir, artist_title):
+        """
+        Scan the directory for existing instrumental files and build a separated_audio structure.
+        This is used when transcription was skipped (existing files found) but we need to
+        pad instrumentals due to countdown padding.
+        Args:
+            track_output_dir: The track output directory to scan
+            artist_title: The "{artist} - {title}" string for matching files
+        Returns:
+            Dictionary with separated_audio structure containing found instrumental paths
+        """
+        self.logger.info(f"Scanning directory for existing instrumentals: {track_output_dir}")
+        separated_audio = {
+            "clean_instrumental": {},
+            "backing_vocals": {},
+            "other_stems": {},
+            "combined_instrumentals": {},
+        }
+        # Search patterns for instrumental files
+        # Files are named like: "{artist} - {title} (Instrumental {model}).flac"
+        # Or with backing vocals: "{artist} - {title} (Instrumental +BV {model}).flac"
+        # Look for files in the track output directory
+        search_dir = track_output_dir
+        # Find all instrumental files (not padded ones - we want the originals)
+        instrumental_pattern = os.path.join(search_dir, f"{artist_title} (Instrumental*.flac")
+        instrumental_files = glob.glob(instrumental_pattern)
+        # Also check for wav files
+        instrumental_pattern_wav = os.path.join(search_dir, f"{artist_title} (Instrumental*.wav")
+        instrumental_files.extend(glob.glob(instrumental_pattern_wav))
+        self.logger.debug(f"Found {len(instrumental_files)} instrumental files")
+        for filepath in instrumental_files:
+            filename = os.path.basename(filepath)
+            # Skip already padded files
+            if "(Padded)" in filename:
+                self.logger.debug(f"Skipping already padded file: {filename}")
+                continue
+            # Determine if it's a combined instrumental (+BV) or clean instrumental
+            if "+BV" in filename or "+bv" in filename.lower():
+                # Combined instrumental with backing vocals
+                # Extract model name from filename
+                # Pattern: "(Instrumental +BV {model}).flac"
+                model_match = re.search(r'\(Instrumental \+BV ([^)]+)\)', filename)
+                if model_match:
+                    model_name = model_match.group(1).strip()
+                    separated_audio["combined_instrumentals"][model_name] = filepath
+                    self.logger.info(f"Found combined instrumental: {filename}")
+            else:
+                # Clean instrumental (no backing vocals)
+                # Pattern: "(Instrumental {model}).flac"
+                model_match = re.search(r'\(Instrumental ([^)]+)\)', filename)
+                if model_match:
+                    # Use as clean instrumental if we don't have one yet
+                    if not separated_audio["clean_instrumental"].get("instrumental"):
+                        separated_audio["clean_instrumental"]["instrumental"] = filepath
+                        self.logger.info(f"Found clean instrumental: {filename}")
+                    else:
+                        # Additional clean instrumentals go to combined_instrumentals for padding
+                        model_name = model_match.group(1).strip()
+                        separated_audio["combined_instrumentals"][model_name] = filepath
+                        self.logger.info(f"Found additional instrumental: {filename}")
+        # Also look for backing vocals files
+        backing_vocals_pattern = os.path.join(search_dir, f"{artist_title} (Backing Vocals*.flac")
+        backing_vocals_files = glob.glob(backing_vocals_pattern)
+        backing_vocals_pattern_wav = os.path.join(search_dir, f"{artist_title} (Backing Vocals*.wav")
+        backing_vocals_files.extend(glob.glob(backing_vocals_pattern_wav))
+        for filepath in backing_vocals_files:
+            filename = os.path.basename(filepath)
+            model_match = re.search(r'\(Backing Vocals ([^)]+)\)', filename)
+            if model_match:
+                model_name = model_match.group(1).strip()
+                if model_name not in separated_audio["backing_vocals"]:
+                    separated_audio["backing_vocals"][model_name] = {"backing_vocals": filepath}
+                    self.logger.info(f"Found backing vocals: {filename}")
+        # Log summary
+        clean_count = 1 if separated_audio["clean_instrumental"].get("instrumental") else 0
+        combined_count = len(separated_audio["combined_instrumentals"])
+        self.logger.info(f"Directory scan complete: {clean_count} clean instrumental, {combined_count} combined instrumentals")
+        return separated_audio
     async def prep_single_track(self):
         # Add signal handler at the start
         loop = asyncio.get_running_loop()
@@ -419,6 +517,9 @@ class KaraokePrep:
                         # No still image for audio-only downloads
                         processed_track["input_still_image"] = None
+                    except UserCancelledError:
+                        # User cancelled - propagate up to CLI for graceful exit
+                        raise
                     except NoResultsError as e:
                         self.logger.error(f"No audio found: {e}")
                         return None
@@ -761,6 +862,24 @@ class KaraokePrep:
                     f"Applying {padding_seconds}s padding to all instrumental files to sync with vocal countdown"
                 )
+                # If separated_audio is empty (e.g., transcription was skipped but existing files have countdown),
+                # scan the directory for existing instrumental files
+                # Note: also check for Custom instrumental (provided via --existing_instrumental)
+                has_instrumentals = (
+                    processed_track["separated_audio"].get("clean_instrumental", {}).get("instrumental") or
+                    processed_track["separated_audio"].get("combined_instrumentals") or
+                    processed_track["separated_audio"].get("Custom", {}).get("instrumental")
+                )
+                if not has_instrumentals:
+                    self.logger.info("No instrumentals in separated_audio, scanning directory for existing files...")
+                    # Preserve existing Custom key if present before overwriting
+                    custom_backup = processed_track["separated_audio"].get("Custom")
+                    processed_track["separated_audio"] = self._scan_directory_for_instrumentals(
+                        track_output_dir, artist_title
+                    )
+                    if custom_backup:
+                        processed_track["separated_audio"]["Custom"] = custom_backup
                 # Apply padding using AudioProcessor
                 padded_separation_result = self.audio_processor.apply_countdown_padding_to_instrumentals(
                     separation_result=processed_track["separated_audio"],
@@ -788,11 +907,11 @@ class KaraokePrep:
             for sig in (signal.SIGINT, signal.SIGTERM):
                 loop.remove_signal_handler(sig)
-    async def shutdown(self, signal):
+    async def shutdown(self, signal_received):
         """Handle shutdown signals gracefully."""
-        self.logger.info(f"Received exit signal {signal.name}...")
+        self.logger.info(f"Received exit signal {signal_received.name}...")
-        # Get all running tasks
+        # Get all running tasks except the current shutdown task
         tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
         if tasks:
@@ -801,17 +920,15 @@ class KaraokePrep:
             for task in tasks:
                 task.cancel()
-            self.logger.info("Received cancellation request, cleaning up...")
             # Wait for all tasks to complete with cancellation
-            try:
-                await asyncio.gather(*tasks, return_exceptions=True)
-            except asyncio.CancelledError:
-                pass
+            # Use return_exceptions=True to gather all results without raising
+            await asyncio.gather(*tasks, return_exceptions=True)
-        # Force exit after cleanup
-        self.logger.info("Cleanup complete, exiting...")
-        sys.exit(0)  # Add this line to force exit
+        self.logger.info("Cleanup complete")
+        # Raise KeyboardInterrupt to propagate the cancellation up the call stack
+        # This allows the main event loop to exit cleanly
+        raise KeyboardInterrupt()
     async def process_playlist(self):
         if self.artist is None or self.title is None:

karaoke_gen/lyrics_processor.py CHANGED Viewed

@@ -11,6 +11,9 @@ from .utils import sanitize_filename
 # Placeholder class or functions for lyrics processing
 class LyricsProcessor:
+    # Standard countdown padding duration used by LyricsTranscriber
+    COUNTDOWN_PADDING_SECONDS = 3.0
     def __init__(
         self, logger, style_params_json, lyrics_file, skip_transcription, skip_transcription_review, render_video, subtitle_offset_ms
     ):
@@ -22,6 +25,67 @@ class LyricsProcessor:
         self.render_video = render_video
         self.subtitle_offset_ms = subtitle_offset_ms
+    def _detect_countdown_padding_from_lrc(self, lrc_filepath):
+        """
+        Detect if countdown padding was applied by checking for countdown text in the LRC file.
+        The countdown segment has the text "3... 2... 1..." at timestamp 0.1-2.9s.
+        We detect this by looking for the countdown text pattern.
+        Args:
+            lrc_filepath: Path to the LRC file
+        Returns:
+            Tuple of (countdown_padding_added: bool, countdown_padding_seconds: float)
+        """
+        try:
+            with open(lrc_filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+            # Method 1: Check for countdown text pattern "3... 2... 1..."
+            # This is the most reliable detection method since the countdown text is unique
+            countdown_text = "3... 2... 1..."
+            if countdown_text in content:
+                self.logger.info(f"Detected countdown padding from LRC: found countdown text '{countdown_text}'")
+                return (True, self.COUNTDOWN_PADDING_SECONDS)
+            # Method 2 (fallback): Check if first lyric timestamp is >= 3 seconds
+            # This handles cases where countdown text format might differ
+            # LRC timestamps: [mm:ss.xx] or [mm:ss.xxx]
+            timestamp_pattern = r'\[(\d{1,2}):(\d{2})\.(\d{2,3})\]'
+            matches = re.findall(timestamp_pattern, content)
+            if not matches:
+                self.logger.debug("No timestamps found in LRC file")
+                return (False, 0.0)
+            # Parse the first timestamp
+            first_timestamp = matches[0]
+            minutes = int(first_timestamp[0])
+            seconds = int(first_timestamp[1])
+            # Handle both .xx and .xxx formats
+            centiseconds = first_timestamp[2]
+            if len(centiseconds) == 2:
+                milliseconds = int(centiseconds) * 10
+            else:
+                milliseconds = int(centiseconds)
+            first_lyric_time = minutes * 60 + seconds + milliseconds / 1000.0
+            self.logger.debug(f"First lyric timestamp in LRC: {first_lyric_time:.3f}s")
+            # If first lyric is at or after 3 seconds, countdown padding was applied
+            # Use a small buffer (2.5s) to account for songs that naturally start a bit late
+            if first_lyric_time >= 2.5:
+                self.logger.info(f"Detected countdown padding from LRC: first lyric at {first_lyric_time:.2f}s")
+                return (True, self.COUNTDOWN_PADDING_SECONDS)
+            return (False, 0.0)
+        except Exception as e:
+            self.logger.warning(f"Failed to detect countdown padding from LRC file: {e}")
+            return (False, 0.0)
     def find_best_split_point(self, line):
         """
         Find the best split point in a line based on the specified criteria.
@@ -103,6 +167,76 @@ class LyricsProcessor:
         return processed_lines
+    def _check_transcription_providers(self) -> dict:
+        """
+        Check which transcription providers are configured and return their status.
+        Returns:
+            dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
+        """
+        load_dotenv()
+        configured = []
+        missing = []
+        # Check AudioShake
+        audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
+        if audioshake_token:
+            configured.append("AudioShake")
+            self.logger.debug("AudioShake transcription provider: configured")
+        else:
+            missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
+            self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
+        # Check Whisper via RunPod
+        runpod_key = os.getenv("RUNPOD_API_KEY")
+        whisper_id = os.getenv("WHISPER_RUNPOD_ID")
+        if runpod_key and whisper_id:
+            configured.append("Whisper (RunPod)")
+            self.logger.debug("Whisper transcription provider: configured")
+        elif runpod_key:
+            missing.append("Whisper (missing WHISPER_RUNPOD_ID)")
+            self.logger.debug("Whisper transcription provider: partially configured (missing WHISPER_RUNPOD_ID)")
+        elif whisper_id:
+            missing.append("Whisper (missing RUNPOD_API_KEY)")
+            self.logger.debug("Whisper transcription provider: partially configured (missing RUNPOD_API_KEY)")
+        else:
+            missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
+            self.logger.debug("Whisper transcription provider: not configured")
+        return {"configured": configured, "missing": missing}
+    def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
+        """Build a helpful error message when no transcription providers are configured."""
+        return (
+            "No transcription providers configured!\n"
+            "\n"
+            "Karaoke video generation requires at least one transcription provider to create "
+            "synchronized lyrics. Without a transcription provider, the system cannot generate "
+            "the word-level timing data needed for the karaoke video.\n"
+            "\n"
+            "AVAILABLE TRANSCRIPTION PROVIDERS:\n"
+            "\n"
+            "1. AudioShake (Recommended - Commercial, high-quality)\n"
+            "   - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
+            "   - Get an API key at: https://www.audioshake.ai/\n"
+            "\n"
+            "2. Whisper via RunPod (Open-source alternative)\n"
+            "   - Set environment variables:\n"
+            "     RUNPOD_API_KEY=your_key\n"
+            "     WHISPER_RUNPOD_ID=your_endpoint_id\n"
+            "   - Set up a Whisper endpoint at: https://www.runpod.io/\n"
+            "\n"
+            "ALTERNATIVES:\n"
+            "\n"
+            "- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
+            "- Use --lyrics_file to provide pre-timed lyrics (still needs transcription for timing)\n"
+            "\n"
+            f"Missing provider configurations: {', '.join(missing_providers)}\n"
+            "\n"
+            "See README.md 'Transcription Providers' section for detailed setup instructions."
+        )
     def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
         """
         Transcribe lyrics for a track.
@@ -114,6 +248,9 @@ class LyricsProcessor:
             track_output_dir: Output directory path
             lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
             lyrics_title: Title for lyrics processing (defaults to title if None)
+        Raises:
+            ValueError: If transcription is enabled but no providers are configured
         """
         # Use original artist/title for filename generation
         filename_artist = artist
@@ -138,25 +275,56 @@ class LyricsProcessor:
         lyrics_video_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (With Vocals).mkv")
         lyrics_lrc_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (Karaoke).lrc")
-        # If files exist in parent directory, return early
+        # If files exist in parent directory, return early (but detect countdown padding first)
         if os.path.exists(parent_video_path) and os.path.exists(parent_lrc_path):
-            self.logger.info(f"Found existing video and LRC files in parent directory, skipping transcription")
+            self.logger.info("Found existing video and LRC files in parent directory, skipping transcription")
+            # Detect countdown padding from existing LRC file
+            countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
+            if countdown_padding_added:
+                self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
             return {
                 "lrc_filepath": parent_lrc_path,
                 "ass_filepath": parent_video_path,
+                "countdown_padding_added": countdown_padding_added,
+                "countdown_padding_seconds": countdown_padding_seconds,
+                "padded_audio_filepath": None,  # Original padded audio may not exist
             }
-        # If files exist in lyrics directory, copy to parent and return
+        # If files exist in lyrics directory, copy to parent and return (but detect countdown padding first)
         if os.path.exists(lyrics_video_path) and os.path.exists(lyrics_lrc_path):
-            self.logger.info(f"Found existing video and LRC files in lyrics directory, copying to parent")
+            self.logger.info("Found existing video and LRC files in lyrics directory, copying to parent")
             os.makedirs(track_output_dir, exist_ok=True)
             shutil.copy2(lyrics_video_path, parent_video_path)
             shutil.copy2(lyrics_lrc_path, parent_lrc_path)
+            # Detect countdown padding from existing LRC file
+            countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
+            if countdown_padding_added:
+                self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
             return {
                 "lrc_filepath": parent_lrc_path,
                 "ass_filepath": parent_video_path,
+                "countdown_padding_added": countdown_padding_added,
+                "countdown_padding_seconds": countdown_padding_seconds,
+                "padded_audio_filepath": None,  # Original padded audio may not exist
             }
+        # Check transcription provider configuration if transcription is not being skipped
+        # Do this AFTER checking for existing files, since existing files don't need transcription
+        if not self.skip_transcription:
+            provider_status = self._check_transcription_providers()
+            if provider_status["configured"]:
+                self.logger.info(f"Transcription providers configured: {', '.join(provider_status['configured'])}")
+            else:
+                error_msg = self._build_transcription_provider_error_message(provider_status["missing"])
+                raise ValueError(error_msg)
         # Create lyrics directory if it doesn't exist
         os.makedirs(lyrics_dir, exist_ok=True)
         self.logger.info(f"Created lyrics directory: {lyrics_dir}")

karaoke_gen/utils/bulk_cli.py CHANGED Viewed

@@ -19,6 +19,9 @@ from karaoke_gen.karaoke_finalise import KaraokeFinalise
 # Global logger
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)  # Set initial log level
+# Prevent log propagation to root logger to avoid duplicate logs
+# when external packages (like lyrics_converter) configure root logger handlers
+logger.propagate = False
 async def process_track_prep(row, args, logger, log_formatter):

karaoke_gen/utils/cli_args.py CHANGED Viewed

@@ -258,8 +258,8 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
     style_group.add_argument(
         "--background_video_darkness",
         type=int,
-        default=0,
-        help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=50",
+        default=50,
+        help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=20",
     )
     # Finalisation Configuration
@@ -352,8 +352,11 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
     )
     remote_group.add_argument(
         "--review-ui-url",
-        default=os.environ.get('REVIEW_UI_URL', 'https://lyrics.nomadkaraoke.com'),
-        help="Lyrics review UI URL (default: https://lyrics.nomadkaraoke.com)",
+        default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://gen.nomadkaraoke.com/lyrics')),
+        help="Lyrics review UI URL. For remote mode: defaults to 'https://gen.nomadkaraoke.com/lyrics'. "
+             "For local mode: defaults to bundled frontend (from lyrics_transcriber/frontend/). "
+             "Use 'http://localhost:5173' to develop against Vite dev server. "
+             "(env: REVIEW_UI_URL or LYRICS_REVIEW_UI_URL)",
     )
     remote_group.add_argument(
         "--poll-interval",

karaoke-gen 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl

karaoke-gen 0.71.42py3-none-any.whl → 0.75.53py3-none-any.whl