PyPI - karaoke-gen - Versions diffs - 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl - Mend

karaoke-gen 0.75.16py3-none-any.whl → 0.75.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

karaoke_gen/audio_fetcher.py +766 -33
karaoke_gen/audio_processor.py +4 -0
karaoke_gen/instrumental_review/static/index.html +37 -14
karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
karaoke_gen/karaoke_gen.py +18 -14
karaoke_gen/lyrics_processor.py +97 -6
karaoke_gen/utils/cli_args.py +6 -5
karaoke_gen/utils/gen_cli.py +30 -5
karaoke_gen/utils/remote_cli.py +269 -15
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +106 -4
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +24 -24
lyrics_transcriber/core/controller.py +76 -2
lyrics_transcriber/frontend/package.json +1 -1
lyrics_transcriber/frontend/src/App.tsx +6 -4
lyrics_transcriber/frontend/src/api.ts +25 -10
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
lyrics_transcriber/frontend/web_assets/index.html +1 -1
lyrics_transcriber/output/countdown_processor.py +39 -0
lyrics_transcriber/transcribers/audioshake.py +96 -7
lyrics_transcriber/types.py +14 -12
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0

karaoke_gen/audio_processor.py CHANGED Viewed

@@ -771,6 +771,10 @@ class AudioProcessor:
         padded_result["other_stems"] = separation_result.get("other_stems", {})
         padded_result["backing_vocals"] = separation_result.get("backing_vocals", {})
+        # Preserve Custom instrumental if present (already padded in karaoke_gen.py)
+        if "Custom" in separation_result:
+            padded_result["Custom"] = separation_result["Custom"]
         # Count actual padded files (don't assume clean instrumental was padded)
         padded_count = (1 if padded_result["clean_instrumental"].get("instrumental") else 0) + len(padded_result["combined_instrumentals"])

karaoke_gen/instrumental_review/static/index.html CHANGED Viewed

@@ -598,7 +598,22 @@
         let animationFrameId = null;
         let currentAudioElement = null;  // Track audio element reference for listener management
-        const API_BASE = '/api/jobs/local';
+        // Parse URL parameters for cloud mode
+        const urlParams = new URLSearchParams(window.location.search);
+        const encodedBaseApiUrl = urlParams.get('baseApiUrl');
+        const instrumentalToken = urlParams.get('instrumentalToken');
+        // Determine API base URL - cloud mode uses provided URL, local mode uses default
+        const API_BASE = encodedBaseApiUrl
+            ? decodeURIComponent(encodedBaseApiUrl)
+            : '/api/jobs/local';
+        // Helper to add token to URL if available
+        function addTokenToUrl(url) {
+            if (!instrumentalToken) return url;
+            const separator = url.includes('?') ? '&' : '?';
+            return `${url}${separator}instrumental_token=${encodeURIComponent(instrumentalToken)}`;
+        }
         // HTML escape helper to prevent XSS
         function escapeHtml(str) {
@@ -617,8 +632,8 @@
         async function init() {
             try {
                 const [analysisRes, waveformRes] = await Promise.all([
-                    fetch(`${API_BASE}/instrumental-analysis`),
-                    fetch(`${API_BASE}/waveform-data?num_points=1000`)
+                    fetch(addTokenToUrl(`${API_BASE}/instrumental-analysis`)),
+                    fetch(addTokenToUrl(`${API_BASE}/waveform-data?num_points=1000`))
                 ]);
                 if (!analysisRes.ok) throw new Error('Failed to load analysis');
@@ -1120,15 +1135,23 @@
         }
         function getAudioUrl() {
-            const urls = {
-                original: '/api/audio/original',
-                backing: '/api/audio/backing_vocals',
-                clean: '/api/audio/clean_instrumental',
-                with_backing: '/api/audio/with_backing',
-                custom: '/api/audio/custom_instrumental',
-                uploaded: '/api/audio/uploaded_instrumental'
+            const stemTypes = {
+                original: 'original',
+                backing: 'backing_vocals',
+                clean: 'clean_instrumental',
+                with_backing: 'with_backing',
+                custom: 'custom_instrumental',
+                uploaded: 'uploaded_instrumental'
             };
-            return urls[activeAudio] || urls.backing;
+            const stemType = stemTypes[activeAudio] || stemTypes.backing;
+            // Cloud mode uses /audio-stream/{stem_type}, local mode uses /api/audio/{stem_type}
+            const isCloudMode = !!encodedBaseApiUrl;
+            const url = isCloudMode
+                ? `${API_BASE}/audio-stream/${stemType}`
+                : `/api/audio/${stemType}`;
+            return addTokenToUrl(url);
         }
         function formatTime(seconds) {
@@ -1295,7 +1318,7 @@
                 const formData = new FormData();
                 formData.append('file', file);
-                const response = await fetch(`${API_BASE}/upload-instrumental`, {
+                const response = await fetch(addTokenToUrl(`${API_BASE}/upload-instrumental`), {
                     method: 'POST',
                     body: formData
                 });
@@ -1354,7 +1377,7 @@
             }
             try {
-                const response = await fetch(`${API_BASE}/create-custom-instrumental`, {
+                const response = await fetch(addTokenToUrl(`${API_BASE}/create-custom-instrumental`), {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({ mute_regions: muteRegions })
@@ -1404,7 +1427,7 @@
             }
             try {
-                const response = await fetch(`${API_BASE}/select-instrumental`, {
+                const response = await fetch(addTokenToUrl(`${API_BASE}/select-instrumental`), {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({ selection: selectedOption })

karaoke_gen/karaoke_finalise/karaoke_finalise.py CHANGED Viewed

@@ -654,7 +654,31 @@ class KaraokeFinalise:
                 else:
                     self.logger.warning(f"Unsupported file extension: {current_ext}")
-        raise Exception("No suitable files found for processing.")
+        raise Exception(
+            "No suitable files found for processing.\n"
+            "\n"
+            "WHAT THIS MEANS:\n"
+            "The finalisation step requires a '(With Vocals).mkv' video file, which is created "
+            "during the lyrics transcription phase. This file contains the karaoke video with "
+            "synchronized lyrics overlay.\n"
+            "\n"
+            "COMMON CAUSES:\n"
+            "1. Transcription provider not configured - No AUDIOSHAKE_API_TOKEN or RUNPOD_API_KEY set\n"
+            "2. Transcription failed - Check logs above for API errors or timeout messages\n"
+            "3. Invalid API credentials - Verify your API tokens are correct and active\n"
+            "4. Network issues - Unable to reach transcription service\n"
+            "5. Running in wrong directory - Make sure you're in the track output folder\n"
+            "\n"
+            "TROUBLESHOOTING STEPS:\n"
+            "1. Check environment variables:\n"
+            "   - AUDIOSHAKE_API_TOKEN (for AudioShake transcription)\n"
+            "   - RUNPOD_API_KEY + WHISPER_RUNPOD_ID (for Whisper transcription)\n"
+            "2. Review the log output above for transcription errors\n"
+            "3. Try running with --log_level debug for more detailed output\n"
+            "4. If you don't need synchronized lyrics, use --skip-lyrics for instrumental-only karaoke\n"
+            "\n"
+            "See README.md 'Transcription Providers' and 'Troubleshooting' sections for more details."
+        )
     def choose_instrumental_audio_file(self, base_name):
         self.logger.info(f"Choosing instrumental audio file to use as karaoke audio...")

karaoke_gen/karaoke_gen.py CHANGED Viewed

@@ -74,7 +74,7 @@ class KaraokePrep:
         skip_separation=False,
         # Video Background Configuration
         background_video=None,
-        background_video_darkness=0,
+        background_video_darkness=50,
         # Audio Fetcher Configuration
         auto_download=False,
     ):
@@ -864,15 +864,21 @@ class KaraokePrep:
                 # If separated_audio is empty (e.g., transcription was skipped but existing files have countdown),
                 # scan the directory for existing instrumental files
+                # Note: also check for Custom instrumental (provided via --existing_instrumental)
                 has_instrumentals = (
                     processed_track["separated_audio"].get("clean_instrumental", {}).get("instrumental") or
-                    processed_track["separated_audio"].get("combined_instrumentals")
+                    processed_track["separated_audio"].get("combined_instrumentals") or
+                    processed_track["separated_audio"].get("Custom", {}).get("instrumental")
                 )
                 if not has_instrumentals:
                     self.logger.info("No instrumentals in separated_audio, scanning directory for existing files...")
+                    # Preserve existing Custom key if present before overwriting
+                    custom_backup = processed_track["separated_audio"].get("Custom")
                     processed_track["separated_audio"] = self._scan_directory_for_instrumentals(
                         track_output_dir, artist_title
                     )
+                    if custom_backup:
+                        processed_track["separated_audio"]["Custom"] = custom_backup
                 # Apply padding using AudioProcessor
                 padded_separation_result = self.audio_processor.apply_countdown_padding_to_instrumentals(
@@ -901,11 +907,11 @@ class KaraokePrep:
             for sig in (signal.SIGINT, signal.SIGTERM):
                 loop.remove_signal_handler(sig)
-    async def shutdown(self, signal):
+    async def shutdown(self, signal_received):
         """Handle shutdown signals gracefully."""
-        self.logger.info(f"Received exit signal {signal.name}...")
+        self.logger.info(f"Received exit signal {signal_received.name}...")
-        # Get all running tasks
+        # Get all running tasks except the current shutdown task
         tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
         if tasks:
@@ -914,17 +920,15 @@ class KaraokePrep:
             for task in tasks:
                 task.cancel()
-            self.logger.info("Received cancellation request, cleaning up...")
             # Wait for all tasks to complete with cancellation
-            try:
-                await asyncio.gather(*tasks, return_exceptions=True)
-            except asyncio.CancelledError:
-                pass
+            # Use return_exceptions=True to gather all results without raising
+            await asyncio.gather(*tasks, return_exceptions=True)
-        # Force exit after cleanup
-        self.logger.info("Cleanup complete, exiting...")
-        sys.exit(0)  # Add this line to force exit
+        self.logger.info("Cleanup complete")
+        # Raise KeyboardInterrupt to propagate the cancellation up the call stack
+        # This allows the main event loop to exit cleanly
+        raise KeyboardInterrupt()
     async def process_playlist(self):
         if self.artist is None or self.title is None:

karaoke_gen/lyrics_processor.py CHANGED Viewed

@@ -27,10 +27,10 @@ class LyricsProcessor:
     def _detect_countdown_padding_from_lrc(self, lrc_filepath):
         """
-        Detect if countdown padding was applied by checking the first lyric timestamp in the LRC file.
+        Detect if countdown padding was applied by checking for countdown text in the LRC file.
-        LRC format timestamps look like: [mm:ss.xx] or [mm:ss.xxx]
-        If the first lyric timestamp is >= 3.0 seconds, countdown padding was likely applied.
+        The countdown segment has the text "3... 2... 1..." at timestamp 0.1-2.9s.
+        We detect this by looking for the countdown text pattern.
         Args:
             lrc_filepath: Path to the LRC file
@@ -42,7 +42,15 @@ class LyricsProcessor:
             with open(lrc_filepath, 'r', encoding='utf-8') as f:
                 content = f.read()
-            # Find all timestamp patterns in the LRC file
+            # Method 1: Check for countdown text pattern "3... 2... 1..."
+            # This is the most reliable detection method since the countdown text is unique
+            countdown_text = "3... 2... 1..."
+            if countdown_text in content:
+                self.logger.info(f"Detected countdown padding from LRC: found countdown text '{countdown_text}'")
+                return (True, self.COUNTDOWN_PADDING_SECONDS)
+            # Method 2 (fallback): Check if first lyric timestamp is >= 3 seconds
+            # This handles cases where countdown text format might differ
             # LRC timestamps: [mm:ss.xx] or [mm:ss.xxx]
             timestamp_pattern = r'\[(\d{1,2}):(\d{2})\.(\d{2,3})\]'
             matches = re.findall(timestamp_pattern, content)
@@ -51,8 +59,7 @@ class LyricsProcessor:
                 self.logger.debug("No timestamps found in LRC file")
                 return (False, 0.0)
-            # Find the first non-metadata timestamp (metadata like [ar:Artist] doesn't have decimal)
-            # We already filtered for decimal timestamps in our pattern
+            # Parse the first timestamp
             first_timestamp = matches[0]
             minutes = int(first_timestamp[0])
             seconds = int(first_timestamp[1])
@@ -160,6 +167,76 @@ class LyricsProcessor:
         return processed_lines
+    def _check_transcription_providers(self) -> dict:
+        """
+        Check which transcription providers are configured and return their status.
+        Returns:
+            dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
+        """
+        load_dotenv()
+        configured = []
+        missing = []
+        # Check AudioShake
+        audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
+        if audioshake_token:
+            configured.append("AudioShake")
+            self.logger.debug("AudioShake transcription provider: configured")
+        else:
+            missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
+            self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
+        # Check Whisper via RunPod
+        runpod_key = os.getenv("RUNPOD_API_KEY")
+        whisper_id = os.getenv("WHISPER_RUNPOD_ID")
+        if runpod_key and whisper_id:
+            configured.append("Whisper (RunPod)")
+            self.logger.debug("Whisper transcription provider: configured")
+        elif runpod_key:
+            missing.append("Whisper (missing WHISPER_RUNPOD_ID)")
+            self.logger.debug("Whisper transcription provider: partially configured (missing WHISPER_RUNPOD_ID)")
+        elif whisper_id:
+            missing.append("Whisper (missing RUNPOD_API_KEY)")
+            self.logger.debug("Whisper transcription provider: partially configured (missing RUNPOD_API_KEY)")
+        else:
+            missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
+            self.logger.debug("Whisper transcription provider: not configured")
+        return {"configured": configured, "missing": missing}
+    def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
+        """Build a helpful error message when no transcription providers are configured."""
+        return (
+            "No transcription providers configured!\n"
+            "\n"
+            "Karaoke video generation requires at least one transcription provider to create "
+            "synchronized lyrics. Without a transcription provider, the system cannot generate "
+            "the word-level timing data needed for the karaoke video.\n"
+            "\n"
+            "AVAILABLE TRANSCRIPTION PROVIDERS:\n"
+            "\n"
+            "1. AudioShake (Recommended - Commercial, high-quality)\n"
+            "   - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
+            "   - Get an API key at: https://www.audioshake.ai/\n"
+            "\n"
+            "2. Whisper via RunPod (Open-source alternative)\n"
+            "   - Set environment variables:\n"
+            "     RUNPOD_API_KEY=your_key\n"
+            "     WHISPER_RUNPOD_ID=your_endpoint_id\n"
+            "   - Set up a Whisper endpoint at: https://www.runpod.io/\n"
+            "\n"
+            "ALTERNATIVES:\n"
+            "\n"
+            "- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
+            "- Use --lyrics_file to provide pre-timed lyrics (still needs transcription for timing)\n"
+            "\n"
+            f"Missing provider configurations: {', '.join(missing_providers)}\n"
+            "\n"
+            "See README.md 'Transcription Providers' section for detailed setup instructions."
+        )
     def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
         """
         Transcribe lyrics for a track.
@@ -171,6 +248,9 @@ class LyricsProcessor:
             track_output_dir: Output directory path
             lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
             lyrics_title: Title for lyrics processing (defaults to title if None)
+        Raises:
+            ValueError: If transcription is enabled but no providers are configured
         """
         # Use original artist/title for filename generation
         filename_artist = artist
@@ -234,6 +314,17 @@ class LyricsProcessor:
                 "padded_audio_filepath": None,  # Original padded audio may not exist
             }
+        # Check transcription provider configuration if transcription is not being skipped
+        # Do this AFTER checking for existing files, since existing files don't need transcription
+        if not self.skip_transcription:
+            provider_status = self._check_transcription_providers()
+            if provider_status["configured"]:
+                self.logger.info(f"Transcription providers configured: {', '.join(provider_status['configured'])}")
+            else:
+                error_msg = self._build_transcription_provider_error_message(provider_status["missing"])
+                raise ValueError(error_msg)
         # Create lyrics directory if it doesn't exist
         os.makedirs(lyrics_dir, exist_ok=True)
         self.logger.info(f"Created lyrics directory: {lyrics_dir}")

karaoke_gen/utils/cli_args.py CHANGED Viewed

@@ -258,8 +258,8 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
     style_group.add_argument(
         "--background_video_darkness",
         type=int,
-        default=0,
-        help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=50",
+        default=50,
+        help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=20",
     )
     # Finalisation Configuration
@@ -352,9 +352,10 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
     )
     remote_group.add_argument(
         "--review-ui-url",
-        default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://lyrics.nomadkaraoke.com')),
-        help="Lyrics review UI URL. Default: 'https://lyrics.nomadkaraoke.com'. "
-             "Use 'http://localhost:5173' for Vite dev server during development. "
+        default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://gen.nomadkaraoke.com/lyrics')),
+        help="Lyrics review UI URL. For remote mode: defaults to 'https://gen.nomadkaraoke.com/lyrics'. "
+             "For local mode: defaults to bundled frontend (from lyrics_transcriber/frontend/). "
+             "Use 'http://localhost:5173' to develop against Vite dev server. "
              "(env: REVIEW_UI_URL or LYRICS_REVIEW_UI_URL)",
     )
     remote_group.add_argument(

karaoke_gen/utils/gen_cli.py CHANGED Viewed

@@ -313,9 +313,18 @@ async def async_main():
     args = parser.parse_args()
     # Set review UI URL environment variable for the lyrics transcriber review server
-    # This allows development against a local frontend dev server (e.g., http://localhost:5173)
+    # Only set this if the user explicitly wants to use a dev server (e.g., http://localhost:5173)
+    # By default, let the ReviewServer use its bundled local frontend (served from lyrics_transcriber/frontend/)
+    # This enables local iteration on the frontend without redeploying
     if hasattr(args, 'review_ui_url') and args.review_ui_url:
-        os.environ['LYRICS_REVIEW_UI_URL'] = args.review_ui_url
+        # Check if user provided a custom value (not the default hosted URL)
+        default_hosted_urls = [
+            'https://gen.nomadkaraoke.com/lyrics',
+            'https://lyrics.nomadkaraoke.com'
+        ]
+        if args.review_ui_url.rstrip('/') not in [url.rstrip('/') for url in default_hosted_urls]:
+            # User explicitly wants a specific URL (e.g., Vite dev server)
+            os.environ['LYRICS_REVIEW_UI_URL'] = args.review_ui_url
     # Process style overrides
     try:
@@ -746,7 +755,7 @@ async def async_main():
     except UserCancelledError:
         logger.info("Operation cancelled by user")
         return
-    except KeyboardInterrupt:
+    except (KeyboardInterrupt, asyncio.CancelledError):
         logger.info("Operation cancelled by user (Ctrl+C)")
         return
@@ -775,12 +784,28 @@ async def async_main():
         logger.info(f"Changing to directory: {track_dir}")
         os.chdir(track_dir)
-        # Select instrumental file - either via web UI or auto-selection
+        # Select instrumental file - either via web UI, auto-selection, or custom instrumental
         # This ALWAYS produces a selected file - no silent fallback to legacy code
         selected_instrumental_file = None
         skip_review = getattr(args, 'skip_instrumental_review', False)
-        if skip_review:
+        # Check if a custom instrumental was provided (via --existing_instrumental)
+        # In this case, the instrumental is already chosen - skip review entirely
+        separated_audio = track.get("separated_audio", {})
+        custom_instrumental = separated_audio.get("Custom", {}).get("instrumental")
+        if custom_instrumental:
+            # Custom instrumental was provided - use it directly, no review needed
+            resolved_path = _resolve_path_for_cwd(custom_instrumental, track_dir)
+            if os.path.exists(resolved_path):
+                logger.info(f"Using custom instrumental (--existing_instrumental): {resolved_path}")
+                selected_instrumental_file = resolved_path
+            else:
+                logger.error(f"Custom instrumental file not found: {resolved_path}")
+                logger.error("The file may have been moved or deleted after preparation.")
+                sys.exit(1)
+                return  # Explicit return for testing
+        elif skip_review:
             # Auto-select instrumental when review is skipped (non-interactive mode)
             logger.info("Instrumental review skipped (--skip_instrumental_review), auto-selecting instrumental file...")
             try:

karaoke-gen 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl

karaoke-gen 0.75.16py3-none-any.whl → 0.75.53py3-none-any.whl