PyPI - karaoke-gen - Versions diffs - 0.75.16__py3-none-any.whl → 0.76.20__py3-none-any.whl - Mend

karaoke-gen 0.75.16py3-none-any.whl → 0.76.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

karaoke_gen/audio_fetcher.py +984 -33
karaoke_gen/audio_processor.py +4 -0
karaoke_gen/instrumental_review/static/index.html +37 -14
karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
karaoke_gen/karaoke_gen.py +208 -39
karaoke_gen/lyrics_processor.py +111 -31
karaoke_gen/utils/__init__.py +26 -0
karaoke_gen/utils/cli_args.py +15 -6
karaoke_gen/utils/gen_cli.py +30 -5
karaoke_gen/utils/remote_cli.py +301 -20
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/METADATA +107 -5
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/RECORD +47 -43
lyrics_transcriber/core/controller.py +76 -2
lyrics_transcriber/frontend/index.html +5 -1
lyrics_transcriber/frontend/package-lock.json +4553 -0
lyrics_transcriber/frontend/package.json +4 -1
lyrics_transcriber/frontend/playwright.config.ts +69 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.svg +5 -0
lyrics_transcriber/frontend/src/App.tsx +94 -63
lyrics_transcriber/frontend/src/api.ts +25 -10
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +55 -21
lyrics_transcriber/frontend/src/components/AppHeader.tsx +65 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +5 -5
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +9 -9
lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -1
lyrics_transcriber/frontend/src/components/EditWordList.tsx +1 -1
lyrics_transcriber/frontend/src/components/Header.tsx +34 -48
lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +22 -21
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
lyrics_transcriber/frontend/src/components/WordDivider.tsx +3 -3
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +2 -2
lyrics_transcriber/frontend/src/components/shared/constants.ts +15 -5
lyrics_transcriber/frontend/src/main.tsx +1 -7
lyrics_transcriber/frontend/src/theme.ts +337 -135
lyrics_transcriber/frontend/vite.config.ts +5 -0
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
lyrics_transcriber/frontend/web_assets/index.html +1 -1
lyrics_transcriber/frontend/yarn.lock +1005 -1046
lyrics_transcriber/output/countdown_processor.py +39 -0
lyrics_transcriber/review/server.py +1 -1
lyrics_transcriber/transcribers/audioshake.py +96 -7
lyrics_transcriber/types.py +14 -12
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/WHEEL +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/licenses/LICENSE +0 -0

karaoke_gen/audio_processor.py CHANGED Viewed

@@ -771,6 +771,10 @@ class AudioProcessor:
         padded_result["other_stems"] = separation_result.get("other_stems", {})
         padded_result["backing_vocals"] = separation_result.get("backing_vocals", {})
+        # Preserve Custom instrumental if present (already padded in karaoke_gen.py)
+        if "Custom" in separation_result:
+            padded_result["Custom"] = separation_result["Custom"]
         # Count actual padded files (don't assume clean instrumental was padded)
         padded_count = (1 if padded_result["clean_instrumental"].get("instrumental") else 0) + len(padded_result["combined_instrumentals"])

karaoke_gen/instrumental_review/static/index.html CHANGED Viewed

@@ -598,7 +598,22 @@
         let animationFrameId = null;
         let currentAudioElement = null;  // Track audio element reference for listener management
-        const API_BASE = '/api/jobs/local';
+        // Parse URL parameters for cloud mode
+        const urlParams = new URLSearchParams(window.location.search);
+        const encodedBaseApiUrl = urlParams.get('baseApiUrl');
+        const instrumentalToken = urlParams.get('instrumentalToken');
+        // Determine API base URL - cloud mode uses provided URL, local mode uses default
+        const API_BASE = encodedBaseApiUrl
+            ? decodeURIComponent(encodedBaseApiUrl)
+            : '/api/jobs/local';
+        // Helper to add token to URL if available
+        function addTokenToUrl(url) {
+            if (!instrumentalToken) return url;
+            const separator = url.includes('?') ? '&' : '?';
+            return `${url}${separator}instrumental_token=${encodeURIComponent(instrumentalToken)}`;
+        }
         // HTML escape helper to prevent XSS
         function escapeHtml(str) {
@@ -617,8 +632,8 @@
         async function init() {
             try {
                 const [analysisRes, waveformRes] = await Promise.all([
-                    fetch(`${API_BASE}/instrumental-analysis`),
-                    fetch(`${API_BASE}/waveform-data?num_points=1000`)
+                    fetch(addTokenToUrl(`${API_BASE}/instrumental-analysis`)),
+                    fetch(addTokenToUrl(`${API_BASE}/waveform-data?num_points=1000`))
                 ]);
                 if (!analysisRes.ok) throw new Error('Failed to load analysis');
@@ -1120,15 +1135,23 @@
         }
         function getAudioUrl() {
-            const urls = {
-                original: '/api/audio/original',
-                backing: '/api/audio/backing_vocals',
-                clean: '/api/audio/clean_instrumental',
-                with_backing: '/api/audio/with_backing',
-                custom: '/api/audio/custom_instrumental',
-                uploaded: '/api/audio/uploaded_instrumental'
+            const stemTypes = {
+                original: 'original',
+                backing: 'backing_vocals',
+                clean: 'clean_instrumental',
+                with_backing: 'with_backing',
+                custom: 'custom_instrumental',
+                uploaded: 'uploaded_instrumental'
             };
-            return urls[activeAudio] || urls.backing;
+            const stemType = stemTypes[activeAudio] || stemTypes.backing;
+            // Cloud mode uses /audio-stream/{stem_type}, local mode uses /api/audio/{stem_type}
+            const isCloudMode = !!encodedBaseApiUrl;
+            const url = isCloudMode
+                ? `${API_BASE}/audio-stream/${stemType}`
+                : `/api/audio/${stemType}`;
+            return addTokenToUrl(url);
         }
         function formatTime(seconds) {
@@ -1295,7 +1318,7 @@
                 const formData = new FormData();
                 formData.append('file', file);
-                const response = await fetch(`${API_BASE}/upload-instrumental`, {
+                const response = await fetch(addTokenToUrl(`${API_BASE}/upload-instrumental`), {
                     method: 'POST',
                     body: formData
                 });
@@ -1354,7 +1377,7 @@
             }
             try {
-                const response = await fetch(`${API_BASE}/create-custom-instrumental`, {
+                const response = await fetch(addTokenToUrl(`${API_BASE}/create-custom-instrumental`), {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({ mute_regions: muteRegions })
@@ -1404,7 +1427,7 @@
             }
             try {
-                const response = await fetch(`${API_BASE}/select-instrumental`, {
+                const response = await fetch(addTokenToUrl(`${API_BASE}/select-instrumental`), {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({ selection: selectedOption })

karaoke_gen/karaoke_finalise/karaoke_finalise.py CHANGED Viewed

@@ -654,7 +654,31 @@ class KaraokeFinalise:
                 else:
                     self.logger.warning(f"Unsupported file extension: {current_ext}")
-        raise Exception("No suitable files found for processing.")
+        raise Exception(
+            "No suitable files found for processing.\n"
+            "\n"
+            "WHAT THIS MEANS:\n"
+            "The finalisation step requires a '(With Vocals).mkv' video file, which is created "
+            "during the lyrics transcription phase. This file contains the karaoke video with "
+            "synchronized lyrics overlay.\n"
+            "\n"
+            "COMMON CAUSES:\n"
+            "1. Transcription provider not configured - No AUDIOSHAKE_API_TOKEN or RUNPOD_API_KEY set\n"
+            "2. Transcription failed - Check logs above for API errors or timeout messages\n"
+            "3. Invalid API credentials - Verify your API tokens are correct and active\n"
+            "4. Network issues - Unable to reach transcription service\n"
+            "5. Running in wrong directory - Make sure you're in the track output folder\n"
+            "\n"
+            "TROUBLESHOOTING STEPS:\n"
+            "1. Check environment variables:\n"
+            "   - AUDIOSHAKE_API_TOKEN (for AudioShake transcription)\n"
+            "   - RUNPOD_API_KEY + WHISPER_RUNPOD_ID (for Whisper transcription)\n"
+            "2. Review the log output above for transcription errors\n"
+            "3. Try running with --log_level debug for more detailed output\n"
+            "4. If you don't need synchronized lyrics, use --skip-lyrics for instrumental-only karaoke\n"
+            "\n"
+            "See README.md 'Transcription Providers' and 'Troubleshooting' sections for more details."
+        )
     def choose_instrumental_audio_file(self, base_name):
         self.logger.info(f"Choosing instrumental audio file to use as karaoke audio...")

karaoke_gen/karaoke_gen.py CHANGED Viewed

@@ -31,6 +31,12 @@ from .video_generator import VideoGenerator
 from .video_background_processor import VideoBackgroundProcessor
 from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError, UserCancelledError
+# Import lyrics_transcriber components for post-review countdown and video rendering
+from lyrics_transcriber.output.countdown_processor import CountdownProcessor
+from lyrics_transcriber.output.generator import OutputGenerator
+from lyrics_transcriber.types import CorrectionResult
+from lyrics_transcriber.core.config import OutputConfig as LyricsOutputConfig
 class KaraokePrep:
     def __init__(
@@ -74,7 +80,7 @@ class KaraokePrep:
         skip_separation=False,
         # Video Background Configuration
         background_video=None,
-        background_video_darkness=0,
+        background_video_darkness=50,
         # Audio Fetcher Configuration
         auto_download=False,
     ):
@@ -482,41 +488,56 @@ class KaraokePrep:
                     self.logger.info(f"Found existing media files matching extractor '{self.extractor}', skipping download/conversion.")
                 elif getattr(self, '_use_audio_fetcher', False):
-                    # Use flacfetch to search and download audio
-                    self.logger.info(f"Using flacfetch to search and download: {self.artist} - {self.title}")
                     try:
-                        # Search and download audio using the AudioFetcher
-                        fetch_result = self.audio_fetcher.search_and_download(
-                            artist=self.artist,
-                            title=self.title,
-                            output_dir=track_output_dir,
-                            output_filename=f"{artist_title} (flacfetch)",
-                            auto_select=self.auto_download,
-                        )
-                        # Update extractor to reflect the actual provider used
-                        self.extractor = f"flacfetch-{fetch_result.provider}"
+                        # Check if this is a URL download or search+download
+                        if getattr(self, '_use_url_download', False):
+                            # Direct URL download (e.g., YouTube URL)
+                            self.logger.info(f"Using flacfetch to download from URL: {self.url}")
+                            fetch_result = self.audio_fetcher.download_from_url(
+                                url=self.url,
+                                output_dir=track_output_dir,
+                                output_filename=f"{artist_title} (youtube)" if artist_title != "Unknown - Unknown" else None,
+                                artist=self.artist,
+                                title=self.title,
+                            )
+                            # Update extractor to reflect the source
+                            self.extractor = "youtube"
+                        else:
+                            # Use flacfetch to search and download audio
+                            self.logger.info(f"Using flacfetch to search and download: {self.artist} - {self.title}")
+                            fetch_result = self.audio_fetcher.search_and_download(
+                                artist=self.artist,
+                                title=self.title,
+                                output_dir=track_output_dir,
+                                output_filename=f"{artist_title} (flacfetch)",
+                                auto_select=self.auto_download,
+                            )
+                            # Update extractor to reflect the actual provider used
+                            self.extractor = f"flacfetch-{fetch_result.provider}"
                         # Set up the output paths
                         output_filename_no_extension = os.path.join(track_output_dir, f"{artist_title} ({self.extractor})")
                         # Copy/move the downloaded file to the expected location
                         processed_track["input_media"] = self.file_handler.download_audio_from_fetcher_result(
                             fetch_result.filepath, output_filename_no_extension
                         )
                         self.logger.info(f"Audio downloaded from {fetch_result.provider}: {processed_track['input_media']}")
                         # Convert to WAV for audio processing
                         self.logger.info("Converting downloaded audio to WAV for processing...")
                         processed_track["input_audio_wav"] = self.file_handler.convert_to_wav(
                             processed_track["input_media"], output_filename_no_extension
                         )
                         # No still image for audio-only downloads
                         processed_track["input_still_image"] = None
                     except UserCancelledError:
                         # User cancelled - propagate up to CLI for graceful exit
                         raise
@@ -692,6 +713,112 @@ class KaraokePrep:
                 self.logger.info("=== Parallel Processing Complete ===")
+            # === POST-TRANSCRIPTION: Add countdown and render video ===
+            # Since lyrics_processor.py now always defers countdown and video rendering,
+            # we handle it here after human review is complete. This ensures the review UI
+            # shows accurate, unshifted timestamps (same behavior as cloud backend).
+            if processed_track.get("lyrics") and self.render_video:
+                self.logger.info("=== Processing Countdown and Video Rendering ===")
+                from .utils import sanitize_filename
+                sanitized_artist = sanitize_filename(self.artist)
+                sanitized_title = sanitize_filename(self.title)
+                lyrics_dir = os.path.join(track_output_dir, "lyrics")
+                # Find the corrections JSON file
+                corrections_filename = f"{sanitized_artist} - {sanitized_title} (Lyrics Corrections).json"
+                corrections_filepath = os.path.join(lyrics_dir, corrections_filename)
+                if os.path.exists(corrections_filepath):
+                    self.logger.info(f"Loading corrections from: {corrections_filepath}")
+                    with open(corrections_filepath, 'r', encoding='utf-8') as f:
+                        corrections_data = json.load(f)
+                    # Convert to CorrectionResult
+                    correction_result = CorrectionResult.from_dict(corrections_data)
+                    self.logger.info(f"Loaded CorrectionResult with {len(correction_result.corrected_segments)} segments")
+                    # Get the audio file path
+                    audio_path = processed_track["input_audio_wav"]
+                    # Add countdown intro if needed (songs that start within 3 seconds)
+                    self.logger.info("Processing countdown intro (if needed)...")
+                    cache_dir = os.path.join(track_output_dir, "cache")
+                    os.makedirs(cache_dir, exist_ok=True)
+                    countdown_processor = CountdownProcessor(
+                        cache_dir=cache_dir,
+                        logger=self.logger,
+                    )
+                    correction_result, audio_path, padding_added, padding_seconds = countdown_processor.process(
+                        correction_result=correction_result,
+                        audio_filepath=audio_path,
+                    )
+                    # Update processed_track with countdown info
+                    processed_track["countdown_padding_added"] = padding_added
+                    processed_track["countdown_padding_seconds"] = padding_seconds
+                    if padding_added:
+                        processed_track["padded_vocals_audio"] = audio_path
+                        self.logger.info(
+                            f"=== COUNTDOWN PADDING ADDED ===\n"
+                            f"Added {padding_seconds}s padding to audio and shifted timestamps.\n"
+                            f"Instrumental tracks will be padded after separation to maintain sync."
+                        )
+                    else:
+                        self.logger.info("No countdown needed - song starts after 3 seconds")
+                    # Save the updated corrections with countdown timestamps
+                    updated_corrections_data = correction_result.to_dict()
+                    with open(corrections_filepath, 'w', encoding='utf-8') as f:
+                        json.dump(updated_corrections_data, f, indent=2)
+                    self.logger.info(f"Saved countdown-adjusted corrections to: {corrections_filepath}")
+                    # Render video with lyrics
+                    self.logger.info("Rendering karaoke video with synchronized lyrics...")
+                    output_config = LyricsOutputConfig(
+                        output_dir=lyrics_dir,
+                        cache_dir=cache_dir,
+                        output_styles_json=self.style_params_json,
+                        render_video=True,
+                        generate_cdg=False,
+                        generate_plain_text=True,
+                        generate_lrc=True,
+                        video_resolution="4k",
+                        subtitle_offset_ms=self.subtitle_offset_ms,
+                    )
+                    output_generator = OutputGenerator(output_config, self.logger)
+                    output_prefix = f"{sanitized_artist} - {sanitized_title}"
+                    outputs = output_generator.generate_outputs(
+                        transcription_corrected=correction_result,
+                        audio_filepath=audio_path,
+                        output_prefix=output_prefix,
+                    )
+                    # Copy video to expected location in parent directory
+                    if outputs and outputs.get("video_filepath"):
+                        source_video = outputs["video_filepath"]
+                        dest_video = os.path.join(track_output_dir, f"{artist_title} (With Vocals).mkv")
+                        shutil.copy2(source_video, dest_video)
+                        self.logger.info(f"Video rendered successfully: {dest_video}")
+                        processed_track["with_vocals_video"] = dest_video
+                        # Update ASS filepath for video background processing
+                        if outputs.get("ass_filepath"):
+                            processed_track["ass_filepath"] = outputs["ass_filepath"]
+                    else:
+                        self.logger.warning("Video rendering did not produce expected output")
+                else:
+                    self.logger.warning(f"Corrections file not found: {corrections_filepath}")
+                    self.logger.warning("Skipping countdown processing and video rendering")
+            elif not self.render_video:
+                self.logger.info("Video rendering disabled - skipping countdown and video generation")
             # Apply video background if requested and lyrics were processed
             if self.video_background_processor and processed_track.get("lyrics"):
                 self.logger.info("=== Processing Video Background ===")
@@ -864,15 +991,21 @@ class KaraokePrep:
                 # If separated_audio is empty (e.g., transcription was skipped but existing files have countdown),
                 # scan the directory for existing instrumental files
+                # Note: also check for Custom instrumental (provided via --existing_instrumental)
                 has_instrumentals = (
                     processed_track["separated_audio"].get("clean_instrumental", {}).get("instrumental") or
-                    processed_track["separated_audio"].get("combined_instrumentals")
+                    processed_track["separated_audio"].get("combined_instrumentals") or
+                    processed_track["separated_audio"].get("Custom", {}).get("instrumental")
                 )
                 if not has_instrumentals:
                     self.logger.info("No instrumentals in separated_audio, scanning directory for existing files...")
+                    # Preserve existing Custom key if present before overwriting
+                    custom_backup = processed_track["separated_audio"].get("Custom")
                     processed_track["separated_audio"] = self._scan_directory_for_instrumentals(
                         track_output_dir, artist_title
                     )
+                    if custom_backup:
+                        processed_track["separated_audio"]["Custom"] = custom_backup
                 # Apply padding using AudioProcessor
                 padded_separation_result = self.audio_processor.apply_countdown_padding_to_instrumentals(
@@ -901,11 +1034,11 @@ class KaraokePrep:
             for sig in (signal.SIGINT, signal.SIGTERM):
                 loop.remove_signal_handler(sig)
-    async def shutdown(self, signal):
+    async def shutdown(self, signal_received):
         """Handle shutdown signals gracefully."""
-        self.logger.info(f"Received exit signal {signal.name}...")
+        self.logger.info(f"Received exit signal {signal_received.name}...")
-        # Get all running tasks
+        # Get all running tasks except the current shutdown task
         tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
         if tasks:
@@ -914,17 +1047,15 @@ class KaraokePrep:
             for task in tasks:
                 task.cancel()
-            self.logger.info("Received cancellation request, cleaning up...")
             # Wait for all tasks to complete with cancellation
-            try:
-                await asyncio.gather(*tasks, return_exceptions=True)
-            except asyncio.CancelledError:
-                pass
+            # Use return_exceptions=True to gather all results without raising
+            await asyncio.gather(*tasks, return_exceptions=True)
-        # Force exit after cleanup
-        self.logger.info("Cleanup complete, exiting...")
-        sys.exit(0)  # Add this line to force exit
+        self.logger.info("Cleanup complete")
+        # Raise KeyboardInterrupt to propagate the cancellation up the call stack
+        # This allows the main event loop to exit cleanly
+        raise KeyboardInterrupt()
     async def process_playlist(self):
         if self.artist is None or self.title is None:
@@ -987,17 +1118,56 @@ class KaraokePrep:
         return tracks
+    def _is_url(self, string: str) -> bool:
+        """Check if a string is a URL."""
+        return string is not None and (string.startswith("http://") or string.startswith("https://"))
     async def process(self):
         if self.input_media is not None and os.path.isdir(self.input_media):
             self.logger.info(f"Input media {self.input_media} is a local folder, processing each file individually...")
             return await self.process_folder()
         elif self.input_media is not None and os.path.isfile(self.input_media):
             self.logger.info(f"Input media {self.input_media} is a local file, audio download will be skipped")
+            return [await self.prep_single_track()]
+        elif self.input_media is not None and self._is_url(self.input_media):
+            # URL provided - download directly via flacfetch
+            self.logger.info(f"Input media {self.input_media} is a URL, downloading via flacfetch...")
+            # Extract video ID for metadata if it's a YouTube URL
+            video_id = None
+            youtube_patterns = [
+                r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
+                r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
+                r'youtube\.com/v/([a-zA-Z0-9_-]{11})',
+            ]
+            for pattern in youtube_patterns:
+                match = re.search(pattern, self.input_media)
+                if match:
+                    video_id = match.group(1)
+                    break
+            # Set up the extracted_info for metadata consistency
+            self.extracted_info = {
+                "title": f"{self.artist} - {self.title}" if self.artist and self.title else video_id or "Unknown",
+                "artist": self.artist or "",
+                "track_title": self.title or "",
+                "extractor_key": "youtube",
+                "id": video_id or self.input_media,
+                "url": self.input_media,
+                "source": "youtube",
+            }
+            self.extractor = "youtube"
+            self.url = self.input_media
+            # Mark that we need to use audio fetcher for URL download
+            self._use_audio_fetcher = True
+            self._use_url_download = True  # New flag for URL-based download
             return [await self.prep_single_track()]
         elif self.artist and self.title:
             # No input file provided - use flacfetch to search and download audio
             self.logger.info(f"No input file provided, using flacfetch to search for: {self.artist} - {self.title}")
             # Set up the extracted_info for metadata consistency
             self.extracted_info = {
                 "title": f"{self.artist} - {self.title}",
@@ -1010,13 +1180,12 @@ class KaraokePrep:
             }
             self.extractor = "flacfetch"
             self.url = None  # URL will be determined by flacfetch
             # Mark that we need to use audio fetcher for download
             self._use_audio_fetcher = True
             return [await self.prep_single_track()]
         else:
             raise ValueError(
-                "Either a local file path or both artist and title must be provided. "
-                "URL-based input has been replaced with flacfetch audio fetching."
+                "Either a local file path, a URL, or both artist and title must be provided."
             )

karaoke-gen 0.75.16__py3-none-any.whl → 0.76.20__py3-none-any.whl

karaoke-gen 0.75.16py3-none-any.whl → 0.76.20py3-none-any.whl