PyPI - karaoke-gen - Versions diffs - 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl - Mend

karaoke-gen 0.75.16py3-none-any.whl → 0.75.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

karaoke_gen/audio_fetcher.py +766 -33
karaoke_gen/audio_processor.py +4 -0
karaoke_gen/instrumental_review/static/index.html +37 -14
karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
karaoke_gen/karaoke_gen.py +18 -14
karaoke_gen/lyrics_processor.py +97 -6
karaoke_gen/utils/cli_args.py +6 -5
karaoke_gen/utils/gen_cli.py +30 -5
karaoke_gen/utils/remote_cli.py +269 -15
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +106 -4
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +24 -24
lyrics_transcriber/core/controller.py +76 -2
lyrics_transcriber/frontend/package.json +1 -1
lyrics_transcriber/frontend/src/App.tsx +6 -4
lyrics_transcriber/frontend/src/api.ts +25 -10
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
lyrics_transcriber/frontend/web_assets/index.html +1 -1
lyrics_transcriber/output/countdown_processor.py +39 -0
lyrics_transcriber/transcribers/audioshake.py +96 -7
lyrics_transcriber/types.py +14 -12
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0

karaoke_gen/utils/remote_cli.py CHANGED Viewed

@@ -31,13 +31,16 @@ import webbrowser
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 import requests
 from .cli_args import create_parser, process_style_overrides, is_url, is_file
 # Use flacfetch's shared display functions for consistent formatting
 from flacfetch import print_releases, Release
+from flacfetch.core.categorize import categorize_releases
+from flacfetch.core.models import TrackQuery
+from flacfetch.interface.cli import print_categorized_releases
 class JobStatus(str, Enum):
@@ -137,7 +140,18 @@ class RemoteKaraokeClient:
             return None
     def refresh_auth(self) -> bool:
-        """Refresh authentication token."""
+        """Refresh authentication token.
+        Only refreshes if we're using a gcloud-based token. If the user
+        provided a static token via KARAOKE_GEN_AUTH_TOKEN, we keep that
+        since it doesn't expire like gcloud identity tokens.
+        """
+        # Don't refresh if using a static admin token from env
+        if os.environ.get('KARAOKE_GEN_AUTH_TOKEN'):
+            # Already have a valid static token, no need to refresh
+            return True
+        # Try to refresh gcloud identity token
         token = self._get_auth_token_from_gcloud()
         if token:
             self.config.auth_token = token
@@ -1081,7 +1095,8 @@ class RemoteKaraokeClient:
             if url.startswith('/'):
                 url = f"{self.config.service_url}{url}"
-            response = requests.get(url, stream=True, timeout=600)
+            # Use session headers (includes Authorization) for authenticated downloads
+            response = self.session.get(url, stream=True, timeout=600)
             if response.status_code != 200:
                 return False
@@ -1192,6 +1207,7 @@ class RemoteKaraokeClient:
             artist: Artist name to search for
             title: Song title to search for
             auto_download: Automatically select best audio source (skip interactive selection)
+            style_params_path: Path to style_params.json (optional)
             ... other args same as submit_job()
         Returns:
@@ -1232,6 +1248,40 @@ class RemoteKaraokeClient:
         if other_stems_models:
             request_data['other_stems_models'] = other_stems_models
+        # Prepare style files for upload if provided
+        style_files = []
+        local_style_files: Dict[str, str] = {}  # file_type -> local_path
+        if style_params_path and os.path.isfile(style_params_path):
+            self.logger.info(f"Parsing style configuration: {style_params_path}")
+            # Add the style_params.json itself
+            style_files.append({
+                'filename': Path(style_params_path).name,
+                'content_type': 'application/json',
+                'file_type': 'style_params'
+            })
+            local_style_files['style_params'] = style_params_path
+            # Parse style params to find referenced files (backgrounds, fonts)
+            style_assets = self._parse_style_params(style_params_path)
+            for asset_key, asset_path in style_assets.items():
+                if os.path.isfile(asset_path):
+                    # Use full path for content type detection (not just extension)
+                    content_type = self._get_content_type(asset_path)
+                    style_files.append({
+                        'filename': Path(asset_path).name,
+                        'content_type': content_type,
+                        'file_type': asset_key  # e.g., 'style_intro_background'
+                    })
+                    local_style_files[asset_key] = asset_path
+                    self.logger.info(f"  Will upload style asset: {asset_key}")
+            if style_files:
+                request_data['style_files'] = style_files
+                self.logger.info(f"Including {len(style_files)} style files in request")
         response = self._request('POST', '/api/audio-search/search', json=request_data)
         if response.status_code == 404:
@@ -1248,7 +1298,52 @@ class RemoteKaraokeClient:
                 error_detail = response.text
             raise RuntimeError(f"Error searching for audio: {error_detail}")
-        return response.json()
+        result = response.json()
+        # Upload style files if we have signed URLs
+        style_upload_urls = result.get('style_upload_urls', [])
+        if style_upload_urls and local_style_files:
+            self.logger.info(f"Uploading {len(style_upload_urls)} style files...")
+            for url_info in style_upload_urls:
+                file_type = url_info['file_type']
+                upload_url = url_info['upload_url']
+                local_path = local_style_files.get(file_type)
+                if not local_path:
+                    self.logger.warning(f"No local file for {file_type}, skipping upload")
+                    continue
+                self.logger.info(f"  Uploading {file_type}: {Path(local_path).name}")
+                try:
+                    with open(local_path, 'rb') as f:
+                        file_content = f.read()
+                    # Use the content type from the original file info, not re-derived
+                    # This ensures it matches the signed URL which was generated with
+                    # the same content type we specified in the request
+                    content_type = self._get_content_type(local_path)
+                    # Use PUT to upload directly to signed URL
+                    upload_response = requests.put(
+                        upload_url,
+                        data=file_content,
+                        headers={'Content-Type': content_type},
+                        timeout=60
+                    )
+                    if upload_response.status_code not in (200, 201):
+                        self.logger.error(f"Failed to upload {file_type}: {upload_response.status_code}")
+                    else:
+                        self.logger.info(f"    ✓ Uploaded {file_type}")
+                except Exception as e:
+                    self.logger.error(f"Error uploading {file_type}: {e}")
+            self.logger.info("Style file uploads complete")
+        return result
     def get_audio_search_results(self, job_id: str) -> Dict[str, Any]:
         """Get audio search results for a job awaiting selection."""
@@ -1398,16 +1493,21 @@ class JobMonitor:
         base_api_url = f"{self.config.service_url}/api/review/{job_id}"
         encoded_api_url = urllib.parse.quote(base_api_url, safe='')
-        # Try to get audio hash from job data
+        # Try to get audio hash and review token from job data
+        audio_hash = ''
+        review_token = ''
         try:
             job_data = self.client.get_job(job_id)
             audio_hash = job_data.get('audio_hash', '')
+            review_token = job_data.get('review_token', '')
         except Exception:
-            audio_hash = ''
+            pass
         url = f"{self.config.review_ui_url}/?baseApiUrl={encoded_api_url}"
         if audio_hash:
             url += f"&audioHash={audio_hash}"
+        if review_token:
+            url += f"&reviewToken={review_token}"
         self.logger.info(f"Opening lyrics review UI: {url}")
         self.open_browser(url)
@@ -1608,8 +1708,34 @@ class JobMonitor:
             "quality_str": result.get('quality_str') or result.get('quality', ''),
         }
+    def _convert_to_release_objects(self, release_dicts: List[Dict[str, Any]]) -> List[Release]:
+        """
+        Convert API result dicts to Release objects for categorization.
+        Used by handle_audio_selection() to enable categorized display
+        for large result sets (10+ results).
+        Args:
+            release_dicts: List of dicts in Release-compatible format
+        Returns:
+            List of Release objects (skipping any that fail to convert)
+        """
+        releases = []
+        for d in release_dicts:
+            try:
+                releases.append(Release.from_dict(d))
+            except Exception as e:
+                self.logger.debug(f"Failed to convert result to Release: {e}")
+        return releases
     def handle_audio_selection(self, job_id: str) -> None:
-        """Handle audio source selection interaction (Batch 5)."""
+        """Handle audio source selection interaction (Batch 5).
+        For 10+ results, uses categorized display (grouped by Top Seeded,
+        Album Releases, Hi-Res, etc.) with a 'more' command to show full list.
+        For smaller result sets, uses flat list display.
+        """
         self.logger.info("=" * 60)
         self.logger.info("AUDIO SOURCE SELECTION NEEDED")
         self.logger.info("=" * 60)
@@ -1619,6 +1745,7 @@ class JobMonitor:
             results_data = self.client.get_audio_search_results(job_id)
             results = results_data.get('results', [])
             artist = results_data.get('artist', 'Unknown')
+            title = results_data.get('title', 'Unknown')
             if not results:
                 self.logger.error("No search results available")
@@ -1633,23 +1760,71 @@ class JobMonitor:
                 # This gives us the same rich, colorized output as the local CLI
                 release_dicts = [self._convert_api_result_to_release_dict(r) for r in results]
-                # Use flacfetch's shared display function
-                print_releases(release_dicts, target_artist=artist, use_colors=True)
+                # Convert to Release objects for categorization
+                release_objects = self._convert_to_release_objects(release_dicts)
+                # Use categorized display for large result sets (10+)
+                # This groups results into categories: Top Seeded, Album Releases, Hi-Res, etc.
+                use_categorized = len(release_objects) >= 10
+                if use_categorized:
+                    # Create query for categorization
+                    query = TrackQuery(artist=artist, title=title)
+                    categorized = categorize_releases(release_objects, query)
+                    # print_categorized_releases returns the flattened list of displayed releases
+                    display_releases = print_categorized_releases(categorized, target_artist=artist, use_colors=True)
+                    showing_categorized = True
+                else:
+                    # Small result set - use simple flat list
+                    print_releases(release_dicts, target_artist=artist, use_colors=True)
+                    display_releases = release_objects
+                    showing_categorized = False
                 selection_index = -1
                 while selection_index < 0:
                     try:
-                        choice = input(f"\nSelect a release (1-{len(results)}, 0 to cancel): ").strip()
+                        if showing_categorized:
+                            prompt = f"\nSelect (1-{len(display_releases)}), 'more' for full list, 0 to cancel: "
+                        else:
+                            prompt = f"\nSelect a release (1-{len(display_releases)}, 0 to cancel): "
+                        choice = input(prompt).strip().lower()
                         if choice == "0":
                             self.logger.info("Selection cancelled by user")
                             raise KeyboardInterrupt
+                        # Handle 'more' command to show full flat list
+                        if choice in ('more', 'm', 'all', 'a') and showing_categorized:
+                            print("\n" + "=" * 60)
+                            print("FULL LIST (all results)")
+                            print("=" * 60 + "\n")
+                            print_releases(release_dicts, target_artist=artist, use_colors=True)
+                            display_releases = release_objects
+                            showing_categorized = False
+                            continue
                         choice_num = int(choice)
-                        if 1 <= choice_num <= len(results):
-                            selection_index = choice_num - 1
+                        if 1 <= choice_num <= len(display_releases):
+                            # Map selection back to original results index for API call
+                            selected_release = display_releases[choice_num - 1]
+                            # Find matching index in original results by download_url
+                            selection_index = self._find_original_index(
+                                selected_release, results, release_objects
+                            )
+                            if selection_index < 0:
+                                # Fallback: use display index if mapping fails
+                                self.logger.warning("Could not map selection to original index, using display index")
+                                selection_index = choice_num - 1
                         else:
-                            print(f"Please enter a number between 0 and {len(results)}")
+                            print(f"Please enter a number between 0 and {len(display_releases)}")
                     except ValueError:
-                        print("Please enter a valid number")
+                        if showing_categorized:
+                            print("Please enter a number or 'more'")
+                        else:
+                            print("Please enter a valid number")
                     except KeyboardInterrupt:
                         print()
                         raise
@@ -1667,10 +1842,80 @@ class JobMonitor:
         except Exception as e:
             self.logger.error(f"Error handling audio selection: {e}")
+    def _find_original_index(
+        self,
+        selected_release: Release,
+        original_results: List[Dict[str, Any]],
+        release_objects: List[Release],
+    ) -> int:
+        """
+        Map a selected Release back to its index in the original API results.
+        This is needed because categorized display may reorder results,
+        but the API selection endpoint needs the original index.
+        Args:
+            selected_release: The Release object user selected
+            original_results: Original API results (list of dicts)
+            release_objects: Release objects in same order as original_results
+        Returns:
+            Index in original_results, or -1 if not found
+        """
+        # First try: match by object identity in release_objects
+        for i, release in enumerate(release_objects):
+            if release is selected_release:
+                return i
+        # Second try: match by download_url
+        selected_url = getattr(selected_release, 'download_url', None)
+        if selected_url:
+            for i, r in enumerate(original_results):
+                if r.get('url') == selected_url:
+                    return i
+        # Third try: match by info_hash (for torrent sources)
+        selected_hash = getattr(selected_release, 'info_hash', None)
+        if selected_hash:
+            for i, r in enumerate(original_results):
+                if r.get('source_id') == selected_hash:
+                    return i
+        # Fourth try: match by title + artist + provider
+        selected_title = getattr(selected_release, 'title', '')
+        selected_artist = getattr(selected_release, 'artist', '')
+        selected_source = getattr(selected_release, 'source_name', '')
+        for i, r in enumerate(original_results):
+            if (r.get('title') == selected_title and
+                r.get('artist') == selected_artist and
+                r.get('provider') == selected_source):
+                return i
+        return -1
     def _open_instrumental_review_and_wait(self, job_id: str) -> None:
         """Open browser to instrumental review UI and wait for selection."""
-        review_url = f"{self.config.review_ui_url}/jobs/{job_id}/instrumental-review"
+        # Get instrumental token from job data
+        instrumental_token = ''
+        try:
+            job_data = self.client.get_job(job_id)
+            instrumental_token = job_data.get('instrumental_token', '')
+        except Exception:
+            pass
+        # Build the review URL with API endpoint and token
+        # The instrumental UI is hosted at /instrumental/ on the frontend domain
+        base_api_url = f"{self.config.service_url}/api/jobs/{job_id}"
+        encoded_api_url = urllib.parse.quote(base_api_url, safe='')
+        # Use /instrumental/ path on the frontend (same domain as review_ui_url but different path)
+        # review_ui_url is like https://gen.nomadkaraoke.com/lyrics, we want /instrumental/
+        frontend_base = self.config.review_ui_url.rsplit('/', 1)[0]  # Remove /lyrics
+        review_url = f"{frontend_base}/instrumental/?baseApiUrl={encoded_api_url}"
+        if instrumental_token:
+            review_url += f"&instrumentalToken={instrumental_token}"
         self.logger.info("")
         self.logger.info("=" * 60)
@@ -2149,6 +2394,14 @@ class JobMonitor:
                         self.handle_instrumental_selection(job_id)
                         self._instrumental_prompted = True
+                elif status == 'instrumental_selected':
+                    # Check if this was auto-selected due to existing instrumental
+                    selection = job_data.get('state_data', {}).get('instrumental_selection', '')
+                    if selection == 'custom' and not self._instrumental_prompted:
+                        self.logger.info("")
+                        self.logger.info("Using user-provided instrumental (--existing_instrumental)")
+                        self._instrumental_prompted = True
                 elif status == 'complete':
                     self.logger.info("")
                     self.logger.info("=" * 60)
@@ -2802,6 +3055,7 @@ def main():
                 artist=artist,
                 title=title,
                 auto_download=auto_download,
+                style_params_path=args.style_params_json,
                 enable_cdg=args.enable_cdg,
                 enable_txt=args.enable_txt,
                 brand_prefix=args.brand_prefix,

{karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: karaoke-gen
-Version: 0.75.16
+Version: 0.75.53
 Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
 License: MIT
 License-File: LICENSE
@@ -22,7 +22,7 @@ Requires-Dist: dropbox (>=12)
 Requires-Dist: fastapi (>=0.104.0)
 Requires-Dist: fetch-lyrics-from-genius (>=0.1)
 Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
-Requires-Dist: flacfetch (>=0.3)
+Requires-Dist: flacfetch (>=0.9.0)
 Requires-Dist: fonttools (>=4.55)
 Requires-Dist: google-api-python-client
 Requires-Dist: google-auth
@@ -48,6 +48,7 @@ Requires-Dist: lyrics-converter (>=0.2.1)
 Requires-Dist: lyricsgenius (>=3)
 Requires-Dist: matplotlib (>=3)
 Requires-Dist: metaphone (>=0.6)
+Requires-Dist: mutagen (>=1.47)
 Requires-Dist: nest-asyncio (>=1.5)
 Requires-Dist: nltk (>=3.9)
 Requires-Dist: numpy (>=2)
@@ -94,7 +95,7 @@ Description-Content-Type: text/markdown
 # Karaoke Generator 🎶 🎥 🚀
 ![PyPI - Version](https://img.shields.io/pypi/v/karaoke-gen)
-![Python Version](https://img.shields.io/badge/python-3.10+-blue)
+![PyPI - Python Version](https://img.shields.io/pypi/pyversions/karaoke-gen)
 ![Tests](https://github.com/nomadkaraoke/karaoke-gen/workflows/Test%20and%20Publish/badge.svg)
 ![Test Coverage](https://codecov.io/gh/nomadkaraoke/karaoke-gen/branch/main/graph/badge.svg)
@@ -147,10 +148,44 @@ pip install karaoke-gen
 This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
 ### Requirements
-- Python 3.10+
+- Python 3.10-3.13
 - FFmpeg
 - For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
+### Transcription Provider Setup
+**Transcription is required** for creating karaoke videos with synchronized lyrics. The system needs word-level timing data to display lyrics in sync with the music.
+#### Option 1: AudioShake (Recommended)
+Commercial service with high-quality transcription. Best for production use.
+```bash
+export AUDIOSHAKE_API_TOKEN="your_audioshake_token"
+```
+Get an API key at [https://www.audioshake.ai/](https://www.audioshake.ai/) - business only, at time of writing this.
+#### Option 2: Whisper via RunPod
+Open-source alternative using OpenAI's Whisper model on RunPod infrastructure.
+```bash
+export RUNPOD_API_KEY="your_runpod_key"
+export WHISPER_RUNPOD_ID="your_whisper_endpoint_id"
+```
+Set up a Whisper endpoint at [https://www.runpod.io/](https://www.runpod.io/)
+#### Without Transcription (Instrumental Only)
+If you don't need synchronized lyrics, use the `--skip-lyrics` flag:
+```bash
+karaoke-gen --skip-lyrics "Artist" "Title"
+```
+This creates an instrumental-only karaoke video without lyrics overlay.
+> **Note:** See `lyrics_transcriber_temp/README.md` for detailed transcription provider configuration options.
 ---
 ## 🖥️ Local CLI (`karaoke-gen`)
@@ -568,6 +603,73 @@ Check backend health status.
 ---
+## 🔧 Troubleshooting
+### "No suitable files found for processing"
+This error occurs during the finalisation step when the `(With Vocals).mkv` file is missing. This file is created during lyrics transcription.
+**Most common cause:** No transcription provider configured.
+**Quick fix:**
+1. Check if transcription providers are configured:
+   ```bash
+   echo $AUDIOSHAKE_API_TOKEN
+   echo $RUNPOD_API_KEY
+   ```
+2. If both are empty, set up a provider (see [Transcription Provider Setup](#transcription-provider-setup))
+3. Or use `--skip-lyrics` for instrumental-only karaoke:
+   ```bash
+   karaoke-gen --skip-lyrics "Artist" "Title"
+   ```
+**Other causes:**
+- Invalid API credentials - verify your tokens are correct and active
+- API service unavailable - check service status pages
+- Network connectivity issues - ensure you can reach the API endpoints
+- Transcription timeout - try again or use a different provider
+### Transcription Fails Silently
+If karaoke-gen runs without errors but produces no synchronized lyrics:
+1. **Check logs** - Run with `--log_level debug` for detailed output:
+   ```bash
+   karaoke-gen --log_level debug "Artist" "Title"
+   ```
+2. **Verify environment variables** - Ensure API tokens are exported in your shell:
+   ```bash
+   # Check if set
+   printenv | grep -E "(AUDIOSHAKE|RUNPOD|WHISPER)"
+   # Set in current session
+   export AUDIOSHAKE_API_TOKEN="your_token"
+   ```
+3. **Test API connectivity** - Verify you can reach the transcription service
+### "No lyrics found from any source"
+This warning means no reference lyrics were fetched from online sources (Genius, Spotify, Musixmatch). The transcription will still work, but auto-correction may be less accurate.
+**To fix:**
+- Set `GENIUS_API_TOKEN` for Genius lyrics
+- Set `SPOTIFY_COOKIE_SP_DC` for Spotify lyrics
+- Set `RAPIDAPI_KEY` for Musixmatch lyrics
+- Or provide lyrics manually with `--lyrics_file /path/to/lyrics.txt`
+### Video Quality Issues
+If the output video has quality problems:
+- Ensure FFmpeg is properly installed: `ffmpeg -version`
+- Check available codecs: `ffmpeg -codecs`
+- For 4K output, ensure sufficient disk space (10GB+ per track)
+---
 ## 🧪 Development
 ### Running Tests

karaoke-gen 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl

karaoke-gen 0.75.16py3-none-any.whl → 0.75.53py3-none-any.whl