karaoke-gen 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -771,6 +771,10 @@ class AudioProcessor:
771
771
  padded_result["other_stems"] = separation_result.get("other_stems", {})
772
772
  padded_result["backing_vocals"] = separation_result.get("backing_vocals", {})
773
773
 
774
+ # Preserve Custom instrumental if present (already padded in karaoke_gen.py)
775
+ if "Custom" in separation_result:
776
+ padded_result["Custom"] = separation_result["Custom"]
777
+
774
778
  # Count actual padded files (don't assume clean instrumental was padded)
775
779
  padded_count = (1 if padded_result["clean_instrumental"].get("instrumental") else 0) + len(padded_result["combined_instrumentals"])
776
780
 
@@ -598,7 +598,22 @@
598
598
  let animationFrameId = null;
599
599
  let currentAudioElement = null; // Track audio element reference for listener management
600
600
 
601
- const API_BASE = '/api/jobs/local';
601
+ // Parse URL parameters for cloud mode
602
+ const urlParams = new URLSearchParams(window.location.search);
603
+ const encodedBaseApiUrl = urlParams.get('baseApiUrl');
604
+ const instrumentalToken = urlParams.get('instrumentalToken');
605
+
606
+ // Determine API base URL - cloud mode uses provided URL, local mode uses default
607
+ const API_BASE = encodedBaseApiUrl
608
+ ? decodeURIComponent(encodedBaseApiUrl)
609
+ : '/api/jobs/local';
610
+
611
+ // Helper to add token to URL if available
612
+ function addTokenToUrl(url) {
613
+ if (!instrumentalToken) return url;
614
+ const separator = url.includes('?') ? '&' : '?';
615
+ return `${url}${separator}instrumental_token=${encodeURIComponent(instrumentalToken)}`;
616
+ }
602
617
 
603
618
  // HTML escape helper to prevent XSS
604
619
  function escapeHtml(str) {
@@ -617,8 +632,8 @@
617
632
  async function init() {
618
633
  try {
619
634
  const [analysisRes, waveformRes] = await Promise.all([
620
- fetch(`${API_BASE}/instrumental-analysis`),
621
- fetch(`${API_BASE}/waveform-data?num_points=1000`)
635
+ fetch(addTokenToUrl(`${API_BASE}/instrumental-analysis`)),
636
+ fetch(addTokenToUrl(`${API_BASE}/waveform-data?num_points=1000`))
622
637
  ]);
623
638
 
624
639
  if (!analysisRes.ok) throw new Error('Failed to load analysis');
@@ -1120,15 +1135,23 @@
1120
1135
  }
1121
1136
 
1122
1137
  function getAudioUrl() {
1123
- const urls = {
1124
- original: '/api/audio/original',
1125
- backing: '/api/audio/backing_vocals',
1126
- clean: '/api/audio/clean_instrumental',
1127
- with_backing: '/api/audio/with_backing',
1128
- custom: '/api/audio/custom_instrumental',
1129
- uploaded: '/api/audio/uploaded_instrumental'
1138
+ const stemTypes = {
1139
+ original: 'original',
1140
+ backing: 'backing_vocals',
1141
+ clean: 'clean_instrumental',
1142
+ with_backing: 'with_backing',
1143
+ custom: 'custom_instrumental',
1144
+ uploaded: 'uploaded_instrumental'
1130
1145
  };
1131
- return urls[activeAudio] || urls.backing;
1146
+ const stemType = stemTypes[activeAudio] || stemTypes.backing;
1147
+
1148
+ // Cloud mode uses /audio-stream/{stem_type}, local mode uses /api/audio/{stem_type}
1149
+ const isCloudMode = !!encodedBaseApiUrl;
1150
+ const url = isCloudMode
1151
+ ? `${API_BASE}/audio-stream/${stemType}`
1152
+ : `/api/audio/${stemType}`;
1153
+
1154
+ return addTokenToUrl(url);
1132
1155
  }
1133
1156
 
1134
1157
  function formatTime(seconds) {
@@ -1295,7 +1318,7 @@
1295
1318
  const formData = new FormData();
1296
1319
  formData.append('file', file);
1297
1320
 
1298
- const response = await fetch(`${API_BASE}/upload-instrumental`, {
1321
+ const response = await fetch(addTokenToUrl(`${API_BASE}/upload-instrumental`), {
1299
1322
  method: 'POST',
1300
1323
  body: formData
1301
1324
  });
@@ -1354,7 +1377,7 @@
1354
1377
  }
1355
1378
 
1356
1379
  try {
1357
- const response = await fetch(`${API_BASE}/create-custom-instrumental`, {
1380
+ const response = await fetch(addTokenToUrl(`${API_BASE}/create-custom-instrumental`), {
1358
1381
  method: 'POST',
1359
1382
  headers: { 'Content-Type': 'application/json' },
1360
1383
  body: JSON.stringify({ mute_regions: muteRegions })
@@ -1404,7 +1427,7 @@
1404
1427
  }
1405
1428
 
1406
1429
  try {
1407
- const response = await fetch(`${API_BASE}/select-instrumental`, {
1430
+ const response = await fetch(addTokenToUrl(`${API_BASE}/select-instrumental`), {
1408
1431
  method: 'POST',
1409
1432
  headers: { 'Content-Type': 'application/json' },
1410
1433
  body: JSON.stringify({ selection: selectedOption })
@@ -654,7 +654,31 @@ class KaraokeFinalise:
654
654
  else:
655
655
  self.logger.warning(f"Unsupported file extension: {current_ext}")
656
656
 
657
- raise Exception("No suitable files found for processing.")
657
+ raise Exception(
658
+ "No suitable files found for processing.\n"
659
+ "\n"
660
+ "WHAT THIS MEANS:\n"
661
+ "The finalisation step requires a '(With Vocals).mkv' video file, which is created "
662
+ "during the lyrics transcription phase. This file contains the karaoke video with "
663
+ "synchronized lyrics overlay.\n"
664
+ "\n"
665
+ "COMMON CAUSES:\n"
666
+ "1. Transcription provider not configured - No AUDIOSHAKE_API_TOKEN or RUNPOD_API_KEY set\n"
667
+ "2. Transcription failed - Check logs above for API errors or timeout messages\n"
668
+ "3. Invalid API credentials - Verify your API tokens are correct and active\n"
669
+ "4. Network issues - Unable to reach transcription service\n"
670
+ "5. Running in wrong directory - Make sure you're in the track output folder\n"
671
+ "\n"
672
+ "TROUBLESHOOTING STEPS:\n"
673
+ "1. Check environment variables:\n"
674
+ " - AUDIOSHAKE_API_TOKEN (for AudioShake transcription)\n"
675
+ " - RUNPOD_API_KEY + WHISPER_RUNPOD_ID (for Whisper transcription)\n"
676
+ "2. Review the log output above for transcription errors\n"
677
+ "3. Try running with --log_level debug for more detailed output\n"
678
+ "4. If you don't need synchronized lyrics, use --skip-lyrics for instrumental-only karaoke\n"
679
+ "\n"
680
+ "See README.md 'Transcription Providers' and 'Troubleshooting' sections for more details."
681
+ )
658
682
 
659
683
  def choose_instrumental_audio_file(self, base_name):
660
684
  self.logger.info(f"Choosing instrumental audio file to use as karaoke audio...")
@@ -74,7 +74,7 @@ class KaraokePrep:
74
74
  skip_separation=False,
75
75
  # Video Background Configuration
76
76
  background_video=None,
77
- background_video_darkness=0,
77
+ background_video_darkness=50,
78
78
  # Audio Fetcher Configuration
79
79
  auto_download=False,
80
80
  ):
@@ -864,15 +864,21 @@ class KaraokePrep:
864
864
 
865
865
  # If separated_audio is empty (e.g., transcription was skipped but existing files have countdown),
866
866
  # scan the directory for existing instrumental files
867
+ # Note: also check for Custom instrumental (provided via --existing_instrumental)
867
868
  has_instrumentals = (
868
869
  processed_track["separated_audio"].get("clean_instrumental", {}).get("instrumental") or
869
- processed_track["separated_audio"].get("combined_instrumentals")
870
+ processed_track["separated_audio"].get("combined_instrumentals") or
871
+ processed_track["separated_audio"].get("Custom", {}).get("instrumental")
870
872
  )
871
873
  if not has_instrumentals:
872
874
  self.logger.info("No instrumentals in separated_audio, scanning directory for existing files...")
875
+ # Preserve existing Custom key if present before overwriting
876
+ custom_backup = processed_track["separated_audio"].get("Custom")
873
877
  processed_track["separated_audio"] = self._scan_directory_for_instrumentals(
874
878
  track_output_dir, artist_title
875
879
  )
880
+ if custom_backup:
881
+ processed_track["separated_audio"]["Custom"] = custom_backup
876
882
 
877
883
  # Apply padding using AudioProcessor
878
884
  padded_separation_result = self.audio_processor.apply_countdown_padding_to_instrumentals(
@@ -901,11 +907,11 @@ class KaraokePrep:
901
907
  for sig in (signal.SIGINT, signal.SIGTERM):
902
908
  loop.remove_signal_handler(sig)
903
909
 
904
- async def shutdown(self, signal):
910
+ async def shutdown(self, signal_received):
905
911
  """Handle shutdown signals gracefully."""
906
- self.logger.info(f"Received exit signal {signal.name}...")
912
+ self.logger.info(f"Received exit signal {signal_received.name}...")
907
913
 
908
- # Get all running tasks
914
+ # Get all running tasks except the current shutdown task
909
915
  tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
910
916
 
911
917
  if tasks:
@@ -914,17 +920,15 @@ class KaraokePrep:
914
920
  for task in tasks:
915
921
  task.cancel()
916
922
 
917
- self.logger.info("Received cancellation request, cleaning up...")
918
-
919
923
  # Wait for all tasks to complete with cancellation
920
- try:
921
- await asyncio.gather(*tasks, return_exceptions=True)
922
- except asyncio.CancelledError:
923
- pass
924
+ # Use return_exceptions=True to gather all results without raising
925
+ await asyncio.gather(*tasks, return_exceptions=True)
924
926
 
925
- # Force exit after cleanup
926
- self.logger.info("Cleanup complete, exiting...")
927
- sys.exit(0) # Add this line to force exit
927
+ self.logger.info("Cleanup complete")
928
+
929
+ # Raise KeyboardInterrupt to propagate the cancellation up the call stack
930
+ # This allows the main event loop to exit cleanly
931
+ raise KeyboardInterrupt()
928
932
 
929
933
  async def process_playlist(self):
930
934
  if self.artist is None or self.title is None:
@@ -27,10 +27,10 @@ class LyricsProcessor:
27
27
 
28
28
  def _detect_countdown_padding_from_lrc(self, lrc_filepath):
29
29
  """
30
- Detect if countdown padding was applied by checking the first lyric timestamp in the LRC file.
30
+ Detect if countdown padding was applied by checking for countdown text in the LRC file.
31
31
 
32
- LRC format timestamps look like: [mm:ss.xx] or [mm:ss.xxx]
33
- If the first lyric timestamp is >= 3.0 seconds, countdown padding was likely applied.
32
+ The countdown segment has the text "3... 2... 1..." at timestamp 0.1-2.9s.
33
+ We detect this by looking for the countdown text pattern.
34
34
 
35
35
  Args:
36
36
  lrc_filepath: Path to the LRC file
@@ -42,7 +42,15 @@ class LyricsProcessor:
42
42
  with open(lrc_filepath, 'r', encoding='utf-8') as f:
43
43
  content = f.read()
44
44
 
45
- # Find all timestamp patterns in the LRC file
45
+ # Method 1: Check for countdown text pattern "3... 2... 1..."
46
+ # This is the most reliable detection method since the countdown text is unique
47
+ countdown_text = "3... 2... 1..."
48
+ if countdown_text in content:
49
+ self.logger.info(f"Detected countdown padding from LRC: found countdown text '{countdown_text}'")
50
+ return (True, self.COUNTDOWN_PADDING_SECONDS)
51
+
52
+ # Method 2 (fallback): Check if first lyric timestamp is >= 3 seconds
53
+ # This handles cases where countdown text format might differ
46
54
  # LRC timestamps: [mm:ss.xx] or [mm:ss.xxx]
47
55
  timestamp_pattern = r'\[(\d{1,2}):(\d{2})\.(\d{2,3})\]'
48
56
  matches = re.findall(timestamp_pattern, content)
@@ -51,8 +59,7 @@ class LyricsProcessor:
51
59
  self.logger.debug("No timestamps found in LRC file")
52
60
  return (False, 0.0)
53
61
 
54
- # Find the first non-metadata timestamp (metadata like [ar:Artist] doesn't have decimal)
55
- # We already filtered for decimal timestamps in our pattern
62
+ # Parse the first timestamp
56
63
  first_timestamp = matches[0]
57
64
  minutes = int(first_timestamp[0])
58
65
  seconds = int(first_timestamp[1])
@@ -160,6 +167,76 @@ class LyricsProcessor:
160
167
 
161
168
  return processed_lines
162
169
 
170
+ def _check_transcription_providers(self) -> dict:
171
+ """
172
+ Check which transcription providers are configured and return their status.
173
+
174
+ Returns:
175
+ dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
176
+ """
177
+ load_dotenv()
178
+
179
+ configured = []
180
+ missing = []
181
+
182
+ # Check AudioShake
183
+ audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
184
+ if audioshake_token:
185
+ configured.append("AudioShake")
186
+ self.logger.debug("AudioShake transcription provider: configured")
187
+ else:
188
+ missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
189
+ self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
190
+
191
+ # Check Whisper via RunPod
192
+ runpod_key = os.getenv("RUNPOD_API_KEY")
193
+ whisper_id = os.getenv("WHISPER_RUNPOD_ID")
194
+ if runpod_key and whisper_id:
195
+ configured.append("Whisper (RunPod)")
196
+ self.logger.debug("Whisper transcription provider: configured")
197
+ elif runpod_key:
198
+ missing.append("Whisper (missing WHISPER_RUNPOD_ID)")
199
+ self.logger.debug("Whisper transcription provider: partially configured (missing WHISPER_RUNPOD_ID)")
200
+ elif whisper_id:
201
+ missing.append("Whisper (missing RUNPOD_API_KEY)")
202
+ self.logger.debug("Whisper transcription provider: partially configured (missing RUNPOD_API_KEY)")
203
+ else:
204
+ missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
205
+ self.logger.debug("Whisper transcription provider: not configured")
206
+
207
+ return {"configured": configured, "missing": missing}
208
+
209
+ def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
210
+ """Build a helpful error message when no transcription providers are configured."""
211
+ return (
212
+ "No transcription providers configured!\n"
213
+ "\n"
214
+ "Karaoke video generation requires at least one transcription provider to create "
215
+ "synchronized lyrics. Without a transcription provider, the system cannot generate "
216
+ "the word-level timing data needed for the karaoke video.\n"
217
+ "\n"
218
+ "AVAILABLE TRANSCRIPTION PROVIDERS:\n"
219
+ "\n"
220
+ "1. AudioShake (Recommended - Commercial, high-quality)\n"
221
+ " - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
222
+ " - Get an API key at: https://www.audioshake.ai/\n"
223
+ "\n"
224
+ "2. Whisper via RunPod (Open-source alternative)\n"
225
+ " - Set environment variables:\n"
226
+ " RUNPOD_API_KEY=your_key\n"
227
+ " WHISPER_RUNPOD_ID=your_endpoint_id\n"
228
+ " - Set up a Whisper endpoint at: https://www.runpod.io/\n"
229
+ "\n"
230
+ "ALTERNATIVES:\n"
231
+ "\n"
232
+ "- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
233
+ "- Use --lyrics_file to provide pre-timed lyrics (still needs transcription for timing)\n"
234
+ "\n"
235
+ f"Missing provider configurations: {', '.join(missing_providers)}\n"
236
+ "\n"
237
+ "See README.md 'Transcription Providers' section for detailed setup instructions."
238
+ )
239
+
163
240
  def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
164
241
  """
165
242
  Transcribe lyrics for a track.
@@ -171,6 +248,9 @@ class LyricsProcessor:
171
248
  track_output_dir: Output directory path
172
249
  lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
173
250
  lyrics_title: Title for lyrics processing (defaults to title if None)
251
+
252
+ Raises:
253
+ ValueError: If transcription is enabled but no providers are configured
174
254
  """
175
255
  # Use original artist/title for filename generation
176
256
  filename_artist = artist
@@ -234,6 +314,17 @@ class LyricsProcessor:
234
314
  "padded_audio_filepath": None, # Original padded audio may not exist
235
315
  }
236
316
 
317
+ # Check transcription provider configuration if transcription is not being skipped
318
+ # Do this AFTER checking for existing files, since existing files don't need transcription
319
+ if not self.skip_transcription:
320
+ provider_status = self._check_transcription_providers()
321
+
322
+ if provider_status["configured"]:
323
+ self.logger.info(f"Transcription providers configured: {', '.join(provider_status['configured'])}")
324
+ else:
325
+ error_msg = self._build_transcription_provider_error_message(provider_status["missing"])
326
+ raise ValueError(error_msg)
327
+
237
328
  # Create lyrics directory if it doesn't exist
238
329
  os.makedirs(lyrics_dir, exist_ok=True)
239
330
  self.logger.info(f"Created lyrics directory: {lyrics_dir}")
@@ -258,8 +258,8 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
258
258
  style_group.add_argument(
259
259
  "--background_video_darkness",
260
260
  type=int,
261
- default=0,
262
- help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=50",
261
+ default=50,
262
+ help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=20",
263
263
  )
264
264
 
265
265
  # Finalisation Configuration
@@ -352,9 +352,10 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
352
352
  )
353
353
  remote_group.add_argument(
354
354
  "--review-ui-url",
355
- default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://lyrics.nomadkaraoke.com')),
356
- help="Lyrics review UI URL. Default: 'https://lyrics.nomadkaraoke.com'. "
357
- "Use 'http://localhost:5173' for Vite dev server during development. "
355
+ default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://gen.nomadkaraoke.com/lyrics')),
356
+ help="Lyrics review UI URL. For remote mode: defaults to 'https://gen.nomadkaraoke.com/lyrics'. "
357
+ "For local mode: defaults to bundled frontend (from lyrics_transcriber/frontend/). "
358
+ "Use 'http://localhost:5173' to develop against Vite dev server. "
358
359
  "(env: REVIEW_UI_URL or LYRICS_REVIEW_UI_URL)",
359
360
  )
360
361
  remote_group.add_argument(
@@ -313,9 +313,18 @@ async def async_main():
313
313
  args = parser.parse_args()
314
314
 
315
315
  # Set review UI URL environment variable for the lyrics transcriber review server
316
- # This allows development against a local frontend dev server (e.g., http://localhost:5173)
316
+ # Only set this if the user explicitly wants to use a dev server (e.g., http://localhost:5173)
317
+ # By default, let the ReviewServer use its bundled local frontend (served from lyrics_transcriber/frontend/)
318
+ # This enables local iteration on the frontend without redeploying
317
319
  if hasattr(args, 'review_ui_url') and args.review_ui_url:
318
- os.environ['LYRICS_REVIEW_UI_URL'] = args.review_ui_url
320
+ # Check if user provided a custom value (not the default hosted URL)
321
+ default_hosted_urls = [
322
+ 'https://gen.nomadkaraoke.com/lyrics',
323
+ 'https://lyrics.nomadkaraoke.com'
324
+ ]
325
+ if args.review_ui_url.rstrip('/') not in [url.rstrip('/') for url in default_hosted_urls]:
326
+ # User explicitly wants a specific URL (e.g., Vite dev server)
327
+ os.environ['LYRICS_REVIEW_UI_URL'] = args.review_ui_url
319
328
 
320
329
  # Process style overrides
321
330
  try:
@@ -746,7 +755,7 @@ async def async_main():
746
755
  except UserCancelledError:
747
756
  logger.info("Operation cancelled by user")
748
757
  return
749
- except KeyboardInterrupt:
758
+ except (KeyboardInterrupt, asyncio.CancelledError):
750
759
  logger.info("Operation cancelled by user (Ctrl+C)")
751
760
  return
752
761
 
@@ -775,12 +784,28 @@ async def async_main():
775
784
  logger.info(f"Changing to directory: {track_dir}")
776
785
  os.chdir(track_dir)
777
786
 
778
- # Select instrumental file - either via web UI or auto-selection
787
+ # Select instrumental file - either via web UI, auto-selection, or custom instrumental
779
788
  # This ALWAYS produces a selected file - no silent fallback to legacy code
780
789
  selected_instrumental_file = None
781
790
  skip_review = getattr(args, 'skip_instrumental_review', False)
782
791
 
783
- if skip_review:
792
+ # Check if a custom instrumental was provided (via --existing_instrumental)
793
+ # In this case, the instrumental is already chosen - skip review entirely
794
+ separated_audio = track.get("separated_audio", {})
795
+ custom_instrumental = separated_audio.get("Custom", {}).get("instrumental")
796
+
797
+ if custom_instrumental:
798
+ # Custom instrumental was provided - use it directly, no review needed
799
+ resolved_path = _resolve_path_for_cwd(custom_instrumental, track_dir)
800
+ if os.path.exists(resolved_path):
801
+ logger.info(f"Using custom instrumental (--existing_instrumental): {resolved_path}")
802
+ selected_instrumental_file = resolved_path
803
+ else:
804
+ logger.error(f"Custom instrumental file not found: {resolved_path}")
805
+ logger.error("The file may have been moved or deleted after preparation.")
806
+ sys.exit(1)
807
+ return # Explicit return for testing
808
+ elif skip_review:
784
809
  # Auto-select instrumental when review is skipped (non-interactive mode)
785
810
  logger.info("Instrumental review skipped (--skip_instrumental_review), auto-selecting instrumental file...")
786
811
  try: