karaoke-gen 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. karaoke_gen/__init__.py +32 -1
  2. karaoke_gen/audio_fetcher.py +1220 -67
  3. karaoke_gen/audio_processor.py +15 -3
  4. karaoke_gen/instrumental_review/server.py +154 -860
  5. karaoke_gen/instrumental_review/static/index.html +1529 -0
  6. karaoke_gen/karaoke_finalise/karaoke_finalise.py +87 -2
  7. karaoke_gen/karaoke_gen.py +131 -14
  8. karaoke_gen/lyrics_processor.py +172 -4
  9. karaoke_gen/utils/bulk_cli.py +3 -0
  10. karaoke_gen/utils/cli_args.py +7 -4
  11. karaoke_gen/utils/gen_cli.py +221 -5
  12. karaoke_gen/utils/remote_cli.py +786 -43
  13. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +109 -4
  14. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +37 -31
  15. lyrics_transcriber/core/controller.py +76 -2
  16. lyrics_transcriber/frontend/package.json +1 -1
  17. lyrics_transcriber/frontend/src/App.tsx +6 -4
  18. lyrics_transcriber/frontend/src/api.ts +25 -10
  19. lyrics_transcriber/frontend/src/components/Header.tsx +38 -12
  20. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +17 -3
  21. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  22. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  23. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  24. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  25. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  26. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +190 -542
  27. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  28. lyrics_transcriber/frontend/web_assets/assets/{index-DdJTDWH3.js → index-BECn1o8Q.js} +1802 -553
  29. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  30. lyrics_transcriber/frontend/web_assets/index.html +1 -1
  31. lyrics_transcriber/output/countdown_processor.py +39 -0
  32. lyrics_transcriber/review/server.py +5 -5
  33. lyrics_transcriber/transcribers/audioshake.py +96 -7
  34. lyrics_transcriber/types.py +14 -12
  35. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +0 -1
  36. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
  37. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
  38. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0
@@ -47,6 +47,7 @@ class KaraokeFinalise:
47
47
  user_youtube_credentials=None, # Add support for pre-stored credentials
48
48
  server_side_mode=False, # New parameter for server-side deployment
49
49
  selected_instrumental_file=None, # Add support for pre-selected instrumental file
50
+ countdown_padding_seconds=None, # Padding applied to vocals; instrumental must match
50
51
  ):
51
52
  self.log_level = log_level
52
53
  self.log_formatter = log_formatter
@@ -54,6 +55,9 @@ class KaraokeFinalise:
54
55
  if logger is None:
55
56
  self.logger = logging.getLogger(__name__)
56
57
  self.logger.setLevel(log_level)
58
+ # Prevent log propagation to root logger to avoid duplicate logs
59
+ # when external packages (like lyrics_converter) configure root logger handlers
60
+ self.logger.propagate = False
57
61
 
58
62
  self.log_handler = logging.StreamHandler()
59
63
 
@@ -105,6 +109,7 @@ class KaraokeFinalise:
105
109
  self.user_youtube_credentials = user_youtube_credentials
106
110
  self.server_side_mode = server_side_mode
107
111
  self.selected_instrumental_file = selected_instrumental_file
112
+ self.countdown_padding_seconds = countdown_padding_seconds
108
113
 
109
114
  self.suffixes = {
110
115
  "title_mov": " (Title).mov",
@@ -421,6 +426,15 @@ class KaraokeFinalise:
421
426
  # Check if any videos were found
422
427
  if "items" in response and len(response["items"]) > 0:
423
428
  for item in response["items"]:
429
+ # YouTube search API sometimes returns results from other channels even with channelId filter
430
+ # Verify the video actually belongs to our channel
431
+ result_channel_id = item["snippet"]["channelId"]
432
+ if result_channel_id != channel_id:
433
+ self.logger.debug(
434
+ f"Skipping video from different channel: {item['snippet']['title']} (channel: {result_channel_id})"
435
+ )
436
+ continue
437
+
424
438
  found_title = item["snippet"]["title"]
425
439
 
426
440
  # In server-side mode, require an exact match to avoid false positives.
@@ -640,7 +654,31 @@ class KaraokeFinalise:
640
654
  else:
641
655
  self.logger.warning(f"Unsupported file extension: {current_ext}")
642
656
 
643
- raise Exception("No suitable files found for processing.")
657
+ raise Exception(
658
+ "No suitable files found for processing.\n"
659
+ "\n"
660
+ "WHAT THIS MEANS:\n"
661
+ "The finalisation step requires a '(With Vocals).mkv' video file, which is created "
662
+ "during the lyrics transcription phase. This file contains the karaoke video with "
663
+ "synchronized lyrics overlay.\n"
664
+ "\n"
665
+ "COMMON CAUSES:\n"
666
+ "1. Transcription provider not configured - No AUDIOSHAKE_API_TOKEN or RUNPOD_API_KEY set\n"
667
+ "2. Transcription failed - Check logs above for API errors or timeout messages\n"
668
+ "3. Invalid API credentials - Verify your API tokens are correct and active\n"
669
+ "4. Network issues - Unable to reach transcription service\n"
670
+ "5. Running in wrong directory - Make sure you're in the track output folder\n"
671
+ "\n"
672
+ "TROUBLESHOOTING STEPS:\n"
673
+ "1. Check environment variables:\n"
674
+ " - AUDIOSHAKE_API_TOKEN (for AudioShake transcription)\n"
675
+ " - RUNPOD_API_KEY + WHISPER_RUNPOD_ID (for Whisper transcription)\n"
676
+ "2. Review the log output above for transcription errors\n"
677
+ "3. Try running with --log_level debug for more detailed output\n"
678
+ "4. If you don't need synchronized lyrics, use --skip-lyrics for instrumental-only karaoke\n"
679
+ "\n"
680
+ "See README.md 'Transcription Providers' and 'Troubleshooting' sections for more details."
681
+ )
644
682
 
645
683
  def choose_instrumental_audio_file(self, base_name):
646
684
  self.logger.info(f"Choosing instrumental audio file to use as karaoke audio...")
@@ -720,6 +758,32 @@ class KaraokeFinalise:
720
758
  artist, title = base_name.split(" - ", 1)
721
759
  return base_name, artist, title
722
760
 
761
+ def _pad_audio_file(self, input_audio, output_audio, padding_seconds):
762
+ """
763
+ Pad an audio file by prepending silence at the beginning.
764
+
765
+ Uses the same ffmpeg approach as LyricsTranscriber's CountdownProcessor
766
+ to ensure consistent padding behavior.
767
+
768
+ Args:
769
+ input_audio: Path to input audio file
770
+ output_audio: Path for the padded output file
771
+ padding_seconds: Amount of silence to prepend (in seconds)
772
+ """
773
+ self.logger.info(f"Padding audio file with {padding_seconds}s of silence")
774
+
775
+ # Use ffmpeg to prepend silence - this matches the approach in audio_processor.py
776
+ # adelay filter adds delay in milliseconds
777
+ delay_ms = int(padding_seconds * 1000)
778
+
779
+ ffmpeg_command = (
780
+ f'{self.ffmpeg_base_command} -i "{input_audio}" '
781
+ f'-af "adelay={delay_ms}|{delay_ms}" '
782
+ f'"{output_audio}"'
783
+ )
784
+
785
+ self.execute_command(ffmpeg_command, f"Padding audio with {padding_seconds}s silence")
786
+
723
787
  def execute_command(self, command, description):
724
788
  """Execute a shell command and log the output. For general commands (rclone, etc.)"""
725
789
  self.logger.info(f"{description}")
@@ -764,11 +828,32 @@ class KaraokeFinalise:
764
828
 
765
829
  def remux_with_instrumental(self, with_vocals_file, instrumental_audio, output_file):
766
830
  """Remux the video with instrumental audio to create karaoke version"""
831
+ # Safety net: If countdown padding was applied to vocals, ensure instrumental is padded too
832
+ actual_instrumental = instrumental_audio
833
+ if self.countdown_padding_seconds and self.countdown_padding_seconds > 0:
834
+ # Check if the instrumental file is already padded (has "(Padded)" in name)
835
+ if "(Padded)" not in instrumental_audio:
836
+ self.logger.warning(
837
+ f"Countdown padding ({self.countdown_padding_seconds}s) was applied to vocals, "
838
+ f"but instrumental doesn't appear to be padded. Creating padded version..."
839
+ )
840
+ # Create a padded version of the instrumental
841
+ base, ext = os.path.splitext(instrumental_audio)
842
+ padded_instrumental = f"{base} (Padded){ext}"
843
+
844
+ if not os.path.exists(padded_instrumental):
845
+ self._pad_audio_file(instrumental_audio, padded_instrumental, self.countdown_padding_seconds)
846
+ self.logger.info(f"Created padded instrumental: {padded_instrumental}")
847
+
848
+ actual_instrumental = padded_instrumental
849
+ else:
850
+ self.logger.info(f"Using already-padded instrumental: {instrumental_audio}")
851
+
767
852
  # This operation is primarily I/O bound (remuxing), so hardware acceleration doesn't provide significant benefit
768
853
  # Keep the existing approach but use the new execute method
769
854
  ffmpeg_command = (
770
855
  f'{self.ffmpeg_base_command} -an -i "{with_vocals_file}" '
771
- f'-vn -i "{instrumental_audio}" -c:v copy -c:a pcm_s16le "{output_file}"'
856
+ f'-vn -i "{actual_instrumental}" -c:v copy -c:a pcm_s16le "{output_file}"'
772
857
  )
773
858
  self.execute_command(ffmpeg_command, "Remuxing video with instrumental audio")
774
859
 
@@ -29,7 +29,7 @@ from .audio_processor import AudioProcessor
29
29
  from .lyrics_processor import LyricsProcessor
30
30
  from .video_generator import VideoGenerator
31
31
  from .video_background_processor import VideoBackgroundProcessor
32
- from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError
32
+ from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError, UserCancelledError
33
33
 
34
34
 
35
35
  class KaraokePrep:
@@ -74,7 +74,7 @@ class KaraokePrep:
74
74
  skip_separation=False,
75
75
  # Video Background Configuration
76
76
  background_video=None,
77
- background_video_darkness=0,
77
+ background_video_darkness=50,
78
78
  # Audio Fetcher Configuration
79
79
  auto_download=False,
80
80
  ):
@@ -84,6 +84,9 @@ class KaraokePrep:
84
84
  if logger is None:
85
85
  self.logger = logging.getLogger(__name__)
86
86
  self.logger.setLevel(log_level)
87
+ # Prevent log propagation to root logger to avoid duplicate logs
88
+ # when external packages (like lyrics_converter) configure root logger handlers
89
+ self.logger.propagate = False
87
90
 
88
91
  self.log_handler = logging.StreamHandler()
89
92
 
@@ -256,6 +259,101 @@ class KaraokePrep:
256
259
  self.artist = metadata_result["artist"]
257
260
  self.title = metadata_result["title"]
258
261
 
262
+ def _scan_directory_for_instrumentals(self, track_output_dir, artist_title):
263
+ """
264
+ Scan the directory for existing instrumental files and build a separated_audio structure.
265
+
266
+ This is used when transcription was skipped (existing files found) but we need to
267
+ pad instrumentals due to countdown padding.
268
+
269
+ Args:
270
+ track_output_dir: The track output directory to scan
271
+ artist_title: The "{artist} - {title}" string for matching files
272
+
273
+ Returns:
274
+ Dictionary with separated_audio structure containing found instrumental paths
275
+ """
276
+ self.logger.info(f"Scanning directory for existing instrumentals: {track_output_dir}")
277
+
278
+ separated_audio = {
279
+ "clean_instrumental": {},
280
+ "backing_vocals": {},
281
+ "other_stems": {},
282
+ "combined_instrumentals": {},
283
+ }
284
+
285
+ # Search patterns for instrumental files
286
+ # Files are named like: "{artist} - {title} (Instrumental {model}).flac"
287
+ # Or with backing vocals: "{artist} - {title} (Instrumental +BV {model}).flac"
288
+
289
+ # Look for files in the track output directory
290
+ search_dir = track_output_dir
291
+
292
+ # Find all instrumental files (not padded ones - we want the originals)
293
+ instrumental_pattern = os.path.join(search_dir, f"{artist_title} (Instrumental*.flac")
294
+ instrumental_files = glob.glob(instrumental_pattern)
295
+
296
+ # Also check for wav files
297
+ instrumental_pattern_wav = os.path.join(search_dir, f"{artist_title} (Instrumental*.wav")
298
+ instrumental_files.extend(glob.glob(instrumental_pattern_wav))
299
+
300
+ self.logger.debug(f"Found {len(instrumental_files)} instrumental files")
301
+
302
+ for filepath in instrumental_files:
303
+ filename = os.path.basename(filepath)
304
+
305
+ # Skip already padded files
306
+ if "(Padded)" in filename:
307
+ self.logger.debug(f"Skipping already padded file: {filename}")
308
+ continue
309
+
310
+ # Determine if it's a combined instrumental (+BV) or clean instrumental
311
+ if "+BV" in filename or "+bv" in filename.lower():
312
+ # Combined instrumental with backing vocals
313
+ # Extract model name from filename
314
+ # Pattern: "(Instrumental +BV {model}).flac"
315
+ model_match = re.search(r'\(Instrumental \+BV ([^)]+)\)', filename)
316
+ if model_match:
317
+ model_name = model_match.group(1).strip()
318
+ separated_audio["combined_instrumentals"][model_name] = filepath
319
+ self.logger.info(f"Found combined instrumental: {filename}")
320
+ else:
321
+ # Clean instrumental (no backing vocals)
322
+ # Pattern: "(Instrumental {model}).flac"
323
+ model_match = re.search(r'\(Instrumental ([^)]+)\)', filename)
324
+ if model_match:
325
+ # Use as clean instrumental if we don't have one yet
326
+ if not separated_audio["clean_instrumental"].get("instrumental"):
327
+ separated_audio["clean_instrumental"]["instrumental"] = filepath
328
+ self.logger.info(f"Found clean instrumental: {filename}")
329
+ else:
330
+ # Additional clean instrumentals go to combined_instrumentals for padding
331
+ model_name = model_match.group(1).strip()
332
+ separated_audio["combined_instrumentals"][model_name] = filepath
333
+ self.logger.info(f"Found additional instrumental: {filename}")
334
+
335
+ # Also look for backing vocals files
336
+ backing_vocals_pattern = os.path.join(search_dir, f"{artist_title} (Backing Vocals*.flac")
337
+ backing_vocals_files = glob.glob(backing_vocals_pattern)
338
+ backing_vocals_pattern_wav = os.path.join(search_dir, f"{artist_title} (Backing Vocals*.wav")
339
+ backing_vocals_files.extend(glob.glob(backing_vocals_pattern_wav))
340
+
341
+ for filepath in backing_vocals_files:
342
+ filename = os.path.basename(filepath)
343
+ model_match = re.search(r'\(Backing Vocals ([^)]+)\)', filename)
344
+ if model_match:
345
+ model_name = model_match.group(1).strip()
346
+ if model_name not in separated_audio["backing_vocals"]:
347
+ separated_audio["backing_vocals"][model_name] = {"backing_vocals": filepath}
348
+ self.logger.info(f"Found backing vocals: {filename}")
349
+
350
+ # Log summary
351
+ clean_count = 1 if separated_audio["clean_instrumental"].get("instrumental") else 0
352
+ combined_count = len(separated_audio["combined_instrumentals"])
353
+ self.logger.info(f"Directory scan complete: {clean_count} clean instrumental, {combined_count} combined instrumentals")
354
+
355
+ return separated_audio
356
+
259
357
  async def prep_single_track(self):
260
358
  # Add signal handler at the start
261
359
  loop = asyncio.get_running_loop()
@@ -419,6 +517,9 @@ class KaraokePrep:
419
517
  # No still image for audio-only downloads
420
518
  processed_track["input_still_image"] = None
421
519
 
520
+ except UserCancelledError:
521
+ # User cancelled - propagate up to CLI for graceful exit
522
+ raise
422
523
  except NoResultsError as e:
423
524
  self.logger.error(f"No audio found: {e}")
424
525
  return None
@@ -761,6 +862,24 @@ class KaraokePrep:
761
862
  f"Applying {padding_seconds}s padding to all instrumental files to sync with vocal countdown"
762
863
  )
763
864
 
865
+ # If separated_audio is empty (e.g., transcription was skipped but existing files have countdown),
866
+ # scan the directory for existing instrumental files
867
+ # Note: also check for Custom instrumental (provided via --existing_instrumental)
868
+ has_instrumentals = (
869
+ processed_track["separated_audio"].get("clean_instrumental", {}).get("instrumental") or
870
+ processed_track["separated_audio"].get("combined_instrumentals") or
871
+ processed_track["separated_audio"].get("Custom", {}).get("instrumental")
872
+ )
873
+ if not has_instrumentals:
874
+ self.logger.info("No instrumentals in separated_audio, scanning directory for existing files...")
875
+ # Preserve existing Custom key if present before overwriting
876
+ custom_backup = processed_track["separated_audio"].get("Custom")
877
+ processed_track["separated_audio"] = self._scan_directory_for_instrumentals(
878
+ track_output_dir, artist_title
879
+ )
880
+ if custom_backup:
881
+ processed_track["separated_audio"]["Custom"] = custom_backup
882
+
764
883
  # Apply padding using AudioProcessor
765
884
  padded_separation_result = self.audio_processor.apply_countdown_padding_to_instrumentals(
766
885
  separation_result=processed_track["separated_audio"],
@@ -788,11 +907,11 @@ class KaraokePrep:
788
907
  for sig in (signal.SIGINT, signal.SIGTERM):
789
908
  loop.remove_signal_handler(sig)
790
909
 
791
- async def shutdown(self, signal):
910
+ async def shutdown(self, signal_received):
792
911
  """Handle shutdown signals gracefully."""
793
- self.logger.info(f"Received exit signal {signal.name}...")
912
+ self.logger.info(f"Received exit signal {signal_received.name}...")
794
913
 
795
- # Get all running tasks
914
+ # Get all running tasks except the current shutdown task
796
915
  tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
797
916
 
798
917
  if tasks:
@@ -801,17 +920,15 @@ class KaraokePrep:
801
920
  for task in tasks:
802
921
  task.cancel()
803
922
 
804
- self.logger.info("Received cancellation request, cleaning up...")
805
-
806
923
  # Wait for all tasks to complete with cancellation
807
- try:
808
- await asyncio.gather(*tasks, return_exceptions=True)
809
- except asyncio.CancelledError:
810
- pass
924
+ # Use return_exceptions=True to gather all results without raising
925
+ await asyncio.gather(*tasks, return_exceptions=True)
811
926
 
812
- # Force exit after cleanup
813
- self.logger.info("Cleanup complete, exiting...")
814
- sys.exit(0) # Add this line to force exit
927
+ self.logger.info("Cleanup complete")
928
+
929
+ # Raise KeyboardInterrupt to propagate the cancellation up the call stack
930
+ # This allows the main event loop to exit cleanly
931
+ raise KeyboardInterrupt()
815
932
 
816
933
  async def process_playlist(self):
817
934
  if self.artist is None or self.title is None:
@@ -11,6 +11,9 @@ from .utils import sanitize_filename
11
11
 
12
12
  # Placeholder class or functions for lyrics processing
13
13
  class LyricsProcessor:
14
+ # Standard countdown padding duration used by LyricsTranscriber
15
+ COUNTDOWN_PADDING_SECONDS = 3.0
16
+
14
17
  def __init__(
15
18
  self, logger, style_params_json, lyrics_file, skip_transcription, skip_transcription_review, render_video, subtitle_offset_ms
16
19
  ):
@@ -22,6 +25,67 @@ class LyricsProcessor:
22
25
  self.render_video = render_video
23
26
  self.subtitle_offset_ms = subtitle_offset_ms
24
27
 
28
+ def _detect_countdown_padding_from_lrc(self, lrc_filepath):
29
+ """
30
+ Detect if countdown padding was applied by checking for countdown text in the LRC file.
31
+
32
+ The countdown segment has the text "3... 2... 1..." at timestamp 0.1-2.9s.
33
+ We detect this by looking for the countdown text pattern.
34
+
35
+ Args:
36
+ lrc_filepath: Path to the LRC file
37
+
38
+ Returns:
39
+ Tuple of (countdown_padding_added: bool, countdown_padding_seconds: float)
40
+ """
41
+ try:
42
+ with open(lrc_filepath, 'r', encoding='utf-8') as f:
43
+ content = f.read()
44
+
45
+ # Method 1: Check for countdown text pattern "3... 2... 1..."
46
+ # This is the most reliable detection method since the countdown text is unique
47
+ countdown_text = "3... 2... 1..."
48
+ if countdown_text in content:
49
+ self.logger.info(f"Detected countdown padding from LRC: found countdown text '{countdown_text}'")
50
+ return (True, self.COUNTDOWN_PADDING_SECONDS)
51
+
52
+ # Method 2 (fallback): Check if first lyric timestamp is >= 3 seconds
53
+ # This handles cases where countdown text format might differ
54
+ # LRC timestamps: [mm:ss.xx] or [mm:ss.xxx]
55
+ timestamp_pattern = r'\[(\d{1,2}):(\d{2})\.(\d{2,3})\]'
56
+ matches = re.findall(timestamp_pattern, content)
57
+
58
+ if not matches:
59
+ self.logger.debug("No timestamps found in LRC file")
60
+ return (False, 0.0)
61
+
62
+ # Parse the first timestamp
63
+ first_timestamp = matches[0]
64
+ minutes = int(first_timestamp[0])
65
+ seconds = int(first_timestamp[1])
66
+ # Handle both .xx and .xxx formats
67
+ centiseconds = first_timestamp[2]
68
+ if len(centiseconds) == 2:
69
+ milliseconds = int(centiseconds) * 10
70
+ else:
71
+ milliseconds = int(centiseconds)
72
+
73
+ first_lyric_time = minutes * 60 + seconds + milliseconds / 1000.0
74
+
75
+ self.logger.debug(f"First lyric timestamp in LRC: {first_lyric_time:.3f}s")
76
+
77
+ # If first lyric is at or after 3 seconds, countdown padding was applied
78
+ # Use a small buffer (2.5s) to account for songs that naturally start a bit late
79
+ if first_lyric_time >= 2.5:
80
+ self.logger.info(f"Detected countdown padding from LRC: first lyric at {first_lyric_time:.2f}s")
81
+ return (True, self.COUNTDOWN_PADDING_SECONDS)
82
+
83
+ return (False, 0.0)
84
+
85
+ except Exception as e:
86
+ self.logger.warning(f"Failed to detect countdown padding from LRC file: {e}")
87
+ return (False, 0.0)
88
+
25
89
  def find_best_split_point(self, line):
26
90
  """
27
91
  Find the best split point in a line based on the specified criteria.
@@ -103,6 +167,76 @@ class LyricsProcessor:
103
167
 
104
168
  return processed_lines
105
169
 
170
+ def _check_transcription_providers(self) -> dict:
171
+ """
172
+ Check which transcription providers are configured and return their status.
173
+
174
+ Returns:
175
+ dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
176
+ """
177
+ load_dotenv()
178
+
179
+ configured = []
180
+ missing = []
181
+
182
+ # Check AudioShake
183
+ audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
184
+ if audioshake_token:
185
+ configured.append("AudioShake")
186
+ self.logger.debug("AudioShake transcription provider: configured")
187
+ else:
188
+ missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
189
+ self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
190
+
191
+ # Check Whisper via RunPod
192
+ runpod_key = os.getenv("RUNPOD_API_KEY")
193
+ whisper_id = os.getenv("WHISPER_RUNPOD_ID")
194
+ if runpod_key and whisper_id:
195
+ configured.append("Whisper (RunPod)")
196
+ self.logger.debug("Whisper transcription provider: configured")
197
+ elif runpod_key:
198
+ missing.append("Whisper (missing WHISPER_RUNPOD_ID)")
199
+ self.logger.debug("Whisper transcription provider: partially configured (missing WHISPER_RUNPOD_ID)")
200
+ elif whisper_id:
201
+ missing.append("Whisper (missing RUNPOD_API_KEY)")
202
+ self.logger.debug("Whisper transcription provider: partially configured (missing RUNPOD_API_KEY)")
203
+ else:
204
+ missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
205
+ self.logger.debug("Whisper transcription provider: not configured")
206
+
207
+ return {"configured": configured, "missing": missing}
208
+
209
+ def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
210
+ """Build a helpful error message when no transcription providers are configured."""
211
+ return (
212
+ "No transcription providers configured!\n"
213
+ "\n"
214
+ "Karaoke video generation requires at least one transcription provider to create "
215
+ "synchronized lyrics. Without a transcription provider, the system cannot generate "
216
+ "the word-level timing data needed for the karaoke video.\n"
217
+ "\n"
218
+ "AVAILABLE TRANSCRIPTION PROVIDERS:\n"
219
+ "\n"
220
+ "1. AudioShake (Recommended - Commercial, high-quality)\n"
221
+ " - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
222
+ " - Get an API key at: https://www.audioshake.ai/\n"
223
+ "\n"
224
+ "2. Whisper via RunPod (Open-source alternative)\n"
225
+ " - Set environment variables:\n"
226
+ " RUNPOD_API_KEY=your_key\n"
227
+ " WHISPER_RUNPOD_ID=your_endpoint_id\n"
228
+ " - Set up a Whisper endpoint at: https://www.runpod.io/\n"
229
+ "\n"
230
+ "ALTERNATIVES:\n"
231
+ "\n"
232
+ "- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
233
+ "- Use --lyrics_file to provide pre-timed lyrics (still needs transcription for timing)\n"
234
+ "\n"
235
+ f"Missing provider configurations: {', '.join(missing_providers)}\n"
236
+ "\n"
237
+ "See README.md 'Transcription Providers' section for detailed setup instructions."
238
+ )
239
+
106
240
  def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
107
241
  """
108
242
  Transcribe lyrics for a track.
@@ -114,6 +248,9 @@ class LyricsProcessor:
114
248
  track_output_dir: Output directory path
115
249
  lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
116
250
  lyrics_title: Title for lyrics processing (defaults to title if None)
251
+
252
+ Raises:
253
+ ValueError: If transcription is enabled but no providers are configured
117
254
  """
118
255
  # Use original artist/title for filename generation
119
256
  filename_artist = artist
@@ -138,25 +275,56 @@ class LyricsProcessor:
138
275
  lyrics_video_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (With Vocals).mkv")
139
276
  lyrics_lrc_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (Karaoke).lrc")
140
277
 
141
- # If files exist in parent directory, return early
278
+ # If files exist in parent directory, return early (but detect countdown padding first)
142
279
  if os.path.exists(parent_video_path) and os.path.exists(parent_lrc_path):
143
- self.logger.info(f"Found existing video and LRC files in parent directory, skipping transcription")
280
+ self.logger.info("Found existing video and LRC files in parent directory, skipping transcription")
281
+
282
+ # Detect countdown padding from existing LRC file
283
+ countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
284
+
285
+ if countdown_padding_added:
286
+ self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
287
+
144
288
  return {
145
289
  "lrc_filepath": parent_lrc_path,
146
290
  "ass_filepath": parent_video_path,
291
+ "countdown_padding_added": countdown_padding_added,
292
+ "countdown_padding_seconds": countdown_padding_seconds,
293
+ "padded_audio_filepath": None, # Original padded audio may not exist
147
294
  }
148
295
 
149
- # If files exist in lyrics directory, copy to parent and return
296
+ # If files exist in lyrics directory, copy to parent and return (but detect countdown padding first)
150
297
  if os.path.exists(lyrics_video_path) and os.path.exists(lyrics_lrc_path):
151
- self.logger.info(f"Found existing video and LRC files in lyrics directory, copying to parent")
298
+ self.logger.info("Found existing video and LRC files in lyrics directory, copying to parent")
152
299
  os.makedirs(track_output_dir, exist_ok=True)
153
300
  shutil.copy2(lyrics_video_path, parent_video_path)
154
301
  shutil.copy2(lyrics_lrc_path, parent_lrc_path)
302
+
303
+ # Detect countdown padding from existing LRC file
304
+ countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
305
+
306
+ if countdown_padding_added:
307
+ self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
308
+
155
309
  return {
156
310
  "lrc_filepath": parent_lrc_path,
157
311
  "ass_filepath": parent_video_path,
312
+ "countdown_padding_added": countdown_padding_added,
313
+ "countdown_padding_seconds": countdown_padding_seconds,
314
+ "padded_audio_filepath": None, # Original padded audio may not exist
158
315
  }
159
316
 
317
+ # Check transcription provider configuration if transcription is not being skipped
318
+ # Do this AFTER checking for existing files, since existing files don't need transcription
319
+ if not self.skip_transcription:
320
+ provider_status = self._check_transcription_providers()
321
+
322
+ if provider_status["configured"]:
323
+ self.logger.info(f"Transcription providers configured: {', '.join(provider_status['configured'])}")
324
+ else:
325
+ error_msg = self._build_transcription_provider_error_message(provider_status["missing"])
326
+ raise ValueError(error_msg)
327
+
160
328
  # Create lyrics directory if it doesn't exist
161
329
  os.makedirs(lyrics_dir, exist_ok=True)
162
330
  self.logger.info(f"Created lyrics directory: {lyrics_dir}")
@@ -19,6 +19,9 @@ from karaoke_gen.karaoke_finalise import KaraokeFinalise
19
19
  # Global logger
20
20
  logger = logging.getLogger(__name__)
21
21
  logger.setLevel(logging.INFO) # Set initial log level
22
+ # Prevent log propagation to root logger to avoid duplicate logs
23
+ # when external packages (like lyrics_converter) configure root logger handlers
24
+ logger.propagate = False
22
25
 
23
26
 
24
27
  async def process_track_prep(row, args, logger, log_formatter):
@@ -258,8 +258,8 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
258
258
  style_group.add_argument(
259
259
  "--background_video_darkness",
260
260
  type=int,
261
- default=0,
262
- help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=50",
261
+ default=50,
262
+ help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=20",
263
263
  )
264
264
 
265
265
  # Finalisation Configuration
@@ -352,8 +352,11 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
352
352
  )
353
353
  remote_group.add_argument(
354
354
  "--review-ui-url",
355
- default=os.environ.get('REVIEW_UI_URL', 'https://lyrics.nomadkaraoke.com'),
356
- help="Lyrics review UI URL (default: https://lyrics.nomadkaraoke.com)",
355
+ default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://gen.nomadkaraoke.com/lyrics')),
356
+ help="Lyrics review UI URL. For remote mode: defaults to 'https://gen.nomadkaraoke.com/lyrics'. "
357
+ "For local mode: defaults to bundled frontend (from lyrics_transcriber/frontend/). "
358
+ "Use 'http://localhost:5173' to develop against Vite dev server. "
359
+ "(env: REVIEW_UI_URL or LYRICS_REVIEW_UI_URL)",
357
360
  )
358
361
  remote_group.add_argument(
359
362
  "--poll-interval",