karaoke-gen 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/__init__.py +32 -1
- karaoke_gen/audio_fetcher.py +1220 -67
- karaoke_gen/audio_processor.py +15 -3
- karaoke_gen/instrumental_review/server.py +154 -860
- karaoke_gen/instrumental_review/static/index.html +1529 -0
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +87 -2
- karaoke_gen/karaoke_gen.py +131 -14
- karaoke_gen/lyrics_processor.py +172 -4
- karaoke_gen/utils/bulk_cli.py +3 -0
- karaoke_gen/utils/cli_args.py +7 -4
- karaoke_gen/utils/gen_cli.py +221 -5
- karaoke_gen/utils/remote_cli.py +786 -43
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +109 -4
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +37 -31
- lyrics_transcriber/core/controller.py +76 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/App.tsx +6 -4
- lyrics_transcriber/frontend/src/api.ts +25 -10
- lyrics_transcriber/frontend/src/components/Header.tsx +38 -12
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +17 -3
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
- lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +190 -542
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/web_assets/assets/{index-DdJTDWH3.js → index-BECn1o8Q.js} +1802 -553
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +1 -1
- lyrics_transcriber/output/countdown_processor.py +39 -0
- lyrics_transcriber/review/server.py +5 -5
- lyrics_transcriber/transcribers/audioshake.py +96 -7
- lyrics_transcriber/types.py +14 -12
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +0 -1
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0
|
@@ -47,6 +47,7 @@ class KaraokeFinalise:
|
|
|
47
47
|
user_youtube_credentials=None, # Add support for pre-stored credentials
|
|
48
48
|
server_side_mode=False, # New parameter for server-side deployment
|
|
49
49
|
selected_instrumental_file=None, # Add support for pre-selected instrumental file
|
|
50
|
+
countdown_padding_seconds=None, # Padding applied to vocals; instrumental must match
|
|
50
51
|
):
|
|
51
52
|
self.log_level = log_level
|
|
52
53
|
self.log_formatter = log_formatter
|
|
@@ -54,6 +55,9 @@ class KaraokeFinalise:
|
|
|
54
55
|
if logger is None:
|
|
55
56
|
self.logger = logging.getLogger(__name__)
|
|
56
57
|
self.logger.setLevel(log_level)
|
|
58
|
+
# Prevent log propagation to root logger to avoid duplicate logs
|
|
59
|
+
# when external packages (like lyrics_converter) configure root logger handlers
|
|
60
|
+
self.logger.propagate = False
|
|
57
61
|
|
|
58
62
|
self.log_handler = logging.StreamHandler()
|
|
59
63
|
|
|
@@ -105,6 +109,7 @@ class KaraokeFinalise:
|
|
|
105
109
|
self.user_youtube_credentials = user_youtube_credentials
|
|
106
110
|
self.server_side_mode = server_side_mode
|
|
107
111
|
self.selected_instrumental_file = selected_instrumental_file
|
|
112
|
+
self.countdown_padding_seconds = countdown_padding_seconds
|
|
108
113
|
|
|
109
114
|
self.suffixes = {
|
|
110
115
|
"title_mov": " (Title).mov",
|
|
@@ -421,6 +426,15 @@ class KaraokeFinalise:
|
|
|
421
426
|
# Check if any videos were found
|
|
422
427
|
if "items" in response and len(response["items"]) > 0:
|
|
423
428
|
for item in response["items"]:
|
|
429
|
+
# YouTube search API sometimes returns results from other channels even with channelId filter
|
|
430
|
+
# Verify the video actually belongs to our channel
|
|
431
|
+
result_channel_id = item["snippet"]["channelId"]
|
|
432
|
+
if result_channel_id != channel_id:
|
|
433
|
+
self.logger.debug(
|
|
434
|
+
f"Skipping video from different channel: {item['snippet']['title']} (channel: {result_channel_id})"
|
|
435
|
+
)
|
|
436
|
+
continue
|
|
437
|
+
|
|
424
438
|
found_title = item["snippet"]["title"]
|
|
425
439
|
|
|
426
440
|
# In server-side mode, require an exact match to avoid false positives.
|
|
@@ -640,7 +654,31 @@ class KaraokeFinalise:
|
|
|
640
654
|
else:
|
|
641
655
|
self.logger.warning(f"Unsupported file extension: {current_ext}")
|
|
642
656
|
|
|
643
|
-
raise Exception(
|
|
657
|
+
raise Exception(
|
|
658
|
+
"No suitable files found for processing.\n"
|
|
659
|
+
"\n"
|
|
660
|
+
"WHAT THIS MEANS:\n"
|
|
661
|
+
"The finalisation step requires a '(With Vocals).mkv' video file, which is created "
|
|
662
|
+
"during the lyrics transcription phase. This file contains the karaoke video with "
|
|
663
|
+
"synchronized lyrics overlay.\n"
|
|
664
|
+
"\n"
|
|
665
|
+
"COMMON CAUSES:\n"
|
|
666
|
+
"1. Transcription provider not configured - No AUDIOSHAKE_API_TOKEN or RUNPOD_API_KEY set\n"
|
|
667
|
+
"2. Transcription failed - Check logs above for API errors or timeout messages\n"
|
|
668
|
+
"3. Invalid API credentials - Verify your API tokens are correct and active\n"
|
|
669
|
+
"4. Network issues - Unable to reach transcription service\n"
|
|
670
|
+
"5. Running in wrong directory - Make sure you're in the track output folder\n"
|
|
671
|
+
"\n"
|
|
672
|
+
"TROUBLESHOOTING STEPS:\n"
|
|
673
|
+
"1. Check environment variables:\n"
|
|
674
|
+
" - AUDIOSHAKE_API_TOKEN (for AudioShake transcription)\n"
|
|
675
|
+
" - RUNPOD_API_KEY + WHISPER_RUNPOD_ID (for Whisper transcription)\n"
|
|
676
|
+
"2. Review the log output above for transcription errors\n"
|
|
677
|
+
"3. Try running with --log_level debug for more detailed output\n"
|
|
678
|
+
"4. If you don't need synchronized lyrics, use --skip-lyrics for instrumental-only karaoke\n"
|
|
679
|
+
"\n"
|
|
680
|
+
"See README.md 'Transcription Providers' and 'Troubleshooting' sections for more details."
|
|
681
|
+
)
|
|
644
682
|
|
|
645
683
|
def choose_instrumental_audio_file(self, base_name):
|
|
646
684
|
self.logger.info(f"Choosing instrumental audio file to use as karaoke audio...")
|
|
@@ -720,6 +758,32 @@ class KaraokeFinalise:
|
|
|
720
758
|
artist, title = base_name.split(" - ", 1)
|
|
721
759
|
return base_name, artist, title
|
|
722
760
|
|
|
761
|
+
def _pad_audio_file(self, input_audio, output_audio, padding_seconds):
|
|
762
|
+
"""
|
|
763
|
+
Pad an audio file by prepending silence at the beginning.
|
|
764
|
+
|
|
765
|
+
Uses the same ffmpeg approach as LyricsTranscriber's CountdownProcessor
|
|
766
|
+
to ensure consistent padding behavior.
|
|
767
|
+
|
|
768
|
+
Args:
|
|
769
|
+
input_audio: Path to input audio file
|
|
770
|
+
output_audio: Path for the padded output file
|
|
771
|
+
padding_seconds: Amount of silence to prepend (in seconds)
|
|
772
|
+
"""
|
|
773
|
+
self.logger.info(f"Padding audio file with {padding_seconds}s of silence")
|
|
774
|
+
|
|
775
|
+
# Use ffmpeg to prepend silence - this matches the approach in audio_processor.py
|
|
776
|
+
# adelay filter adds delay in milliseconds
|
|
777
|
+
delay_ms = int(padding_seconds * 1000)
|
|
778
|
+
|
|
779
|
+
ffmpeg_command = (
|
|
780
|
+
f'{self.ffmpeg_base_command} -i "{input_audio}" '
|
|
781
|
+
f'-af "adelay={delay_ms}|{delay_ms}" '
|
|
782
|
+
f'"{output_audio}"'
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
self.execute_command(ffmpeg_command, f"Padding audio with {padding_seconds}s silence")
|
|
786
|
+
|
|
723
787
|
def execute_command(self, command, description):
|
|
724
788
|
"""Execute a shell command and log the output. For general commands (rclone, etc.)"""
|
|
725
789
|
self.logger.info(f"{description}")
|
|
@@ -764,11 +828,32 @@ class KaraokeFinalise:
|
|
|
764
828
|
|
|
765
829
|
def remux_with_instrumental(self, with_vocals_file, instrumental_audio, output_file):
|
|
766
830
|
"""Remux the video with instrumental audio to create karaoke version"""
|
|
831
|
+
# Safety net: If countdown padding was applied to vocals, ensure instrumental is padded too
|
|
832
|
+
actual_instrumental = instrumental_audio
|
|
833
|
+
if self.countdown_padding_seconds and self.countdown_padding_seconds > 0:
|
|
834
|
+
# Check if the instrumental file is already padded (has "(Padded)" in name)
|
|
835
|
+
if "(Padded)" not in instrumental_audio:
|
|
836
|
+
self.logger.warning(
|
|
837
|
+
f"Countdown padding ({self.countdown_padding_seconds}s) was applied to vocals, "
|
|
838
|
+
f"but instrumental doesn't appear to be padded. Creating padded version..."
|
|
839
|
+
)
|
|
840
|
+
# Create a padded version of the instrumental
|
|
841
|
+
base, ext = os.path.splitext(instrumental_audio)
|
|
842
|
+
padded_instrumental = f"{base} (Padded){ext}"
|
|
843
|
+
|
|
844
|
+
if not os.path.exists(padded_instrumental):
|
|
845
|
+
self._pad_audio_file(instrumental_audio, padded_instrumental, self.countdown_padding_seconds)
|
|
846
|
+
self.logger.info(f"Created padded instrumental: {padded_instrumental}")
|
|
847
|
+
|
|
848
|
+
actual_instrumental = padded_instrumental
|
|
849
|
+
else:
|
|
850
|
+
self.logger.info(f"Using already-padded instrumental: {instrumental_audio}")
|
|
851
|
+
|
|
767
852
|
# This operation is primarily I/O bound (remuxing), so hardware acceleration doesn't provide significant benefit
|
|
768
853
|
# Keep the existing approach but use the new execute method
|
|
769
854
|
ffmpeg_command = (
|
|
770
855
|
f'{self.ffmpeg_base_command} -an -i "{with_vocals_file}" '
|
|
771
|
-
f'-vn -i "{
|
|
856
|
+
f'-vn -i "{actual_instrumental}" -c:v copy -c:a pcm_s16le "{output_file}"'
|
|
772
857
|
)
|
|
773
858
|
self.execute_command(ffmpeg_command, "Remuxing video with instrumental audio")
|
|
774
859
|
|
karaoke_gen/karaoke_gen.py
CHANGED
|
@@ -29,7 +29,7 @@ from .audio_processor import AudioProcessor
|
|
|
29
29
|
from .lyrics_processor import LyricsProcessor
|
|
30
30
|
from .video_generator import VideoGenerator
|
|
31
31
|
from .video_background_processor import VideoBackgroundProcessor
|
|
32
|
-
from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError
|
|
32
|
+
from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError, UserCancelledError
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class KaraokePrep:
|
|
@@ -74,7 +74,7 @@ class KaraokePrep:
|
|
|
74
74
|
skip_separation=False,
|
|
75
75
|
# Video Background Configuration
|
|
76
76
|
background_video=None,
|
|
77
|
-
background_video_darkness=
|
|
77
|
+
background_video_darkness=50,
|
|
78
78
|
# Audio Fetcher Configuration
|
|
79
79
|
auto_download=False,
|
|
80
80
|
):
|
|
@@ -84,6 +84,9 @@ class KaraokePrep:
|
|
|
84
84
|
if logger is None:
|
|
85
85
|
self.logger = logging.getLogger(__name__)
|
|
86
86
|
self.logger.setLevel(log_level)
|
|
87
|
+
# Prevent log propagation to root logger to avoid duplicate logs
|
|
88
|
+
# when external packages (like lyrics_converter) configure root logger handlers
|
|
89
|
+
self.logger.propagate = False
|
|
87
90
|
|
|
88
91
|
self.log_handler = logging.StreamHandler()
|
|
89
92
|
|
|
@@ -256,6 +259,101 @@ class KaraokePrep:
|
|
|
256
259
|
self.artist = metadata_result["artist"]
|
|
257
260
|
self.title = metadata_result["title"]
|
|
258
261
|
|
|
262
|
+
def _scan_directory_for_instrumentals(self, track_output_dir, artist_title):
|
|
263
|
+
"""
|
|
264
|
+
Scan the directory for existing instrumental files and build a separated_audio structure.
|
|
265
|
+
|
|
266
|
+
This is used when transcription was skipped (existing files found) but we need to
|
|
267
|
+
pad instrumentals due to countdown padding.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
track_output_dir: The track output directory to scan
|
|
271
|
+
artist_title: The "{artist} - {title}" string for matching files
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Dictionary with separated_audio structure containing found instrumental paths
|
|
275
|
+
"""
|
|
276
|
+
self.logger.info(f"Scanning directory for existing instrumentals: {track_output_dir}")
|
|
277
|
+
|
|
278
|
+
separated_audio = {
|
|
279
|
+
"clean_instrumental": {},
|
|
280
|
+
"backing_vocals": {},
|
|
281
|
+
"other_stems": {},
|
|
282
|
+
"combined_instrumentals": {},
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
# Search patterns for instrumental files
|
|
286
|
+
# Files are named like: "{artist} - {title} (Instrumental {model}).flac"
|
|
287
|
+
# Or with backing vocals: "{artist} - {title} (Instrumental +BV {model}).flac"
|
|
288
|
+
|
|
289
|
+
# Look for files in the track output directory
|
|
290
|
+
search_dir = track_output_dir
|
|
291
|
+
|
|
292
|
+
# Find all instrumental files (not padded ones - we want the originals)
|
|
293
|
+
instrumental_pattern = os.path.join(search_dir, f"{artist_title} (Instrumental*.flac")
|
|
294
|
+
instrumental_files = glob.glob(instrumental_pattern)
|
|
295
|
+
|
|
296
|
+
# Also check for wav files
|
|
297
|
+
instrumental_pattern_wav = os.path.join(search_dir, f"{artist_title} (Instrumental*.wav")
|
|
298
|
+
instrumental_files.extend(glob.glob(instrumental_pattern_wav))
|
|
299
|
+
|
|
300
|
+
self.logger.debug(f"Found {len(instrumental_files)} instrumental files")
|
|
301
|
+
|
|
302
|
+
for filepath in instrumental_files:
|
|
303
|
+
filename = os.path.basename(filepath)
|
|
304
|
+
|
|
305
|
+
# Skip already padded files
|
|
306
|
+
if "(Padded)" in filename:
|
|
307
|
+
self.logger.debug(f"Skipping already padded file: {filename}")
|
|
308
|
+
continue
|
|
309
|
+
|
|
310
|
+
# Determine if it's a combined instrumental (+BV) or clean instrumental
|
|
311
|
+
if "+BV" in filename or "+bv" in filename.lower():
|
|
312
|
+
# Combined instrumental with backing vocals
|
|
313
|
+
# Extract model name from filename
|
|
314
|
+
# Pattern: "(Instrumental +BV {model}).flac"
|
|
315
|
+
model_match = re.search(r'\(Instrumental \+BV ([^)]+)\)', filename)
|
|
316
|
+
if model_match:
|
|
317
|
+
model_name = model_match.group(1).strip()
|
|
318
|
+
separated_audio["combined_instrumentals"][model_name] = filepath
|
|
319
|
+
self.logger.info(f"Found combined instrumental: {filename}")
|
|
320
|
+
else:
|
|
321
|
+
# Clean instrumental (no backing vocals)
|
|
322
|
+
# Pattern: "(Instrumental {model}).flac"
|
|
323
|
+
model_match = re.search(r'\(Instrumental ([^)]+)\)', filename)
|
|
324
|
+
if model_match:
|
|
325
|
+
# Use as clean instrumental if we don't have one yet
|
|
326
|
+
if not separated_audio["clean_instrumental"].get("instrumental"):
|
|
327
|
+
separated_audio["clean_instrumental"]["instrumental"] = filepath
|
|
328
|
+
self.logger.info(f"Found clean instrumental: {filename}")
|
|
329
|
+
else:
|
|
330
|
+
# Additional clean instrumentals go to combined_instrumentals for padding
|
|
331
|
+
model_name = model_match.group(1).strip()
|
|
332
|
+
separated_audio["combined_instrumentals"][model_name] = filepath
|
|
333
|
+
self.logger.info(f"Found additional instrumental: {filename}")
|
|
334
|
+
|
|
335
|
+
# Also look for backing vocals files
|
|
336
|
+
backing_vocals_pattern = os.path.join(search_dir, f"{artist_title} (Backing Vocals*.flac")
|
|
337
|
+
backing_vocals_files = glob.glob(backing_vocals_pattern)
|
|
338
|
+
backing_vocals_pattern_wav = os.path.join(search_dir, f"{artist_title} (Backing Vocals*.wav")
|
|
339
|
+
backing_vocals_files.extend(glob.glob(backing_vocals_pattern_wav))
|
|
340
|
+
|
|
341
|
+
for filepath in backing_vocals_files:
|
|
342
|
+
filename = os.path.basename(filepath)
|
|
343
|
+
model_match = re.search(r'\(Backing Vocals ([^)]+)\)', filename)
|
|
344
|
+
if model_match:
|
|
345
|
+
model_name = model_match.group(1).strip()
|
|
346
|
+
if model_name not in separated_audio["backing_vocals"]:
|
|
347
|
+
separated_audio["backing_vocals"][model_name] = {"backing_vocals": filepath}
|
|
348
|
+
self.logger.info(f"Found backing vocals: {filename}")
|
|
349
|
+
|
|
350
|
+
# Log summary
|
|
351
|
+
clean_count = 1 if separated_audio["clean_instrumental"].get("instrumental") else 0
|
|
352
|
+
combined_count = len(separated_audio["combined_instrumentals"])
|
|
353
|
+
self.logger.info(f"Directory scan complete: {clean_count} clean instrumental, {combined_count} combined instrumentals")
|
|
354
|
+
|
|
355
|
+
return separated_audio
|
|
356
|
+
|
|
259
357
|
async def prep_single_track(self):
|
|
260
358
|
# Add signal handler at the start
|
|
261
359
|
loop = asyncio.get_running_loop()
|
|
@@ -419,6 +517,9 @@ class KaraokePrep:
|
|
|
419
517
|
# No still image for audio-only downloads
|
|
420
518
|
processed_track["input_still_image"] = None
|
|
421
519
|
|
|
520
|
+
except UserCancelledError:
|
|
521
|
+
# User cancelled - propagate up to CLI for graceful exit
|
|
522
|
+
raise
|
|
422
523
|
except NoResultsError as e:
|
|
423
524
|
self.logger.error(f"No audio found: {e}")
|
|
424
525
|
return None
|
|
@@ -761,6 +862,24 @@ class KaraokePrep:
|
|
|
761
862
|
f"Applying {padding_seconds}s padding to all instrumental files to sync with vocal countdown"
|
|
762
863
|
)
|
|
763
864
|
|
|
865
|
+
# If separated_audio is empty (e.g., transcription was skipped but existing files have countdown),
|
|
866
|
+
# scan the directory for existing instrumental files
|
|
867
|
+
# Note: also check for Custom instrumental (provided via --existing_instrumental)
|
|
868
|
+
has_instrumentals = (
|
|
869
|
+
processed_track["separated_audio"].get("clean_instrumental", {}).get("instrumental") or
|
|
870
|
+
processed_track["separated_audio"].get("combined_instrumentals") or
|
|
871
|
+
processed_track["separated_audio"].get("Custom", {}).get("instrumental")
|
|
872
|
+
)
|
|
873
|
+
if not has_instrumentals:
|
|
874
|
+
self.logger.info("No instrumentals in separated_audio, scanning directory for existing files...")
|
|
875
|
+
# Preserve existing Custom key if present before overwriting
|
|
876
|
+
custom_backup = processed_track["separated_audio"].get("Custom")
|
|
877
|
+
processed_track["separated_audio"] = self._scan_directory_for_instrumentals(
|
|
878
|
+
track_output_dir, artist_title
|
|
879
|
+
)
|
|
880
|
+
if custom_backup:
|
|
881
|
+
processed_track["separated_audio"]["Custom"] = custom_backup
|
|
882
|
+
|
|
764
883
|
# Apply padding using AudioProcessor
|
|
765
884
|
padded_separation_result = self.audio_processor.apply_countdown_padding_to_instrumentals(
|
|
766
885
|
separation_result=processed_track["separated_audio"],
|
|
@@ -788,11 +907,11 @@ class KaraokePrep:
|
|
|
788
907
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
789
908
|
loop.remove_signal_handler(sig)
|
|
790
909
|
|
|
791
|
-
async def shutdown(self,
|
|
910
|
+
async def shutdown(self, signal_received):
|
|
792
911
|
"""Handle shutdown signals gracefully."""
|
|
793
|
-
self.logger.info(f"Received exit signal {
|
|
912
|
+
self.logger.info(f"Received exit signal {signal_received.name}...")
|
|
794
913
|
|
|
795
|
-
# Get all running tasks
|
|
914
|
+
# Get all running tasks except the current shutdown task
|
|
796
915
|
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
|
|
797
916
|
|
|
798
917
|
if tasks:
|
|
@@ -801,17 +920,15 @@ class KaraokePrep:
|
|
|
801
920
|
for task in tasks:
|
|
802
921
|
task.cancel()
|
|
803
922
|
|
|
804
|
-
self.logger.info("Received cancellation request, cleaning up...")
|
|
805
|
-
|
|
806
923
|
# Wait for all tasks to complete with cancellation
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
except asyncio.CancelledError:
|
|
810
|
-
pass
|
|
924
|
+
# Use return_exceptions=True to gather all results without raising
|
|
925
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
811
926
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
927
|
+
self.logger.info("Cleanup complete")
|
|
928
|
+
|
|
929
|
+
# Raise KeyboardInterrupt to propagate the cancellation up the call stack
|
|
930
|
+
# This allows the main event loop to exit cleanly
|
|
931
|
+
raise KeyboardInterrupt()
|
|
815
932
|
|
|
816
933
|
async def process_playlist(self):
|
|
817
934
|
if self.artist is None or self.title is None:
|
karaoke_gen/lyrics_processor.py
CHANGED
|
@@ -11,6 +11,9 @@ from .utils import sanitize_filename
|
|
|
11
11
|
|
|
12
12
|
# Placeholder class or functions for lyrics processing
|
|
13
13
|
class LyricsProcessor:
|
|
14
|
+
# Standard countdown padding duration used by LyricsTranscriber
|
|
15
|
+
COUNTDOWN_PADDING_SECONDS = 3.0
|
|
16
|
+
|
|
14
17
|
def __init__(
|
|
15
18
|
self, logger, style_params_json, lyrics_file, skip_transcription, skip_transcription_review, render_video, subtitle_offset_ms
|
|
16
19
|
):
|
|
@@ -22,6 +25,67 @@ class LyricsProcessor:
|
|
|
22
25
|
self.render_video = render_video
|
|
23
26
|
self.subtitle_offset_ms = subtitle_offset_ms
|
|
24
27
|
|
|
28
|
+
def _detect_countdown_padding_from_lrc(self, lrc_filepath):
|
|
29
|
+
"""
|
|
30
|
+
Detect if countdown padding was applied by checking for countdown text in the LRC file.
|
|
31
|
+
|
|
32
|
+
The countdown segment has the text "3... 2... 1..." at timestamp 0.1-2.9s.
|
|
33
|
+
We detect this by looking for the countdown text pattern.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
lrc_filepath: Path to the LRC file
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Tuple of (countdown_padding_added: bool, countdown_padding_seconds: float)
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
with open(lrc_filepath, 'r', encoding='utf-8') as f:
|
|
43
|
+
content = f.read()
|
|
44
|
+
|
|
45
|
+
# Method 1: Check for countdown text pattern "3... 2... 1..."
|
|
46
|
+
# This is the most reliable detection method since the countdown text is unique
|
|
47
|
+
countdown_text = "3... 2... 1..."
|
|
48
|
+
if countdown_text in content:
|
|
49
|
+
self.logger.info(f"Detected countdown padding from LRC: found countdown text '{countdown_text}'")
|
|
50
|
+
return (True, self.COUNTDOWN_PADDING_SECONDS)
|
|
51
|
+
|
|
52
|
+
# Method 2 (fallback): Check if first lyric timestamp is >= 3 seconds
|
|
53
|
+
# This handles cases where countdown text format might differ
|
|
54
|
+
# LRC timestamps: [mm:ss.xx] or [mm:ss.xxx]
|
|
55
|
+
timestamp_pattern = r'\[(\d{1,2}):(\d{2})\.(\d{2,3})\]'
|
|
56
|
+
matches = re.findall(timestamp_pattern, content)
|
|
57
|
+
|
|
58
|
+
if not matches:
|
|
59
|
+
self.logger.debug("No timestamps found in LRC file")
|
|
60
|
+
return (False, 0.0)
|
|
61
|
+
|
|
62
|
+
# Parse the first timestamp
|
|
63
|
+
first_timestamp = matches[0]
|
|
64
|
+
minutes = int(first_timestamp[0])
|
|
65
|
+
seconds = int(first_timestamp[1])
|
|
66
|
+
# Handle both .xx and .xxx formats
|
|
67
|
+
centiseconds = first_timestamp[2]
|
|
68
|
+
if len(centiseconds) == 2:
|
|
69
|
+
milliseconds = int(centiseconds) * 10
|
|
70
|
+
else:
|
|
71
|
+
milliseconds = int(centiseconds)
|
|
72
|
+
|
|
73
|
+
first_lyric_time = minutes * 60 + seconds + milliseconds / 1000.0
|
|
74
|
+
|
|
75
|
+
self.logger.debug(f"First lyric timestamp in LRC: {first_lyric_time:.3f}s")
|
|
76
|
+
|
|
77
|
+
# If first lyric is at or after 3 seconds, countdown padding was applied
|
|
78
|
+
# Use a small buffer (2.5s) to account for songs that naturally start a bit late
|
|
79
|
+
if first_lyric_time >= 2.5:
|
|
80
|
+
self.logger.info(f"Detected countdown padding from LRC: first lyric at {first_lyric_time:.2f}s")
|
|
81
|
+
return (True, self.COUNTDOWN_PADDING_SECONDS)
|
|
82
|
+
|
|
83
|
+
return (False, 0.0)
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
self.logger.warning(f"Failed to detect countdown padding from LRC file: {e}")
|
|
87
|
+
return (False, 0.0)
|
|
88
|
+
|
|
25
89
|
def find_best_split_point(self, line):
|
|
26
90
|
"""
|
|
27
91
|
Find the best split point in a line based on the specified criteria.
|
|
@@ -103,6 +167,76 @@ class LyricsProcessor:
|
|
|
103
167
|
|
|
104
168
|
return processed_lines
|
|
105
169
|
|
|
170
|
+
def _check_transcription_providers(self) -> dict:
|
|
171
|
+
"""
|
|
172
|
+
Check which transcription providers are configured and return their status.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
|
|
176
|
+
"""
|
|
177
|
+
load_dotenv()
|
|
178
|
+
|
|
179
|
+
configured = []
|
|
180
|
+
missing = []
|
|
181
|
+
|
|
182
|
+
# Check AudioShake
|
|
183
|
+
audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
|
|
184
|
+
if audioshake_token:
|
|
185
|
+
configured.append("AudioShake")
|
|
186
|
+
self.logger.debug("AudioShake transcription provider: configured")
|
|
187
|
+
else:
|
|
188
|
+
missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
|
|
189
|
+
self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
|
|
190
|
+
|
|
191
|
+
# Check Whisper via RunPod
|
|
192
|
+
runpod_key = os.getenv("RUNPOD_API_KEY")
|
|
193
|
+
whisper_id = os.getenv("WHISPER_RUNPOD_ID")
|
|
194
|
+
if runpod_key and whisper_id:
|
|
195
|
+
configured.append("Whisper (RunPod)")
|
|
196
|
+
self.logger.debug("Whisper transcription provider: configured")
|
|
197
|
+
elif runpod_key:
|
|
198
|
+
missing.append("Whisper (missing WHISPER_RUNPOD_ID)")
|
|
199
|
+
self.logger.debug("Whisper transcription provider: partially configured (missing WHISPER_RUNPOD_ID)")
|
|
200
|
+
elif whisper_id:
|
|
201
|
+
missing.append("Whisper (missing RUNPOD_API_KEY)")
|
|
202
|
+
self.logger.debug("Whisper transcription provider: partially configured (missing RUNPOD_API_KEY)")
|
|
203
|
+
else:
|
|
204
|
+
missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
|
|
205
|
+
self.logger.debug("Whisper transcription provider: not configured")
|
|
206
|
+
|
|
207
|
+
return {"configured": configured, "missing": missing}
|
|
208
|
+
|
|
209
|
+
def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
|
|
210
|
+
"""Build a helpful error message when no transcription providers are configured."""
|
|
211
|
+
return (
|
|
212
|
+
"No transcription providers configured!\n"
|
|
213
|
+
"\n"
|
|
214
|
+
"Karaoke video generation requires at least one transcription provider to create "
|
|
215
|
+
"synchronized lyrics. Without a transcription provider, the system cannot generate "
|
|
216
|
+
"the word-level timing data needed for the karaoke video.\n"
|
|
217
|
+
"\n"
|
|
218
|
+
"AVAILABLE TRANSCRIPTION PROVIDERS:\n"
|
|
219
|
+
"\n"
|
|
220
|
+
"1. AudioShake (Recommended - Commercial, high-quality)\n"
|
|
221
|
+
" - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
|
|
222
|
+
" - Get an API key at: https://www.audioshake.ai/\n"
|
|
223
|
+
"\n"
|
|
224
|
+
"2. Whisper via RunPod (Open-source alternative)\n"
|
|
225
|
+
" - Set environment variables:\n"
|
|
226
|
+
" RUNPOD_API_KEY=your_key\n"
|
|
227
|
+
" WHISPER_RUNPOD_ID=your_endpoint_id\n"
|
|
228
|
+
" - Set up a Whisper endpoint at: https://www.runpod.io/\n"
|
|
229
|
+
"\n"
|
|
230
|
+
"ALTERNATIVES:\n"
|
|
231
|
+
"\n"
|
|
232
|
+
"- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
|
|
233
|
+
"- Use --lyrics_file to provide pre-timed lyrics (still needs transcription for timing)\n"
|
|
234
|
+
"\n"
|
|
235
|
+
f"Missing provider configurations: {', '.join(missing_providers)}\n"
|
|
236
|
+
"\n"
|
|
237
|
+
"See README.md 'Transcription Providers' section for detailed setup instructions."
|
|
238
|
+
)
|
|
239
|
+
|
|
106
240
|
def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
|
|
107
241
|
"""
|
|
108
242
|
Transcribe lyrics for a track.
|
|
@@ -114,6 +248,9 @@ class LyricsProcessor:
|
|
|
114
248
|
track_output_dir: Output directory path
|
|
115
249
|
lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
|
|
116
250
|
lyrics_title: Title for lyrics processing (defaults to title if None)
|
|
251
|
+
|
|
252
|
+
Raises:
|
|
253
|
+
ValueError: If transcription is enabled but no providers are configured
|
|
117
254
|
"""
|
|
118
255
|
# Use original artist/title for filename generation
|
|
119
256
|
filename_artist = artist
|
|
@@ -138,25 +275,56 @@ class LyricsProcessor:
|
|
|
138
275
|
lyrics_video_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (With Vocals).mkv")
|
|
139
276
|
lyrics_lrc_path = os.path.join(lyrics_dir, f"{sanitized_artist} - {sanitized_title} (Karaoke).lrc")
|
|
140
277
|
|
|
141
|
-
# If files exist in parent directory, return early
|
|
278
|
+
# If files exist in parent directory, return early (but detect countdown padding first)
|
|
142
279
|
if os.path.exists(parent_video_path) and os.path.exists(parent_lrc_path):
|
|
143
|
-
self.logger.info(
|
|
280
|
+
self.logger.info("Found existing video and LRC files in parent directory, skipping transcription")
|
|
281
|
+
|
|
282
|
+
# Detect countdown padding from existing LRC file
|
|
283
|
+
countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
|
|
284
|
+
|
|
285
|
+
if countdown_padding_added:
|
|
286
|
+
self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
|
|
287
|
+
|
|
144
288
|
return {
|
|
145
289
|
"lrc_filepath": parent_lrc_path,
|
|
146
290
|
"ass_filepath": parent_video_path,
|
|
291
|
+
"countdown_padding_added": countdown_padding_added,
|
|
292
|
+
"countdown_padding_seconds": countdown_padding_seconds,
|
|
293
|
+
"padded_audio_filepath": None, # Original padded audio may not exist
|
|
147
294
|
}
|
|
148
295
|
|
|
149
|
-
# If files exist in lyrics directory, copy to parent and return
|
|
296
|
+
# If files exist in lyrics directory, copy to parent and return (but detect countdown padding first)
|
|
150
297
|
if os.path.exists(lyrics_video_path) and os.path.exists(lyrics_lrc_path):
|
|
151
|
-
self.logger.info(
|
|
298
|
+
self.logger.info("Found existing video and LRC files in lyrics directory, copying to parent")
|
|
152
299
|
os.makedirs(track_output_dir, exist_ok=True)
|
|
153
300
|
shutil.copy2(lyrics_video_path, parent_video_path)
|
|
154
301
|
shutil.copy2(lyrics_lrc_path, parent_lrc_path)
|
|
302
|
+
|
|
303
|
+
# Detect countdown padding from existing LRC file
|
|
304
|
+
countdown_padding_added, countdown_padding_seconds = self._detect_countdown_padding_from_lrc(parent_lrc_path)
|
|
305
|
+
|
|
306
|
+
if countdown_padding_added:
|
|
307
|
+
self.logger.info(f"Existing files have countdown padding: {countdown_padding_seconds}s")
|
|
308
|
+
|
|
155
309
|
return {
|
|
156
310
|
"lrc_filepath": parent_lrc_path,
|
|
157
311
|
"ass_filepath": parent_video_path,
|
|
312
|
+
"countdown_padding_added": countdown_padding_added,
|
|
313
|
+
"countdown_padding_seconds": countdown_padding_seconds,
|
|
314
|
+
"padded_audio_filepath": None, # Original padded audio may not exist
|
|
158
315
|
}
|
|
159
316
|
|
|
317
|
+
# Check transcription provider configuration if transcription is not being skipped
|
|
318
|
+
# Do this AFTER checking for existing files, since existing files don't need transcription
|
|
319
|
+
if not self.skip_transcription:
|
|
320
|
+
provider_status = self._check_transcription_providers()
|
|
321
|
+
|
|
322
|
+
if provider_status["configured"]:
|
|
323
|
+
self.logger.info(f"Transcription providers configured: {', '.join(provider_status['configured'])}")
|
|
324
|
+
else:
|
|
325
|
+
error_msg = self._build_transcription_provider_error_message(provider_status["missing"])
|
|
326
|
+
raise ValueError(error_msg)
|
|
327
|
+
|
|
160
328
|
# Create lyrics directory if it doesn't exist
|
|
161
329
|
os.makedirs(lyrics_dir, exist_ok=True)
|
|
162
330
|
self.logger.info(f"Created lyrics directory: {lyrics_dir}")
|
karaoke_gen/utils/bulk_cli.py
CHANGED
|
@@ -19,6 +19,9 @@ from karaoke_gen.karaoke_finalise import KaraokeFinalise
|
|
|
19
19
|
# Global logger
|
|
20
20
|
logger = logging.getLogger(__name__)
|
|
21
21
|
logger.setLevel(logging.INFO) # Set initial log level
|
|
22
|
+
# Prevent log propagation to root logger to avoid duplicate logs
|
|
23
|
+
# when external packages (like lyrics_converter) configure root logger handlers
|
|
24
|
+
logger.propagate = False
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
async def process_track_prep(row, args, logger, log_formatter):
|
karaoke_gen/utils/cli_args.py
CHANGED
|
@@ -258,8 +258,8 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
|
|
|
258
258
|
style_group.add_argument(
|
|
259
259
|
"--background_video_darkness",
|
|
260
260
|
type=int,
|
|
261
|
-
default=
|
|
262
|
-
help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=
|
|
261
|
+
default=50,
|
|
262
|
+
help="Optional: Darkness overlay percentage (0-100) for video background (default: %(default)s). Example: --background_video_darkness=20",
|
|
263
263
|
)
|
|
264
264
|
|
|
265
265
|
# Finalisation Configuration
|
|
@@ -352,8 +352,11 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
|
|
|
352
352
|
)
|
|
353
353
|
remote_group.add_argument(
|
|
354
354
|
"--review-ui-url",
|
|
355
|
-
default=os.environ.get('REVIEW_UI_URL', 'https://
|
|
356
|
-
help="Lyrics review UI URL
|
|
355
|
+
default=os.environ.get('REVIEW_UI_URL', os.environ.get('LYRICS_REVIEW_UI_URL', 'https://gen.nomadkaraoke.com/lyrics')),
|
|
356
|
+
help="Lyrics review UI URL. For remote mode: defaults to 'https://gen.nomadkaraoke.com/lyrics'. "
|
|
357
|
+
"For local mode: defaults to bundled frontend (from lyrics_transcriber/frontend/). "
|
|
358
|
+
"Use 'http://localhost:5173' to develop against Vite dev server. "
|
|
359
|
+
"(env: REVIEW_UI_URL or LYRICS_REVIEW_UI_URL)",
|
|
357
360
|
)
|
|
358
361
|
remote_group.add_argument(
|
|
359
362
|
"--poll-interval",
|