karaoke-gen 0.75.53__py3-none-any.whl → 0.81.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/audio_fetcher.py +218 -0
- karaoke_gen/instrumental_review/static/index.html +179 -16
- karaoke_gen/karaoke_gen.py +191 -25
- karaoke_gen/lyrics_processor.py +39 -31
- karaoke_gen/utils/__init__.py +26 -0
- karaoke_gen/utils/cli_args.py +9 -1
- karaoke_gen/utils/gen_cli.py +1 -1
- karaoke_gen/utils/remote_cli.py +33 -6
- {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/METADATA +80 -4
- {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/RECORD +50 -43
- lyrics_transcriber/core/config.py +8 -0
- lyrics_transcriber/core/controller.py +43 -1
- lyrics_transcriber/correction/agentic/providers/config.py +6 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +24 -1
- lyrics_transcriber/correction/agentic/router.py +17 -13
- lyrics_transcriber/frontend/.gitignore +1 -0
- lyrics_transcriber/frontend/e2e/agentic-corrections.spec.ts +207 -0
- lyrics_transcriber/frontend/e2e/fixtures/agentic-correction-data.json +226 -0
- lyrics_transcriber/frontend/index.html +5 -1
- lyrics_transcriber/frontend/package-lock.json +4553 -0
- lyrics_transcriber/frontend/package.json +7 -1
- lyrics_transcriber/frontend/playwright.config.ts +69 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/frontend/src/App.tsx +88 -59
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +55 -21
- lyrics_transcriber/frontend/src/components/AppHeader.tsx +65 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +39 -35
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +9 -9
- lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +1 -1
- lyrics_transcriber/frontend/src/components/Header.tsx +96 -3
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +120 -3
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +22 -21
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +12 -2
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +3 -3
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +122 -35
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +2 -2
- lyrics_transcriber/frontend/src/components/shared/constants.ts +15 -5
- lyrics_transcriber/frontend/src/components/shared/types.ts +6 -0
- lyrics_transcriber/frontend/src/main.tsx +1 -7
- lyrics_transcriber/frontend/src/theme.ts +337 -135
- lyrics_transcriber/frontend/vite.config.ts +5 -0
- lyrics_transcriber/frontend/yarn.lock +1005 -1046
- lyrics_transcriber/output/generator.py +50 -3
- lyrics_transcriber/review/server.py +1 -1
- lyrics_transcriber/transcribers/local_whisper.py +260 -0
- {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/licenses/LICENSE +0 -0
karaoke_gen/karaoke_gen.py
CHANGED
|
@@ -31,6 +31,12 @@ from .video_generator import VideoGenerator
|
|
|
31
31
|
from .video_background_processor import VideoBackgroundProcessor
|
|
32
32
|
from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError, UserCancelledError
|
|
33
33
|
|
|
34
|
+
# Import lyrics_transcriber components for post-review countdown and video rendering
|
|
35
|
+
from lyrics_transcriber.output.countdown_processor import CountdownProcessor
|
|
36
|
+
from lyrics_transcriber.output.generator import OutputGenerator
|
|
37
|
+
from lyrics_transcriber.types import CorrectionResult
|
|
38
|
+
from lyrics_transcriber.core.config import OutputConfig as LyricsOutputConfig
|
|
39
|
+
|
|
34
40
|
|
|
35
41
|
class KaraokePrep:
|
|
36
42
|
def __init__(
|
|
@@ -482,41 +488,56 @@ class KaraokePrep:
|
|
|
482
488
|
self.logger.info(f"Found existing media files matching extractor '{self.extractor}', skipping download/conversion.")
|
|
483
489
|
|
|
484
490
|
elif getattr(self, '_use_audio_fetcher', False):
|
|
485
|
-
# Use flacfetch to search and download audio
|
|
486
|
-
self.logger.info(f"Using flacfetch to search and download: {self.artist} - {self.title}")
|
|
487
|
-
|
|
488
491
|
try:
|
|
489
|
-
#
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
492
|
+
# Check if this is a URL download or search+download
|
|
493
|
+
if getattr(self, '_use_url_download', False):
|
|
494
|
+
# Direct URL download (e.g., YouTube URL)
|
|
495
|
+
self.logger.info(f"Using flacfetch to download from URL: {self.url}")
|
|
496
|
+
|
|
497
|
+
fetch_result = self.audio_fetcher.download_from_url(
|
|
498
|
+
url=self.url,
|
|
499
|
+
output_dir=track_output_dir,
|
|
500
|
+
output_filename=f"{artist_title} (youtube)" if artist_title != "Unknown - Unknown" else None,
|
|
501
|
+
artist=self.artist,
|
|
502
|
+
title=self.title,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
# Update extractor to reflect the source
|
|
506
|
+
self.extractor = "youtube"
|
|
507
|
+
else:
|
|
508
|
+
# Use flacfetch to search and download audio
|
|
509
|
+
self.logger.info(f"Using flacfetch to search and download: {self.artist} - {self.title}")
|
|
510
|
+
|
|
511
|
+
fetch_result = self.audio_fetcher.search_and_download(
|
|
512
|
+
artist=self.artist,
|
|
513
|
+
title=self.title,
|
|
514
|
+
output_dir=track_output_dir,
|
|
515
|
+
output_filename=f"{artist_title} (flacfetch)",
|
|
516
|
+
auto_select=self.auto_download,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Update extractor to reflect the actual provider used
|
|
520
|
+
self.extractor = f"flacfetch-{fetch_result.provider}"
|
|
521
|
+
|
|
501
522
|
# Set up the output paths
|
|
502
523
|
output_filename_no_extension = os.path.join(track_output_dir, f"{artist_title} ({self.extractor})")
|
|
503
|
-
|
|
524
|
+
|
|
504
525
|
# Copy/move the downloaded file to the expected location
|
|
505
526
|
processed_track["input_media"] = self.file_handler.download_audio_from_fetcher_result(
|
|
506
527
|
fetch_result.filepath, output_filename_no_extension
|
|
507
528
|
)
|
|
508
|
-
|
|
529
|
+
|
|
509
530
|
self.logger.info(f"Audio downloaded from {fetch_result.provider}: {processed_track['input_media']}")
|
|
510
|
-
|
|
531
|
+
|
|
511
532
|
# Convert to WAV for audio processing
|
|
512
533
|
self.logger.info("Converting downloaded audio to WAV for processing...")
|
|
513
534
|
processed_track["input_audio_wav"] = self.file_handler.convert_to_wav(
|
|
514
535
|
processed_track["input_media"], output_filename_no_extension
|
|
515
536
|
)
|
|
516
|
-
|
|
537
|
+
|
|
517
538
|
# No still image for audio-only downloads
|
|
518
539
|
processed_track["input_still_image"] = None
|
|
519
|
-
|
|
540
|
+
|
|
520
541
|
except UserCancelledError:
|
|
521
542
|
# User cancelled - propagate up to CLI for graceful exit
|
|
522
543
|
raise
|
|
@@ -692,6 +713,113 @@ class KaraokePrep:
|
|
|
692
713
|
|
|
693
714
|
self.logger.info("=== Parallel Processing Complete ===")
|
|
694
715
|
|
|
716
|
+
# === POST-TRANSCRIPTION: Add countdown and render video ===
|
|
717
|
+
# Since lyrics_processor.py now always defers countdown and video rendering,
|
|
718
|
+
# we handle it here after human review is complete. This ensures the review UI
|
|
719
|
+
# shows accurate, unshifted timestamps (same behavior as cloud backend).
|
|
720
|
+
if processed_track.get("lyrics") and self.render_video:
|
|
721
|
+
self.logger.info("=== Processing Countdown and Video Rendering ===")
|
|
722
|
+
|
|
723
|
+
from .utils import sanitize_filename
|
|
724
|
+
sanitized_artist = sanitize_filename(self.artist)
|
|
725
|
+
sanitized_title = sanitize_filename(self.title)
|
|
726
|
+
lyrics_dir = os.path.join(track_output_dir, "lyrics")
|
|
727
|
+
|
|
728
|
+
# Find the corrections JSON file
|
|
729
|
+
corrections_filename = f"{sanitized_artist} - {sanitized_title} (Lyrics Corrections).json"
|
|
730
|
+
corrections_filepath = os.path.join(lyrics_dir, corrections_filename)
|
|
731
|
+
|
|
732
|
+
if os.path.exists(corrections_filepath):
|
|
733
|
+
self.logger.info(f"Loading corrections from: {corrections_filepath}")
|
|
734
|
+
|
|
735
|
+
with open(corrections_filepath, 'r', encoding='utf-8') as f:
|
|
736
|
+
corrections_data = json.load(f)
|
|
737
|
+
|
|
738
|
+
# Convert to CorrectionResult
|
|
739
|
+
correction_result = CorrectionResult.from_dict(corrections_data)
|
|
740
|
+
self.logger.info(f"Loaded CorrectionResult with {len(correction_result.corrected_segments)} segments")
|
|
741
|
+
|
|
742
|
+
# Get the audio file path
|
|
743
|
+
audio_path = processed_track["input_audio_wav"]
|
|
744
|
+
|
|
745
|
+
# Add countdown intro if needed (songs that start within 3 seconds)
|
|
746
|
+
self.logger.info("Processing countdown intro (if needed)...")
|
|
747
|
+
cache_dir = os.path.join(track_output_dir, "cache")
|
|
748
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
749
|
+
|
|
750
|
+
countdown_processor = CountdownProcessor(
|
|
751
|
+
cache_dir=cache_dir,
|
|
752
|
+
logger=self.logger,
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
correction_result, audio_path, padding_added, padding_seconds = countdown_processor.process(
|
|
756
|
+
correction_result=correction_result,
|
|
757
|
+
audio_filepath=audio_path,
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
# Update processed_track with countdown info
|
|
761
|
+
processed_track["countdown_padding_added"] = padding_added
|
|
762
|
+
processed_track["countdown_padding_seconds"] = padding_seconds
|
|
763
|
+
if padding_added:
|
|
764
|
+
processed_track["padded_vocals_audio"] = audio_path
|
|
765
|
+
self.logger.info(
|
|
766
|
+
f"=== COUNTDOWN PADDING ADDED ===\n"
|
|
767
|
+
f"Added {padding_seconds}s padding to audio and shifted timestamps.\n"
|
|
768
|
+
f"Instrumental tracks will be padded after separation to maintain sync."
|
|
769
|
+
)
|
|
770
|
+
else:
|
|
771
|
+
self.logger.info("No countdown needed - song starts after 3 seconds")
|
|
772
|
+
|
|
773
|
+
# Save the updated corrections with countdown timestamps
|
|
774
|
+
updated_corrections_data = correction_result.to_dict()
|
|
775
|
+
with open(corrections_filepath, 'w', encoding='utf-8') as f:
|
|
776
|
+
json.dump(updated_corrections_data, f, indent=2)
|
|
777
|
+
self.logger.info(f"Saved countdown-adjusted corrections to: {corrections_filepath}")
|
|
778
|
+
|
|
779
|
+
# Render video with lyrics
|
|
780
|
+
self.logger.info("Rendering karaoke video with synchronized lyrics...")
|
|
781
|
+
|
|
782
|
+
output_config = LyricsOutputConfig(
|
|
783
|
+
output_dir=lyrics_dir,
|
|
784
|
+
cache_dir=cache_dir,
|
|
785
|
+
output_styles_json=self.style_params_json,
|
|
786
|
+
render_video=True,
|
|
787
|
+
generate_cdg=False,
|
|
788
|
+
generate_plain_text=True,
|
|
789
|
+
generate_lrc=True,
|
|
790
|
+
video_resolution="4k",
|
|
791
|
+
subtitle_offset_ms=self.subtitle_offset_ms,
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
output_generator = OutputGenerator(output_config, self.logger)
|
|
795
|
+
output_prefix = f"{sanitized_artist} - {sanitized_title}"
|
|
796
|
+
|
|
797
|
+
outputs = output_generator.generate_outputs(
|
|
798
|
+
transcription_corrected=correction_result,
|
|
799
|
+
lyrics_results={}, # Lyrics already written during transcription phase
|
|
800
|
+
audio_filepath=audio_path,
|
|
801
|
+
output_prefix=output_prefix,
|
|
802
|
+
)
|
|
803
|
+
|
|
804
|
+
# Copy video to expected location in parent directory
|
|
805
|
+
if outputs and outputs.video:
|
|
806
|
+
source_video = outputs.video
|
|
807
|
+
dest_video = os.path.join(track_output_dir, f"{artist_title} (With Vocals).mkv")
|
|
808
|
+
shutil.copy2(source_video, dest_video)
|
|
809
|
+
self.logger.info(f"Video rendered successfully: {dest_video}")
|
|
810
|
+
processed_track["with_vocals_video"] = dest_video
|
|
811
|
+
|
|
812
|
+
# Update ASS filepath for video background processing
|
|
813
|
+
if outputs.ass:
|
|
814
|
+
processed_track["ass_filepath"] = outputs.ass
|
|
815
|
+
else:
|
|
816
|
+
self.logger.warning("Video rendering did not produce expected output")
|
|
817
|
+
else:
|
|
818
|
+
self.logger.warning(f"Corrections file not found: {corrections_filepath}")
|
|
819
|
+
self.logger.warning("Skipping countdown processing and video rendering")
|
|
820
|
+
elif not self.render_video:
|
|
821
|
+
self.logger.info("Video rendering disabled - skipping countdown and video generation")
|
|
822
|
+
|
|
695
823
|
# Apply video background if requested and lyrics were processed
|
|
696
824
|
if self.video_background_processor and processed_track.get("lyrics"):
|
|
697
825
|
self.logger.info("=== Processing Video Background ===")
|
|
@@ -991,17 +1119,56 @@ class KaraokePrep:
|
|
|
991
1119
|
|
|
992
1120
|
return tracks
|
|
993
1121
|
|
|
1122
|
+
def _is_url(self, string: str) -> bool:
|
|
1123
|
+
"""Check if a string is a URL."""
|
|
1124
|
+
return string is not None and (string.startswith("http://") or string.startswith("https://"))
|
|
1125
|
+
|
|
994
1126
|
async def process(self):
|
|
995
1127
|
if self.input_media is not None and os.path.isdir(self.input_media):
|
|
996
1128
|
self.logger.info(f"Input media {self.input_media} is a local folder, processing each file individually...")
|
|
997
1129
|
return await self.process_folder()
|
|
998
1130
|
elif self.input_media is not None and os.path.isfile(self.input_media):
|
|
999
1131
|
self.logger.info(f"Input media {self.input_media} is a local file, audio download will be skipped")
|
|
1132
|
+
return [await self.prep_single_track()]
|
|
1133
|
+
elif self.input_media is not None and self._is_url(self.input_media):
|
|
1134
|
+
# URL provided - download directly via flacfetch
|
|
1135
|
+
self.logger.info(f"Input media {self.input_media} is a URL, downloading via flacfetch...")
|
|
1136
|
+
|
|
1137
|
+
# Extract video ID for metadata if it's a YouTube URL
|
|
1138
|
+
video_id = None
|
|
1139
|
+
youtube_patterns = [
|
|
1140
|
+
r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
|
|
1141
|
+
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
|
|
1142
|
+
r'youtube\.com/v/([a-zA-Z0-9_-]{11})',
|
|
1143
|
+
]
|
|
1144
|
+
for pattern in youtube_patterns:
|
|
1145
|
+
match = re.search(pattern, self.input_media)
|
|
1146
|
+
if match:
|
|
1147
|
+
video_id = match.group(1)
|
|
1148
|
+
break
|
|
1149
|
+
|
|
1150
|
+
# Set up the extracted_info for metadata consistency
|
|
1151
|
+
self.extracted_info = {
|
|
1152
|
+
"title": f"{self.artist} - {self.title}" if self.artist and self.title else video_id or "Unknown",
|
|
1153
|
+
"artist": self.artist or "",
|
|
1154
|
+
"track_title": self.title or "",
|
|
1155
|
+
"extractor_key": "youtube",
|
|
1156
|
+
"id": video_id or self.input_media,
|
|
1157
|
+
"url": self.input_media,
|
|
1158
|
+
"source": "youtube",
|
|
1159
|
+
}
|
|
1160
|
+
self.extractor = "youtube"
|
|
1161
|
+
self.url = self.input_media
|
|
1162
|
+
|
|
1163
|
+
# Mark that we need to use audio fetcher for URL download
|
|
1164
|
+
self._use_audio_fetcher = True
|
|
1165
|
+
self._use_url_download = True # New flag for URL-based download
|
|
1166
|
+
|
|
1000
1167
|
return [await self.prep_single_track()]
|
|
1001
1168
|
elif self.artist and self.title:
|
|
1002
1169
|
# No input file provided - use flacfetch to search and download audio
|
|
1003
1170
|
self.logger.info(f"No input file provided, using flacfetch to search for: {self.artist} - {self.title}")
|
|
1004
|
-
|
|
1171
|
+
|
|
1005
1172
|
# Set up the extracted_info for metadata consistency
|
|
1006
1173
|
self.extracted_info = {
|
|
1007
1174
|
"title": f"{self.artist} - {self.title}",
|
|
@@ -1014,13 +1181,12 @@ class KaraokePrep:
|
|
|
1014
1181
|
}
|
|
1015
1182
|
self.extractor = "flacfetch"
|
|
1016
1183
|
self.url = None # URL will be determined by flacfetch
|
|
1017
|
-
|
|
1184
|
+
|
|
1018
1185
|
# Mark that we need to use audio fetcher for download
|
|
1019
1186
|
self._use_audio_fetcher = True
|
|
1020
|
-
|
|
1187
|
+
|
|
1021
1188
|
return [await self.prep_single_track()]
|
|
1022
1189
|
else:
|
|
1023
1190
|
raise ValueError(
|
|
1024
|
-
"Either a local file path or both artist and title must be provided.
|
|
1025
|
-
"URL-based input has been replaced with flacfetch audio fetching."
|
|
1191
|
+
"Either a local file path, a URL, or both artist and title must be provided."
|
|
1026
1192
|
)
|
karaoke_gen/lyrics_processor.py
CHANGED
|
@@ -170,15 +170,15 @@ class LyricsProcessor:
|
|
|
170
170
|
def _check_transcription_providers(self) -> dict:
|
|
171
171
|
"""
|
|
172
172
|
Check which transcription providers are configured and return their status.
|
|
173
|
-
|
|
173
|
+
|
|
174
174
|
Returns:
|
|
175
175
|
dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
|
|
176
176
|
"""
|
|
177
177
|
load_dotenv()
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
configured = []
|
|
180
180
|
missing = []
|
|
181
|
-
|
|
181
|
+
|
|
182
182
|
# Check AudioShake
|
|
183
183
|
audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
|
|
184
184
|
if audioshake_token:
|
|
@@ -187,7 +187,7 @@ class LyricsProcessor:
|
|
|
187
187
|
else:
|
|
188
188
|
missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
|
|
189
189
|
self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
|
|
190
|
-
|
|
190
|
+
|
|
191
191
|
# Check Whisper via RunPod
|
|
192
192
|
runpod_key = os.getenv("RUNPOD_API_KEY")
|
|
193
193
|
whisper_id = os.getenv("WHISPER_RUNPOD_ID")
|
|
@@ -203,7 +203,16 @@ class LyricsProcessor:
|
|
|
203
203
|
else:
|
|
204
204
|
missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
|
|
205
205
|
self.logger.debug("Whisper transcription provider: not configured")
|
|
206
|
-
|
|
206
|
+
|
|
207
|
+
# Check Local Whisper (whisper-timestamped)
|
|
208
|
+
try:
|
|
209
|
+
import whisper_timestamped
|
|
210
|
+
configured.append("Local Whisper")
|
|
211
|
+
self.logger.debug("Local Whisper transcription provider: configured (whisper-timestamped installed)")
|
|
212
|
+
except ImportError:
|
|
213
|
+
missing.append("Local Whisper (pip install karaoke-gen[local-whisper])")
|
|
214
|
+
self.logger.debug("Local Whisper transcription provider: not configured (whisper-timestamped not installed)")
|
|
215
|
+
|
|
207
216
|
return {"configured": configured, "missing": missing}
|
|
208
217
|
|
|
209
218
|
def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
|
|
@@ -221,12 +230,18 @@ class LyricsProcessor:
|
|
|
221
230
|
" - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
|
|
222
231
|
" - Get an API key at: https://www.audioshake.ai/\n"
|
|
223
232
|
"\n"
|
|
224
|
-
"2. Whisper via RunPod (
|
|
233
|
+
"2. Whisper via RunPod (Cloud-based open-source)\n"
|
|
225
234
|
" - Set environment variables:\n"
|
|
226
235
|
" RUNPOD_API_KEY=your_key\n"
|
|
227
236
|
" WHISPER_RUNPOD_ID=your_endpoint_id\n"
|
|
228
237
|
" - Set up a Whisper endpoint at: https://www.runpod.io/\n"
|
|
229
238
|
"\n"
|
|
239
|
+
"3. Local Whisper (No cloud required - runs on your machine)\n"
|
|
240
|
+
" - Install with: pip install karaoke-gen[local-whisper]\n"
|
|
241
|
+
" - For CPU-only: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu\n"
|
|
242
|
+
" pip install karaoke-gen[local-whisper]\n"
|
|
243
|
+
" - Requires 2-10GB RAM depending on model size\n"
|
|
244
|
+
"\n"
|
|
230
245
|
"ALTERNATIVES:\n"
|
|
231
246
|
"\n"
|
|
232
247
|
"- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
|
|
@@ -348,6 +363,10 @@ class LyricsProcessor:
|
|
|
348
363
|
# Create config objects for LyricsTranscriber
|
|
349
364
|
transcriber_config = TranscriberConfig(
|
|
350
365
|
audioshake_api_token=env_config.get("audioshake_api_token"),
|
|
366
|
+
runpod_api_key=env_config.get("runpod_api_key"),
|
|
367
|
+
whisper_runpod_id=env_config.get("whisper_runpod_id"),
|
|
368
|
+
# Local Whisper is enabled by default as a fallback when no cloud providers are configured
|
|
369
|
+
enable_local_whisper=True,
|
|
351
370
|
)
|
|
352
371
|
|
|
353
372
|
lyrics_config = LyricsConfig(
|
|
@@ -364,41 +383,30 @@ class LyricsProcessor:
|
|
|
364
383
|
self.logger.info(f" rapidapi_key: {env_config.get('rapidapi_key')[:3] + '...' if env_config.get('rapidapi_key') else 'None'}")
|
|
365
384
|
self.logger.info(f" lyrics_file: {self.lyrics_file}")
|
|
366
385
|
|
|
367
|
-
#
|
|
368
|
-
#
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
enable_review_setting = not self.skip_transcription_review and not is_serverless
|
|
378
|
-
|
|
379
|
-
if is_serverless and not self.skip_transcription_review:
|
|
380
|
-
self.logger.info("Detected serverless environment - disabling interactive review to prevent hanging")
|
|
381
|
-
|
|
382
|
-
# In serverless environment, disable video generation during Phase 1 to save compute
|
|
383
|
-
# Video will be generated in Phase 2 after human review
|
|
384
|
-
serverless_render_video = render_video and not is_serverless
|
|
385
|
-
|
|
386
|
-
if is_serverless and render_video:
|
|
387
|
-
self.logger.info("Detected serverless environment - deferring video generation until after review")
|
|
388
|
-
|
|
386
|
+
# Always defer countdown and video rendering to a later phase.
|
|
387
|
+
# This ensures the review UI (both local and cloud) shows original timing
|
|
388
|
+
# without the 3-second countdown shift. The caller is responsible for:
|
|
389
|
+
# - Local CLI: karaoke_gen.py adds countdown and renders video after transcription
|
|
390
|
+
# - Cloud backend: render_video_worker.py adds countdown and renders video
|
|
391
|
+
#
|
|
392
|
+
# This design ensures consistent behavior regardless of environment,
|
|
393
|
+
# and the review UI always shows accurate, unshifted timestamps.
|
|
394
|
+
self.logger.info("Deferring countdown and video rendering to post-review phase")
|
|
395
|
+
|
|
389
396
|
output_config = OutputConfig(
|
|
390
397
|
output_styles_json=self.style_params_json,
|
|
391
398
|
output_dir=lyrics_dir,
|
|
392
|
-
render_video=
|
|
399
|
+
render_video=False, # Always defer - caller handles video rendering after countdown
|
|
393
400
|
fetch_lyrics=True,
|
|
394
401
|
run_transcription=not self.skip_transcription,
|
|
395
402
|
run_correction=True,
|
|
396
403
|
generate_plain_text=True,
|
|
397
404
|
generate_lrc=True,
|
|
398
|
-
generate_cdg=False, #
|
|
405
|
+
generate_cdg=False, # CDG generation disabled (not currently supported)
|
|
399
406
|
video_resolution="4k",
|
|
400
|
-
enable_review=
|
|
407
|
+
enable_review=not self.skip_transcription_review, # Honor the caller's setting
|
|
401
408
|
subtitle_offset_ms=self.subtitle_offset_ms,
|
|
409
|
+
add_countdown=False, # Always defer - caller handles countdown after review
|
|
402
410
|
)
|
|
403
411
|
|
|
404
412
|
# Add this log entry to debug the OutputConfig
|
karaoke_gen/utils/__init__.py
CHANGED
|
@@ -1,9 +1,35 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
+
# Unicode character replacements for ASCII-safe filenames
|
|
4
|
+
# These characters cause issues with HTTP headers (latin-1 encoding) and some filesystems
|
|
5
|
+
UNICODE_REPLACEMENTS = {
|
|
6
|
+
# Curly/smart quotes -> straight quotes
|
|
7
|
+
"\u2018": "'", # LEFT SINGLE QUOTATION MARK
|
|
8
|
+
"\u2019": "'", # RIGHT SINGLE QUOTATION MARK (the one causing the bug)
|
|
9
|
+
"\u201A": "'", # SINGLE LOW-9 QUOTATION MARK
|
|
10
|
+
"\u201B": "'", # SINGLE HIGH-REVERSED-9 QUOTATION MARK
|
|
11
|
+
"\u201C": '"', # LEFT DOUBLE QUOTATION MARK
|
|
12
|
+
"\u201D": '"', # RIGHT DOUBLE QUOTATION MARK
|
|
13
|
+
"\u201E": '"', # DOUBLE LOW-9 QUOTATION MARK
|
|
14
|
+
"\u201F": '"', # DOUBLE HIGH-REVERSED-9 QUOTATION MARK
|
|
15
|
+
# Other common problematic characters
|
|
16
|
+
"\u2013": "-", # EN DASH
|
|
17
|
+
"\u2014": "-", # EM DASH
|
|
18
|
+
"\u2026": "...", # HORIZONTAL ELLIPSIS
|
|
19
|
+
"\u00A0": " ", # NON-BREAKING SPACE
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
3
23
|
def sanitize_filename(filename):
|
|
4
24
|
"""Replace or remove characters that are unsafe for filenames."""
|
|
5
25
|
if filename is None:
|
|
6
26
|
return None
|
|
27
|
+
|
|
28
|
+
# First, normalize Unicode characters that cause HTTP header encoding issues
|
|
29
|
+
# (e.g., curly quotes from macOS/Word that can't be encoded in latin-1)
|
|
30
|
+
for unicode_char, ascii_replacement in UNICODE_REPLACEMENTS.items():
|
|
31
|
+
filename = filename.replace(unicode_char, ascii_replacement)
|
|
32
|
+
|
|
7
33
|
# Replace problematic characters with underscores
|
|
8
34
|
for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
|
|
9
35
|
filename = filename.replace(char, "_")
|
karaoke_gen/utils/cli_args.py
CHANGED
|
@@ -242,9 +242,17 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
|
|
|
242
242
|
|
|
243
243
|
# Style Configuration
|
|
244
244
|
style_group = parser.add_argument_group("Style Configuration")
|
|
245
|
+
style_group.add_argument(
|
|
246
|
+
"--theme",
|
|
247
|
+
help="Optional: Theme ID for pre-made styles stored in GCS (e.g., 'nomad', 'default'). "
|
|
248
|
+
"When using a theme, CDG/TXT are enabled by default. "
|
|
249
|
+
"Example: --theme=nomad",
|
|
250
|
+
)
|
|
245
251
|
style_group.add_argument(
|
|
246
252
|
"--style_params_json",
|
|
247
|
-
help="Optional: Path to JSON file containing style configuration.
|
|
253
|
+
help="Optional: Path to JSON file containing style configuration. "
|
|
254
|
+
"Takes precedence over --theme if both are provided. "
|
|
255
|
+
"Example: --style_params_json='/path/to/style_params.json'",
|
|
248
256
|
)
|
|
249
257
|
style_group.add_argument(
|
|
250
258
|
"--style_override",
|
karaoke_gen/utils/gen_cli.py
CHANGED
|
@@ -320,7 +320,7 @@ async def async_main():
|
|
|
320
320
|
# Check if user provided a custom value (not the default hosted URL)
|
|
321
321
|
default_hosted_urls = [
|
|
322
322
|
'https://gen.nomadkaraoke.com/lyrics',
|
|
323
|
-
'https://
|
|
323
|
+
'https://gen.nomadkaraoke.com/lyrics/'
|
|
324
324
|
]
|
|
325
325
|
if args.review_ui_url.rstrip('/') not in [url.rstrip('/') for url in default_hosted_urls]:
|
|
326
326
|
# User explicitly wants a specific URL (e.g., Vite dev server)
|
karaoke_gen/utils/remote_cli.py
CHANGED
|
@@ -292,6 +292,8 @@ class RemoteKaraokeClient:
|
|
|
292
292
|
# Two-phase workflow (Batch 6)
|
|
293
293
|
prep_only: bool = False,
|
|
294
294
|
keep_brand_code: Optional[str] = None,
|
|
295
|
+
# Theme system
|
|
296
|
+
theme_id: Optional[str] = None,
|
|
295
297
|
) -> Dict[str, Any]:
|
|
296
298
|
"""
|
|
297
299
|
Submit a new karaoke generation job from a YouTube/online URL.
|
|
@@ -322,6 +324,7 @@ class RemoteKaraokeClient:
|
|
|
322
324
|
clean_instrumental_model: Model for clean instrumental separation
|
|
323
325
|
backing_vocals_models: List of models for backing vocals separation
|
|
324
326
|
other_stems_models: List of models for other stems (bass, drums, etc.)
|
|
327
|
+
theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
|
|
325
328
|
"""
|
|
326
329
|
self.logger.info(f"Submitting URL-based job: {url}")
|
|
327
330
|
|
|
@@ -367,7 +370,10 @@ class RemoteKaraokeClient:
|
|
|
367
370
|
create_request['prep_only'] = prep_only
|
|
368
371
|
if keep_brand_code:
|
|
369
372
|
create_request['keep_brand_code'] = keep_brand_code
|
|
370
|
-
|
|
373
|
+
# Theme system
|
|
374
|
+
if theme_id:
|
|
375
|
+
create_request['theme_id'] = theme_id
|
|
376
|
+
|
|
371
377
|
self.logger.info(f"Creating URL-based job at {self.config.service_url}/api/jobs/create-from-url")
|
|
372
378
|
|
|
373
379
|
response = self._request('POST', '/api/jobs/create-from-url', json=create_request)
|
|
@@ -396,9 +402,9 @@ class RemoteKaraokeClient:
|
|
|
396
402
|
return result
|
|
397
403
|
|
|
398
404
|
def submit_job(
|
|
399
|
-
self,
|
|
400
|
-
filepath: str,
|
|
401
|
-
artist: str,
|
|
405
|
+
self,
|
|
406
|
+
filepath: str,
|
|
407
|
+
artist: str,
|
|
402
408
|
title: str,
|
|
403
409
|
style_params_path: Optional[str] = None,
|
|
404
410
|
enable_cdg: bool = True,
|
|
@@ -425,6 +431,8 @@ class RemoteKaraokeClient:
|
|
|
425
431
|
# Two-phase workflow (Batch 6)
|
|
426
432
|
prep_only: bool = False,
|
|
427
433
|
keep_brand_code: Optional[str] = None,
|
|
434
|
+
# Theme system
|
|
435
|
+
theme_id: Optional[str] = None,
|
|
428
436
|
) -> Dict[str, Any]:
|
|
429
437
|
"""
|
|
430
438
|
Submit a new karaoke generation job with optional style configuration.
|
|
@@ -456,6 +464,7 @@ class RemoteKaraokeClient:
|
|
|
456
464
|
backing_vocals_models: List of models for backing vocals separation
|
|
457
465
|
other_stems_models: List of models for other stems (bass, drums, etc.)
|
|
458
466
|
existing_instrumental: Path to existing instrumental file to use instead of AI separation
|
|
467
|
+
theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
|
|
459
468
|
"""
|
|
460
469
|
file_path = Path(filepath)
|
|
461
470
|
|
|
@@ -574,7 +583,10 @@ class RemoteKaraokeClient:
|
|
|
574
583
|
create_request['prep_only'] = prep_only
|
|
575
584
|
if keep_brand_code:
|
|
576
585
|
create_request['keep_brand_code'] = keep_brand_code
|
|
577
|
-
|
|
586
|
+
# Theme system
|
|
587
|
+
if theme_id:
|
|
588
|
+
create_request['theme_id'] = theme_id
|
|
589
|
+
|
|
578
590
|
response = self._request('POST', '/api/jobs/create-with-upload-urls', json=create_request)
|
|
579
591
|
|
|
580
592
|
if response.status_code != 200:
|
|
@@ -1195,6 +1207,8 @@ class RemoteKaraokeClient:
|
|
|
1195
1207
|
clean_instrumental_model: Optional[str] = None,
|
|
1196
1208
|
backing_vocals_models: Optional[list] = None,
|
|
1197
1209
|
other_stems_models: Optional[list] = None,
|
|
1210
|
+
# Theme system
|
|
1211
|
+
theme_id: Optional[str] = None,
|
|
1198
1212
|
) -> Dict[str, Any]:
|
|
1199
1213
|
"""
|
|
1200
1214
|
Search for audio by artist and title (Batch 5 - Flacfetch integration).
|
|
@@ -1247,7 +1261,10 @@ class RemoteKaraokeClient:
|
|
|
1247
1261
|
request_data['backing_vocals_models'] = backing_vocals_models
|
|
1248
1262
|
if other_stems_models:
|
|
1249
1263
|
request_data['other_stems_models'] = other_stems_models
|
|
1250
|
-
|
|
1264
|
+
# Theme system
|
|
1265
|
+
if theme_id:
|
|
1266
|
+
request_data['theme_id'] = theme_id
|
|
1267
|
+
|
|
1251
1268
|
# Prepare style files for upload if provided
|
|
1252
1269
|
style_files = []
|
|
1253
1270
|
local_style_files: Dict[str, str] = {} # file_type -> local_path
|
|
@@ -3029,6 +3046,8 @@ def main():
|
|
|
3029
3046
|
logger.info(f"Searching for: {artist} - {title}")
|
|
3030
3047
|
if getattr(args, 'auto_download', False) or config.non_interactive:
|
|
3031
3048
|
logger.info(f"Auto-download: enabled (will auto-select best source)")
|
|
3049
|
+
if getattr(args, 'theme', None):
|
|
3050
|
+
logger.info(f"Theme: {args.theme}")
|
|
3032
3051
|
if args.style_params_json:
|
|
3033
3052
|
logger.info(f"Style: {args.style_params_json}")
|
|
3034
3053
|
logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
|
|
@@ -3070,6 +3089,8 @@ def main():
|
|
|
3070
3089
|
clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
|
|
3071
3090
|
backing_vocals_models=getattr(args, 'backing_vocals_models', None),
|
|
3072
3091
|
other_stems_models=getattr(args, 'other_stems_models', None),
|
|
3092
|
+
# Theme system
|
|
3093
|
+
theme_id=getattr(args, 'theme', None),
|
|
3073
3094
|
)
|
|
3074
3095
|
|
|
3075
3096
|
job_id = result.get('job_id')
|
|
@@ -3106,6 +3127,8 @@ def main():
|
|
|
3106
3127
|
logger.info(f"Title: {title}")
|
|
3107
3128
|
if not artist and not title and is_url_input:
|
|
3108
3129
|
logger.info(f"Artist/Title: (will be auto-detected from URL)")
|
|
3130
|
+
if getattr(args, 'theme', None):
|
|
3131
|
+
logger.info(f"Theme: {args.theme}")
|
|
3109
3132
|
if args.style_params_json:
|
|
3110
3133
|
logger.info(f"Style: {args.style_params_json}")
|
|
3111
3134
|
logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
|
|
@@ -3206,6 +3229,8 @@ def main():
|
|
|
3206
3229
|
# Two-phase workflow (Batch 6)
|
|
3207
3230
|
prep_only=getattr(args, 'prep_only', False),
|
|
3208
3231
|
keep_brand_code=keep_brand_code_value,
|
|
3232
|
+
# Theme system
|
|
3233
|
+
theme_id=getattr(args, 'theme', None),
|
|
3209
3234
|
)
|
|
3210
3235
|
else:
|
|
3211
3236
|
# File-based job submission
|
|
@@ -3238,6 +3263,8 @@ def main():
|
|
|
3238
3263
|
# Two-phase workflow (Batch 6)
|
|
3239
3264
|
prep_only=getattr(args, 'prep_only', False),
|
|
3240
3265
|
keep_brand_code=keep_brand_code_value,
|
|
3266
|
+
# Theme system
|
|
3267
|
+
theme_id=getattr(args, 'theme', None),
|
|
3241
3268
|
)
|
|
3242
3269
|
job_id = result.get('job_id')
|
|
3243
3270
|
style_assets = result.get('style_assets_uploaded', [])
|