karaoke-gen 0.75.53__py3-none-any.whl → 0.81.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. karaoke_gen/audio_fetcher.py +218 -0
  2. karaoke_gen/instrumental_review/static/index.html +179 -16
  3. karaoke_gen/karaoke_gen.py +191 -25
  4. karaoke_gen/lyrics_processor.py +39 -31
  5. karaoke_gen/utils/__init__.py +26 -0
  6. karaoke_gen/utils/cli_args.py +9 -1
  7. karaoke_gen/utils/gen_cli.py +1 -1
  8. karaoke_gen/utils/remote_cli.py +33 -6
  9. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/METADATA +80 -4
  10. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/RECORD +50 -43
  11. lyrics_transcriber/core/config.py +8 -0
  12. lyrics_transcriber/core/controller.py +43 -1
  13. lyrics_transcriber/correction/agentic/providers/config.py +6 -0
  14. lyrics_transcriber/correction/agentic/providers/model_factory.py +24 -1
  15. lyrics_transcriber/correction/agentic/router.py +17 -13
  16. lyrics_transcriber/frontend/.gitignore +1 -0
  17. lyrics_transcriber/frontend/e2e/agentic-corrections.spec.ts +207 -0
  18. lyrics_transcriber/frontend/e2e/fixtures/agentic-correction-data.json +226 -0
  19. lyrics_transcriber/frontend/index.html +5 -1
  20. lyrics_transcriber/frontend/package-lock.json +4553 -0
  21. lyrics_transcriber/frontend/package.json +7 -1
  22. lyrics_transcriber/frontend/playwright.config.ts +69 -0
  23. lyrics_transcriber/frontend/public/nomad-karaoke-logo.svg +5 -0
  24. lyrics_transcriber/frontend/src/App.tsx +88 -59
  25. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +55 -21
  26. lyrics_transcriber/frontend/src/components/AppHeader.tsx +65 -0
  27. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +39 -35
  28. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +9 -9
  29. lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -1
  30. lyrics_transcriber/frontend/src/components/EditWordList.tsx +1 -1
  31. lyrics_transcriber/frontend/src/components/Header.tsx +96 -3
  32. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +120 -3
  33. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +22 -21
  34. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  35. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +12 -2
  36. lyrics_transcriber/frontend/src/components/WordDivider.tsx +3 -3
  37. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +122 -35
  38. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +2 -2
  39. lyrics_transcriber/frontend/src/components/shared/constants.ts +15 -5
  40. lyrics_transcriber/frontend/src/components/shared/types.ts +6 -0
  41. lyrics_transcriber/frontend/src/main.tsx +1 -7
  42. lyrics_transcriber/frontend/src/theme.ts +337 -135
  43. lyrics_transcriber/frontend/vite.config.ts +5 -0
  44. lyrics_transcriber/frontend/yarn.lock +1005 -1046
  45. lyrics_transcriber/output/generator.py +50 -3
  46. lyrics_transcriber/review/server.py +1 -1
  47. lyrics_transcriber/transcribers/local_whisper.py +260 -0
  48. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/WHEEL +0 -0
  49. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/entry_points.txt +0 -0
  50. {karaoke_gen-0.75.53.dist-info → karaoke_gen-0.81.1.dist-info}/licenses/LICENSE +0 -0
@@ -31,6 +31,12 @@ from .video_generator import VideoGenerator
31
31
  from .video_background_processor import VideoBackgroundProcessor
32
32
  from .audio_fetcher import create_audio_fetcher, AudioFetcherError, NoResultsError, UserCancelledError
33
33
 
34
+ # Import lyrics_transcriber components for post-review countdown and video rendering
35
+ from lyrics_transcriber.output.countdown_processor import CountdownProcessor
36
+ from lyrics_transcriber.output.generator import OutputGenerator
37
+ from lyrics_transcriber.types import CorrectionResult
38
+ from lyrics_transcriber.core.config import OutputConfig as LyricsOutputConfig
39
+
34
40
 
35
41
  class KaraokePrep:
36
42
  def __init__(
@@ -482,41 +488,56 @@ class KaraokePrep:
482
488
  self.logger.info(f"Found existing media files matching extractor '{self.extractor}', skipping download/conversion.")
483
489
 
484
490
  elif getattr(self, '_use_audio_fetcher', False):
485
- # Use flacfetch to search and download audio
486
- self.logger.info(f"Using flacfetch to search and download: {self.artist} - {self.title}")
487
-
488
491
  try:
489
- # Search and download audio using the AudioFetcher
490
- fetch_result = self.audio_fetcher.search_and_download(
491
- artist=self.artist,
492
- title=self.title,
493
- output_dir=track_output_dir,
494
- output_filename=f"{artist_title} (flacfetch)",
495
- auto_select=self.auto_download,
496
- )
497
-
498
- # Update extractor to reflect the actual provider used
499
- self.extractor = f"flacfetch-{fetch_result.provider}"
500
-
492
+ # Check if this is a URL download or search+download
493
+ if getattr(self, '_use_url_download', False):
494
+ # Direct URL download (e.g., YouTube URL)
495
+ self.logger.info(f"Using flacfetch to download from URL: {self.url}")
496
+
497
+ fetch_result = self.audio_fetcher.download_from_url(
498
+ url=self.url,
499
+ output_dir=track_output_dir,
500
+ output_filename=f"{artist_title} (youtube)" if artist_title != "Unknown - Unknown" else None,
501
+ artist=self.artist,
502
+ title=self.title,
503
+ )
504
+
505
+ # Update extractor to reflect the source
506
+ self.extractor = "youtube"
507
+ else:
508
+ # Use flacfetch to search and download audio
509
+ self.logger.info(f"Using flacfetch to search and download: {self.artist} - {self.title}")
510
+
511
+ fetch_result = self.audio_fetcher.search_and_download(
512
+ artist=self.artist,
513
+ title=self.title,
514
+ output_dir=track_output_dir,
515
+ output_filename=f"{artist_title} (flacfetch)",
516
+ auto_select=self.auto_download,
517
+ )
518
+
519
+ # Update extractor to reflect the actual provider used
520
+ self.extractor = f"flacfetch-{fetch_result.provider}"
521
+
501
522
  # Set up the output paths
502
523
  output_filename_no_extension = os.path.join(track_output_dir, f"{artist_title} ({self.extractor})")
503
-
524
+
504
525
  # Copy/move the downloaded file to the expected location
505
526
  processed_track["input_media"] = self.file_handler.download_audio_from_fetcher_result(
506
527
  fetch_result.filepath, output_filename_no_extension
507
528
  )
508
-
529
+
509
530
  self.logger.info(f"Audio downloaded from {fetch_result.provider}: {processed_track['input_media']}")
510
-
531
+
511
532
  # Convert to WAV for audio processing
512
533
  self.logger.info("Converting downloaded audio to WAV for processing...")
513
534
  processed_track["input_audio_wav"] = self.file_handler.convert_to_wav(
514
535
  processed_track["input_media"], output_filename_no_extension
515
536
  )
516
-
537
+
517
538
  # No still image for audio-only downloads
518
539
  processed_track["input_still_image"] = None
519
-
540
+
520
541
  except UserCancelledError:
521
542
  # User cancelled - propagate up to CLI for graceful exit
522
543
  raise
@@ -692,6 +713,113 @@ class KaraokePrep:
692
713
 
693
714
  self.logger.info("=== Parallel Processing Complete ===")
694
715
 
716
+ # === POST-TRANSCRIPTION: Add countdown and render video ===
717
+ # Since lyrics_processor.py now always defers countdown and video rendering,
718
+ # we handle it here after human review is complete. This ensures the review UI
719
+ # shows accurate, unshifted timestamps (same behavior as cloud backend).
720
+ if processed_track.get("lyrics") and self.render_video:
721
+ self.logger.info("=== Processing Countdown and Video Rendering ===")
722
+
723
+ from .utils import sanitize_filename
724
+ sanitized_artist = sanitize_filename(self.artist)
725
+ sanitized_title = sanitize_filename(self.title)
726
+ lyrics_dir = os.path.join(track_output_dir, "lyrics")
727
+
728
+ # Find the corrections JSON file
729
+ corrections_filename = f"{sanitized_artist} - {sanitized_title} (Lyrics Corrections).json"
730
+ corrections_filepath = os.path.join(lyrics_dir, corrections_filename)
731
+
732
+ if os.path.exists(corrections_filepath):
733
+ self.logger.info(f"Loading corrections from: {corrections_filepath}")
734
+
735
+ with open(corrections_filepath, 'r', encoding='utf-8') as f:
736
+ corrections_data = json.load(f)
737
+
738
+ # Convert to CorrectionResult
739
+ correction_result = CorrectionResult.from_dict(corrections_data)
740
+ self.logger.info(f"Loaded CorrectionResult with {len(correction_result.corrected_segments)} segments")
741
+
742
+ # Get the audio file path
743
+ audio_path = processed_track["input_audio_wav"]
744
+
745
+ # Add countdown intro if needed (songs that start within 3 seconds)
746
+ self.logger.info("Processing countdown intro (if needed)...")
747
+ cache_dir = os.path.join(track_output_dir, "cache")
748
+ os.makedirs(cache_dir, exist_ok=True)
749
+
750
+ countdown_processor = CountdownProcessor(
751
+ cache_dir=cache_dir,
752
+ logger=self.logger,
753
+ )
754
+
755
+ correction_result, audio_path, padding_added, padding_seconds = countdown_processor.process(
756
+ correction_result=correction_result,
757
+ audio_filepath=audio_path,
758
+ )
759
+
760
+ # Update processed_track with countdown info
761
+ processed_track["countdown_padding_added"] = padding_added
762
+ processed_track["countdown_padding_seconds"] = padding_seconds
763
+ if padding_added:
764
+ processed_track["padded_vocals_audio"] = audio_path
765
+ self.logger.info(
766
+ f"=== COUNTDOWN PADDING ADDED ===\n"
767
+ f"Added {padding_seconds}s padding to audio and shifted timestamps.\n"
768
+ f"Instrumental tracks will be padded after separation to maintain sync."
769
+ )
770
+ else:
771
+ self.logger.info("No countdown needed - song starts after 3 seconds")
772
+
773
+ # Save the updated corrections with countdown timestamps
774
+ updated_corrections_data = correction_result.to_dict()
775
+ with open(corrections_filepath, 'w', encoding='utf-8') as f:
776
+ json.dump(updated_corrections_data, f, indent=2)
777
+ self.logger.info(f"Saved countdown-adjusted corrections to: {corrections_filepath}")
778
+
779
+ # Render video with lyrics
780
+ self.logger.info("Rendering karaoke video with synchronized lyrics...")
781
+
782
+ output_config = LyricsOutputConfig(
783
+ output_dir=lyrics_dir,
784
+ cache_dir=cache_dir,
785
+ output_styles_json=self.style_params_json,
786
+ render_video=True,
787
+ generate_cdg=False,
788
+ generate_plain_text=True,
789
+ generate_lrc=True,
790
+ video_resolution="4k",
791
+ subtitle_offset_ms=self.subtitle_offset_ms,
792
+ )
793
+
794
+ output_generator = OutputGenerator(output_config, self.logger)
795
+ output_prefix = f"{sanitized_artist} - {sanitized_title}"
796
+
797
+ outputs = output_generator.generate_outputs(
798
+ transcription_corrected=correction_result,
799
+ lyrics_results={}, # Lyrics already written during transcription phase
800
+ audio_filepath=audio_path,
801
+ output_prefix=output_prefix,
802
+ )
803
+
804
+ # Copy video to expected location in parent directory
805
+ if outputs and outputs.video:
806
+ source_video = outputs.video
807
+ dest_video = os.path.join(track_output_dir, f"{artist_title} (With Vocals).mkv")
808
+ shutil.copy2(source_video, dest_video)
809
+ self.logger.info(f"Video rendered successfully: {dest_video}")
810
+ processed_track["with_vocals_video"] = dest_video
811
+
812
+ # Update ASS filepath for video background processing
813
+ if outputs.ass:
814
+ processed_track["ass_filepath"] = outputs.ass
815
+ else:
816
+ self.logger.warning("Video rendering did not produce expected output")
817
+ else:
818
+ self.logger.warning(f"Corrections file not found: {corrections_filepath}")
819
+ self.logger.warning("Skipping countdown processing and video rendering")
820
+ elif not self.render_video:
821
+ self.logger.info("Video rendering disabled - skipping countdown and video generation")
822
+
695
823
  # Apply video background if requested and lyrics were processed
696
824
  if self.video_background_processor and processed_track.get("lyrics"):
697
825
  self.logger.info("=== Processing Video Background ===")
@@ -991,17 +1119,56 @@ class KaraokePrep:
991
1119
 
992
1120
  return tracks
993
1121
 
1122
+ def _is_url(self, string: str) -> bool:
1123
+ """Check if a string is a URL."""
1124
+ return string is not None and (string.startswith("http://") or string.startswith("https://"))
1125
+
994
1126
  async def process(self):
995
1127
  if self.input_media is not None and os.path.isdir(self.input_media):
996
1128
  self.logger.info(f"Input media {self.input_media} is a local folder, processing each file individually...")
997
1129
  return await self.process_folder()
998
1130
  elif self.input_media is not None and os.path.isfile(self.input_media):
999
1131
  self.logger.info(f"Input media {self.input_media} is a local file, audio download will be skipped")
1132
+ return [await self.prep_single_track()]
1133
+ elif self.input_media is not None and self._is_url(self.input_media):
1134
+ # URL provided - download directly via flacfetch
1135
+ self.logger.info(f"Input media {self.input_media} is a URL, downloading via flacfetch...")
1136
+
1137
+ # Extract video ID for metadata if it's a YouTube URL
1138
+ video_id = None
1139
+ youtube_patterns = [
1140
+ r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
1141
+ r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
1142
+ r'youtube\.com/v/([a-zA-Z0-9_-]{11})',
1143
+ ]
1144
+ for pattern in youtube_patterns:
1145
+ match = re.search(pattern, self.input_media)
1146
+ if match:
1147
+ video_id = match.group(1)
1148
+ break
1149
+
1150
+ # Set up the extracted_info for metadata consistency
1151
+ self.extracted_info = {
1152
+ "title": f"{self.artist} - {self.title}" if self.artist and self.title else video_id or "Unknown",
1153
+ "artist": self.artist or "",
1154
+ "track_title": self.title or "",
1155
+ "extractor_key": "youtube",
1156
+ "id": video_id or self.input_media,
1157
+ "url": self.input_media,
1158
+ "source": "youtube",
1159
+ }
1160
+ self.extractor = "youtube"
1161
+ self.url = self.input_media
1162
+
1163
+ # Mark that we need to use audio fetcher for URL download
1164
+ self._use_audio_fetcher = True
1165
+ self._use_url_download = True # New flag for URL-based download
1166
+
1000
1167
  return [await self.prep_single_track()]
1001
1168
  elif self.artist and self.title:
1002
1169
  # No input file provided - use flacfetch to search and download audio
1003
1170
  self.logger.info(f"No input file provided, using flacfetch to search for: {self.artist} - {self.title}")
1004
-
1171
+
1005
1172
  # Set up the extracted_info for metadata consistency
1006
1173
  self.extracted_info = {
1007
1174
  "title": f"{self.artist} - {self.title}",
@@ -1014,13 +1181,12 @@ class KaraokePrep:
1014
1181
  }
1015
1182
  self.extractor = "flacfetch"
1016
1183
  self.url = None # URL will be determined by flacfetch
1017
-
1184
+
1018
1185
  # Mark that we need to use audio fetcher for download
1019
1186
  self._use_audio_fetcher = True
1020
-
1187
+
1021
1188
  return [await self.prep_single_track()]
1022
1189
  else:
1023
1190
  raise ValueError(
1024
- "Either a local file path or both artist and title must be provided. "
1025
- "URL-based input has been replaced with flacfetch audio fetching."
1191
+ "Either a local file path, a URL, or both artist and title must be provided."
1026
1192
  )
@@ -170,15 +170,15 @@ class LyricsProcessor:
170
170
  def _check_transcription_providers(self) -> dict:
171
171
  """
172
172
  Check which transcription providers are configured and return their status.
173
-
173
+
174
174
  Returns:
175
175
  dict with 'configured' (list of provider names) and 'missing' (list of missing configs)
176
176
  """
177
177
  load_dotenv()
178
-
178
+
179
179
  configured = []
180
180
  missing = []
181
-
181
+
182
182
  # Check AudioShake
183
183
  audioshake_token = os.getenv("AUDIOSHAKE_API_TOKEN")
184
184
  if audioshake_token:
@@ -187,7 +187,7 @@ class LyricsProcessor:
187
187
  else:
188
188
  missing.append("AudioShake (AUDIOSHAKE_API_TOKEN)")
189
189
  self.logger.debug("AudioShake transcription provider: not configured (missing AUDIOSHAKE_API_TOKEN)")
190
-
190
+
191
191
  # Check Whisper via RunPod
192
192
  runpod_key = os.getenv("RUNPOD_API_KEY")
193
193
  whisper_id = os.getenv("WHISPER_RUNPOD_ID")
@@ -203,7 +203,16 @@ class LyricsProcessor:
203
203
  else:
204
204
  missing.append("Whisper (RUNPOD_API_KEY + WHISPER_RUNPOD_ID)")
205
205
  self.logger.debug("Whisper transcription provider: not configured")
206
-
206
+
207
+ # Check Local Whisper (whisper-timestamped)
208
+ try:
209
+ import whisper_timestamped
210
+ configured.append("Local Whisper")
211
+ self.logger.debug("Local Whisper transcription provider: configured (whisper-timestamped installed)")
212
+ except ImportError:
213
+ missing.append("Local Whisper (pip install karaoke-gen[local-whisper])")
214
+ self.logger.debug("Local Whisper transcription provider: not configured (whisper-timestamped not installed)")
215
+
207
216
  return {"configured": configured, "missing": missing}
208
217
 
209
218
  def _build_transcription_provider_error_message(self, missing_providers: list) -> str:
@@ -221,12 +230,18 @@ class LyricsProcessor:
221
230
  " - Set environment variable: AUDIOSHAKE_API_TOKEN=your_token\n"
222
231
  " - Get an API key at: https://www.audioshake.ai/\n"
223
232
  "\n"
224
- "2. Whisper via RunPod (Open-source alternative)\n"
233
+ "2. Whisper via RunPod (Cloud-based open-source)\n"
225
234
  " - Set environment variables:\n"
226
235
  " RUNPOD_API_KEY=your_key\n"
227
236
  " WHISPER_RUNPOD_ID=your_endpoint_id\n"
228
237
  " - Set up a Whisper endpoint at: https://www.runpod.io/\n"
229
238
  "\n"
239
+ "3. Local Whisper (No cloud required - runs on your machine)\n"
240
+ " - Install with: pip install karaoke-gen[local-whisper]\n"
241
+ " - For CPU-only: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu\n"
242
+ " pip install karaoke-gen[local-whisper]\n"
243
+ " - Requires 2-10GB RAM depending on model size\n"
244
+ "\n"
230
245
  "ALTERNATIVES:\n"
231
246
  "\n"
232
247
  "- Use --skip-lyrics flag to generate instrumental-only karaoke (no synchronized lyrics)\n"
@@ -348,6 +363,10 @@ class LyricsProcessor:
348
363
  # Create config objects for LyricsTranscriber
349
364
  transcriber_config = TranscriberConfig(
350
365
  audioshake_api_token=env_config.get("audioshake_api_token"),
366
+ runpod_api_key=env_config.get("runpod_api_key"),
367
+ whisper_runpod_id=env_config.get("whisper_runpod_id"),
368
+ # Local Whisper is enabled by default as a fallback when no cloud providers are configured
369
+ enable_local_whisper=True,
351
370
  )
352
371
 
353
372
  lyrics_config = LyricsConfig(
@@ -364,41 +383,30 @@ class LyricsProcessor:
364
383
  self.logger.info(f" rapidapi_key: {env_config.get('rapidapi_key')[:3] + '...' if env_config.get('rapidapi_key') else 'None'}")
365
384
  self.logger.info(f" lyrics_file: {self.lyrics_file}")
366
385
 
367
- # Detect if we're running in a serverless environment (Modal)
368
- # Modal sets specific environment variables we can check for
369
- is_serverless = (
370
- os.getenv("MODAL_TASK_ID") is not None or
371
- os.getenv("MODAL_FUNCTION_NAME") is not None or
372
- os.path.exists("/.modal") # Modal creates this directory in containers
373
- )
374
-
375
- # In serverless environment, disable interactive review even if skip_transcription_review=False
376
- # This preserves CLI behavior while fixing serverless hanging
377
- enable_review_setting = not self.skip_transcription_review and not is_serverless
378
-
379
- if is_serverless and not self.skip_transcription_review:
380
- self.logger.info("Detected serverless environment - disabling interactive review to prevent hanging")
381
-
382
- # In serverless environment, disable video generation during Phase 1 to save compute
383
- # Video will be generated in Phase 2 after human review
384
- serverless_render_video = render_video and not is_serverless
385
-
386
- if is_serverless and render_video:
387
- self.logger.info("Detected serverless environment - deferring video generation until after review")
388
-
386
+ # Always defer countdown and video rendering to a later phase.
387
+ # This ensures the review UI (both local and cloud) shows original timing
388
+ # without the 3-second countdown shift. The caller is responsible for:
389
+ # - Local CLI: karaoke_gen.py adds countdown and renders video after transcription
390
+ # - Cloud backend: render_video_worker.py adds countdown and renders video
391
+ #
392
+ # This design ensures consistent behavior regardless of environment,
393
+ # and the review UI always shows accurate, unshifted timestamps.
394
+ self.logger.info("Deferring countdown and video rendering to post-review phase")
395
+
389
396
  output_config = OutputConfig(
390
397
  output_styles_json=self.style_params_json,
391
398
  output_dir=lyrics_dir,
392
- render_video=serverless_render_video, # Disable video in serverless Phase 1
399
+ render_video=False, # Always defer - caller handles video rendering after countdown
393
400
  fetch_lyrics=True,
394
401
  run_transcription=not self.skip_transcription,
395
402
  run_correction=True,
396
403
  generate_plain_text=True,
397
404
  generate_lrc=True,
398
- generate_cdg=False, # Also defer CDG generation to Phase 2
405
+ generate_cdg=False, # CDG generation disabled (not currently supported)
399
406
  video_resolution="4k",
400
- enable_review=enable_review_setting,
407
+ enable_review=not self.skip_transcription_review, # Honor the caller's setting
401
408
  subtitle_offset_ms=self.subtitle_offset_ms,
409
+ add_countdown=False, # Always defer - caller handles countdown after review
402
410
  )
403
411
 
404
412
  # Add this log entry to debug the OutputConfig
@@ -1,9 +1,35 @@
1
1
  import re
2
2
 
3
+ # Unicode character replacements for ASCII-safe filenames
4
+ # These characters cause issues with HTTP headers (latin-1 encoding) and some filesystems
5
+ UNICODE_REPLACEMENTS = {
6
+ # Curly/smart quotes -> straight quotes
7
+ "\u2018": "'", # LEFT SINGLE QUOTATION MARK
8
+ "\u2019": "'", # RIGHT SINGLE QUOTATION MARK (the one causing the bug)
9
+ "\u201A": "'", # SINGLE LOW-9 QUOTATION MARK
10
+ "\u201B": "'", # SINGLE HIGH-REVERSED-9 QUOTATION MARK
11
+ "\u201C": '"', # LEFT DOUBLE QUOTATION MARK
12
+ "\u201D": '"', # RIGHT DOUBLE QUOTATION MARK
13
+ "\u201E": '"', # DOUBLE LOW-9 QUOTATION MARK
14
+ "\u201F": '"', # DOUBLE HIGH-REVERSED-9 QUOTATION MARK
15
+ # Other common problematic characters
16
+ "\u2013": "-", # EN DASH
17
+ "\u2014": "-", # EM DASH
18
+ "\u2026": "...", # HORIZONTAL ELLIPSIS
19
+ "\u00A0": " ", # NON-BREAKING SPACE
20
+ }
21
+
22
+
3
23
  def sanitize_filename(filename):
4
24
  """Replace or remove characters that are unsafe for filenames."""
5
25
  if filename is None:
6
26
  return None
27
+
28
+ # First, normalize Unicode characters that cause HTTP header encoding issues
29
+ # (e.g., curly quotes from macOS/Word that can't be encoded in latin-1)
30
+ for unicode_char, ascii_replacement in UNICODE_REPLACEMENTS.items():
31
+ filename = filename.replace(unicode_char, ascii_replacement)
32
+
7
33
  # Replace problematic characters with underscores
8
34
  for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
9
35
  filename = filename.replace(char, "_")
@@ -242,9 +242,17 @@ def create_parser(prog: str = "karaoke-gen") -> argparse.ArgumentParser:
242
242
 
243
243
  # Style Configuration
244
244
  style_group = parser.add_argument_group("Style Configuration")
245
+ style_group.add_argument(
246
+ "--theme",
247
+ help="Optional: Theme ID for pre-made styles stored in GCS (e.g., 'nomad', 'default'). "
248
+ "When using a theme, CDG/TXT are enabled by default. "
249
+ "Example: --theme=nomad",
250
+ )
245
251
  style_group.add_argument(
246
252
  "--style_params_json",
247
- help="Optional: Path to JSON file containing style configuration. Example: --style_params_json='/path/to/style_params.json'",
253
+ help="Optional: Path to JSON file containing style configuration. "
254
+ "Takes precedence over --theme if both are provided. "
255
+ "Example: --style_params_json='/path/to/style_params.json'",
248
256
  )
249
257
  style_group.add_argument(
250
258
  "--style_override",
@@ -320,7 +320,7 @@ async def async_main():
320
320
  # Check if user provided a custom value (not the default hosted URL)
321
321
  default_hosted_urls = [
322
322
  'https://gen.nomadkaraoke.com/lyrics',
323
- 'https://lyrics.nomadkaraoke.com'
323
+ 'https://gen.nomadkaraoke.com/lyrics/'
324
324
  ]
325
325
  if args.review_ui_url.rstrip('/') not in [url.rstrip('/') for url in default_hosted_urls]:
326
326
  # User explicitly wants a specific URL (e.g., Vite dev server)
@@ -292,6 +292,8 @@ class RemoteKaraokeClient:
292
292
  # Two-phase workflow (Batch 6)
293
293
  prep_only: bool = False,
294
294
  keep_brand_code: Optional[str] = None,
295
+ # Theme system
296
+ theme_id: Optional[str] = None,
295
297
  ) -> Dict[str, Any]:
296
298
  """
297
299
  Submit a new karaoke generation job from a YouTube/online URL.
@@ -322,6 +324,7 @@ class RemoteKaraokeClient:
322
324
  clean_instrumental_model: Model for clean instrumental separation
323
325
  backing_vocals_models: List of models for backing vocals separation
324
326
  other_stems_models: List of models for other stems (bass, drums, etc.)
327
+ theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
325
328
  """
326
329
  self.logger.info(f"Submitting URL-based job: {url}")
327
330
 
@@ -367,7 +370,10 @@ class RemoteKaraokeClient:
367
370
  create_request['prep_only'] = prep_only
368
371
  if keep_brand_code:
369
372
  create_request['keep_brand_code'] = keep_brand_code
370
-
373
+ # Theme system
374
+ if theme_id:
375
+ create_request['theme_id'] = theme_id
376
+
371
377
  self.logger.info(f"Creating URL-based job at {self.config.service_url}/api/jobs/create-from-url")
372
378
 
373
379
  response = self._request('POST', '/api/jobs/create-from-url', json=create_request)
@@ -396,9 +402,9 @@ class RemoteKaraokeClient:
396
402
  return result
397
403
 
398
404
  def submit_job(
399
- self,
400
- filepath: str,
401
- artist: str,
405
+ self,
406
+ filepath: str,
407
+ artist: str,
402
408
  title: str,
403
409
  style_params_path: Optional[str] = None,
404
410
  enable_cdg: bool = True,
@@ -425,6 +431,8 @@ class RemoteKaraokeClient:
425
431
  # Two-phase workflow (Batch 6)
426
432
  prep_only: bool = False,
427
433
  keep_brand_code: Optional[str] = None,
434
+ # Theme system
435
+ theme_id: Optional[str] = None,
428
436
  ) -> Dict[str, Any]:
429
437
  """
430
438
  Submit a new karaoke generation job with optional style configuration.
@@ -456,6 +464,7 @@ class RemoteKaraokeClient:
456
464
  backing_vocals_models: List of models for backing vocals separation
457
465
  other_stems_models: List of models for other stems (bass, drums, etc.)
458
466
  existing_instrumental: Path to existing instrumental file to use instead of AI separation
467
+ theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
459
468
  """
460
469
  file_path = Path(filepath)
461
470
 
@@ -574,7 +583,10 @@ class RemoteKaraokeClient:
574
583
  create_request['prep_only'] = prep_only
575
584
  if keep_brand_code:
576
585
  create_request['keep_brand_code'] = keep_brand_code
577
-
586
+ # Theme system
587
+ if theme_id:
588
+ create_request['theme_id'] = theme_id
589
+
578
590
  response = self._request('POST', '/api/jobs/create-with-upload-urls', json=create_request)
579
591
 
580
592
  if response.status_code != 200:
@@ -1195,6 +1207,8 @@ class RemoteKaraokeClient:
1195
1207
  clean_instrumental_model: Optional[str] = None,
1196
1208
  backing_vocals_models: Optional[list] = None,
1197
1209
  other_stems_models: Optional[list] = None,
1210
+ # Theme system
1211
+ theme_id: Optional[str] = None,
1198
1212
  ) -> Dict[str, Any]:
1199
1213
  """
1200
1214
  Search for audio by artist and title (Batch 5 - Flacfetch integration).
@@ -1247,7 +1261,10 @@ class RemoteKaraokeClient:
1247
1261
  request_data['backing_vocals_models'] = backing_vocals_models
1248
1262
  if other_stems_models:
1249
1263
  request_data['other_stems_models'] = other_stems_models
1250
-
1264
+ # Theme system
1265
+ if theme_id:
1266
+ request_data['theme_id'] = theme_id
1267
+
1251
1268
  # Prepare style files for upload if provided
1252
1269
  style_files = []
1253
1270
  local_style_files: Dict[str, str] = {} # file_type -> local_path
@@ -3029,6 +3046,8 @@ def main():
3029
3046
  logger.info(f"Searching for: {artist} - {title}")
3030
3047
  if getattr(args, 'auto_download', False) or config.non_interactive:
3031
3048
  logger.info(f"Auto-download: enabled (will auto-select best source)")
3049
+ if getattr(args, 'theme', None):
3050
+ logger.info(f"Theme: {args.theme}")
3032
3051
  if args.style_params_json:
3033
3052
  logger.info(f"Style: {args.style_params_json}")
3034
3053
  logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
@@ -3070,6 +3089,8 @@ def main():
3070
3089
  clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
3071
3090
  backing_vocals_models=getattr(args, 'backing_vocals_models', None),
3072
3091
  other_stems_models=getattr(args, 'other_stems_models', None),
3092
+ # Theme system
3093
+ theme_id=getattr(args, 'theme', None),
3073
3094
  )
3074
3095
 
3075
3096
  job_id = result.get('job_id')
@@ -3106,6 +3127,8 @@ def main():
3106
3127
  logger.info(f"Title: {title}")
3107
3128
  if not artist and not title and is_url_input:
3108
3129
  logger.info(f"Artist/Title: (will be auto-detected from URL)")
3130
+ if getattr(args, 'theme', None):
3131
+ logger.info(f"Theme: {args.theme}")
3109
3132
  if args.style_params_json:
3110
3133
  logger.info(f"Style: {args.style_params_json}")
3111
3134
  logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
@@ -3206,6 +3229,8 @@ def main():
3206
3229
  # Two-phase workflow (Batch 6)
3207
3230
  prep_only=getattr(args, 'prep_only', False),
3208
3231
  keep_brand_code=keep_brand_code_value,
3232
+ # Theme system
3233
+ theme_id=getattr(args, 'theme', None),
3209
3234
  )
3210
3235
  else:
3211
3236
  # File-based job submission
@@ -3238,6 +3263,8 @@ def main():
3238
3263
  # Two-phase workflow (Batch 6)
3239
3264
  prep_only=getattr(args, 'prep_only', False),
3240
3265
  keep_brand_code=keep_brand_code_value,
3266
+ # Theme system
3267
+ theme_id=getattr(args, 'theme', None),
3241
3268
  )
3242
3269
  job_id = result.get('job_id')
3243
3270
  style_assets = result.get('style_assets_uploaded', [])