karaoke-gen 0.75.16__py3-none-any.whl → 0.76.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. karaoke_gen/audio_fetcher.py +984 -33
  2. karaoke_gen/audio_processor.py +4 -0
  3. karaoke_gen/instrumental_review/static/index.html +37 -14
  4. karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
  5. karaoke_gen/karaoke_gen.py +208 -39
  6. karaoke_gen/lyrics_processor.py +111 -31
  7. karaoke_gen/utils/__init__.py +26 -0
  8. karaoke_gen/utils/cli_args.py +15 -6
  9. karaoke_gen/utils/gen_cli.py +30 -5
  10. karaoke_gen/utils/remote_cli.py +301 -20
  11. {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/METADATA +107 -5
  12. {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/RECORD +47 -43
  13. lyrics_transcriber/core/controller.py +76 -2
  14. lyrics_transcriber/frontend/index.html +5 -1
  15. lyrics_transcriber/frontend/package-lock.json +4553 -0
  16. lyrics_transcriber/frontend/package.json +4 -1
  17. lyrics_transcriber/frontend/playwright.config.ts +69 -0
  18. lyrics_transcriber/frontend/public/nomad-karaoke-logo.svg +5 -0
  19. lyrics_transcriber/frontend/src/App.tsx +94 -63
  20. lyrics_transcriber/frontend/src/api.ts +25 -10
  21. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +55 -21
  22. lyrics_transcriber/frontend/src/components/AppHeader.tsx +65 -0
  23. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +5 -5
  24. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +9 -9
  25. lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -1
  26. lyrics_transcriber/frontend/src/components/EditWordList.tsx +1 -1
  27. lyrics_transcriber/frontend/src/components/Header.tsx +34 -48
  28. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +22 -21
  29. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  30. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  31. lyrics_transcriber/frontend/src/components/WordDivider.tsx +3 -3
  32. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +2 -2
  33. lyrics_transcriber/frontend/src/components/shared/constants.ts +15 -5
  34. lyrics_transcriber/frontend/src/main.tsx +1 -7
  35. lyrics_transcriber/frontend/src/theme.ts +337 -135
  36. lyrics_transcriber/frontend/vite.config.ts +5 -0
  37. lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
  38. lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
  39. lyrics_transcriber/frontend/web_assets/index.html +1 -1
  40. lyrics_transcriber/frontend/yarn.lock +1005 -1046
  41. lyrics_transcriber/output/countdown_processor.py +39 -0
  42. lyrics_transcriber/review/server.py +1 -1
  43. lyrics_transcriber/transcribers/audioshake.py +96 -7
  44. lyrics_transcriber/types.py +14 -12
  45. {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/WHEEL +0 -0
  46. {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/entry_points.txt +0 -0
  47. {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/licenses/LICENSE +0 -0
@@ -31,13 +31,16 @@ import webbrowser
31
31
  from dataclasses import dataclass
32
32
  from enum import Enum
33
33
  from pathlib import Path
34
- from typing import Any, Dict, Optional
34
+ from typing import Any, Dict, List, Optional
35
35
 
36
36
  import requests
37
37
 
38
38
  from .cli_args import create_parser, process_style_overrides, is_url, is_file
39
39
  # Use flacfetch's shared display functions for consistent formatting
40
40
  from flacfetch import print_releases, Release
41
+ from flacfetch.core.categorize import categorize_releases
42
+ from flacfetch.core.models import TrackQuery
43
+ from flacfetch.interface.cli import print_categorized_releases
41
44
 
42
45
 
43
46
  class JobStatus(str, Enum):
@@ -137,7 +140,18 @@ class RemoteKaraokeClient:
137
140
  return None
138
141
 
139
142
  def refresh_auth(self) -> bool:
140
- """Refresh authentication token."""
143
+ """Refresh authentication token.
144
+
145
+ Only refreshes if we're using a gcloud-based token. If the user
146
+ provided a static token via KARAOKE_GEN_AUTH_TOKEN, we keep that
147
+ since it doesn't expire like gcloud identity tokens.
148
+ """
149
+ # Don't refresh if using a static admin token from env
150
+ if os.environ.get('KARAOKE_GEN_AUTH_TOKEN'):
151
+ # Already have a valid static token, no need to refresh
152
+ return True
153
+
154
+ # Try to refresh gcloud identity token
141
155
  token = self._get_auth_token_from_gcloud()
142
156
  if token:
143
157
  self.config.auth_token = token
@@ -278,6 +292,8 @@ class RemoteKaraokeClient:
278
292
  # Two-phase workflow (Batch 6)
279
293
  prep_only: bool = False,
280
294
  keep_brand_code: Optional[str] = None,
295
+ # Theme system
296
+ theme_id: Optional[str] = None,
281
297
  ) -> Dict[str, Any]:
282
298
  """
283
299
  Submit a new karaoke generation job from a YouTube/online URL.
@@ -308,6 +324,7 @@ class RemoteKaraokeClient:
308
324
  clean_instrumental_model: Model for clean instrumental separation
309
325
  backing_vocals_models: List of models for backing vocals separation
310
326
  other_stems_models: List of models for other stems (bass, drums, etc.)
327
+ theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
311
328
  """
312
329
  self.logger.info(f"Submitting URL-based job: {url}")
313
330
 
@@ -353,7 +370,10 @@ class RemoteKaraokeClient:
353
370
  create_request['prep_only'] = prep_only
354
371
  if keep_brand_code:
355
372
  create_request['keep_brand_code'] = keep_brand_code
356
-
373
+ # Theme system
374
+ if theme_id:
375
+ create_request['theme_id'] = theme_id
376
+
357
377
  self.logger.info(f"Creating URL-based job at {self.config.service_url}/api/jobs/create-from-url")
358
378
 
359
379
  response = self._request('POST', '/api/jobs/create-from-url', json=create_request)
@@ -382,9 +402,9 @@ class RemoteKaraokeClient:
382
402
  return result
383
403
 
384
404
  def submit_job(
385
- self,
386
- filepath: str,
387
- artist: str,
405
+ self,
406
+ filepath: str,
407
+ artist: str,
388
408
  title: str,
389
409
  style_params_path: Optional[str] = None,
390
410
  enable_cdg: bool = True,
@@ -411,6 +431,8 @@ class RemoteKaraokeClient:
411
431
  # Two-phase workflow (Batch 6)
412
432
  prep_only: bool = False,
413
433
  keep_brand_code: Optional[str] = None,
434
+ # Theme system
435
+ theme_id: Optional[str] = None,
414
436
  ) -> Dict[str, Any]:
415
437
  """
416
438
  Submit a new karaoke generation job with optional style configuration.
@@ -442,6 +464,7 @@ class RemoteKaraokeClient:
442
464
  backing_vocals_models: List of models for backing vocals separation
443
465
  other_stems_models: List of models for other stems (bass, drums, etc.)
444
466
  existing_instrumental: Path to existing instrumental file to use instead of AI separation
467
+ theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
445
468
  """
446
469
  file_path = Path(filepath)
447
470
 
@@ -560,7 +583,10 @@ class RemoteKaraokeClient:
560
583
  create_request['prep_only'] = prep_only
561
584
  if keep_brand_code:
562
585
  create_request['keep_brand_code'] = keep_brand_code
563
-
586
+ # Theme system
587
+ if theme_id:
588
+ create_request['theme_id'] = theme_id
589
+
564
590
  response = self._request('POST', '/api/jobs/create-with-upload-urls', json=create_request)
565
591
 
566
592
  if response.status_code != 200:
@@ -1081,7 +1107,8 @@ class RemoteKaraokeClient:
1081
1107
  if url.startswith('/'):
1082
1108
  url = f"{self.config.service_url}{url}"
1083
1109
 
1084
- response = requests.get(url, stream=True, timeout=600)
1110
+ # Use session headers (includes Authorization) for authenticated downloads
1111
+ response = self.session.get(url, stream=True, timeout=600)
1085
1112
  if response.status_code != 200:
1086
1113
  return False
1087
1114
 
@@ -1180,6 +1207,8 @@ class RemoteKaraokeClient:
1180
1207
  clean_instrumental_model: Optional[str] = None,
1181
1208
  backing_vocals_models: Optional[list] = None,
1182
1209
  other_stems_models: Optional[list] = None,
1210
+ # Theme system
1211
+ theme_id: Optional[str] = None,
1183
1212
  ) -> Dict[str, Any]:
1184
1213
  """
1185
1214
  Search for audio by artist and title (Batch 5 - Flacfetch integration).
@@ -1192,6 +1221,7 @@ class RemoteKaraokeClient:
1192
1221
  artist: Artist name to search for
1193
1222
  title: Song title to search for
1194
1223
  auto_download: Automatically select best audio source (skip interactive selection)
1224
+ style_params_path: Path to style_params.json (optional)
1195
1225
  ... other args same as submit_job()
1196
1226
 
1197
1227
  Returns:
@@ -1231,6 +1261,43 @@ class RemoteKaraokeClient:
1231
1261
  request_data['backing_vocals_models'] = backing_vocals_models
1232
1262
  if other_stems_models:
1233
1263
  request_data['other_stems_models'] = other_stems_models
1264
+ # Theme system
1265
+ if theme_id:
1266
+ request_data['theme_id'] = theme_id
1267
+
1268
+ # Prepare style files for upload if provided
1269
+ style_files = []
1270
+ local_style_files: Dict[str, str] = {} # file_type -> local_path
1271
+
1272
+ if style_params_path and os.path.isfile(style_params_path):
1273
+ self.logger.info(f"Parsing style configuration: {style_params_path}")
1274
+
1275
+ # Add the style_params.json itself
1276
+ style_files.append({
1277
+ 'filename': Path(style_params_path).name,
1278
+ 'content_type': 'application/json',
1279
+ 'file_type': 'style_params'
1280
+ })
1281
+ local_style_files['style_params'] = style_params_path
1282
+
1283
+ # Parse style params to find referenced files (backgrounds, fonts)
1284
+ style_assets = self._parse_style_params(style_params_path)
1285
+
1286
+ for asset_key, asset_path in style_assets.items():
1287
+ if os.path.isfile(asset_path):
1288
+ # Use full path for content type detection (not just extension)
1289
+ content_type = self._get_content_type(asset_path)
1290
+ style_files.append({
1291
+ 'filename': Path(asset_path).name,
1292
+ 'content_type': content_type,
1293
+ 'file_type': asset_key # e.g., 'style_intro_background'
1294
+ })
1295
+ local_style_files[asset_key] = asset_path
1296
+ self.logger.info(f" Will upload style asset: {asset_key}")
1297
+
1298
+ if style_files:
1299
+ request_data['style_files'] = style_files
1300
+ self.logger.info(f"Including {len(style_files)} style files in request")
1234
1301
 
1235
1302
  response = self._request('POST', '/api/audio-search/search', json=request_data)
1236
1303
 
@@ -1248,7 +1315,52 @@ class RemoteKaraokeClient:
1248
1315
  error_detail = response.text
1249
1316
  raise RuntimeError(f"Error searching for audio: {error_detail}")
1250
1317
 
1251
- return response.json()
1318
+ result = response.json()
1319
+
1320
+ # Upload style files if we have signed URLs
1321
+ style_upload_urls = result.get('style_upload_urls', [])
1322
+ if style_upload_urls and local_style_files:
1323
+ self.logger.info(f"Uploading {len(style_upload_urls)} style files...")
1324
+
1325
+ for url_info in style_upload_urls:
1326
+ file_type = url_info['file_type']
1327
+ upload_url = url_info['upload_url']
1328
+
1329
+ local_path = local_style_files.get(file_type)
1330
+ if not local_path:
1331
+ self.logger.warning(f"No local file for {file_type}, skipping upload")
1332
+ continue
1333
+
1334
+ self.logger.info(f" Uploading {file_type}: {Path(local_path).name}")
1335
+
1336
+ try:
1337
+ with open(local_path, 'rb') as f:
1338
+ file_content = f.read()
1339
+
1340
+ # Use the content type from the original file info, not re-derived
1341
+ # This ensures it matches the signed URL which was generated with
1342
+ # the same content type we specified in the request
1343
+ content_type = self._get_content_type(local_path)
1344
+
1345
+ # Use PUT to upload directly to signed URL
1346
+ upload_response = requests.put(
1347
+ upload_url,
1348
+ data=file_content,
1349
+ headers={'Content-Type': content_type},
1350
+ timeout=60
1351
+ )
1352
+
1353
+ if upload_response.status_code not in (200, 201):
1354
+ self.logger.error(f"Failed to upload {file_type}: {upload_response.status_code}")
1355
+ else:
1356
+ self.logger.info(f" ✓ Uploaded {file_type}")
1357
+
1358
+ except Exception as e:
1359
+ self.logger.error(f"Error uploading {file_type}: {e}")
1360
+
1361
+ self.logger.info("Style file uploads complete")
1362
+
1363
+ return result
1252
1364
 
1253
1365
  def get_audio_search_results(self, job_id: str) -> Dict[str, Any]:
1254
1366
  """Get audio search results for a job awaiting selection."""
@@ -1398,16 +1510,21 @@ class JobMonitor:
1398
1510
  base_api_url = f"{self.config.service_url}/api/review/{job_id}"
1399
1511
  encoded_api_url = urllib.parse.quote(base_api_url, safe='')
1400
1512
 
1401
- # Try to get audio hash from job data
1513
+ # Try to get audio hash and review token from job data
1514
+ audio_hash = ''
1515
+ review_token = ''
1402
1516
  try:
1403
1517
  job_data = self.client.get_job(job_id)
1404
1518
  audio_hash = job_data.get('audio_hash', '')
1519
+ review_token = job_data.get('review_token', '')
1405
1520
  except Exception:
1406
- audio_hash = ''
1521
+ pass
1407
1522
 
1408
1523
  url = f"{self.config.review_ui_url}/?baseApiUrl={encoded_api_url}"
1409
1524
  if audio_hash:
1410
1525
  url += f"&audioHash={audio_hash}"
1526
+ if review_token:
1527
+ url += f"&reviewToken={review_token}"
1411
1528
 
1412
1529
  self.logger.info(f"Opening lyrics review UI: {url}")
1413
1530
  self.open_browser(url)
@@ -1608,8 +1725,34 @@ class JobMonitor:
1608
1725
  "quality_str": result.get('quality_str') or result.get('quality', ''),
1609
1726
  }
1610
1727
 
1728
+ def _convert_to_release_objects(self, release_dicts: List[Dict[str, Any]]) -> List[Release]:
1729
+ """
1730
+ Convert API result dicts to Release objects for categorization.
1731
+
1732
+ Used by handle_audio_selection() to enable categorized display
1733
+ for large result sets (10+ results).
1734
+
1735
+ Args:
1736
+ release_dicts: List of dicts in Release-compatible format
1737
+
1738
+ Returns:
1739
+ List of Release objects (skipping any that fail to convert)
1740
+ """
1741
+ releases = []
1742
+ for d in release_dicts:
1743
+ try:
1744
+ releases.append(Release.from_dict(d))
1745
+ except Exception as e:
1746
+ self.logger.debug(f"Failed to convert result to Release: {e}")
1747
+ return releases
1748
+
1611
1749
  def handle_audio_selection(self, job_id: str) -> None:
1612
- """Handle audio source selection interaction (Batch 5)."""
1750
+ """Handle audio source selection interaction (Batch 5).
1751
+
1752
+ For 10+ results, uses categorized display (grouped by Top Seeded,
1753
+ Album Releases, Hi-Res, etc.) with a 'more' command to show full list.
1754
+ For smaller result sets, uses flat list display.
1755
+ """
1613
1756
  self.logger.info("=" * 60)
1614
1757
  self.logger.info("AUDIO SOURCE SELECTION NEEDED")
1615
1758
  self.logger.info("=" * 60)
@@ -1619,6 +1762,7 @@ class JobMonitor:
1619
1762
  results_data = self.client.get_audio_search_results(job_id)
1620
1763
  results = results_data.get('results', [])
1621
1764
  artist = results_data.get('artist', 'Unknown')
1765
+ title = results_data.get('title', 'Unknown')
1622
1766
 
1623
1767
  if not results:
1624
1768
  self.logger.error("No search results available")
@@ -1633,23 +1777,71 @@ class JobMonitor:
1633
1777
  # This gives us the same rich, colorized output as the local CLI
1634
1778
  release_dicts = [self._convert_api_result_to_release_dict(r) for r in results]
1635
1779
 
1636
- # Use flacfetch's shared display function
1637
- print_releases(release_dicts, target_artist=artist, use_colors=True)
1780
+ # Convert to Release objects for categorization
1781
+ release_objects = self._convert_to_release_objects(release_dicts)
1782
+
1783
+ # Use categorized display for large result sets (10+)
1784
+ # This groups results into categories: Top Seeded, Album Releases, Hi-Res, etc.
1785
+ use_categorized = len(release_objects) >= 10
1786
+
1787
+ if use_categorized:
1788
+ # Create query for categorization
1789
+ query = TrackQuery(artist=artist, title=title)
1790
+ categorized = categorize_releases(release_objects, query)
1791
+ # print_categorized_releases returns the flattened list of displayed releases
1792
+ display_releases = print_categorized_releases(categorized, target_artist=artist, use_colors=True)
1793
+ showing_categorized = True
1794
+ else:
1795
+ # Small result set - use simple flat list
1796
+ print_releases(release_dicts, target_artist=artist, use_colors=True)
1797
+ display_releases = release_objects
1798
+ showing_categorized = False
1638
1799
 
1639
1800
  selection_index = -1
1640
1801
  while selection_index < 0:
1641
1802
  try:
1642
- choice = input(f"\nSelect a release (1-{len(results)}, 0 to cancel): ").strip()
1803
+ if showing_categorized:
1804
+ prompt = f"\nSelect (1-{len(display_releases)}), 'more' for full list, 0 to cancel: "
1805
+ else:
1806
+ prompt = f"\nSelect a release (1-{len(display_releases)}, 0 to cancel): "
1807
+
1808
+ choice = input(prompt).strip().lower()
1809
+
1643
1810
  if choice == "0":
1644
1811
  self.logger.info("Selection cancelled by user")
1645
1812
  raise KeyboardInterrupt
1813
+
1814
+ # Handle 'more' command to show full flat list
1815
+ if choice in ('more', 'm', 'all', 'a') and showing_categorized:
1816
+ print("\n" + "=" * 60)
1817
+ print("FULL LIST (all results)")
1818
+ print("=" * 60 + "\n")
1819
+ print_releases(release_dicts, target_artist=artist, use_colors=True)
1820
+ display_releases = release_objects
1821
+ showing_categorized = False
1822
+ continue
1823
+
1646
1824
  choice_num = int(choice)
1647
- if 1 <= choice_num <= len(results):
1648
- selection_index = choice_num - 1
1825
+ if 1 <= choice_num <= len(display_releases):
1826
+ # Map selection back to original results index for API call
1827
+ selected_release = display_releases[choice_num - 1]
1828
+
1829
+ # Find matching index in original results by download_url
1830
+ selection_index = self._find_original_index(
1831
+ selected_release, results, release_objects
1832
+ )
1833
+
1834
+ if selection_index < 0:
1835
+ # Fallback: use display index if mapping fails
1836
+ self.logger.warning("Could not map selection to original index, using display index")
1837
+ selection_index = choice_num - 1
1649
1838
  else:
1650
- print(f"Please enter a number between 0 and {len(results)}")
1839
+ print(f"Please enter a number between 0 and {len(display_releases)}")
1651
1840
  except ValueError:
1652
- print("Please enter a valid number")
1841
+ if showing_categorized:
1842
+ print("Please enter a number or 'more'")
1843
+ else:
1844
+ print("Please enter a valid number")
1653
1845
  except KeyboardInterrupt:
1654
1846
  print()
1655
1847
  raise
@@ -1667,10 +1859,80 @@ class JobMonitor:
1667
1859
 
1668
1860
  except Exception as e:
1669
1861
  self.logger.error(f"Error handling audio selection: {e}")
1862
+
1863
+ def _find_original_index(
1864
+ self,
1865
+ selected_release: Release,
1866
+ original_results: List[Dict[str, Any]],
1867
+ release_objects: List[Release],
1868
+ ) -> int:
1869
+ """
1870
+ Map a selected Release back to its index in the original API results.
1871
+
1872
+ This is needed because categorized display may reorder results,
1873
+ but the API selection endpoint needs the original index.
1874
+
1875
+ Args:
1876
+ selected_release: The Release object user selected
1877
+ original_results: Original API results (list of dicts)
1878
+ release_objects: Release objects in same order as original_results
1879
+
1880
+ Returns:
1881
+ Index in original_results, or -1 if not found
1882
+ """
1883
+ # First try: match by object identity in release_objects
1884
+ for i, release in enumerate(release_objects):
1885
+ if release is selected_release:
1886
+ return i
1887
+
1888
+ # Second try: match by download_url
1889
+ selected_url = getattr(selected_release, 'download_url', None)
1890
+ if selected_url:
1891
+ for i, r in enumerate(original_results):
1892
+ if r.get('url') == selected_url:
1893
+ return i
1894
+
1895
+ # Third try: match by info_hash (for torrent sources)
1896
+ selected_hash = getattr(selected_release, 'info_hash', None)
1897
+ if selected_hash:
1898
+ for i, r in enumerate(original_results):
1899
+ if r.get('source_id') == selected_hash:
1900
+ return i
1901
+
1902
+ # Fourth try: match by title + artist + provider
1903
+ selected_title = getattr(selected_release, 'title', '')
1904
+ selected_artist = getattr(selected_release, 'artist', '')
1905
+ selected_source = getattr(selected_release, 'source_name', '')
1906
+
1907
+ for i, r in enumerate(original_results):
1908
+ if (r.get('title') == selected_title and
1909
+ r.get('artist') == selected_artist and
1910
+ r.get('provider') == selected_source):
1911
+ return i
1912
+
1913
+ return -1
1670
1914
 
1671
1915
  def _open_instrumental_review_and_wait(self, job_id: str) -> None:
1672
1916
  """Open browser to instrumental review UI and wait for selection."""
1673
- review_url = f"{self.config.review_ui_url}/jobs/{job_id}/instrumental-review"
1917
+ # Get instrumental token from job data
1918
+ instrumental_token = ''
1919
+ try:
1920
+ job_data = self.client.get_job(job_id)
1921
+ instrumental_token = job_data.get('instrumental_token', '')
1922
+ except Exception:
1923
+ pass
1924
+
1925
+ # Build the review URL with API endpoint and token
1926
+ # The instrumental UI is hosted at /instrumental/ on the frontend domain
1927
+ base_api_url = f"{self.config.service_url}/api/jobs/{job_id}"
1928
+ encoded_api_url = urllib.parse.quote(base_api_url, safe='')
1929
+
1930
+ # Use /instrumental/ path on the frontend (same domain as review_ui_url but different path)
1931
+ # review_ui_url is like https://gen.nomadkaraoke.com/lyrics, we want /instrumental/
1932
+ frontend_base = self.config.review_ui_url.rsplit('/', 1)[0] # Remove /lyrics
1933
+ review_url = f"{frontend_base}/instrumental/?baseApiUrl={encoded_api_url}"
1934
+ if instrumental_token:
1935
+ review_url += f"&instrumentalToken={instrumental_token}"
1674
1936
 
1675
1937
  self.logger.info("")
1676
1938
  self.logger.info("=" * 60)
@@ -2149,6 +2411,14 @@ class JobMonitor:
2149
2411
  self.handle_instrumental_selection(job_id)
2150
2412
  self._instrumental_prompted = True
2151
2413
 
2414
+ elif status == 'instrumental_selected':
2415
+ # Check if this was auto-selected due to existing instrumental
2416
+ selection = job_data.get('state_data', {}).get('instrumental_selection', '')
2417
+ if selection == 'custom' and not self._instrumental_prompted:
2418
+ self.logger.info("")
2419
+ self.logger.info("Using user-provided instrumental (--existing_instrumental)")
2420
+ self._instrumental_prompted = True
2421
+
2152
2422
  elif status == 'complete':
2153
2423
  self.logger.info("")
2154
2424
  self.logger.info("=" * 60)
@@ -2776,6 +3046,8 @@ def main():
2776
3046
  logger.info(f"Searching for: {artist} - {title}")
2777
3047
  if getattr(args, 'auto_download', False) or config.non_interactive:
2778
3048
  logger.info(f"Auto-download: enabled (will auto-select best source)")
3049
+ if getattr(args, 'theme', None):
3050
+ logger.info(f"Theme: {args.theme}")
2779
3051
  if args.style_params_json:
2780
3052
  logger.info(f"Style: {args.style_params_json}")
2781
3053
  logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
@@ -2802,6 +3074,7 @@ def main():
2802
3074
  artist=artist,
2803
3075
  title=title,
2804
3076
  auto_download=auto_download,
3077
+ style_params_path=args.style_params_json,
2805
3078
  enable_cdg=args.enable_cdg,
2806
3079
  enable_txt=args.enable_txt,
2807
3080
  brand_prefix=args.brand_prefix,
@@ -2816,6 +3089,8 @@ def main():
2816
3089
  clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2817
3090
  backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2818
3091
  other_stems_models=getattr(args, 'other_stems_models', None),
3092
+ # Theme system
3093
+ theme_id=getattr(args, 'theme', None),
2819
3094
  )
2820
3095
 
2821
3096
  job_id = result.get('job_id')
@@ -2852,6 +3127,8 @@ def main():
2852
3127
  logger.info(f"Title: {title}")
2853
3128
  if not artist and not title and is_url_input:
2854
3129
  logger.info(f"Artist/Title: (will be auto-detected from URL)")
3130
+ if getattr(args, 'theme', None):
3131
+ logger.info(f"Theme: {args.theme}")
2855
3132
  if args.style_params_json:
2856
3133
  logger.info(f"Style: {args.style_params_json}")
2857
3134
  logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
@@ -2952,6 +3229,8 @@ def main():
2952
3229
  # Two-phase workflow (Batch 6)
2953
3230
  prep_only=getattr(args, 'prep_only', False),
2954
3231
  keep_brand_code=keep_brand_code_value,
3232
+ # Theme system
3233
+ theme_id=getattr(args, 'theme', None),
2955
3234
  )
2956
3235
  else:
2957
3236
  # File-based job submission
@@ -2984,6 +3263,8 @@ def main():
2984
3263
  # Two-phase workflow (Batch 6)
2985
3264
  prep_only=getattr(args, 'prep_only', False),
2986
3265
  keep_brand_code=keep_brand_code_value,
3266
+ # Theme system
3267
+ theme_id=getattr(args, 'theme', None),
2987
3268
  )
2988
3269
  job_id = result.get('job_id')
2989
3270
  style_assets = result.get('style_assets_uploaded', [])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: karaoke-gen
3
- Version: 0.75.16
3
+ Version: 0.76.20
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -22,7 +22,7 @@ Requires-Dist: dropbox (>=12)
22
22
  Requires-Dist: fastapi (>=0.104.0)
23
23
  Requires-Dist: fetch-lyrics-from-genius (>=0.1)
24
24
  Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
25
- Requires-Dist: flacfetch (>=0.3)
25
+ Requires-Dist: flacfetch (>=0.9.0)
26
26
  Requires-Dist: fonttools (>=4.55)
27
27
  Requires-Dist: google-api-python-client
28
28
  Requires-Dist: google-auth
@@ -48,6 +48,7 @@ Requires-Dist: lyrics-converter (>=0.2.1)
48
48
  Requires-Dist: lyricsgenius (>=3)
49
49
  Requires-Dist: matplotlib (>=3)
50
50
  Requires-Dist: metaphone (>=0.6)
51
+ Requires-Dist: mutagen (>=1.47)
51
52
  Requires-Dist: nest-asyncio (>=1.5)
52
53
  Requires-Dist: nltk (>=3.9)
53
54
  Requires-Dist: numpy (>=2)
@@ -94,7 +95,7 @@ Description-Content-Type: text/markdown
94
95
  # Karaoke Generator 🎶 🎥 🚀
95
96
 
96
97
  ![PyPI - Version](https://img.shields.io/pypi/v/karaoke-gen)
97
- ![Python Version](https://img.shields.io/badge/python-3.10+-blue)
98
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/karaoke-gen)
98
99
  ![Tests](https://github.com/nomadkaraoke/karaoke-gen/workflows/Test%20and%20Publish/badge.svg)
99
100
  ![Test Coverage](https://codecov.io/gh/nomadkaraoke/karaoke-gen/branch/main/graph/badge.svg)
100
101
 
@@ -147,10 +148,44 @@ pip install karaoke-gen
147
148
  This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
148
149
 
149
150
  ### Requirements
150
- - Python 3.10+
151
+ - Python 3.10-3.13
151
152
  - FFmpeg
152
153
  - For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
153
154
 
155
+ ### Transcription Provider Setup
156
+
157
+ **Transcription is required** for creating karaoke videos with synchronized lyrics. The system needs word-level timing data to display lyrics in sync with the music.
158
+
159
+ #### Option 1: AudioShake (Recommended)
160
+ Commercial service with high-quality transcription. Best for production use.
161
+
162
+ ```bash
163
+ export AUDIOSHAKE_API_TOKEN="your_audioshake_token"
164
+ ```
165
+
166
+ Get an API key at [https://www.audioshake.ai/](https://www.audioshake.ai/) - business only, at time of writing this.
167
+
168
+ #### Option 2: Whisper via RunPod
169
+ Open-source alternative using OpenAI's Whisper model on RunPod infrastructure.
170
+
171
+ ```bash
172
+ export RUNPOD_API_KEY="your_runpod_key"
173
+ export WHISPER_RUNPOD_ID="your_whisper_endpoint_id"
174
+ ```
175
+
176
+ Set up a Whisper endpoint at [https://www.runpod.io/](https://www.runpod.io/)
177
+
178
+ #### Without Transcription (Instrumental Only)
179
+ If you don't need synchronized lyrics, use the `--skip-lyrics` flag:
180
+
181
+ ```bash
182
+ karaoke-gen --skip-lyrics "Artist" "Title"
183
+ ```
184
+
185
+ This creates an instrumental-only karaoke video without lyrics overlay.
186
+
187
+ > **Note:** See `lyrics_transcriber_temp/README.md` for detailed transcription provider configuration options.
188
+
154
189
  ---
155
190
 
156
191
  ## 🖥️ Local CLI (`karaoke-gen`)
@@ -273,7 +308,7 @@ karaoke-gen-remote \
273
308
  |----------|-------------|---------|
274
309
  | `KARAOKE_GEN_URL` | Backend service URL | Required |
275
310
  | `KARAOKE_GEN_AUTH_TOKEN` | Admin auth token (for protected endpoints) | Optional |
276
- | `REVIEW_UI_URL` | Lyrics review UI URL | `https://lyrics.nomadkaraoke.com` |
311
+ | `REVIEW_UI_URL` | Lyrics review UI URL | `https://gen.nomadkaraoke.com/lyrics/` |
277
312
  | `POLL_INTERVAL` | Seconds between status polls | `5` |
278
313
 
279
314
  **Note:** The `REVIEW_UI_URL` defaults to the hosted lyrics review UI. For local development, set it to `http://localhost:5173` if you're running the frontend dev server.
@@ -568,6 +603,73 @@ Check backend health status.
568
603
 
569
604
  ---
570
605
 
606
+ ## 🔧 Troubleshooting
607
+
608
+ ### "No suitable files found for processing"
609
+
610
+ This error occurs during the finalisation step when the `(With Vocals).mkv` file is missing. This file is created during lyrics transcription.
611
+
612
+ **Most common cause:** No transcription provider configured.
613
+
614
+ **Quick fix:**
615
+ 1. Check if transcription providers are configured:
616
+ ```bash
617
+ echo $AUDIOSHAKE_API_TOKEN
618
+ echo $RUNPOD_API_KEY
619
+ ```
620
+
621
+ 2. If both are empty, set up a provider (see [Transcription Provider Setup](#transcription-provider-setup))
622
+
623
+ 3. Or use `--skip-lyrics` for instrumental-only karaoke:
624
+ ```bash
625
+ karaoke-gen --skip-lyrics "Artist" "Title"
626
+ ```
627
+
628
+ **Other causes:**
629
+ - Invalid API credentials - verify your tokens are correct and active
630
+ - API service unavailable - check service status pages
631
+ - Network connectivity issues - ensure you can reach the API endpoints
632
+ - Transcription timeout - try again or use a different provider
633
+
634
+ ### Transcription Fails Silently
635
+
636
+ If karaoke-gen runs without errors but produces no synchronized lyrics:
637
+
638
+ 1. **Check logs** - Run with `--log_level debug` for detailed output:
639
+ ```bash
640
+ karaoke-gen --log_level debug "Artist" "Title"
641
+ ```
642
+
643
+ 2. **Verify environment variables** - Ensure API tokens are exported in your shell:
644
+ ```bash
645
+ # Check if set
646
+ printenv | grep -E "(AUDIOSHAKE|RUNPOD|WHISPER)"
647
+
648
+ # Set in current session
649
+ export AUDIOSHAKE_API_TOKEN="your_token"
650
+ ```
651
+
652
+ 3. **Test API connectivity** - Verify you can reach the transcription service
653
+
654
+ ### "No lyrics found from any source"
655
+
656
+ This warning means no reference lyrics were fetched from online sources (Genius, Spotify, Musixmatch). The transcription will still work, but auto-correction may be less accurate.
657
+
658
+ **To fix:**
659
+ - Set `GENIUS_API_TOKEN` for Genius lyrics
660
+ - Set `SPOTIFY_COOKIE_SP_DC` for Spotify lyrics
661
+ - Set `RAPIDAPI_KEY` for Musixmatch lyrics
662
+ - Or provide lyrics manually with `--lyrics_file /path/to/lyrics.txt`
663
+
664
+ ### Video Quality Issues
665
+
666
+ If the output video has quality problems:
667
+ - Ensure FFmpeg is properly installed: `ffmpeg -version`
668
+ - Check available codecs: `ffmpeg -codecs`
669
+ - For 4K output, ensure sufficient disk space (10GB+ per track)
670
+
671
+ ---
672
+
571
673
  ## 🧪 Development
572
674
 
573
675
  ### Running Tests