karaoke-gen 0.75.16__py3-none-any.whl → 0.75.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/audio_fetcher.py +766 -33
- karaoke_gen/audio_processor.py +4 -0
- karaoke_gen/instrumental_review/static/index.html +37 -14
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
- karaoke_gen/karaoke_gen.py +18 -14
- karaoke_gen/lyrics_processor.py +97 -6
- karaoke_gen/utils/cli_args.py +6 -5
- karaoke_gen/utils/gen_cli.py +30 -5
- karaoke_gen/utils/remote_cli.py +269 -15
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +106 -4
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +24 -24
- lyrics_transcriber/core/controller.py +76 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/App.tsx +6 -4
- lyrics_transcriber/frontend/src/api.ts +25 -10
- lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
- lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
- lyrics_transcriber/frontend/web_assets/index.html +1 -1
- lyrics_transcriber/output/countdown_processor.py +39 -0
- lyrics_transcriber/transcribers/audioshake.py +96 -7
- lyrics_transcriber/types.py +14 -12
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0
karaoke_gen/utils/remote_cli.py
CHANGED
|
@@ -31,13 +31,16 @@ import webbrowser
|
|
|
31
31
|
from dataclasses import dataclass
|
|
32
32
|
from enum import Enum
|
|
33
33
|
from pathlib import Path
|
|
34
|
-
from typing import Any, Dict, Optional
|
|
34
|
+
from typing import Any, Dict, List, Optional
|
|
35
35
|
|
|
36
36
|
import requests
|
|
37
37
|
|
|
38
38
|
from .cli_args import create_parser, process_style_overrides, is_url, is_file
|
|
39
39
|
# Use flacfetch's shared display functions for consistent formatting
|
|
40
40
|
from flacfetch import print_releases, Release
|
|
41
|
+
from flacfetch.core.categorize import categorize_releases
|
|
42
|
+
from flacfetch.core.models import TrackQuery
|
|
43
|
+
from flacfetch.interface.cli import print_categorized_releases
|
|
41
44
|
|
|
42
45
|
|
|
43
46
|
class JobStatus(str, Enum):
|
|
@@ -137,7 +140,18 @@ class RemoteKaraokeClient:
|
|
|
137
140
|
return None
|
|
138
141
|
|
|
139
142
|
def refresh_auth(self) -> bool:
|
|
140
|
-
"""Refresh authentication token.
|
|
143
|
+
"""Refresh authentication token.
|
|
144
|
+
|
|
145
|
+
Only refreshes if we're using a gcloud-based token. If the user
|
|
146
|
+
provided a static token via KARAOKE_GEN_AUTH_TOKEN, we keep that
|
|
147
|
+
since it doesn't expire like gcloud identity tokens.
|
|
148
|
+
"""
|
|
149
|
+
# Don't refresh if using a static admin token from env
|
|
150
|
+
if os.environ.get('KARAOKE_GEN_AUTH_TOKEN'):
|
|
151
|
+
# Already have a valid static token, no need to refresh
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
# Try to refresh gcloud identity token
|
|
141
155
|
token = self._get_auth_token_from_gcloud()
|
|
142
156
|
if token:
|
|
143
157
|
self.config.auth_token = token
|
|
@@ -1081,7 +1095,8 @@ class RemoteKaraokeClient:
|
|
|
1081
1095
|
if url.startswith('/'):
|
|
1082
1096
|
url = f"{self.config.service_url}{url}"
|
|
1083
1097
|
|
|
1084
|
-
|
|
1098
|
+
# Use session headers (includes Authorization) for authenticated downloads
|
|
1099
|
+
response = self.session.get(url, stream=True, timeout=600)
|
|
1085
1100
|
if response.status_code != 200:
|
|
1086
1101
|
return False
|
|
1087
1102
|
|
|
@@ -1192,6 +1207,7 @@ class RemoteKaraokeClient:
|
|
|
1192
1207
|
artist: Artist name to search for
|
|
1193
1208
|
title: Song title to search for
|
|
1194
1209
|
auto_download: Automatically select best audio source (skip interactive selection)
|
|
1210
|
+
style_params_path: Path to style_params.json (optional)
|
|
1195
1211
|
... other args same as submit_job()
|
|
1196
1212
|
|
|
1197
1213
|
Returns:
|
|
@@ -1232,6 +1248,40 @@ class RemoteKaraokeClient:
|
|
|
1232
1248
|
if other_stems_models:
|
|
1233
1249
|
request_data['other_stems_models'] = other_stems_models
|
|
1234
1250
|
|
|
1251
|
+
# Prepare style files for upload if provided
|
|
1252
|
+
style_files = []
|
|
1253
|
+
local_style_files: Dict[str, str] = {} # file_type -> local_path
|
|
1254
|
+
|
|
1255
|
+
if style_params_path and os.path.isfile(style_params_path):
|
|
1256
|
+
self.logger.info(f"Parsing style configuration: {style_params_path}")
|
|
1257
|
+
|
|
1258
|
+
# Add the style_params.json itself
|
|
1259
|
+
style_files.append({
|
|
1260
|
+
'filename': Path(style_params_path).name,
|
|
1261
|
+
'content_type': 'application/json',
|
|
1262
|
+
'file_type': 'style_params'
|
|
1263
|
+
})
|
|
1264
|
+
local_style_files['style_params'] = style_params_path
|
|
1265
|
+
|
|
1266
|
+
# Parse style params to find referenced files (backgrounds, fonts)
|
|
1267
|
+
style_assets = self._parse_style_params(style_params_path)
|
|
1268
|
+
|
|
1269
|
+
for asset_key, asset_path in style_assets.items():
|
|
1270
|
+
if os.path.isfile(asset_path):
|
|
1271
|
+
# Use full path for content type detection (not just extension)
|
|
1272
|
+
content_type = self._get_content_type(asset_path)
|
|
1273
|
+
style_files.append({
|
|
1274
|
+
'filename': Path(asset_path).name,
|
|
1275
|
+
'content_type': content_type,
|
|
1276
|
+
'file_type': asset_key # e.g., 'style_intro_background'
|
|
1277
|
+
})
|
|
1278
|
+
local_style_files[asset_key] = asset_path
|
|
1279
|
+
self.logger.info(f" Will upload style asset: {asset_key}")
|
|
1280
|
+
|
|
1281
|
+
if style_files:
|
|
1282
|
+
request_data['style_files'] = style_files
|
|
1283
|
+
self.logger.info(f"Including {len(style_files)} style files in request")
|
|
1284
|
+
|
|
1235
1285
|
response = self._request('POST', '/api/audio-search/search', json=request_data)
|
|
1236
1286
|
|
|
1237
1287
|
if response.status_code == 404:
|
|
@@ -1248,7 +1298,52 @@ class RemoteKaraokeClient:
|
|
|
1248
1298
|
error_detail = response.text
|
|
1249
1299
|
raise RuntimeError(f"Error searching for audio: {error_detail}")
|
|
1250
1300
|
|
|
1251
|
-
|
|
1301
|
+
result = response.json()
|
|
1302
|
+
|
|
1303
|
+
# Upload style files if we have signed URLs
|
|
1304
|
+
style_upload_urls = result.get('style_upload_urls', [])
|
|
1305
|
+
if style_upload_urls and local_style_files:
|
|
1306
|
+
self.logger.info(f"Uploading {len(style_upload_urls)} style files...")
|
|
1307
|
+
|
|
1308
|
+
for url_info in style_upload_urls:
|
|
1309
|
+
file_type = url_info['file_type']
|
|
1310
|
+
upload_url = url_info['upload_url']
|
|
1311
|
+
|
|
1312
|
+
local_path = local_style_files.get(file_type)
|
|
1313
|
+
if not local_path:
|
|
1314
|
+
self.logger.warning(f"No local file for {file_type}, skipping upload")
|
|
1315
|
+
continue
|
|
1316
|
+
|
|
1317
|
+
self.logger.info(f" Uploading {file_type}: {Path(local_path).name}")
|
|
1318
|
+
|
|
1319
|
+
try:
|
|
1320
|
+
with open(local_path, 'rb') as f:
|
|
1321
|
+
file_content = f.read()
|
|
1322
|
+
|
|
1323
|
+
# Use the content type from the original file info, not re-derived
|
|
1324
|
+
# This ensures it matches the signed URL which was generated with
|
|
1325
|
+
# the same content type we specified in the request
|
|
1326
|
+
content_type = self._get_content_type(local_path)
|
|
1327
|
+
|
|
1328
|
+
# Use PUT to upload directly to signed URL
|
|
1329
|
+
upload_response = requests.put(
|
|
1330
|
+
upload_url,
|
|
1331
|
+
data=file_content,
|
|
1332
|
+
headers={'Content-Type': content_type},
|
|
1333
|
+
timeout=60
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
if upload_response.status_code not in (200, 201):
|
|
1337
|
+
self.logger.error(f"Failed to upload {file_type}: {upload_response.status_code}")
|
|
1338
|
+
else:
|
|
1339
|
+
self.logger.info(f" ✓ Uploaded {file_type}")
|
|
1340
|
+
|
|
1341
|
+
except Exception as e:
|
|
1342
|
+
self.logger.error(f"Error uploading {file_type}: {e}")
|
|
1343
|
+
|
|
1344
|
+
self.logger.info("Style file uploads complete")
|
|
1345
|
+
|
|
1346
|
+
return result
|
|
1252
1347
|
|
|
1253
1348
|
def get_audio_search_results(self, job_id: str) -> Dict[str, Any]:
|
|
1254
1349
|
"""Get audio search results for a job awaiting selection."""
|
|
@@ -1398,16 +1493,21 @@ class JobMonitor:
|
|
|
1398
1493
|
base_api_url = f"{self.config.service_url}/api/review/{job_id}"
|
|
1399
1494
|
encoded_api_url = urllib.parse.quote(base_api_url, safe='')
|
|
1400
1495
|
|
|
1401
|
-
# Try to get audio hash from job data
|
|
1496
|
+
# Try to get audio hash and review token from job data
|
|
1497
|
+
audio_hash = ''
|
|
1498
|
+
review_token = ''
|
|
1402
1499
|
try:
|
|
1403
1500
|
job_data = self.client.get_job(job_id)
|
|
1404
1501
|
audio_hash = job_data.get('audio_hash', '')
|
|
1502
|
+
review_token = job_data.get('review_token', '')
|
|
1405
1503
|
except Exception:
|
|
1406
|
-
|
|
1504
|
+
pass
|
|
1407
1505
|
|
|
1408
1506
|
url = f"{self.config.review_ui_url}/?baseApiUrl={encoded_api_url}"
|
|
1409
1507
|
if audio_hash:
|
|
1410
1508
|
url += f"&audioHash={audio_hash}"
|
|
1509
|
+
if review_token:
|
|
1510
|
+
url += f"&reviewToken={review_token}"
|
|
1411
1511
|
|
|
1412
1512
|
self.logger.info(f"Opening lyrics review UI: {url}")
|
|
1413
1513
|
self.open_browser(url)
|
|
@@ -1608,8 +1708,34 @@ class JobMonitor:
|
|
|
1608
1708
|
"quality_str": result.get('quality_str') or result.get('quality', ''),
|
|
1609
1709
|
}
|
|
1610
1710
|
|
|
1711
|
+
def _convert_to_release_objects(self, release_dicts: List[Dict[str, Any]]) -> List[Release]:
|
|
1712
|
+
"""
|
|
1713
|
+
Convert API result dicts to Release objects for categorization.
|
|
1714
|
+
|
|
1715
|
+
Used by handle_audio_selection() to enable categorized display
|
|
1716
|
+
for large result sets (10+ results).
|
|
1717
|
+
|
|
1718
|
+
Args:
|
|
1719
|
+
release_dicts: List of dicts in Release-compatible format
|
|
1720
|
+
|
|
1721
|
+
Returns:
|
|
1722
|
+
List of Release objects (skipping any that fail to convert)
|
|
1723
|
+
"""
|
|
1724
|
+
releases = []
|
|
1725
|
+
for d in release_dicts:
|
|
1726
|
+
try:
|
|
1727
|
+
releases.append(Release.from_dict(d))
|
|
1728
|
+
except Exception as e:
|
|
1729
|
+
self.logger.debug(f"Failed to convert result to Release: {e}")
|
|
1730
|
+
return releases
|
|
1731
|
+
|
|
1611
1732
|
def handle_audio_selection(self, job_id: str) -> None:
|
|
1612
|
-
"""Handle audio source selection interaction (Batch 5).
|
|
1733
|
+
"""Handle audio source selection interaction (Batch 5).
|
|
1734
|
+
|
|
1735
|
+
For 10+ results, uses categorized display (grouped by Top Seeded,
|
|
1736
|
+
Album Releases, Hi-Res, etc.) with a 'more' command to show full list.
|
|
1737
|
+
For smaller result sets, uses flat list display.
|
|
1738
|
+
"""
|
|
1613
1739
|
self.logger.info("=" * 60)
|
|
1614
1740
|
self.logger.info("AUDIO SOURCE SELECTION NEEDED")
|
|
1615
1741
|
self.logger.info("=" * 60)
|
|
@@ -1619,6 +1745,7 @@ class JobMonitor:
|
|
|
1619
1745
|
results_data = self.client.get_audio_search_results(job_id)
|
|
1620
1746
|
results = results_data.get('results', [])
|
|
1621
1747
|
artist = results_data.get('artist', 'Unknown')
|
|
1748
|
+
title = results_data.get('title', 'Unknown')
|
|
1622
1749
|
|
|
1623
1750
|
if not results:
|
|
1624
1751
|
self.logger.error("No search results available")
|
|
@@ -1633,23 +1760,71 @@ class JobMonitor:
|
|
|
1633
1760
|
# This gives us the same rich, colorized output as the local CLI
|
|
1634
1761
|
release_dicts = [self._convert_api_result_to_release_dict(r) for r in results]
|
|
1635
1762
|
|
|
1636
|
-
#
|
|
1637
|
-
|
|
1763
|
+
# Convert to Release objects for categorization
|
|
1764
|
+
release_objects = self._convert_to_release_objects(release_dicts)
|
|
1765
|
+
|
|
1766
|
+
# Use categorized display for large result sets (10+)
|
|
1767
|
+
# This groups results into categories: Top Seeded, Album Releases, Hi-Res, etc.
|
|
1768
|
+
use_categorized = len(release_objects) >= 10
|
|
1769
|
+
|
|
1770
|
+
if use_categorized:
|
|
1771
|
+
# Create query for categorization
|
|
1772
|
+
query = TrackQuery(artist=artist, title=title)
|
|
1773
|
+
categorized = categorize_releases(release_objects, query)
|
|
1774
|
+
# print_categorized_releases returns the flattened list of displayed releases
|
|
1775
|
+
display_releases = print_categorized_releases(categorized, target_artist=artist, use_colors=True)
|
|
1776
|
+
showing_categorized = True
|
|
1777
|
+
else:
|
|
1778
|
+
# Small result set - use simple flat list
|
|
1779
|
+
print_releases(release_dicts, target_artist=artist, use_colors=True)
|
|
1780
|
+
display_releases = release_objects
|
|
1781
|
+
showing_categorized = False
|
|
1638
1782
|
|
|
1639
1783
|
selection_index = -1
|
|
1640
1784
|
while selection_index < 0:
|
|
1641
1785
|
try:
|
|
1642
|
-
|
|
1786
|
+
if showing_categorized:
|
|
1787
|
+
prompt = f"\nSelect (1-{len(display_releases)}), 'more' for full list, 0 to cancel: "
|
|
1788
|
+
else:
|
|
1789
|
+
prompt = f"\nSelect a release (1-{len(display_releases)}, 0 to cancel): "
|
|
1790
|
+
|
|
1791
|
+
choice = input(prompt).strip().lower()
|
|
1792
|
+
|
|
1643
1793
|
if choice == "0":
|
|
1644
1794
|
self.logger.info("Selection cancelled by user")
|
|
1645
1795
|
raise KeyboardInterrupt
|
|
1796
|
+
|
|
1797
|
+
# Handle 'more' command to show full flat list
|
|
1798
|
+
if choice in ('more', 'm', 'all', 'a') and showing_categorized:
|
|
1799
|
+
print("\n" + "=" * 60)
|
|
1800
|
+
print("FULL LIST (all results)")
|
|
1801
|
+
print("=" * 60 + "\n")
|
|
1802
|
+
print_releases(release_dicts, target_artist=artist, use_colors=True)
|
|
1803
|
+
display_releases = release_objects
|
|
1804
|
+
showing_categorized = False
|
|
1805
|
+
continue
|
|
1806
|
+
|
|
1646
1807
|
choice_num = int(choice)
|
|
1647
|
-
if 1 <= choice_num <= len(
|
|
1648
|
-
|
|
1808
|
+
if 1 <= choice_num <= len(display_releases):
|
|
1809
|
+
# Map selection back to original results index for API call
|
|
1810
|
+
selected_release = display_releases[choice_num - 1]
|
|
1811
|
+
|
|
1812
|
+
# Find matching index in original results by download_url
|
|
1813
|
+
selection_index = self._find_original_index(
|
|
1814
|
+
selected_release, results, release_objects
|
|
1815
|
+
)
|
|
1816
|
+
|
|
1817
|
+
if selection_index < 0:
|
|
1818
|
+
# Fallback: use display index if mapping fails
|
|
1819
|
+
self.logger.warning("Could not map selection to original index, using display index")
|
|
1820
|
+
selection_index = choice_num - 1
|
|
1649
1821
|
else:
|
|
1650
|
-
print(f"Please enter a number between 0 and {len(
|
|
1822
|
+
print(f"Please enter a number between 0 and {len(display_releases)}")
|
|
1651
1823
|
except ValueError:
|
|
1652
|
-
|
|
1824
|
+
if showing_categorized:
|
|
1825
|
+
print("Please enter a number or 'more'")
|
|
1826
|
+
else:
|
|
1827
|
+
print("Please enter a valid number")
|
|
1653
1828
|
except KeyboardInterrupt:
|
|
1654
1829
|
print()
|
|
1655
1830
|
raise
|
|
@@ -1667,10 +1842,80 @@ class JobMonitor:
|
|
|
1667
1842
|
|
|
1668
1843
|
except Exception as e:
|
|
1669
1844
|
self.logger.error(f"Error handling audio selection: {e}")
|
|
1845
|
+
|
|
1846
|
+
def _find_original_index(
|
|
1847
|
+
self,
|
|
1848
|
+
selected_release: Release,
|
|
1849
|
+
original_results: List[Dict[str, Any]],
|
|
1850
|
+
release_objects: List[Release],
|
|
1851
|
+
) -> int:
|
|
1852
|
+
"""
|
|
1853
|
+
Map a selected Release back to its index in the original API results.
|
|
1854
|
+
|
|
1855
|
+
This is needed because categorized display may reorder results,
|
|
1856
|
+
but the API selection endpoint needs the original index.
|
|
1857
|
+
|
|
1858
|
+
Args:
|
|
1859
|
+
selected_release: The Release object user selected
|
|
1860
|
+
original_results: Original API results (list of dicts)
|
|
1861
|
+
release_objects: Release objects in same order as original_results
|
|
1862
|
+
|
|
1863
|
+
Returns:
|
|
1864
|
+
Index in original_results, or -1 if not found
|
|
1865
|
+
"""
|
|
1866
|
+
# First try: match by object identity in release_objects
|
|
1867
|
+
for i, release in enumerate(release_objects):
|
|
1868
|
+
if release is selected_release:
|
|
1869
|
+
return i
|
|
1870
|
+
|
|
1871
|
+
# Second try: match by download_url
|
|
1872
|
+
selected_url = getattr(selected_release, 'download_url', None)
|
|
1873
|
+
if selected_url:
|
|
1874
|
+
for i, r in enumerate(original_results):
|
|
1875
|
+
if r.get('url') == selected_url:
|
|
1876
|
+
return i
|
|
1877
|
+
|
|
1878
|
+
# Third try: match by info_hash (for torrent sources)
|
|
1879
|
+
selected_hash = getattr(selected_release, 'info_hash', None)
|
|
1880
|
+
if selected_hash:
|
|
1881
|
+
for i, r in enumerate(original_results):
|
|
1882
|
+
if r.get('source_id') == selected_hash:
|
|
1883
|
+
return i
|
|
1884
|
+
|
|
1885
|
+
# Fourth try: match by title + artist + provider
|
|
1886
|
+
selected_title = getattr(selected_release, 'title', '')
|
|
1887
|
+
selected_artist = getattr(selected_release, 'artist', '')
|
|
1888
|
+
selected_source = getattr(selected_release, 'source_name', '')
|
|
1889
|
+
|
|
1890
|
+
for i, r in enumerate(original_results):
|
|
1891
|
+
if (r.get('title') == selected_title and
|
|
1892
|
+
r.get('artist') == selected_artist and
|
|
1893
|
+
r.get('provider') == selected_source):
|
|
1894
|
+
return i
|
|
1895
|
+
|
|
1896
|
+
return -1
|
|
1670
1897
|
|
|
1671
1898
|
def _open_instrumental_review_and_wait(self, job_id: str) -> None:
|
|
1672
1899
|
"""Open browser to instrumental review UI and wait for selection."""
|
|
1673
|
-
|
|
1900
|
+
# Get instrumental token from job data
|
|
1901
|
+
instrumental_token = ''
|
|
1902
|
+
try:
|
|
1903
|
+
job_data = self.client.get_job(job_id)
|
|
1904
|
+
instrumental_token = job_data.get('instrumental_token', '')
|
|
1905
|
+
except Exception:
|
|
1906
|
+
pass
|
|
1907
|
+
|
|
1908
|
+
# Build the review URL with API endpoint and token
|
|
1909
|
+
# The instrumental UI is hosted at /instrumental/ on the frontend domain
|
|
1910
|
+
base_api_url = f"{self.config.service_url}/api/jobs/{job_id}"
|
|
1911
|
+
encoded_api_url = urllib.parse.quote(base_api_url, safe='')
|
|
1912
|
+
|
|
1913
|
+
# Use /instrumental/ path on the frontend (same domain as review_ui_url but different path)
|
|
1914
|
+
# review_ui_url is like https://gen.nomadkaraoke.com/lyrics, we want /instrumental/
|
|
1915
|
+
frontend_base = self.config.review_ui_url.rsplit('/', 1)[0] # Remove /lyrics
|
|
1916
|
+
review_url = f"{frontend_base}/instrumental/?baseApiUrl={encoded_api_url}"
|
|
1917
|
+
if instrumental_token:
|
|
1918
|
+
review_url += f"&instrumentalToken={instrumental_token}"
|
|
1674
1919
|
|
|
1675
1920
|
self.logger.info("")
|
|
1676
1921
|
self.logger.info("=" * 60)
|
|
@@ -2149,6 +2394,14 @@ class JobMonitor:
|
|
|
2149
2394
|
self.handle_instrumental_selection(job_id)
|
|
2150
2395
|
self._instrumental_prompted = True
|
|
2151
2396
|
|
|
2397
|
+
elif status == 'instrumental_selected':
|
|
2398
|
+
# Check if this was auto-selected due to existing instrumental
|
|
2399
|
+
selection = job_data.get('state_data', {}).get('instrumental_selection', '')
|
|
2400
|
+
if selection == 'custom' and not self._instrumental_prompted:
|
|
2401
|
+
self.logger.info("")
|
|
2402
|
+
self.logger.info("Using user-provided instrumental (--existing_instrumental)")
|
|
2403
|
+
self._instrumental_prompted = True
|
|
2404
|
+
|
|
2152
2405
|
elif status == 'complete':
|
|
2153
2406
|
self.logger.info("")
|
|
2154
2407
|
self.logger.info("=" * 60)
|
|
@@ -2802,6 +3055,7 @@ def main():
|
|
|
2802
3055
|
artist=artist,
|
|
2803
3056
|
title=title,
|
|
2804
3057
|
auto_download=auto_download,
|
|
3058
|
+
style_params_path=args.style_params_json,
|
|
2805
3059
|
enable_cdg=args.enable_cdg,
|
|
2806
3060
|
enable_txt=args.enable_txt,
|
|
2807
3061
|
brand_prefix=args.brand_prefix,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: karaoke-gen
|
|
3
|
-
Version: 0.75.
|
|
3
|
+
Version: 0.75.53
|
|
4
4
|
Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -22,7 +22,7 @@ Requires-Dist: dropbox (>=12)
|
|
|
22
22
|
Requires-Dist: fastapi (>=0.104.0)
|
|
23
23
|
Requires-Dist: fetch-lyrics-from-genius (>=0.1)
|
|
24
24
|
Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
|
|
25
|
-
Requires-Dist: flacfetch (>=0.
|
|
25
|
+
Requires-Dist: flacfetch (>=0.9.0)
|
|
26
26
|
Requires-Dist: fonttools (>=4.55)
|
|
27
27
|
Requires-Dist: google-api-python-client
|
|
28
28
|
Requires-Dist: google-auth
|
|
@@ -48,6 +48,7 @@ Requires-Dist: lyrics-converter (>=0.2.1)
|
|
|
48
48
|
Requires-Dist: lyricsgenius (>=3)
|
|
49
49
|
Requires-Dist: matplotlib (>=3)
|
|
50
50
|
Requires-Dist: metaphone (>=0.6)
|
|
51
|
+
Requires-Dist: mutagen (>=1.47)
|
|
51
52
|
Requires-Dist: nest-asyncio (>=1.5)
|
|
52
53
|
Requires-Dist: nltk (>=3.9)
|
|
53
54
|
Requires-Dist: numpy (>=2)
|
|
@@ -94,7 +95,7 @@ Description-Content-Type: text/markdown
|
|
|
94
95
|
# Karaoke Generator 🎶 🎥 🚀
|
|
95
96
|
|
|
96
97
|

|
|
97
|
-

|
|
98
99
|

|
|
99
100
|

|
|
100
101
|
|
|
@@ -147,10 +148,44 @@ pip install karaoke-gen
|
|
|
147
148
|
This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
|
|
148
149
|
|
|
149
150
|
### Requirements
|
|
150
|
-
- Python 3.10
|
|
151
|
+
- Python 3.10-3.13
|
|
151
152
|
- FFmpeg
|
|
152
153
|
- For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
|
|
153
154
|
|
|
155
|
+
### Transcription Provider Setup
|
|
156
|
+
|
|
157
|
+
**Transcription is required** for creating karaoke videos with synchronized lyrics. The system needs word-level timing data to display lyrics in sync with the music.
|
|
158
|
+
|
|
159
|
+
#### Option 1: AudioShake (Recommended)
|
|
160
|
+
Commercial service with high-quality transcription. Best for production use.
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
export AUDIOSHAKE_API_TOKEN="your_audioshake_token"
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Get an API key at [https://www.audioshake.ai/](https://www.audioshake.ai/) - business only, at time of writing this.
|
|
167
|
+
|
|
168
|
+
#### Option 2: Whisper via RunPod
|
|
169
|
+
Open-source alternative using OpenAI's Whisper model on RunPod infrastructure.
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
export RUNPOD_API_KEY="your_runpod_key"
|
|
173
|
+
export WHISPER_RUNPOD_ID="your_whisper_endpoint_id"
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Set up a Whisper endpoint at [https://www.runpod.io/](https://www.runpod.io/)
|
|
177
|
+
|
|
178
|
+
#### Without Transcription (Instrumental Only)
|
|
179
|
+
If you don't need synchronized lyrics, use the `--skip-lyrics` flag:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
karaoke-gen --skip-lyrics "Artist" "Title"
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
This creates an instrumental-only karaoke video without lyrics overlay.
|
|
186
|
+
|
|
187
|
+
> **Note:** See `lyrics_transcriber_temp/README.md` for detailed transcription provider configuration options.
|
|
188
|
+
|
|
154
189
|
---
|
|
155
190
|
|
|
156
191
|
## 🖥️ Local CLI (`karaoke-gen`)
|
|
@@ -568,6 +603,73 @@ Check backend health status.
|
|
|
568
603
|
|
|
569
604
|
---
|
|
570
605
|
|
|
606
|
+
## 🔧 Troubleshooting
|
|
607
|
+
|
|
608
|
+
### "No suitable files found for processing"
|
|
609
|
+
|
|
610
|
+
This error occurs during the finalisation step when the `(With Vocals).mkv` file is missing. This file is created during lyrics transcription.
|
|
611
|
+
|
|
612
|
+
**Most common cause:** No transcription provider configured.
|
|
613
|
+
|
|
614
|
+
**Quick fix:**
|
|
615
|
+
1. Check if transcription providers are configured:
|
|
616
|
+
```bash
|
|
617
|
+
echo $AUDIOSHAKE_API_TOKEN
|
|
618
|
+
echo $RUNPOD_API_KEY
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
2. If both are empty, set up a provider (see [Transcription Provider Setup](#transcription-provider-setup))
|
|
622
|
+
|
|
623
|
+
3. Or use `--skip-lyrics` for instrumental-only karaoke:
|
|
624
|
+
```bash
|
|
625
|
+
karaoke-gen --skip-lyrics "Artist" "Title"
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
**Other causes:**
|
|
629
|
+
- Invalid API credentials - verify your tokens are correct and active
|
|
630
|
+
- API service unavailable - check service status pages
|
|
631
|
+
- Network connectivity issues - ensure you can reach the API endpoints
|
|
632
|
+
- Transcription timeout - try again or use a different provider
|
|
633
|
+
|
|
634
|
+
### Transcription Fails Silently
|
|
635
|
+
|
|
636
|
+
If karaoke-gen runs without errors but produces no synchronized lyrics:
|
|
637
|
+
|
|
638
|
+
1. **Check logs** - Run with `--log_level debug` for detailed output:
|
|
639
|
+
```bash
|
|
640
|
+
karaoke-gen --log_level debug "Artist" "Title"
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
2. **Verify environment variables** - Ensure API tokens are exported in your shell:
|
|
644
|
+
```bash
|
|
645
|
+
# Check if set
|
|
646
|
+
printenv | grep -E "(AUDIOSHAKE|RUNPOD|WHISPER)"
|
|
647
|
+
|
|
648
|
+
# Set in current session
|
|
649
|
+
export AUDIOSHAKE_API_TOKEN="your_token"
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
3. **Test API connectivity** - Verify you can reach the transcription service
|
|
653
|
+
|
|
654
|
+
### "No lyrics found from any source"
|
|
655
|
+
|
|
656
|
+
This warning means no reference lyrics were fetched from online sources (Genius, Spotify, Musixmatch). The transcription will still work, but auto-correction may be less accurate.
|
|
657
|
+
|
|
658
|
+
**To fix:**
|
|
659
|
+
- Set `GENIUS_API_TOKEN` for Genius lyrics
|
|
660
|
+
- Set `SPOTIFY_COOKIE_SP_DC` for Spotify lyrics
|
|
661
|
+
- Set `RAPIDAPI_KEY` for Musixmatch lyrics
|
|
662
|
+
- Or provide lyrics manually with `--lyrics_file /path/to/lyrics.txt`
|
|
663
|
+
|
|
664
|
+
### Video Quality Issues
|
|
665
|
+
|
|
666
|
+
If the output video has quality problems:
|
|
667
|
+
- Ensure FFmpeg is properly installed: `ffmpeg -version`
|
|
668
|
+
- Check available codecs: `ffmpeg -codecs`
|
|
669
|
+
- For 4K output, ensure sufficient disk space (10GB+ per track)
|
|
670
|
+
|
|
671
|
+
---
|
|
672
|
+
|
|
571
673
|
## 🧪 Development
|
|
572
674
|
|
|
573
675
|
### Running Tests
|