karaoke-gen 0.75.16__py3-none-any.whl → 0.76.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/audio_fetcher.py +984 -33
- karaoke_gen/audio_processor.py +4 -0
- karaoke_gen/instrumental_review/static/index.html +37 -14
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +25 -1
- karaoke_gen/karaoke_gen.py +208 -39
- karaoke_gen/lyrics_processor.py +111 -31
- karaoke_gen/utils/__init__.py +26 -0
- karaoke_gen/utils/cli_args.py +15 -6
- karaoke_gen/utils/gen_cli.py +30 -5
- karaoke_gen/utils/remote_cli.py +301 -20
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/METADATA +107 -5
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/RECORD +47 -43
- lyrics_transcriber/core/controller.py +76 -2
- lyrics_transcriber/frontend/index.html +5 -1
- lyrics_transcriber/frontend/package-lock.json +4553 -0
- lyrics_transcriber/frontend/package.json +4 -1
- lyrics_transcriber/frontend/playwright.config.ts +69 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/frontend/src/App.tsx +94 -63
- lyrics_transcriber/frontend/src/api.ts +25 -10
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +55 -21
- lyrics_transcriber/frontend/src/components/AppHeader.tsx +65 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +5 -5
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +9 -9
- lyrics_transcriber/frontend/src/components/EditModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +1 -1
- lyrics_transcriber/frontend/src/components/Header.tsx +34 -48
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +22 -21
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +3 -3
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +2 -2
- lyrics_transcriber/frontend/src/components/shared/constants.ts +15 -5
- lyrics_transcriber/frontend/src/main.tsx +1 -7
- lyrics_transcriber/frontend/src/theme.ts +337 -135
- lyrics_transcriber/frontend/vite.config.ts +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js → index-BECn1o8Q.js} +38 -22
- lyrics_transcriber/frontend/web_assets/assets/{index-COYImAcx.js.map → index-BECn1o8Q.js.map} +1 -1
- lyrics_transcriber/frontend/web_assets/index.html +1 -1
- lyrics_transcriber/frontend/yarn.lock +1005 -1046
- lyrics_transcriber/output/countdown_processor.py +39 -0
- lyrics_transcriber/review/server.py +1 -1
- lyrics_transcriber/transcribers/audioshake.py +96 -7
- lyrics_transcriber/types.py +14 -12
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.75.16.dist-info → karaoke_gen-0.76.20.dist-info}/licenses/LICENSE +0 -0
karaoke_gen/utils/remote_cli.py
CHANGED
|
@@ -31,13 +31,16 @@ import webbrowser
|
|
|
31
31
|
from dataclasses import dataclass
|
|
32
32
|
from enum import Enum
|
|
33
33
|
from pathlib import Path
|
|
34
|
-
from typing import Any, Dict, Optional
|
|
34
|
+
from typing import Any, Dict, List, Optional
|
|
35
35
|
|
|
36
36
|
import requests
|
|
37
37
|
|
|
38
38
|
from .cli_args import create_parser, process_style_overrides, is_url, is_file
|
|
39
39
|
# Use flacfetch's shared display functions for consistent formatting
|
|
40
40
|
from flacfetch import print_releases, Release
|
|
41
|
+
from flacfetch.core.categorize import categorize_releases
|
|
42
|
+
from flacfetch.core.models import TrackQuery
|
|
43
|
+
from flacfetch.interface.cli import print_categorized_releases
|
|
41
44
|
|
|
42
45
|
|
|
43
46
|
class JobStatus(str, Enum):
|
|
@@ -137,7 +140,18 @@ class RemoteKaraokeClient:
|
|
|
137
140
|
return None
|
|
138
141
|
|
|
139
142
|
def refresh_auth(self) -> bool:
|
|
140
|
-
"""Refresh authentication token.
|
|
143
|
+
"""Refresh authentication token.
|
|
144
|
+
|
|
145
|
+
Only refreshes if we're using a gcloud-based token. If the user
|
|
146
|
+
provided a static token via KARAOKE_GEN_AUTH_TOKEN, we keep that
|
|
147
|
+
since it doesn't expire like gcloud identity tokens.
|
|
148
|
+
"""
|
|
149
|
+
# Don't refresh if using a static admin token from env
|
|
150
|
+
if os.environ.get('KARAOKE_GEN_AUTH_TOKEN'):
|
|
151
|
+
# Already have a valid static token, no need to refresh
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
# Try to refresh gcloud identity token
|
|
141
155
|
token = self._get_auth_token_from_gcloud()
|
|
142
156
|
if token:
|
|
143
157
|
self.config.auth_token = token
|
|
@@ -278,6 +292,8 @@ class RemoteKaraokeClient:
|
|
|
278
292
|
# Two-phase workflow (Batch 6)
|
|
279
293
|
prep_only: bool = False,
|
|
280
294
|
keep_brand_code: Optional[str] = None,
|
|
295
|
+
# Theme system
|
|
296
|
+
theme_id: Optional[str] = None,
|
|
281
297
|
) -> Dict[str, Any]:
|
|
282
298
|
"""
|
|
283
299
|
Submit a new karaoke generation job from a YouTube/online URL.
|
|
@@ -308,6 +324,7 @@ class RemoteKaraokeClient:
|
|
|
308
324
|
clean_instrumental_model: Model for clean instrumental separation
|
|
309
325
|
backing_vocals_models: List of models for backing vocals separation
|
|
310
326
|
other_stems_models: List of models for other stems (bass, drums, etc.)
|
|
327
|
+
theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
|
|
311
328
|
"""
|
|
312
329
|
self.logger.info(f"Submitting URL-based job: {url}")
|
|
313
330
|
|
|
@@ -353,7 +370,10 @@ class RemoteKaraokeClient:
|
|
|
353
370
|
create_request['prep_only'] = prep_only
|
|
354
371
|
if keep_brand_code:
|
|
355
372
|
create_request['keep_brand_code'] = keep_brand_code
|
|
356
|
-
|
|
373
|
+
# Theme system
|
|
374
|
+
if theme_id:
|
|
375
|
+
create_request['theme_id'] = theme_id
|
|
376
|
+
|
|
357
377
|
self.logger.info(f"Creating URL-based job at {self.config.service_url}/api/jobs/create-from-url")
|
|
358
378
|
|
|
359
379
|
response = self._request('POST', '/api/jobs/create-from-url', json=create_request)
|
|
@@ -382,9 +402,9 @@ class RemoteKaraokeClient:
|
|
|
382
402
|
return result
|
|
383
403
|
|
|
384
404
|
def submit_job(
|
|
385
|
-
self,
|
|
386
|
-
filepath: str,
|
|
387
|
-
artist: str,
|
|
405
|
+
self,
|
|
406
|
+
filepath: str,
|
|
407
|
+
artist: str,
|
|
388
408
|
title: str,
|
|
389
409
|
style_params_path: Optional[str] = None,
|
|
390
410
|
enable_cdg: bool = True,
|
|
@@ -411,6 +431,8 @@ class RemoteKaraokeClient:
|
|
|
411
431
|
# Two-phase workflow (Batch 6)
|
|
412
432
|
prep_only: bool = False,
|
|
413
433
|
keep_brand_code: Optional[str] = None,
|
|
434
|
+
# Theme system
|
|
435
|
+
theme_id: Optional[str] = None,
|
|
414
436
|
) -> Dict[str, Any]:
|
|
415
437
|
"""
|
|
416
438
|
Submit a new karaoke generation job with optional style configuration.
|
|
@@ -442,6 +464,7 @@ class RemoteKaraokeClient:
|
|
|
442
464
|
backing_vocals_models: List of models for backing vocals separation
|
|
443
465
|
other_stems_models: List of models for other stems (bass, drums, etc.)
|
|
444
466
|
existing_instrumental: Path to existing instrumental file to use instead of AI separation
|
|
467
|
+
theme_id: Theme ID from GCS themes (e.g., 'nomad', 'default')
|
|
445
468
|
"""
|
|
446
469
|
file_path = Path(filepath)
|
|
447
470
|
|
|
@@ -560,7 +583,10 @@ class RemoteKaraokeClient:
|
|
|
560
583
|
create_request['prep_only'] = prep_only
|
|
561
584
|
if keep_brand_code:
|
|
562
585
|
create_request['keep_brand_code'] = keep_brand_code
|
|
563
|
-
|
|
586
|
+
# Theme system
|
|
587
|
+
if theme_id:
|
|
588
|
+
create_request['theme_id'] = theme_id
|
|
589
|
+
|
|
564
590
|
response = self._request('POST', '/api/jobs/create-with-upload-urls', json=create_request)
|
|
565
591
|
|
|
566
592
|
if response.status_code != 200:
|
|
@@ -1081,7 +1107,8 @@ class RemoteKaraokeClient:
|
|
|
1081
1107
|
if url.startswith('/'):
|
|
1082
1108
|
url = f"{self.config.service_url}{url}"
|
|
1083
1109
|
|
|
1084
|
-
|
|
1110
|
+
# Use session headers (includes Authorization) for authenticated downloads
|
|
1111
|
+
response = self.session.get(url, stream=True, timeout=600)
|
|
1085
1112
|
if response.status_code != 200:
|
|
1086
1113
|
return False
|
|
1087
1114
|
|
|
@@ -1180,6 +1207,8 @@ class RemoteKaraokeClient:
|
|
|
1180
1207
|
clean_instrumental_model: Optional[str] = None,
|
|
1181
1208
|
backing_vocals_models: Optional[list] = None,
|
|
1182
1209
|
other_stems_models: Optional[list] = None,
|
|
1210
|
+
# Theme system
|
|
1211
|
+
theme_id: Optional[str] = None,
|
|
1183
1212
|
) -> Dict[str, Any]:
|
|
1184
1213
|
"""
|
|
1185
1214
|
Search for audio by artist and title (Batch 5 - Flacfetch integration).
|
|
@@ -1192,6 +1221,7 @@ class RemoteKaraokeClient:
|
|
|
1192
1221
|
artist: Artist name to search for
|
|
1193
1222
|
title: Song title to search for
|
|
1194
1223
|
auto_download: Automatically select best audio source (skip interactive selection)
|
|
1224
|
+
style_params_path: Path to style_params.json (optional)
|
|
1195
1225
|
... other args same as submit_job()
|
|
1196
1226
|
|
|
1197
1227
|
Returns:
|
|
@@ -1231,6 +1261,43 @@ class RemoteKaraokeClient:
|
|
|
1231
1261
|
request_data['backing_vocals_models'] = backing_vocals_models
|
|
1232
1262
|
if other_stems_models:
|
|
1233
1263
|
request_data['other_stems_models'] = other_stems_models
|
|
1264
|
+
# Theme system
|
|
1265
|
+
if theme_id:
|
|
1266
|
+
request_data['theme_id'] = theme_id
|
|
1267
|
+
|
|
1268
|
+
# Prepare style files for upload if provided
|
|
1269
|
+
style_files = []
|
|
1270
|
+
local_style_files: Dict[str, str] = {} # file_type -> local_path
|
|
1271
|
+
|
|
1272
|
+
if style_params_path and os.path.isfile(style_params_path):
|
|
1273
|
+
self.logger.info(f"Parsing style configuration: {style_params_path}")
|
|
1274
|
+
|
|
1275
|
+
# Add the style_params.json itself
|
|
1276
|
+
style_files.append({
|
|
1277
|
+
'filename': Path(style_params_path).name,
|
|
1278
|
+
'content_type': 'application/json',
|
|
1279
|
+
'file_type': 'style_params'
|
|
1280
|
+
})
|
|
1281
|
+
local_style_files['style_params'] = style_params_path
|
|
1282
|
+
|
|
1283
|
+
# Parse style params to find referenced files (backgrounds, fonts)
|
|
1284
|
+
style_assets = self._parse_style_params(style_params_path)
|
|
1285
|
+
|
|
1286
|
+
for asset_key, asset_path in style_assets.items():
|
|
1287
|
+
if os.path.isfile(asset_path):
|
|
1288
|
+
# Use full path for content type detection (not just extension)
|
|
1289
|
+
content_type = self._get_content_type(asset_path)
|
|
1290
|
+
style_files.append({
|
|
1291
|
+
'filename': Path(asset_path).name,
|
|
1292
|
+
'content_type': content_type,
|
|
1293
|
+
'file_type': asset_key # e.g., 'style_intro_background'
|
|
1294
|
+
})
|
|
1295
|
+
local_style_files[asset_key] = asset_path
|
|
1296
|
+
self.logger.info(f" Will upload style asset: {asset_key}")
|
|
1297
|
+
|
|
1298
|
+
if style_files:
|
|
1299
|
+
request_data['style_files'] = style_files
|
|
1300
|
+
self.logger.info(f"Including {len(style_files)} style files in request")
|
|
1234
1301
|
|
|
1235
1302
|
response = self._request('POST', '/api/audio-search/search', json=request_data)
|
|
1236
1303
|
|
|
@@ -1248,7 +1315,52 @@ class RemoteKaraokeClient:
|
|
|
1248
1315
|
error_detail = response.text
|
|
1249
1316
|
raise RuntimeError(f"Error searching for audio: {error_detail}")
|
|
1250
1317
|
|
|
1251
|
-
|
|
1318
|
+
result = response.json()
|
|
1319
|
+
|
|
1320
|
+
# Upload style files if we have signed URLs
|
|
1321
|
+
style_upload_urls = result.get('style_upload_urls', [])
|
|
1322
|
+
if style_upload_urls and local_style_files:
|
|
1323
|
+
self.logger.info(f"Uploading {len(style_upload_urls)} style files...")
|
|
1324
|
+
|
|
1325
|
+
for url_info in style_upload_urls:
|
|
1326
|
+
file_type = url_info['file_type']
|
|
1327
|
+
upload_url = url_info['upload_url']
|
|
1328
|
+
|
|
1329
|
+
local_path = local_style_files.get(file_type)
|
|
1330
|
+
if not local_path:
|
|
1331
|
+
self.logger.warning(f"No local file for {file_type}, skipping upload")
|
|
1332
|
+
continue
|
|
1333
|
+
|
|
1334
|
+
self.logger.info(f" Uploading {file_type}: {Path(local_path).name}")
|
|
1335
|
+
|
|
1336
|
+
try:
|
|
1337
|
+
with open(local_path, 'rb') as f:
|
|
1338
|
+
file_content = f.read()
|
|
1339
|
+
|
|
1340
|
+
# Use the content type from the original file info, not re-derived
|
|
1341
|
+
# This ensures it matches the signed URL which was generated with
|
|
1342
|
+
# the same content type we specified in the request
|
|
1343
|
+
content_type = self._get_content_type(local_path)
|
|
1344
|
+
|
|
1345
|
+
# Use PUT to upload directly to signed URL
|
|
1346
|
+
upload_response = requests.put(
|
|
1347
|
+
upload_url,
|
|
1348
|
+
data=file_content,
|
|
1349
|
+
headers={'Content-Type': content_type},
|
|
1350
|
+
timeout=60
|
|
1351
|
+
)
|
|
1352
|
+
|
|
1353
|
+
if upload_response.status_code not in (200, 201):
|
|
1354
|
+
self.logger.error(f"Failed to upload {file_type}: {upload_response.status_code}")
|
|
1355
|
+
else:
|
|
1356
|
+
self.logger.info(f" ✓ Uploaded {file_type}")
|
|
1357
|
+
|
|
1358
|
+
except Exception as e:
|
|
1359
|
+
self.logger.error(f"Error uploading {file_type}: {e}")
|
|
1360
|
+
|
|
1361
|
+
self.logger.info("Style file uploads complete")
|
|
1362
|
+
|
|
1363
|
+
return result
|
|
1252
1364
|
|
|
1253
1365
|
def get_audio_search_results(self, job_id: str) -> Dict[str, Any]:
|
|
1254
1366
|
"""Get audio search results for a job awaiting selection."""
|
|
@@ -1398,16 +1510,21 @@ class JobMonitor:
|
|
|
1398
1510
|
base_api_url = f"{self.config.service_url}/api/review/{job_id}"
|
|
1399
1511
|
encoded_api_url = urllib.parse.quote(base_api_url, safe='')
|
|
1400
1512
|
|
|
1401
|
-
# Try to get audio hash from job data
|
|
1513
|
+
# Try to get audio hash and review token from job data
|
|
1514
|
+
audio_hash = ''
|
|
1515
|
+
review_token = ''
|
|
1402
1516
|
try:
|
|
1403
1517
|
job_data = self.client.get_job(job_id)
|
|
1404
1518
|
audio_hash = job_data.get('audio_hash', '')
|
|
1519
|
+
review_token = job_data.get('review_token', '')
|
|
1405
1520
|
except Exception:
|
|
1406
|
-
|
|
1521
|
+
pass
|
|
1407
1522
|
|
|
1408
1523
|
url = f"{self.config.review_ui_url}/?baseApiUrl={encoded_api_url}"
|
|
1409
1524
|
if audio_hash:
|
|
1410
1525
|
url += f"&audioHash={audio_hash}"
|
|
1526
|
+
if review_token:
|
|
1527
|
+
url += f"&reviewToken={review_token}"
|
|
1411
1528
|
|
|
1412
1529
|
self.logger.info(f"Opening lyrics review UI: {url}")
|
|
1413
1530
|
self.open_browser(url)
|
|
@@ -1608,8 +1725,34 @@ class JobMonitor:
|
|
|
1608
1725
|
"quality_str": result.get('quality_str') or result.get('quality', ''),
|
|
1609
1726
|
}
|
|
1610
1727
|
|
|
1728
|
+
def _convert_to_release_objects(self, release_dicts: List[Dict[str, Any]]) -> List[Release]:
|
|
1729
|
+
"""
|
|
1730
|
+
Convert API result dicts to Release objects for categorization.
|
|
1731
|
+
|
|
1732
|
+
Used by handle_audio_selection() to enable categorized display
|
|
1733
|
+
for large result sets (10+ results).
|
|
1734
|
+
|
|
1735
|
+
Args:
|
|
1736
|
+
release_dicts: List of dicts in Release-compatible format
|
|
1737
|
+
|
|
1738
|
+
Returns:
|
|
1739
|
+
List of Release objects (skipping any that fail to convert)
|
|
1740
|
+
"""
|
|
1741
|
+
releases = []
|
|
1742
|
+
for d in release_dicts:
|
|
1743
|
+
try:
|
|
1744
|
+
releases.append(Release.from_dict(d))
|
|
1745
|
+
except Exception as e:
|
|
1746
|
+
self.logger.debug(f"Failed to convert result to Release: {e}")
|
|
1747
|
+
return releases
|
|
1748
|
+
|
|
1611
1749
|
def handle_audio_selection(self, job_id: str) -> None:
|
|
1612
|
-
"""Handle audio source selection interaction (Batch 5).
|
|
1750
|
+
"""Handle audio source selection interaction (Batch 5).
|
|
1751
|
+
|
|
1752
|
+
For 10+ results, uses categorized display (grouped by Top Seeded,
|
|
1753
|
+
Album Releases, Hi-Res, etc.) with a 'more' command to show full list.
|
|
1754
|
+
For smaller result sets, uses flat list display.
|
|
1755
|
+
"""
|
|
1613
1756
|
self.logger.info("=" * 60)
|
|
1614
1757
|
self.logger.info("AUDIO SOURCE SELECTION NEEDED")
|
|
1615
1758
|
self.logger.info("=" * 60)
|
|
@@ -1619,6 +1762,7 @@ class JobMonitor:
|
|
|
1619
1762
|
results_data = self.client.get_audio_search_results(job_id)
|
|
1620
1763
|
results = results_data.get('results', [])
|
|
1621
1764
|
artist = results_data.get('artist', 'Unknown')
|
|
1765
|
+
title = results_data.get('title', 'Unknown')
|
|
1622
1766
|
|
|
1623
1767
|
if not results:
|
|
1624
1768
|
self.logger.error("No search results available")
|
|
@@ -1633,23 +1777,71 @@ class JobMonitor:
|
|
|
1633
1777
|
# This gives us the same rich, colorized output as the local CLI
|
|
1634
1778
|
release_dicts = [self._convert_api_result_to_release_dict(r) for r in results]
|
|
1635
1779
|
|
|
1636
|
-
#
|
|
1637
|
-
|
|
1780
|
+
# Convert to Release objects for categorization
|
|
1781
|
+
release_objects = self._convert_to_release_objects(release_dicts)
|
|
1782
|
+
|
|
1783
|
+
# Use categorized display for large result sets (10+)
|
|
1784
|
+
# This groups results into categories: Top Seeded, Album Releases, Hi-Res, etc.
|
|
1785
|
+
use_categorized = len(release_objects) >= 10
|
|
1786
|
+
|
|
1787
|
+
if use_categorized:
|
|
1788
|
+
# Create query for categorization
|
|
1789
|
+
query = TrackQuery(artist=artist, title=title)
|
|
1790
|
+
categorized = categorize_releases(release_objects, query)
|
|
1791
|
+
# print_categorized_releases returns the flattened list of displayed releases
|
|
1792
|
+
display_releases = print_categorized_releases(categorized, target_artist=artist, use_colors=True)
|
|
1793
|
+
showing_categorized = True
|
|
1794
|
+
else:
|
|
1795
|
+
# Small result set - use simple flat list
|
|
1796
|
+
print_releases(release_dicts, target_artist=artist, use_colors=True)
|
|
1797
|
+
display_releases = release_objects
|
|
1798
|
+
showing_categorized = False
|
|
1638
1799
|
|
|
1639
1800
|
selection_index = -1
|
|
1640
1801
|
while selection_index < 0:
|
|
1641
1802
|
try:
|
|
1642
|
-
|
|
1803
|
+
if showing_categorized:
|
|
1804
|
+
prompt = f"\nSelect (1-{len(display_releases)}), 'more' for full list, 0 to cancel: "
|
|
1805
|
+
else:
|
|
1806
|
+
prompt = f"\nSelect a release (1-{len(display_releases)}, 0 to cancel): "
|
|
1807
|
+
|
|
1808
|
+
choice = input(prompt).strip().lower()
|
|
1809
|
+
|
|
1643
1810
|
if choice == "0":
|
|
1644
1811
|
self.logger.info("Selection cancelled by user")
|
|
1645
1812
|
raise KeyboardInterrupt
|
|
1813
|
+
|
|
1814
|
+
# Handle 'more' command to show full flat list
|
|
1815
|
+
if choice in ('more', 'm', 'all', 'a') and showing_categorized:
|
|
1816
|
+
print("\n" + "=" * 60)
|
|
1817
|
+
print("FULL LIST (all results)")
|
|
1818
|
+
print("=" * 60 + "\n")
|
|
1819
|
+
print_releases(release_dicts, target_artist=artist, use_colors=True)
|
|
1820
|
+
display_releases = release_objects
|
|
1821
|
+
showing_categorized = False
|
|
1822
|
+
continue
|
|
1823
|
+
|
|
1646
1824
|
choice_num = int(choice)
|
|
1647
|
-
if 1 <= choice_num <= len(
|
|
1648
|
-
|
|
1825
|
+
if 1 <= choice_num <= len(display_releases):
|
|
1826
|
+
# Map selection back to original results index for API call
|
|
1827
|
+
selected_release = display_releases[choice_num - 1]
|
|
1828
|
+
|
|
1829
|
+
# Find matching index in original results by download_url
|
|
1830
|
+
selection_index = self._find_original_index(
|
|
1831
|
+
selected_release, results, release_objects
|
|
1832
|
+
)
|
|
1833
|
+
|
|
1834
|
+
if selection_index < 0:
|
|
1835
|
+
# Fallback: use display index if mapping fails
|
|
1836
|
+
self.logger.warning("Could not map selection to original index, using display index")
|
|
1837
|
+
selection_index = choice_num - 1
|
|
1649
1838
|
else:
|
|
1650
|
-
print(f"Please enter a number between 0 and {len(
|
|
1839
|
+
print(f"Please enter a number between 0 and {len(display_releases)}")
|
|
1651
1840
|
except ValueError:
|
|
1652
|
-
|
|
1841
|
+
if showing_categorized:
|
|
1842
|
+
print("Please enter a number or 'more'")
|
|
1843
|
+
else:
|
|
1844
|
+
print("Please enter a valid number")
|
|
1653
1845
|
except KeyboardInterrupt:
|
|
1654
1846
|
print()
|
|
1655
1847
|
raise
|
|
@@ -1667,10 +1859,80 @@ class JobMonitor:
|
|
|
1667
1859
|
|
|
1668
1860
|
except Exception as e:
|
|
1669
1861
|
self.logger.error(f"Error handling audio selection: {e}")
|
|
1862
|
+
|
|
1863
|
+
def _find_original_index(
|
|
1864
|
+
self,
|
|
1865
|
+
selected_release: Release,
|
|
1866
|
+
original_results: List[Dict[str, Any]],
|
|
1867
|
+
release_objects: List[Release],
|
|
1868
|
+
) -> int:
|
|
1869
|
+
"""
|
|
1870
|
+
Map a selected Release back to its index in the original API results.
|
|
1871
|
+
|
|
1872
|
+
This is needed because categorized display may reorder results,
|
|
1873
|
+
but the API selection endpoint needs the original index.
|
|
1874
|
+
|
|
1875
|
+
Args:
|
|
1876
|
+
selected_release: The Release object user selected
|
|
1877
|
+
original_results: Original API results (list of dicts)
|
|
1878
|
+
release_objects: Release objects in same order as original_results
|
|
1879
|
+
|
|
1880
|
+
Returns:
|
|
1881
|
+
Index in original_results, or -1 if not found
|
|
1882
|
+
"""
|
|
1883
|
+
# First try: match by object identity in release_objects
|
|
1884
|
+
for i, release in enumerate(release_objects):
|
|
1885
|
+
if release is selected_release:
|
|
1886
|
+
return i
|
|
1887
|
+
|
|
1888
|
+
# Second try: match by download_url
|
|
1889
|
+
selected_url = getattr(selected_release, 'download_url', None)
|
|
1890
|
+
if selected_url:
|
|
1891
|
+
for i, r in enumerate(original_results):
|
|
1892
|
+
if r.get('url') == selected_url:
|
|
1893
|
+
return i
|
|
1894
|
+
|
|
1895
|
+
# Third try: match by info_hash (for torrent sources)
|
|
1896
|
+
selected_hash = getattr(selected_release, 'info_hash', None)
|
|
1897
|
+
if selected_hash:
|
|
1898
|
+
for i, r in enumerate(original_results):
|
|
1899
|
+
if r.get('source_id') == selected_hash:
|
|
1900
|
+
return i
|
|
1901
|
+
|
|
1902
|
+
# Fourth try: match by title + artist + provider
|
|
1903
|
+
selected_title = getattr(selected_release, 'title', '')
|
|
1904
|
+
selected_artist = getattr(selected_release, 'artist', '')
|
|
1905
|
+
selected_source = getattr(selected_release, 'source_name', '')
|
|
1906
|
+
|
|
1907
|
+
for i, r in enumerate(original_results):
|
|
1908
|
+
if (r.get('title') == selected_title and
|
|
1909
|
+
r.get('artist') == selected_artist and
|
|
1910
|
+
r.get('provider') == selected_source):
|
|
1911
|
+
return i
|
|
1912
|
+
|
|
1913
|
+
return -1
|
|
1670
1914
|
|
|
1671
1915
|
def _open_instrumental_review_and_wait(self, job_id: str) -> None:
|
|
1672
1916
|
"""Open browser to instrumental review UI and wait for selection."""
|
|
1673
|
-
|
|
1917
|
+
# Get instrumental token from job data
|
|
1918
|
+
instrumental_token = ''
|
|
1919
|
+
try:
|
|
1920
|
+
job_data = self.client.get_job(job_id)
|
|
1921
|
+
instrumental_token = job_data.get('instrumental_token', '')
|
|
1922
|
+
except Exception:
|
|
1923
|
+
pass
|
|
1924
|
+
|
|
1925
|
+
# Build the review URL with API endpoint and token
|
|
1926
|
+
# The instrumental UI is hosted at /instrumental/ on the frontend domain
|
|
1927
|
+
base_api_url = f"{self.config.service_url}/api/jobs/{job_id}"
|
|
1928
|
+
encoded_api_url = urllib.parse.quote(base_api_url, safe='')
|
|
1929
|
+
|
|
1930
|
+
# Use /instrumental/ path on the frontend (same domain as review_ui_url but different path)
|
|
1931
|
+
# review_ui_url is like https://gen.nomadkaraoke.com/lyrics, we want /instrumental/
|
|
1932
|
+
frontend_base = self.config.review_ui_url.rsplit('/', 1)[0] # Remove /lyrics
|
|
1933
|
+
review_url = f"{frontend_base}/instrumental/?baseApiUrl={encoded_api_url}"
|
|
1934
|
+
if instrumental_token:
|
|
1935
|
+
review_url += f"&instrumentalToken={instrumental_token}"
|
|
1674
1936
|
|
|
1675
1937
|
self.logger.info("")
|
|
1676
1938
|
self.logger.info("=" * 60)
|
|
@@ -2149,6 +2411,14 @@ class JobMonitor:
|
|
|
2149
2411
|
self.handle_instrumental_selection(job_id)
|
|
2150
2412
|
self._instrumental_prompted = True
|
|
2151
2413
|
|
|
2414
|
+
elif status == 'instrumental_selected':
|
|
2415
|
+
# Check if this was auto-selected due to existing instrumental
|
|
2416
|
+
selection = job_data.get('state_data', {}).get('instrumental_selection', '')
|
|
2417
|
+
if selection == 'custom' and not self._instrumental_prompted:
|
|
2418
|
+
self.logger.info("")
|
|
2419
|
+
self.logger.info("Using user-provided instrumental (--existing_instrumental)")
|
|
2420
|
+
self._instrumental_prompted = True
|
|
2421
|
+
|
|
2152
2422
|
elif status == 'complete':
|
|
2153
2423
|
self.logger.info("")
|
|
2154
2424
|
self.logger.info("=" * 60)
|
|
@@ -2776,6 +3046,8 @@ def main():
|
|
|
2776
3046
|
logger.info(f"Searching for: {artist} - {title}")
|
|
2777
3047
|
if getattr(args, 'auto_download', False) or config.non_interactive:
|
|
2778
3048
|
logger.info(f"Auto-download: enabled (will auto-select best source)")
|
|
3049
|
+
if getattr(args, 'theme', None):
|
|
3050
|
+
logger.info(f"Theme: {args.theme}")
|
|
2779
3051
|
if args.style_params_json:
|
|
2780
3052
|
logger.info(f"Style: {args.style_params_json}")
|
|
2781
3053
|
logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
|
|
@@ -2802,6 +3074,7 @@ def main():
|
|
|
2802
3074
|
artist=artist,
|
|
2803
3075
|
title=title,
|
|
2804
3076
|
auto_download=auto_download,
|
|
3077
|
+
style_params_path=args.style_params_json,
|
|
2805
3078
|
enable_cdg=args.enable_cdg,
|
|
2806
3079
|
enable_txt=args.enable_txt,
|
|
2807
3080
|
brand_prefix=args.brand_prefix,
|
|
@@ -2816,6 +3089,8 @@ def main():
|
|
|
2816
3089
|
clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
|
|
2817
3090
|
backing_vocals_models=getattr(args, 'backing_vocals_models', None),
|
|
2818
3091
|
other_stems_models=getattr(args, 'other_stems_models', None),
|
|
3092
|
+
# Theme system
|
|
3093
|
+
theme_id=getattr(args, 'theme', None),
|
|
2819
3094
|
)
|
|
2820
3095
|
|
|
2821
3096
|
job_id = result.get('job_id')
|
|
@@ -2852,6 +3127,8 @@ def main():
|
|
|
2852
3127
|
logger.info(f"Title: {title}")
|
|
2853
3128
|
if not artist and not title and is_url_input:
|
|
2854
3129
|
logger.info(f"Artist/Title: (will be auto-detected from URL)")
|
|
3130
|
+
if getattr(args, 'theme', None):
|
|
3131
|
+
logger.info(f"Theme: {args.theme}")
|
|
2855
3132
|
if args.style_params_json:
|
|
2856
3133
|
logger.info(f"Style: {args.style_params_json}")
|
|
2857
3134
|
logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
|
|
@@ -2952,6 +3229,8 @@ def main():
|
|
|
2952
3229
|
# Two-phase workflow (Batch 6)
|
|
2953
3230
|
prep_only=getattr(args, 'prep_only', False),
|
|
2954
3231
|
keep_brand_code=keep_brand_code_value,
|
|
3232
|
+
# Theme system
|
|
3233
|
+
theme_id=getattr(args, 'theme', None),
|
|
2955
3234
|
)
|
|
2956
3235
|
else:
|
|
2957
3236
|
# File-based job submission
|
|
@@ -2984,6 +3263,8 @@ def main():
|
|
|
2984
3263
|
# Two-phase workflow (Batch 6)
|
|
2985
3264
|
prep_only=getattr(args, 'prep_only', False),
|
|
2986
3265
|
keep_brand_code=keep_brand_code_value,
|
|
3266
|
+
# Theme system
|
|
3267
|
+
theme_id=getattr(args, 'theme', None),
|
|
2987
3268
|
)
|
|
2988
3269
|
job_id = result.get('job_id')
|
|
2989
3270
|
style_assets = result.get('style_assets_uploaded', [])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: karaoke-gen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.76.20
|
|
4
4
|
Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -22,7 +22,7 @@ Requires-Dist: dropbox (>=12)
|
|
|
22
22
|
Requires-Dist: fastapi (>=0.104.0)
|
|
23
23
|
Requires-Dist: fetch-lyrics-from-genius (>=0.1)
|
|
24
24
|
Requires-Dist: ffmpeg-python (>=0.2.0,<0.3.0)
|
|
25
|
-
Requires-Dist: flacfetch (>=0.
|
|
25
|
+
Requires-Dist: flacfetch (>=0.9.0)
|
|
26
26
|
Requires-Dist: fonttools (>=4.55)
|
|
27
27
|
Requires-Dist: google-api-python-client
|
|
28
28
|
Requires-Dist: google-auth
|
|
@@ -48,6 +48,7 @@ Requires-Dist: lyrics-converter (>=0.2.1)
|
|
|
48
48
|
Requires-Dist: lyricsgenius (>=3)
|
|
49
49
|
Requires-Dist: matplotlib (>=3)
|
|
50
50
|
Requires-Dist: metaphone (>=0.6)
|
|
51
|
+
Requires-Dist: mutagen (>=1.47)
|
|
51
52
|
Requires-Dist: nest-asyncio (>=1.5)
|
|
52
53
|
Requires-Dist: nltk (>=3.9)
|
|
53
54
|
Requires-Dist: numpy (>=2)
|
|
@@ -94,7 +95,7 @@ Description-Content-Type: text/markdown
|
|
|
94
95
|
# Karaoke Generator 🎶 🎥 🚀
|
|
95
96
|
|
|
96
97
|

|
|
97
|
-

|
|
98
99
|

|
|
99
100
|

|
|
100
101
|
|
|
@@ -147,10 +148,44 @@ pip install karaoke-gen
|
|
|
147
148
|
This installs both `karaoke-gen` (local) and `karaoke-gen-remote` (cloud) CLIs.
|
|
148
149
|
|
|
149
150
|
### Requirements
|
|
150
|
-
- Python 3.10
|
|
151
|
+
- Python 3.10-3.13
|
|
151
152
|
- FFmpeg
|
|
152
153
|
- For local processing: CUDA-capable GPU or Apple Silicon CPU recommended
|
|
153
154
|
|
|
155
|
+
### Transcription Provider Setup
|
|
156
|
+
|
|
157
|
+
**Transcription is required** for creating karaoke videos with synchronized lyrics. The system needs word-level timing data to display lyrics in sync with the music.
|
|
158
|
+
|
|
159
|
+
#### Option 1: AudioShake (Recommended)
|
|
160
|
+
Commercial service with high-quality transcription. Best for production use.
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
export AUDIOSHAKE_API_TOKEN="your_audioshake_token"
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Get an API key at [https://www.audioshake.ai/](https://www.audioshake.ai/) - business only, at time of writing this.
|
|
167
|
+
|
|
168
|
+
#### Option 2: Whisper via RunPod
|
|
169
|
+
Open-source alternative using OpenAI's Whisper model on RunPod infrastructure.
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
export RUNPOD_API_KEY="your_runpod_key"
|
|
173
|
+
export WHISPER_RUNPOD_ID="your_whisper_endpoint_id"
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Set up a Whisper endpoint at [https://www.runpod.io/](https://www.runpod.io/)
|
|
177
|
+
|
|
178
|
+
#### Without Transcription (Instrumental Only)
|
|
179
|
+
If you don't need synchronized lyrics, use the `--skip-lyrics` flag:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
karaoke-gen --skip-lyrics "Artist" "Title"
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
This creates an instrumental-only karaoke video without lyrics overlay.
|
|
186
|
+
|
|
187
|
+
> **Note:** See `lyrics_transcriber_temp/README.md` for detailed transcription provider configuration options.
|
|
188
|
+
|
|
154
189
|
---
|
|
155
190
|
|
|
156
191
|
## 🖥️ Local CLI (`karaoke-gen`)
|
|
@@ -273,7 +308,7 @@ karaoke-gen-remote \
|
|
|
273
308
|
|----------|-------------|---------|
|
|
274
309
|
| `KARAOKE_GEN_URL` | Backend service URL | Required |
|
|
275
310
|
| `KARAOKE_GEN_AUTH_TOKEN` | Admin auth token (for protected endpoints) | Optional |
|
|
276
|
-
| `REVIEW_UI_URL` | Lyrics review UI URL | `https://
|
|
311
|
+
| `REVIEW_UI_URL` | Lyrics review UI URL | `https://gen.nomadkaraoke.com/lyrics/` |
|
|
277
312
|
| `POLL_INTERVAL` | Seconds between status polls | `5` |
|
|
278
313
|
|
|
279
314
|
**Note:** The `REVIEW_UI_URL` defaults to the hosted lyrics review UI. For local development, set it to `http://localhost:5173` if you're running the frontend dev server.
|
|
@@ -568,6 +603,73 @@ Check backend health status.
|
|
|
568
603
|
|
|
569
604
|
---
|
|
570
605
|
|
|
606
|
+
## 🔧 Troubleshooting
|
|
607
|
+
|
|
608
|
+
### "No suitable files found for processing"
|
|
609
|
+
|
|
610
|
+
This error occurs during the finalisation step when the `(With Vocals).mkv` file is missing. This file is created during lyrics transcription.
|
|
611
|
+
|
|
612
|
+
**Most common cause:** No transcription provider configured.
|
|
613
|
+
|
|
614
|
+
**Quick fix:**
|
|
615
|
+
1. Check if transcription providers are configured:
|
|
616
|
+
```bash
|
|
617
|
+
echo $AUDIOSHAKE_API_TOKEN
|
|
618
|
+
echo $RUNPOD_API_KEY
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
2. If both are empty, set up a provider (see [Transcription Provider Setup](#transcription-provider-setup))
|
|
622
|
+
|
|
623
|
+
3. Or use `--skip-lyrics` for instrumental-only karaoke:
|
|
624
|
+
```bash
|
|
625
|
+
karaoke-gen --skip-lyrics "Artist" "Title"
|
|
626
|
+
```
|
|
627
|
+
|
|
628
|
+
**Other causes:**
|
|
629
|
+
- Invalid API credentials - verify your tokens are correct and active
|
|
630
|
+
- API service unavailable - check service status pages
|
|
631
|
+
- Network connectivity issues - ensure you can reach the API endpoints
|
|
632
|
+
- Transcription timeout - try again or use a different provider
|
|
633
|
+
|
|
634
|
+
### Transcription Fails Silently
|
|
635
|
+
|
|
636
|
+
If karaoke-gen runs without errors but produces no synchronized lyrics:
|
|
637
|
+
|
|
638
|
+
1. **Check logs** - Run with `--log_level debug` for detailed output:
|
|
639
|
+
```bash
|
|
640
|
+
karaoke-gen --log_level debug "Artist" "Title"
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
2. **Verify environment variables** - Ensure API tokens are exported in your shell:
|
|
644
|
+
```bash
|
|
645
|
+
# Check if set
|
|
646
|
+
printenv | grep -E "(AUDIOSHAKE|RUNPOD|WHISPER)"
|
|
647
|
+
|
|
648
|
+
# Set in current session
|
|
649
|
+
export AUDIOSHAKE_API_TOKEN="your_token"
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
3. **Test API connectivity** - Verify you can reach the transcription service
|
|
653
|
+
|
|
654
|
+
### "No lyrics found from any source"
|
|
655
|
+
|
|
656
|
+
This warning means no reference lyrics were fetched from online sources (Genius, Spotify, Musixmatch). The transcription will still work, but auto-correction may be less accurate.
|
|
657
|
+
|
|
658
|
+
**To fix:**
|
|
659
|
+
- Set `GENIUS_API_TOKEN` for Genius lyrics
|
|
660
|
+
- Set `SPOTIFY_COOKIE_SP_DC` for Spotify lyrics
|
|
661
|
+
- Set `RAPIDAPI_KEY` for Musixmatch lyrics
|
|
662
|
+
- Or provide lyrics manually with `--lyrics_file /path/to/lyrics.txt`
|
|
663
|
+
|
|
664
|
+
### Video Quality Issues
|
|
665
|
+
|
|
666
|
+
If the output video has quality problems:
|
|
667
|
+
- Ensure FFmpeg is properly installed: `ffmpeg -version`
|
|
668
|
+
- Check available codecs: `ffmpeg -codecs`
|
|
669
|
+
- For 4K output, ensure sufficient disk space (10GB+ per track)
|
|
670
|
+
|
|
671
|
+
---
|
|
672
|
+
|
|
571
673
|
## 🧪 Development
|
|
572
674
|
|
|
573
675
|
### Running Tests
|