karaoke-gen 0.71.27__py3-none-any.whl → 0.71.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,6 +41,11 @@ from .cli_args import create_parser, process_style_overrides, is_url, is_file
41
41
  class JobStatus(str, Enum):
42
42
  """Job status values (matching backend)."""
43
43
  PENDING = "pending"
44
+ # Audio search states (Batch 5)
45
+ SEARCHING_AUDIO = "searching_audio"
46
+ AWAITING_AUDIO_SELECTION = "awaiting_audio_selection"
47
+ DOWNLOADING_AUDIO = "downloading_audio"
48
+ # Main workflow
44
49
  DOWNLOADING = "downloading"
45
50
  SEPARATING_STAGE1 = "separating_stage1"
46
51
  SEPARATING_STAGE2 = "separating_stage2"
@@ -247,6 +252,124 @@ class RemoteKaraokeClient:
247
252
 
248
253
  return asset_files
249
254
 
255
+ def submit_job_from_url(
256
+ self,
257
+ url: str,
258
+ artist: Optional[str] = None,
259
+ title: Optional[str] = None,
260
+ enable_cdg: bool = True,
261
+ enable_txt: bool = True,
262
+ brand_prefix: Optional[str] = None,
263
+ discord_webhook_url: Optional[str] = None,
264
+ youtube_description: Optional[str] = None,
265
+ organised_dir_rclone_root: Optional[str] = None,
266
+ enable_youtube_upload: bool = False,
267
+ dropbox_path: Optional[str] = None,
268
+ gdrive_folder_id: Optional[str] = None,
269
+ lyrics_artist: Optional[str] = None,
270
+ lyrics_title: Optional[str] = None,
271
+ subtitle_offset_ms: int = 0,
272
+ clean_instrumental_model: Optional[str] = None,
273
+ backing_vocals_models: Optional[list] = None,
274
+ other_stems_models: Optional[list] = None,
275
+ ) -> Dict[str, Any]:
276
+ """
277
+ Submit a new karaoke generation job from a YouTube/online URL.
278
+
279
+ The backend will download the audio from the URL and process it.
280
+ Artist and title will be auto-detected from the URL if not provided.
281
+
282
+ Note: Custom style configuration is not supported for URL-based jobs.
283
+ If you need custom styles, download the audio locally first and use
284
+ the regular file upload flow with submit_job().
285
+
286
+ Args:
287
+ url: YouTube or other video URL to download audio from
288
+ artist: Artist name (optional - auto-detected if not provided)
289
+ title: Song title (optional - auto-detected if not provided)
290
+ enable_cdg: Generate CDG+MP3 package
291
+ enable_txt: Generate TXT+MP3 package
292
+ brand_prefix: Brand code prefix (e.g., "NOMAD")
293
+ discord_webhook_url: Discord webhook for notifications
294
+ youtube_description: YouTube video description
295
+ organised_dir_rclone_root: Legacy rclone path (deprecated)
296
+ enable_youtube_upload: Enable YouTube upload
297
+ dropbox_path: Dropbox folder path for organized output (native API)
298
+ gdrive_folder_id: Google Drive folder ID for public share (native API)
299
+ lyrics_artist: Override artist name for lyrics search
300
+ lyrics_title: Override title for lyrics search
301
+ subtitle_offset_ms: Subtitle timing offset in milliseconds
302
+ clean_instrumental_model: Model for clean instrumental separation
303
+ backing_vocals_models: List of models for backing vocals separation
304
+ other_stems_models: List of models for other stems (bass, drums, etc.)
305
+ """
306
+ self.logger.info(f"Submitting URL-based job: {url}")
307
+
308
+ # Build request payload
309
+ create_request = {
310
+ 'url': url,
311
+ 'enable_cdg': enable_cdg,
312
+ 'enable_txt': enable_txt,
313
+ }
314
+
315
+ if artist:
316
+ create_request['artist'] = artist
317
+ if title:
318
+ create_request['title'] = title
319
+ if brand_prefix:
320
+ create_request['brand_prefix'] = brand_prefix
321
+ if discord_webhook_url:
322
+ create_request['discord_webhook_url'] = discord_webhook_url
323
+ if youtube_description:
324
+ create_request['youtube_description'] = youtube_description
325
+ if enable_youtube_upload:
326
+ create_request['enable_youtube_upload'] = enable_youtube_upload
327
+ if dropbox_path:
328
+ create_request['dropbox_path'] = dropbox_path
329
+ if gdrive_folder_id:
330
+ create_request['gdrive_folder_id'] = gdrive_folder_id
331
+ if organised_dir_rclone_root:
332
+ create_request['organised_dir_rclone_root'] = organised_dir_rclone_root
333
+ if lyrics_artist:
334
+ create_request['lyrics_artist'] = lyrics_artist
335
+ if lyrics_title:
336
+ create_request['lyrics_title'] = lyrics_title
337
+ if subtitle_offset_ms != 0:
338
+ create_request['subtitle_offset_ms'] = subtitle_offset_ms
339
+ if clean_instrumental_model:
340
+ create_request['clean_instrumental_model'] = clean_instrumental_model
341
+ if backing_vocals_models:
342
+ create_request['backing_vocals_models'] = backing_vocals_models
343
+ if other_stems_models:
344
+ create_request['other_stems_models'] = other_stems_models
345
+
346
+ self.logger.info(f"Creating URL-based job at {self.config.service_url}/api/jobs/create-from-url")
347
+
348
+ response = self._request('POST', '/api/jobs/create-from-url', json=create_request)
349
+
350
+ if response.status_code != 200:
351
+ try:
352
+ error_detail = response.json()
353
+ except Exception:
354
+ error_detail = response.text
355
+ raise RuntimeError(f"Error creating job from URL: {error_detail}")
356
+
357
+ result = response.json()
358
+ if result.get('status') != 'success':
359
+ raise RuntimeError(f"Error creating job from URL: {result}")
360
+
361
+ job_id = result['job_id']
362
+ detected_artist = result.get('detected_artist')
363
+ detected_title = result.get('detected_title')
364
+
365
+ self.logger.info(f"Job {job_id} created from URL")
366
+ if detected_artist:
367
+ self.logger.info(f" Artist: {detected_artist}")
368
+ if detected_title:
369
+ self.logger.info(f" Title: {detected_title}")
370
+
371
+ return result
372
+
250
373
  def submit_job(
251
374
  self,
252
375
  filepath: str,
@@ -645,7 +768,18 @@ class RemoteKaraokeClient:
645
768
  error_detail = response.text
646
769
  raise RuntimeError(f"Error getting instrumental options: {error_detail}")
647
770
  return response.json()
648
-
771
+
772
+ def get_instrumental_analysis(self, job_id: str) -> Dict[str, Any]:
773
+ """Get instrumental analysis data including backing vocals detection."""
774
+ response = self._request('GET', f'/api/jobs/{job_id}/instrumental-analysis')
775
+ if response.status_code != 200:
776
+ try:
777
+ error_detail = response.json()
778
+ except Exception:
779
+ error_detail = response.text
780
+ raise RuntimeError(f"Error getting instrumental analysis: {error_detail}")
781
+ return response.json()
782
+
649
783
  def select_instrumental(self, job_id: str, selection: str) -> Dict[str, Any]:
650
784
  """Submit instrumental selection."""
651
785
  response = self._request(
@@ -757,6 +891,122 @@ class RemoteKaraokeClient:
757
891
  error_detail = response.text
758
892
  raise RuntimeError(f"Error completing review: {error_detail}")
759
893
  return response.json()
894
+
895
+ def search_audio(
896
+ self,
897
+ artist: str,
898
+ title: str,
899
+ auto_download: bool = False,
900
+ style_params_path: Optional[str] = None,
901
+ enable_cdg: bool = True,
902
+ enable_txt: bool = True,
903
+ brand_prefix: Optional[str] = None,
904
+ discord_webhook_url: Optional[str] = None,
905
+ youtube_description: Optional[str] = None,
906
+ enable_youtube_upload: bool = False,
907
+ dropbox_path: Optional[str] = None,
908
+ gdrive_folder_id: Optional[str] = None,
909
+ lyrics_artist: Optional[str] = None,
910
+ lyrics_title: Optional[str] = None,
911
+ subtitle_offset_ms: int = 0,
912
+ clean_instrumental_model: Optional[str] = None,
913
+ backing_vocals_models: Optional[list] = None,
914
+ other_stems_models: Optional[list] = None,
915
+ ) -> Dict[str, Any]:
916
+ """
917
+ Search for audio by artist and title (Batch 5 - Flacfetch integration).
918
+
919
+ This creates a job and searches for audio sources. If auto_download is True,
920
+ it automatically selects the best source. Otherwise, it returns search results
921
+ for user selection.
922
+
923
+ Args:
924
+ artist: Artist name to search for
925
+ title: Song title to search for
926
+ auto_download: Automatically select best audio source (skip interactive selection)
927
+ ... other args same as submit_job()
928
+
929
+ Returns:
930
+ Dict with job_id, status, and optionally search results
931
+ """
932
+ self.logger.info(f"Searching for audio: {artist} - {title}")
933
+
934
+ request_data = {
935
+ 'artist': artist,
936
+ 'title': title,
937
+ 'auto_download': auto_download,
938
+ 'enable_cdg': enable_cdg,
939
+ 'enable_txt': enable_txt,
940
+ }
941
+
942
+ if brand_prefix:
943
+ request_data['brand_prefix'] = brand_prefix
944
+ if discord_webhook_url:
945
+ request_data['discord_webhook_url'] = discord_webhook_url
946
+ if youtube_description:
947
+ request_data['youtube_description'] = youtube_description
948
+ if enable_youtube_upload:
949
+ request_data['enable_youtube_upload'] = enable_youtube_upload
950
+ if dropbox_path:
951
+ request_data['dropbox_path'] = dropbox_path
952
+ if gdrive_folder_id:
953
+ request_data['gdrive_folder_id'] = gdrive_folder_id
954
+ if lyrics_artist:
955
+ request_data['lyrics_artist'] = lyrics_artist
956
+ if lyrics_title:
957
+ request_data['lyrics_title'] = lyrics_title
958
+ if subtitle_offset_ms != 0:
959
+ request_data['subtitle_offset_ms'] = subtitle_offset_ms
960
+ if clean_instrumental_model:
961
+ request_data['clean_instrumental_model'] = clean_instrumental_model
962
+ if backing_vocals_models:
963
+ request_data['backing_vocals_models'] = backing_vocals_models
964
+ if other_stems_models:
965
+ request_data['other_stems_models'] = other_stems_models
966
+
967
+ response = self._request('POST', '/api/audio-search/search', json=request_data)
968
+
969
+ if response.status_code == 404:
970
+ try:
971
+ error_detail = response.json()
972
+ except Exception:
973
+ error_detail = response.text
974
+ raise ValueError(f"No audio sources found: {error_detail}")
975
+
976
+ if response.status_code != 200:
977
+ try:
978
+ error_detail = response.json()
979
+ except Exception:
980
+ error_detail = response.text
981
+ raise RuntimeError(f"Error searching for audio: {error_detail}")
982
+
983
+ return response.json()
984
+
985
+ def get_audio_search_results(self, job_id: str) -> Dict[str, Any]:
986
+ """Get audio search results for a job awaiting selection."""
987
+ response = self._request('GET', f'/api/audio-search/{job_id}/results')
988
+ if response.status_code != 200:
989
+ try:
990
+ error_detail = response.json()
991
+ except Exception:
992
+ error_detail = response.text
993
+ raise RuntimeError(f"Error getting search results: {error_detail}")
994
+ return response.json()
995
+
996
+ def select_audio_source(self, job_id: str, selection_index: int) -> Dict[str, Any]:
997
+ """Select an audio source and start processing."""
998
+ response = self._request(
999
+ 'POST',
1000
+ f'/api/audio-search/{job_id}/select',
1001
+ json={'selection_index': selection_index}
1002
+ )
1003
+ if response.status_code != 200:
1004
+ try:
1005
+ error_detail = response.json()
1006
+ except Exception:
1007
+ error_detail = response.text
1008
+ raise RuntimeError(f"Error selecting audio: {error_detail}")
1009
+ return response.json()
760
1010
 
761
1011
 
762
1012
  class JobMonitor:
@@ -768,6 +1018,7 @@ class JobMonitor:
768
1018
  self.logger = logger
769
1019
  self._review_opened = False
770
1020
  self._instrumental_prompted = False
1021
+ self._audio_selection_prompted = False # Batch 5: audio source selection
771
1022
  self._last_timeline_index = 0
772
1023
  self._last_log_index = 0
773
1024
  self._show_worker_logs = True # Enable worker log display
@@ -777,6 +1028,11 @@ class JobMonitor:
777
1028
  # Status descriptions for user-friendly logging
778
1029
  STATUS_DESCRIPTIONS = {
779
1030
  'pending': 'Job queued, waiting to start',
1031
+ # Audio search states (Batch 5)
1032
+ 'searching_audio': 'Searching for audio sources',
1033
+ 'awaiting_audio_selection': 'Waiting for audio source selection',
1034
+ 'downloading_audio': 'Downloading selected audio',
1035
+ # Main workflow
780
1036
  'downloading': 'Downloading and preparing input files',
781
1037
  'separating_stage1': 'AI audio separation (stage 1 of 2)',
782
1038
  'separating_stage2': 'AI audio separation (stage 2 of 2)',
@@ -891,39 +1147,97 @@ class JobMonitor:
891
1147
  time.sleep(self.config.poll_interval)
892
1148
 
893
1149
  def handle_instrumental_selection(self, job_id: str) -> None:
894
- """Handle instrumental selection interaction."""
1150
+ """Handle instrumental selection interaction with analysis-based recommendations."""
895
1151
  self.logger.info("=" * 60)
896
1152
  self.logger.info("INSTRUMENTAL SELECTION NEEDED")
897
1153
  self.logger.info("=" * 60)
898
1154
 
899
- # In non-interactive mode, auto-select clean instrumental
900
- if self.config.non_interactive:
901
- self.logger.info("Non-interactive mode: Auto-selecting clean instrumental")
902
- selection = 'clean'
903
- else:
904
- self.logger.info("")
905
- self.logger.info("Choose which instrumental track to use for the final video:")
906
- self.logger.info("")
907
- self.logger.info(" 1) Clean Instrumental (no backing vocals)")
908
- self.logger.info(" Best for songs where you want ONLY the lead vocal removed")
1155
+ # Try to get analysis data for smart recommendations
1156
+ analysis_data = None
1157
+ try:
1158
+ analysis_data = self.client.get_instrumental_analysis(job_id)
1159
+ analysis = analysis_data.get('analysis', {})
1160
+
1161
+ # Display analysis summary
909
1162
  self.logger.info("")
910
- self.logger.info(" 2) Instrumental with Backing Vocals")
911
- self.logger.info(" Best for songs where backing vocals add to the karaoke experience")
1163
+ self.logger.info("=== Backing Vocals Analysis ===")
1164
+ if analysis.get('has_audible_content'):
1165
+ self.logger.info(f" Backing vocals detected: YES")
1166
+ self.logger.info(f" Audible segments: {len(analysis.get('audible_segments', []))}")
1167
+ self.logger.info(f" Audible duration: {analysis.get('total_audible_duration_seconds', 0):.1f}s "
1168
+ f"({analysis.get('audible_percentage', 0):.1f}% of track)")
1169
+ else:
1170
+ self.logger.info(f" Backing vocals detected: NO")
1171
+ self.logger.info(f" Recommendation: {analysis.get('recommended_selection', 'review_needed')}")
912
1172
  self.logger.info("")
1173
+ except Exception as e:
1174
+ self.logger.warning(f"Could not fetch analysis data: {e}")
1175
+ self.logger.info("Falling back to manual selection...")
1176
+
1177
+ # In non-interactive mode, use analysis recommendation or default to clean
1178
+ if self.config.non_interactive:
1179
+ if analysis_data and analysis_data.get('analysis', {}).get('recommended_selection') == 'clean':
1180
+ self.logger.info("Non-interactive mode: Auto-selecting clean instrumental (recommended)")
1181
+ selection = 'clean'
1182
+ else:
1183
+ self.logger.info("Non-interactive mode: Auto-selecting clean instrumental (default)")
1184
+ selection = 'clean'
1185
+ else:
1186
+ # Check if we should recommend clean based on analysis
1187
+ recommend_clean = (
1188
+ analysis_data and
1189
+ not analysis_data.get('analysis', {}).get('has_audible_content', True)
1190
+ )
913
1191
 
914
- selection = ""
915
- while not selection:
1192
+ if recommend_clean:
1193
+ self.logger.info("No backing vocals detected - recommending clean instrumental.")
1194
+ self.logger.info("")
1195
+ self.logger.info("Options:")
1196
+ self.logger.info(" 1) Accept recommendation (clean instrumental)")
1197
+ self.logger.info(" 2) Open browser to review and select")
1198
+ self.logger.info("")
1199
+
916
1200
  try:
917
1201
  choice = input("Enter your choice (1 or 2): ").strip()
918
1202
  if choice == '1':
919
1203
  selection = 'clean'
920
- elif choice == '2':
921
- selection = 'with_backing'
922
1204
  else:
923
- self.logger.error("Invalid choice. Please enter 1 or 2.")
1205
+ self._open_instrumental_review_and_wait(job_id)
1206
+ return # Selection will be submitted via browser
924
1207
  except KeyboardInterrupt:
925
1208
  print()
926
1209
  raise
1210
+ else:
1211
+ # Backing vocals detected or analysis unavailable - offer browser review
1212
+ self.logger.info("Choose how to select your instrumental:")
1213
+ self.logger.info("")
1214
+ self.logger.info(" 1) Clean Instrumental (no backing vocals)")
1215
+ self.logger.info(" Best for songs where you want ONLY the lead vocal removed")
1216
+ self.logger.info("")
1217
+ self.logger.info(" 2) Instrumental with Backing Vocals")
1218
+ self.logger.info(" Best for songs where backing vocals add to the karaoke experience")
1219
+ self.logger.info("")
1220
+ self.logger.info(" 3) Open Browser for Advanced Review")
1221
+ self.logger.info(" Listen to audio, view waveform, and optionally mute sections")
1222
+ self.logger.info(" to create a custom instrumental")
1223
+ self.logger.info("")
1224
+
1225
+ selection = ""
1226
+ while not selection:
1227
+ try:
1228
+ choice = input("Enter your choice (1, 2, or 3): ").strip()
1229
+ if choice == '1':
1230
+ selection = 'clean'
1231
+ elif choice == '2':
1232
+ selection = 'with_backing'
1233
+ elif choice == '3':
1234
+ self._open_instrumental_review_and_wait(job_id)
1235
+ return # Selection will be submitted via browser
1236
+ else:
1237
+ self.logger.error("Invalid choice. Please enter 1, 2, or 3.")
1238
+ except KeyboardInterrupt:
1239
+ print()
1240
+ raise
927
1241
 
928
1242
  self.logger.info(f"Submitting selection: {selection}")
929
1243
 
@@ -936,6 +1250,126 @@ class JobMonitor:
936
1250
  except Exception as e:
937
1251
  self.logger.error(f"Error submitting selection: {e}")
938
1252
 
1253
+ def handle_audio_selection(self, job_id: str) -> None:
1254
+ """Handle audio source selection interaction (Batch 5)."""
1255
+ self.logger.info("=" * 60)
1256
+ self.logger.info("AUDIO SOURCE SELECTION NEEDED")
1257
+ self.logger.info("=" * 60)
1258
+
1259
+ try:
1260
+ # Get search results
1261
+ results_data = self.client.get_audio_search_results(job_id)
1262
+ results = results_data.get('results', [])
1263
+
1264
+ if not results:
1265
+ self.logger.error("No search results available")
1266
+ return
1267
+
1268
+ # In non-interactive mode, auto-select first result
1269
+ if self.config.non_interactive:
1270
+ self.logger.info("Non-interactive mode: Auto-selecting first result")
1271
+ selection_index = 0
1272
+ else:
1273
+ self.logger.info("")
1274
+ self.logger.info("Choose which audio source to download:")
1275
+ self.logger.info("")
1276
+
1277
+ for result in results:
1278
+ index = result.get('index', 0)
1279
+ provider = result.get('provider', 'Unknown')
1280
+ artist = result.get('artist', 'Unknown')
1281
+ title = result.get('title', 'Unknown')
1282
+ quality = result.get('quality', '')
1283
+ duration = result.get('duration')
1284
+
1285
+ # Format duration if available
1286
+ duration_str = ""
1287
+ if duration:
1288
+ minutes = duration // 60
1289
+ seconds = duration % 60
1290
+ duration_str = f" [{minutes}:{seconds:02d}]"
1291
+
1292
+ quality_str = f" ({quality})" if quality else ""
1293
+
1294
+ self.logger.info(f" {index + 1}) [{provider}] {artist} - {title}{quality_str}{duration_str}")
1295
+
1296
+ self.logger.info("")
1297
+
1298
+ selection_index = -1
1299
+ while selection_index < 0:
1300
+ try:
1301
+ choice = input(f"Enter your choice (1-{len(results)}): ").strip()
1302
+ choice_num = int(choice)
1303
+ if 1 <= choice_num <= len(results):
1304
+ selection_index = choice_num - 1
1305
+ else:
1306
+ self.logger.error(f"Please enter a number between 1 and {len(results)}")
1307
+ except ValueError:
1308
+ self.logger.error("Please enter a valid number")
1309
+ except KeyboardInterrupt:
1310
+ print()
1311
+ raise
1312
+
1313
+ selected = results[selection_index]
1314
+ self.logger.info(f"Selected: [{selected.get('provider')}] {selected.get('artist')} - {selected.get('title')}")
1315
+ self.logger.info("")
1316
+
1317
+ # Submit selection
1318
+ result = self.client.select_audio_source(job_id, selection_index)
1319
+ if result.get('status') == 'success':
1320
+ self.logger.info(f"Selection submitted successfully")
1321
+ else:
1322
+ self.logger.error(f"Error submitting selection: {result}")
1323
+
1324
+ except Exception as e:
1325
+ self.logger.error(f"Error handling audio selection: {e}")
1326
+
1327
+ def _open_instrumental_review_and_wait(self, job_id: str) -> None:
1328
+ """Open browser to instrumental review UI and wait for selection."""
1329
+ review_url = f"{self.config.review_ui_url}/jobs/{job_id}/instrumental-review"
1330
+
1331
+ self.logger.info("")
1332
+ self.logger.info("=" * 60)
1333
+ self.logger.info("OPENING BROWSER FOR INSTRUMENTAL REVIEW")
1334
+ self.logger.info("=" * 60)
1335
+ self.logger.info(f"Review URL: {review_url}")
1336
+ self.logger.info("")
1337
+ self.logger.info("In the browser you can:")
1338
+ self.logger.info(" - View the backing vocals waveform")
1339
+ self.logger.info(" - Listen to clean instrumental, backing vocals, or combined")
1340
+ self.logger.info(" - Select regions to mute and create a custom instrumental")
1341
+ self.logger.info(" - Submit your final selection")
1342
+ self.logger.info("")
1343
+ self.logger.info("Waiting for selection to be submitted...")
1344
+ self.logger.info("(Press Ctrl+C to cancel)")
1345
+ self.logger.info("")
1346
+
1347
+ # Open browser
1348
+ webbrowser.open(review_url)
1349
+
1350
+ # Poll until job status changes from awaiting_instrumental_selection
1351
+ while True:
1352
+ try:
1353
+ job_data = self.client.get_job(job_id)
1354
+ current_status = job_data.get('status')
1355
+
1356
+ if current_status != 'awaiting_instrumental_selection':
1357
+ selection = job_data.get('state_data', {}).get('instrumental_selection', 'unknown')
1358
+ self.logger.info(f"Selection received: {selection}")
1359
+ self.logger.info(f"Job status: {current_status}")
1360
+ return
1361
+
1362
+ time.sleep(self.config.poll_interval)
1363
+
1364
+ except KeyboardInterrupt:
1365
+ print()
1366
+ self.logger.info("Cancelled. You can resume this job later with --resume")
1367
+ raise
1368
+ except Exception as e:
1369
+ self.logger.warning(f"Error checking status: {e}")
1370
+ time.sleep(self.config.poll_interval)
1371
+
1372
+
939
1373
  def download_outputs(self, job_id: str, job_data: Dict[str, Any]) -> None:
940
1374
  """
941
1375
  Download all output files for a completed job.
@@ -1343,7 +1777,14 @@ class JobMonitor:
1343
1777
  self._polls_without_updates = 0
1344
1778
 
1345
1779
  # Handle human interaction points
1346
- if status in ['awaiting_review', 'in_review']:
1780
+ if status == 'awaiting_audio_selection':
1781
+ if not self._audio_selection_prompted:
1782
+ self.logger.info("")
1783
+ self.handle_audio_selection(job_id)
1784
+ self._audio_selection_prompted = True
1785
+ self._last_timeline_index = 0 # Reset to catch any events
1786
+
1787
+ elif status in ['awaiting_review', 'in_review']:
1347
1788
  if not self._review_opened:
1348
1789
  self.logger.info("")
1349
1790
  self.handle_review(job_id)
@@ -1781,8 +2222,7 @@ def main():
1781
2222
  ignored_features.append("--lyrics-only")
1782
2223
  if args.background_video:
1783
2224
  ignored_features.append("--background_video")
1784
- if getattr(args, 'auto_download', False):
1785
- ignored_features.append("--auto-download (audio search not yet supported)")
2225
+ # --auto-download is now supported (Batch 5)
1786
2226
  # These are now supported but server-side handling may be partial
1787
2227
  if args.organised_dir:
1788
2228
  ignored_features.append("--organised_dir (local-only)")
@@ -1803,6 +2243,8 @@ def main():
1803
2243
 
1804
2244
  # Handle new job submission - parse input arguments same as gen_cli
1805
2245
  input_media, artist, title, filename_pattern = None, None, None, None
2246
+ use_audio_search = False # Batch 5: audio search mode
2247
+ is_url_input = False
1806
2248
 
1807
2249
  if not args.args:
1808
2250
  parser.print_help()
@@ -1810,52 +2252,137 @@ def main():
1810
2252
 
1811
2253
  # Allow 3 forms of positional arguments:
1812
2254
  # 1. URL or Media File only
1813
- # 2. Artist and Title only
1814
- # 3. URL, Artist, and Title
2255
+ # 2. Artist and Title only (audio search mode - Batch 5)
2256
+ # 3. URL/File, Artist, and Title
1815
2257
  if args.args and (is_url(args.args[0]) or is_file(args.args[0])):
1816
2258
  input_media = args.args[0]
2259
+ is_url_input = is_url(args.args[0])
1817
2260
  if len(args.args) > 2:
1818
2261
  artist = args.args[1]
1819
2262
  title = args.args[2]
1820
2263
  elif len(args.args) > 1:
1821
2264
  artist = args.args[1]
1822
2265
  else:
1823
- logger.error("Input media provided without Artist and Title")
1824
- return 1
2266
+ # For URLs, artist/title can be auto-detected
2267
+ if is_url_input:
2268
+ logger.info("URL provided without Artist and Title - will be auto-detected from video metadata")
2269
+ else:
2270
+ logger.error("Input media provided without Artist and Title")
2271
+ return 1
1825
2272
  elif os.path.isdir(args.args[0]):
1826
2273
  logger.error("Folder processing is not yet supported in remote mode")
1827
2274
  return 1
1828
2275
  elif len(args.args) > 1:
2276
+ # Audio search mode: artist + title without file (Batch 5)
1829
2277
  artist = args.args[0]
1830
2278
  title = args.args[1]
1831
- logger.error("Audio search (artist+title) is not yet supported in remote mode.")
1832
- logger.error("Please provide a local audio file path instead.")
1833
- logger.error("")
1834
- logger.error("For local flacfetch search, use karaoke-gen instead:")
1835
- logger.error(f" karaoke-gen \"{artist}\" \"{title}\"")
1836
- return 1
2279
+ use_audio_search = True
1837
2280
  else:
1838
2281
  parser.print_help()
1839
2282
  return 1
1840
2283
 
1841
- # For now, remote mode only supports file uploads
1842
- if not input_media or not os.path.isfile(input_media):
1843
- logger.error("Remote mode currently only supports local file uploads")
1844
- logger.error("Please provide a path to an audio file (mp3, wav, flac, m4a, ogg, aac)")
1845
- return 1
1846
-
1847
2284
  # Validate artist and title are provided
1848
2285
  if not artist or not title:
1849
2286
  logger.error("Artist and Title are required")
1850
2287
  parser.print_help()
1851
2288
  return 1
1852
2289
 
2290
+ # For file/URL input modes, validate input exists
2291
+ if not use_audio_search:
2292
+ if not input_media:
2293
+ logger.error("No input media or URL provided")
2294
+ return 1
2295
+
2296
+ # For file input (not URL), validate file exists
2297
+ if not is_url_input and not os.path.isfile(input_media):
2298
+ logger.error(f"File not found: {input_media}")
2299
+ logger.error("Please provide a valid path to an audio file (mp3, wav, flac, m4a, ogg, aac)")
2300
+ return 1
2301
+
2302
+ # Handle audio search mode (Batch 5)
2303
+ if use_audio_search:
2304
+ logger.info("=" * 60)
2305
+ logger.info("Karaoke Generator (Remote) - Audio Search Mode")
2306
+ logger.info("=" * 60)
2307
+ logger.info(f"Searching for: {artist} - {title}")
2308
+ if getattr(args, 'auto_download', False) or config.non_interactive:
2309
+ logger.info(f"Auto-download: enabled (will auto-select best source)")
2310
+ if args.style_params_json:
2311
+ logger.info(f"Style: {args.style_params_json}")
2312
+ logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
2313
+ if args.brand_prefix:
2314
+ logger.info(f"Brand: {args.brand_prefix}")
2315
+ logger.info(f"Service URL: {config.service_url}")
2316
+ logger.info("")
2317
+
2318
+ # Read youtube description from file if provided
2319
+ youtube_description = None
2320
+ if args.youtube_description_file and os.path.isfile(args.youtube_description_file):
2321
+ try:
2322
+ with open(args.youtube_description_file, 'r') as f:
2323
+ youtube_description = f.read()
2324
+ logger.info(f"Loaded YouTube description from: {args.youtube_description_file}")
2325
+ except Exception as e:
2326
+ logger.warning(f"Failed to read YouTube description file: {e}")
2327
+
2328
+ try:
2329
+ # Determine auto_download mode
2330
+ auto_download = getattr(args, 'auto_download', False) or config.non_interactive
2331
+
2332
+ result = client.search_audio(
2333
+ artist=artist,
2334
+ title=title,
2335
+ auto_download=auto_download,
2336
+ enable_cdg=args.enable_cdg,
2337
+ enable_txt=args.enable_txt,
2338
+ brand_prefix=args.brand_prefix,
2339
+ discord_webhook_url=args.discord_webhook_url,
2340
+ youtube_description=youtube_description,
2341
+ enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
2342
+ dropbox_path=getattr(args, 'dropbox_path', None),
2343
+ gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
2344
+ lyrics_artist=getattr(args, 'lyrics_artist', None),
2345
+ lyrics_title=getattr(args, 'lyrics_title', None),
2346
+ subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
2347
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2348
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2349
+ other_stems_models=getattr(args, 'other_stems_models', None),
2350
+ )
2351
+
2352
+ job_id = result.get('job_id')
2353
+ results_count = result.get('results_count', 0)
2354
+ server_version = result.get('server_version', 'unknown')
2355
+
2356
+ logger.info(f"Job created: {job_id}")
2357
+ logger.info(f"Server version: {server_version}")
2358
+ logger.info(f"Audio sources found: {results_count}")
2359
+ logger.info("")
2360
+
2361
+ # Monitor job
2362
+ return monitor.monitor(job_id)
2363
+
2364
+ except ValueError as e:
2365
+ logger.error(str(e))
2366
+ return 1
2367
+ except Exception as e:
2368
+ logger.error(f"Error: {e}")
2369
+ logger.exception("Full error details:")
2370
+ return 1
2371
+
2372
+ # File upload mode (original flow)
1853
2373
  logger.info("=" * 60)
1854
2374
  logger.info("Karaoke Generator (Remote) - Job Submission")
1855
2375
  logger.info("=" * 60)
1856
- logger.info(f"File: {input_media}")
1857
- logger.info(f"Artist: {artist}")
1858
- logger.info(f"Title: {title}")
2376
+ if is_url_input:
2377
+ logger.info(f"URL: {input_media}")
2378
+ else:
2379
+ logger.info(f"File: {input_media}")
2380
+ if artist:
2381
+ logger.info(f"Artist: {artist}")
2382
+ if title:
2383
+ logger.info(f"Title: {title}")
2384
+ if not artist and not title and is_url_input:
2385
+ logger.info(f"Artist/Title: (will be auto-detected from URL)")
1859
2386
  if args.style_params_json:
1860
2387
  logger.info(f"Style: {args.style_params_json}")
1861
2388
  logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
@@ -1908,34 +2435,67 @@ def main():
1908
2435
  logger.warning(f"Failed to read YouTube description file: {e}")
1909
2436
 
1910
2437
  try:
1911
- # Submit job with all options
1912
- result = client.submit_job(
1913
- filepath=input_media,
1914
- artist=artist,
1915
- title=title,
1916
- style_params_path=args.style_params_json,
1917
- enable_cdg=args.enable_cdg,
1918
- enable_txt=args.enable_txt,
1919
- brand_prefix=args.brand_prefix,
1920
- discord_webhook_url=args.discord_webhook_url,
1921
- youtube_description=youtube_description,
1922
- organised_dir_rclone_root=args.organised_dir_rclone_root,
1923
- enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
1924
- # Native API distribution (preferred for remote CLI)
1925
- dropbox_path=getattr(args, 'dropbox_path', None),
1926
- gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
1927
- # Lyrics configuration
1928
- lyrics_artist=getattr(args, 'lyrics_artist', None),
1929
- lyrics_title=getattr(args, 'lyrics_title', None),
1930
- lyrics_file=getattr(args, 'lyrics_file', None),
1931
- subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
1932
- # Audio separation model configuration
1933
- clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
1934
- backing_vocals_models=getattr(args, 'backing_vocals_models', None),
1935
- other_stems_models=getattr(args, 'other_stems_models', None),
1936
- # Existing instrumental (Batch 3)
1937
- existing_instrumental=getattr(args, 'existing_instrumental', None),
1938
- )
2438
+ # Submit job - different endpoint for URL vs file
2439
+ if is_url_input:
2440
+ # URL-based job submission
2441
+ # Note: style_params_path is not supported for URL-based jobs
2442
+ # If custom styles are needed, download the audio locally first
2443
+ if args.style_params_json:
2444
+ logger.warning("Custom styles (--style_params_json) are not supported for URL-based jobs. "
2445
+ "Download the audio locally first and use file upload for custom styles.")
2446
+
2447
+ result = client.submit_job_from_url(
2448
+ url=input_media,
2449
+ artist=artist,
2450
+ title=title,
2451
+ enable_cdg=args.enable_cdg,
2452
+ enable_txt=args.enable_txt,
2453
+ brand_prefix=args.brand_prefix,
2454
+ discord_webhook_url=args.discord_webhook_url,
2455
+ youtube_description=youtube_description,
2456
+ organised_dir_rclone_root=args.organised_dir_rclone_root,
2457
+ enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
2458
+ # Native API distribution (preferred for remote CLI)
2459
+ dropbox_path=getattr(args, 'dropbox_path', None),
2460
+ gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
2461
+ # Lyrics configuration
2462
+ lyrics_artist=getattr(args, 'lyrics_artist', None),
2463
+ lyrics_title=getattr(args, 'lyrics_title', None),
2464
+ subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
2465
+ # Audio separation model configuration
2466
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2467
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2468
+ other_stems_models=getattr(args, 'other_stems_models', None),
2469
+ )
2470
+ else:
2471
+ # File-based job submission
2472
+ result = client.submit_job(
2473
+ filepath=input_media,
2474
+ artist=artist,
2475
+ title=title,
2476
+ style_params_path=args.style_params_json,
2477
+ enable_cdg=args.enable_cdg,
2478
+ enable_txt=args.enable_txt,
2479
+ brand_prefix=args.brand_prefix,
2480
+ discord_webhook_url=args.discord_webhook_url,
2481
+ youtube_description=youtube_description,
2482
+ organised_dir_rclone_root=args.organised_dir_rclone_root,
2483
+ enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
2484
+ # Native API distribution (preferred for remote CLI)
2485
+ dropbox_path=getattr(args, 'dropbox_path', None),
2486
+ gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
2487
+ # Lyrics configuration
2488
+ lyrics_artist=getattr(args, 'lyrics_artist', None),
2489
+ lyrics_title=getattr(args, 'lyrics_title', None),
2490
+ lyrics_file=getattr(args, 'lyrics_file', None),
2491
+ subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
2492
+ # Audio separation model configuration
2493
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2494
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2495
+ other_stems_models=getattr(args, 'other_stems_models', None),
2496
+ # Existing instrumental (Batch 3)
2497
+ existing_instrumental=getattr(args, 'existing_instrumental', None),
2498
+ )
1939
2499
  job_id = result.get('job_id')
1940
2500
  style_assets = result.get('style_assets_uploaded', [])
1941
2501
  server_version = result.get('server_version', 'unknown')