karaoke-gen 0.71.23__py3-none-any.whl → 0.71.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,6 +41,11 @@ from .cli_args import create_parser, process_style_overrides, is_url, is_file
41
41
  class JobStatus(str, Enum):
42
42
  """Job status values (matching backend)."""
43
43
  PENDING = "pending"
44
+ # Audio search states (Batch 5)
45
+ SEARCHING_AUDIO = "searching_audio"
46
+ AWAITING_AUDIO_SELECTION = "awaiting_audio_selection"
47
+ DOWNLOADING_AUDIO = "downloading_audio"
48
+ # Main workflow
44
49
  DOWNLOADING = "downloading"
45
50
  SEPARATING_STAGE1 = "separating_stage1"
46
51
  SEPARATING_STAGE2 = "separating_stage2"
@@ -143,6 +148,70 @@ class RemoteKaraokeClient:
143
148
  response = self.session.request(method, url, **kwargs)
144
149
  return response
145
150
 
151
+ def _upload_file_to_signed_url(self, signed_url: str, file_path: str, content_type: str) -> bool:
152
+ """
153
+ Upload a file directly to GCS using a signed URL.
154
+
155
+ Args:
156
+ signed_url: The signed URL from the backend
157
+ file_path: Local path to the file to upload
158
+ content_type: MIME type for the Content-Type header
159
+
160
+ Returns:
161
+ True if upload succeeded, False otherwise
162
+ """
163
+ try:
164
+ with open(file_path, 'rb') as f:
165
+ # Use a fresh requests session (not self.session) because
166
+ # signed URLs should not have our auth headers
167
+ response = requests.put(
168
+ signed_url,
169
+ data=f,
170
+ headers={'Content-Type': content_type},
171
+ timeout=600 # 10 minutes for large files
172
+ )
173
+
174
+ if response.status_code in (200, 201):
175
+ return True
176
+ else:
177
+ self.logger.error(f"Failed to upload to signed URL: HTTP {response.status_code} - {response.text}")
178
+ return False
179
+ except Exception as e:
180
+ self.logger.error(f"Error uploading to signed URL: {e}")
181
+ return False
182
+
183
+ def _get_content_type(self, file_path: str) -> str:
184
+ """Get the MIME content type for a file based on its extension."""
185
+ ext = Path(file_path).suffix.lower()
186
+
187
+ content_types = {
188
+ # Audio
189
+ '.mp3': 'audio/mpeg',
190
+ '.wav': 'audio/wav',
191
+ '.flac': 'audio/flac',
192
+ '.m4a': 'audio/mp4',
193
+ '.ogg': 'audio/ogg',
194
+ '.aac': 'audio/aac',
195
+ # Images
196
+ '.png': 'image/png',
197
+ '.jpg': 'image/jpeg',
198
+ '.jpeg': 'image/jpeg',
199
+ '.gif': 'image/gif',
200
+ '.webp': 'image/webp',
201
+ # Fonts
202
+ '.ttf': 'font/ttf',
203
+ '.otf': 'font/otf',
204
+ '.woff': 'font/woff',
205
+ '.woff2': 'font/woff2',
206
+ # Other
207
+ '.json': 'application/json',
208
+ '.txt': 'text/plain',
209
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
210
+ '.rtf': 'application/rtf',
211
+ }
212
+
213
+ return content_types.get(ext, 'application/octet-stream')
214
+
146
215
  def _parse_style_params(self, style_params_path: str) -> Dict[str, str]:
147
216
  """
148
217
  Parse style_params.json and extract file paths that need to be uploaded.
@@ -183,6 +252,124 @@ class RemoteKaraokeClient:
183
252
 
184
253
  return asset_files
185
254
 
255
+ def submit_job_from_url(
256
+ self,
257
+ url: str,
258
+ artist: Optional[str] = None,
259
+ title: Optional[str] = None,
260
+ enable_cdg: bool = True,
261
+ enable_txt: bool = True,
262
+ brand_prefix: Optional[str] = None,
263
+ discord_webhook_url: Optional[str] = None,
264
+ youtube_description: Optional[str] = None,
265
+ organised_dir_rclone_root: Optional[str] = None,
266
+ enable_youtube_upload: bool = False,
267
+ dropbox_path: Optional[str] = None,
268
+ gdrive_folder_id: Optional[str] = None,
269
+ lyrics_artist: Optional[str] = None,
270
+ lyrics_title: Optional[str] = None,
271
+ subtitle_offset_ms: int = 0,
272
+ clean_instrumental_model: Optional[str] = None,
273
+ backing_vocals_models: Optional[list] = None,
274
+ other_stems_models: Optional[list] = None,
275
+ ) -> Dict[str, Any]:
276
+ """
277
+ Submit a new karaoke generation job from a YouTube/online URL.
278
+
279
+ The backend will download the audio from the URL and process it.
280
+ Artist and title will be auto-detected from the URL if not provided.
281
+
282
+ Note: Custom style configuration is not supported for URL-based jobs.
283
+ If you need custom styles, download the audio locally first and use
284
+ the regular file upload flow with submit_job().
285
+
286
+ Args:
287
+ url: YouTube or other video URL to download audio from
288
+ artist: Artist name (optional - auto-detected if not provided)
289
+ title: Song title (optional - auto-detected if not provided)
290
+ enable_cdg: Generate CDG+MP3 package
291
+ enable_txt: Generate TXT+MP3 package
292
+ brand_prefix: Brand code prefix (e.g., "NOMAD")
293
+ discord_webhook_url: Discord webhook for notifications
294
+ youtube_description: YouTube video description
295
+ organised_dir_rclone_root: Legacy rclone path (deprecated)
296
+ enable_youtube_upload: Enable YouTube upload
297
+ dropbox_path: Dropbox folder path for organized output (native API)
298
+ gdrive_folder_id: Google Drive folder ID for public share (native API)
299
+ lyrics_artist: Override artist name for lyrics search
300
+ lyrics_title: Override title for lyrics search
301
+ subtitle_offset_ms: Subtitle timing offset in milliseconds
302
+ clean_instrumental_model: Model for clean instrumental separation
303
+ backing_vocals_models: List of models for backing vocals separation
304
+ other_stems_models: List of models for other stems (bass, drums, etc.)
305
+ """
306
+ self.logger.info(f"Submitting URL-based job: {url}")
307
+
308
+ # Build request payload
309
+ create_request = {
310
+ 'url': url,
311
+ 'enable_cdg': enable_cdg,
312
+ 'enable_txt': enable_txt,
313
+ }
314
+
315
+ if artist:
316
+ create_request['artist'] = artist
317
+ if title:
318
+ create_request['title'] = title
319
+ if brand_prefix:
320
+ create_request['brand_prefix'] = brand_prefix
321
+ if discord_webhook_url:
322
+ create_request['discord_webhook_url'] = discord_webhook_url
323
+ if youtube_description:
324
+ create_request['youtube_description'] = youtube_description
325
+ if enable_youtube_upload:
326
+ create_request['enable_youtube_upload'] = enable_youtube_upload
327
+ if dropbox_path:
328
+ create_request['dropbox_path'] = dropbox_path
329
+ if gdrive_folder_id:
330
+ create_request['gdrive_folder_id'] = gdrive_folder_id
331
+ if organised_dir_rclone_root:
332
+ create_request['organised_dir_rclone_root'] = organised_dir_rclone_root
333
+ if lyrics_artist:
334
+ create_request['lyrics_artist'] = lyrics_artist
335
+ if lyrics_title:
336
+ create_request['lyrics_title'] = lyrics_title
337
+ if subtitle_offset_ms != 0:
338
+ create_request['subtitle_offset_ms'] = subtitle_offset_ms
339
+ if clean_instrumental_model:
340
+ create_request['clean_instrumental_model'] = clean_instrumental_model
341
+ if backing_vocals_models:
342
+ create_request['backing_vocals_models'] = backing_vocals_models
343
+ if other_stems_models:
344
+ create_request['other_stems_models'] = other_stems_models
345
+
346
+ self.logger.info(f"Creating URL-based job at {self.config.service_url}/api/jobs/create-from-url")
347
+
348
+ response = self._request('POST', '/api/jobs/create-from-url', json=create_request)
349
+
350
+ if response.status_code != 200:
351
+ try:
352
+ error_detail = response.json()
353
+ except Exception:
354
+ error_detail = response.text
355
+ raise RuntimeError(f"Error creating job from URL: {error_detail}")
356
+
357
+ result = response.json()
358
+ if result.get('status') != 'success':
359
+ raise RuntimeError(f"Error creating job from URL: {result}")
360
+
361
+ job_id = result['job_id']
362
+ detected_artist = result.get('detected_artist')
363
+ detected_title = result.get('detected_title')
364
+
365
+ self.logger.info(f"Job {job_id} created from URL")
366
+ if detected_artist:
367
+ self.logger.info(f" Artist: {detected_artist}")
368
+ if detected_title:
369
+ self.logger.info(f" Title: {detected_title}")
370
+
371
+ return result
372
+
186
373
  def submit_job(
187
374
  self,
188
375
  filepath: str,
@@ -204,10 +391,21 @@ class RemoteKaraokeClient:
204
391
  lyrics_title: Optional[str] = None,
205
392
  lyrics_file: Optional[str] = None,
206
393
  subtitle_offset_ms: int = 0,
394
+ # Audio separation model configuration
395
+ clean_instrumental_model: Optional[str] = None,
396
+ backing_vocals_models: Optional[list] = None,
397
+ other_stems_models: Optional[list] = None,
398
+ # Existing instrumental (Batch 3)
399
+ existing_instrumental: Optional[str] = None,
207
400
  ) -> Dict[str, Any]:
208
401
  """
209
402
  Submit a new karaoke generation job with optional style configuration.
210
403
 
404
+ Uses signed URL upload flow to bypass Cloud Run's 32MB request body limit:
405
+ 1. Create job and get signed upload URLs from backend
406
+ 2. Upload files directly to GCS using signed URLs
407
+ 3. Notify backend that uploads are complete to start processing
408
+
211
409
  Args:
212
410
  filepath: Path to audio file
213
411
  artist: Artist name
@@ -226,6 +424,10 @@ class RemoteKaraokeClient:
226
424
  lyrics_title: Override title for lyrics search
227
425
  lyrics_file: Path to user-provided lyrics file
228
426
  subtitle_offset_ms: Subtitle timing offset in milliseconds
427
+ clean_instrumental_model: Model for clean instrumental separation
428
+ backing_vocals_models: List of models for backing vocals separation
429
+ other_stems_models: List of models for other stems (bass, drums, etc.)
430
+ existing_instrumental: Path to existing instrumental file to use instead of AI separation
229
431
  """
230
432
  file_path = Path(filepath)
231
433
 
@@ -239,110 +441,168 @@ class RemoteKaraokeClient:
239
441
  f"Allowed: {', '.join(self.ALLOWED_AUDIO_EXTENSIONS)}"
240
442
  )
241
443
 
242
- self.logger.info(f"Uploading audio file: {filepath}")
444
+ # Step 1: Build list of files to upload
445
+ files_info = []
446
+ local_files = {} # file_type -> local_path
447
+
448
+ # Main audio file
449
+ audio_content_type = self._get_content_type(filepath)
450
+ files_info.append({
451
+ 'filename': file_path.name,
452
+ 'content_type': audio_content_type,
453
+ 'file_type': 'audio'
454
+ })
455
+ local_files['audio'] = filepath
456
+ self.logger.info(f"Will upload audio: {filepath}")
457
+
458
+ # Parse style params and find referenced files
459
+ style_assets = {}
460
+ if style_params_path and os.path.isfile(style_params_path):
461
+ self.logger.info(f"Parsing style configuration: {style_params_path}")
462
+ style_assets = self._parse_style_params(style_params_path)
463
+
464
+ # Add style_params.json
465
+ files_info.append({
466
+ 'filename': Path(style_params_path).name,
467
+ 'content_type': 'application/json',
468
+ 'file_type': 'style_params'
469
+ })
470
+ local_files['style_params'] = style_params_path
471
+ self.logger.info(f" Will upload style_params.json")
472
+
473
+ # Add each style asset file
474
+ for asset_key, asset_path in style_assets.items():
475
+ if os.path.isfile(asset_path):
476
+ content_type = self._get_content_type(asset_path)
477
+ files_info.append({
478
+ 'filename': Path(asset_path).name,
479
+ 'content_type': content_type,
480
+ 'file_type': asset_key # e.g., 'style_intro_background'
481
+ })
482
+ local_files[asset_key] = asset_path
483
+ self.logger.info(f" Will upload {asset_key}: {asset_path}")
484
+
485
+ # Add lyrics file if provided
486
+ if lyrics_file and os.path.isfile(lyrics_file):
487
+ content_type = self._get_content_type(lyrics_file)
488
+ files_info.append({
489
+ 'filename': Path(lyrics_file).name,
490
+ 'content_type': content_type,
491
+ 'file_type': 'lyrics_file'
492
+ })
493
+ local_files['lyrics_file'] = lyrics_file
494
+ self.logger.info(f"Will upload lyrics file: {lyrics_file}")
495
+
496
+ # Add existing instrumental file if provided (Batch 3)
497
+ if existing_instrumental and os.path.isfile(existing_instrumental):
498
+ content_type = self._get_content_type(existing_instrumental)
499
+ files_info.append({
500
+ 'filename': Path(existing_instrumental).name,
501
+ 'content_type': content_type,
502
+ 'file_type': 'existing_instrumental'
503
+ })
504
+ local_files['existing_instrumental'] = existing_instrumental
505
+ self.logger.info(f"Will upload existing instrumental: {existing_instrumental}")
506
+
507
+ # Step 2: Create job and get signed upload URLs
508
+ self.logger.info(f"Creating job at {self.config.service_url}/api/jobs/create-with-upload-urls")
509
+
510
+ create_request = {
511
+ 'artist': artist,
512
+ 'title': title,
513
+ 'files': files_info,
514
+ 'enable_cdg': enable_cdg,
515
+ 'enable_txt': enable_txt,
516
+ }
517
+
518
+ if brand_prefix:
519
+ create_request['brand_prefix'] = brand_prefix
520
+ if discord_webhook_url:
521
+ create_request['discord_webhook_url'] = discord_webhook_url
522
+ if youtube_description:
523
+ create_request['youtube_description'] = youtube_description
524
+ if enable_youtube_upload:
525
+ create_request['enable_youtube_upload'] = enable_youtube_upload
526
+ if dropbox_path:
527
+ create_request['dropbox_path'] = dropbox_path
528
+ if gdrive_folder_id:
529
+ create_request['gdrive_folder_id'] = gdrive_folder_id
530
+ if organised_dir_rclone_root:
531
+ create_request['organised_dir_rclone_root'] = organised_dir_rclone_root
532
+ if lyrics_artist:
533
+ create_request['lyrics_artist'] = lyrics_artist
534
+ if lyrics_title:
535
+ create_request['lyrics_title'] = lyrics_title
536
+ if subtitle_offset_ms != 0:
537
+ create_request['subtitle_offset_ms'] = subtitle_offset_ms
538
+ if clean_instrumental_model:
539
+ create_request['clean_instrumental_model'] = clean_instrumental_model
540
+ if backing_vocals_models:
541
+ create_request['backing_vocals_models'] = backing_vocals_models
542
+ if other_stems_models:
543
+ create_request['other_stems_models'] = other_stems_models
544
+
545
+ response = self._request('POST', '/api/jobs/create-with-upload-urls', json=create_request)
243
546
 
244
- # Prepare files dict for multipart upload
245
- files_to_upload = {}
246
- files_to_close = []
547
+ if response.status_code != 200:
548
+ try:
549
+ error_detail = response.json()
550
+ except Exception:
551
+ error_detail = response.text
552
+ raise RuntimeError(f"Error creating job: {error_detail}")
247
553
 
248
- try:
249
- # Main audio file
250
- audio_file = open(filepath, 'rb')
251
- files_to_close.append(audio_file)
252
- files_to_upload['file'] = (file_path.name, audio_file)
253
-
254
- # Parse style params and find referenced files
255
- style_assets = {}
256
- if style_params_path and os.path.isfile(style_params_path):
257
- self.logger.info(f"Parsing style configuration: {style_params_path}")
258
- style_assets = self._parse_style_params(style_params_path)
259
-
260
- # Upload style_params.json
261
- style_file = open(style_params_path, 'rb')
262
- files_to_close.append(style_file)
263
- files_to_upload['style_params'] = (Path(style_params_path).name, style_file, 'application/json')
264
- self.logger.info(f" Will upload style_params.json")
265
-
266
- # Upload each style asset file
267
- for asset_key, asset_path in style_assets.items():
268
- if os.path.isfile(asset_path):
269
- asset_file = open(asset_path, 'rb')
270
- files_to_close.append(asset_file)
271
- # Determine content type
272
- ext = Path(asset_path).suffix.lower()
273
- if ext in self.ALLOWED_IMAGE_EXTENSIONS:
274
- content_type = f'image/{ext[1:]}'
275
- elif ext in self.ALLOWED_FONT_EXTENSIONS:
276
- content_type = 'font/ttf'
277
- else:
278
- content_type = 'application/octet-stream'
279
- files_to_upload[asset_key] = (Path(asset_path).name, asset_file, content_type)
280
- self.logger.info(f" Will upload {asset_key}: {asset_path}")
281
-
282
- # Upload lyrics file if provided
283
- if lyrics_file and os.path.isfile(lyrics_file):
284
- self.logger.info(f"Uploading lyrics file: {lyrics_file}")
285
- lyrics_file_handle = open(lyrics_file, 'rb')
286
- files_to_close.append(lyrics_file_handle)
287
- files_to_upload['lyrics_file'] = (Path(lyrics_file).name, lyrics_file_handle, 'text/plain')
288
-
289
- # Prepare form data
290
- data = {
291
- 'artist': artist,
292
- 'title': title,
293
- 'enable_cdg': str(enable_cdg).lower(),
294
- 'enable_txt': str(enable_txt).lower(),
295
- }
296
-
297
- if brand_prefix:
298
- data['brand_prefix'] = brand_prefix
299
- if discord_webhook_url:
300
- data['discord_webhook_url'] = discord_webhook_url
301
- if youtube_description:
302
- data['youtube_description'] = youtube_description
303
- if enable_youtube_upload:
304
- data['enable_youtube_upload'] = str(enable_youtube_upload).lower()
305
-
306
- # Native API distribution (preferred for remote CLI)
307
- if dropbox_path:
308
- data['dropbox_path'] = dropbox_path
309
- if gdrive_folder_id:
310
- data['gdrive_folder_id'] = gdrive_folder_id
311
-
312
- # Legacy rclone distribution (deprecated)
313
- if organised_dir_rclone_root:
314
- data['organised_dir_rclone_root'] = organised_dir_rclone_root
554
+ create_result = response.json()
555
+ if create_result.get('status') != 'success':
556
+ raise RuntimeError(f"Error creating job: {create_result}")
557
+
558
+ job_id = create_result['job_id']
559
+ upload_urls = create_result['upload_urls']
560
+
561
+ self.logger.info(f"Job {job_id} created. Uploading {len(upload_urls)} files directly to storage...")
562
+
563
+ # Step 3: Upload each file directly to GCS using signed URLs
564
+ uploaded_files = []
565
+ for url_info in upload_urls:
566
+ file_type = url_info['file_type']
567
+ signed_url = url_info['upload_url']
568
+ content_type = url_info['content_type']
569
+ local_path = local_files.get(file_type)
315
570
 
316
- # Lyrics configuration
317
- if lyrics_artist:
318
- data['lyrics_artist'] = lyrics_artist
319
- if lyrics_title:
320
- data['lyrics_title'] = lyrics_title
321
- if subtitle_offset_ms != 0:
322
- data['subtitle_offset_ms'] = str(subtitle_offset_ms)
571
+ if not local_path:
572
+ self.logger.warning(f"No local file found for file_type: {file_type}")
573
+ continue
323
574
 
324
- self.logger.info(f"Submitting job to {self.config.service_url}/api/jobs/upload")
575
+ # Calculate file size for logging
576
+ file_size = os.path.getsize(local_path)
577
+ file_size_mb = file_size / (1024 * 1024)
578
+ self.logger.info(f" Uploading {file_type} ({file_size_mb:.1f} MB)...")
325
579
 
326
- response = self._request('POST', '/api/jobs/upload', files=files_to_upload, data=data)
580
+ success = self._upload_file_to_signed_url(signed_url, local_path, content_type)
581
+ if not success:
582
+ raise RuntimeError(f"Failed to upload {file_type} to storage")
327
583
 
328
- finally:
329
- # Close all opened files
330
- for f in files_to_close:
331
- try:
332
- f.close()
333
- except:
334
- pass
584
+ uploaded_files.append(file_type)
585
+ self.logger.info(f" ✓ Uploaded {file_type}")
586
+
587
+ # Step 4: Notify backend that uploads are complete
588
+ self.logger.info(f"Notifying backend that uploads are complete...")
589
+
590
+ complete_request = {
591
+ 'uploaded_files': uploaded_files
592
+ }
593
+
594
+ response = self._request('POST', f'/api/jobs/{job_id}/uploads-complete', json=complete_request)
335
595
 
336
596
  if response.status_code != 200:
337
597
  try:
338
598
  error_detail = response.json()
339
599
  except Exception:
340
600
  error_detail = response.text
341
- raise RuntimeError(f"Error submitting job: {error_detail}")
601
+ raise RuntimeError(f"Error completing uploads: {error_detail}")
342
602
 
343
603
  result = response.json()
344
604
  if result.get('status') != 'success':
345
- raise RuntimeError(f"Error submitting job: {result}")
605
+ raise RuntimeError(f"Error completing uploads: {result}")
346
606
 
347
607
  # Log distribution services info if available
348
608
  if 'distribution_services' in result:
@@ -508,7 +768,18 @@ class RemoteKaraokeClient:
508
768
  error_detail = response.text
509
769
  raise RuntimeError(f"Error getting instrumental options: {error_detail}")
510
770
  return response.json()
511
-
771
+
772
+ def get_instrumental_analysis(self, job_id: str) -> Dict[str, Any]:
773
+ """Get instrumental analysis data including backing vocals detection."""
774
+ response = self._request('GET', f'/api/jobs/{job_id}/instrumental-analysis')
775
+ if response.status_code != 200:
776
+ try:
777
+ error_detail = response.json()
778
+ except Exception:
779
+ error_detail = response.text
780
+ raise RuntimeError(f"Error getting instrumental analysis: {error_detail}")
781
+ return response.json()
782
+
512
783
  def select_instrumental(self, job_id: str, selection: str) -> Dict[str, Any]:
513
784
  """Submit instrumental selection."""
514
785
  response = self._request(
@@ -620,6 +891,122 @@ class RemoteKaraokeClient:
620
891
  error_detail = response.text
621
892
  raise RuntimeError(f"Error completing review: {error_detail}")
622
893
  return response.json()
894
+
895
+ def search_audio(
896
+ self,
897
+ artist: str,
898
+ title: str,
899
+ auto_download: bool = False,
900
+ style_params_path: Optional[str] = None,
901
+ enable_cdg: bool = True,
902
+ enable_txt: bool = True,
903
+ brand_prefix: Optional[str] = None,
904
+ discord_webhook_url: Optional[str] = None,
905
+ youtube_description: Optional[str] = None,
906
+ enable_youtube_upload: bool = False,
907
+ dropbox_path: Optional[str] = None,
908
+ gdrive_folder_id: Optional[str] = None,
909
+ lyrics_artist: Optional[str] = None,
910
+ lyrics_title: Optional[str] = None,
911
+ subtitle_offset_ms: int = 0,
912
+ clean_instrumental_model: Optional[str] = None,
913
+ backing_vocals_models: Optional[list] = None,
914
+ other_stems_models: Optional[list] = None,
915
+ ) -> Dict[str, Any]:
916
+ """
917
+ Search for audio by artist and title (Batch 5 - Flacfetch integration).
918
+
919
+ This creates a job and searches for audio sources. If auto_download is True,
920
+ it automatically selects the best source. Otherwise, it returns search results
921
+ for user selection.
922
+
923
+ Args:
924
+ artist: Artist name to search for
925
+ title: Song title to search for
926
+ auto_download: Automatically select best audio source (skip interactive selection)
927
+ ... other args same as submit_job()
928
+
929
+ Returns:
930
+ Dict with job_id, status, and optionally search results
931
+ """
932
+ self.logger.info(f"Searching for audio: {artist} - {title}")
933
+
934
+ request_data = {
935
+ 'artist': artist,
936
+ 'title': title,
937
+ 'auto_download': auto_download,
938
+ 'enable_cdg': enable_cdg,
939
+ 'enable_txt': enable_txt,
940
+ }
941
+
942
+ if brand_prefix:
943
+ request_data['brand_prefix'] = brand_prefix
944
+ if discord_webhook_url:
945
+ request_data['discord_webhook_url'] = discord_webhook_url
946
+ if youtube_description:
947
+ request_data['youtube_description'] = youtube_description
948
+ if enable_youtube_upload:
949
+ request_data['enable_youtube_upload'] = enable_youtube_upload
950
+ if dropbox_path:
951
+ request_data['dropbox_path'] = dropbox_path
952
+ if gdrive_folder_id:
953
+ request_data['gdrive_folder_id'] = gdrive_folder_id
954
+ if lyrics_artist:
955
+ request_data['lyrics_artist'] = lyrics_artist
956
+ if lyrics_title:
957
+ request_data['lyrics_title'] = lyrics_title
958
+ if subtitle_offset_ms != 0:
959
+ request_data['subtitle_offset_ms'] = subtitle_offset_ms
960
+ if clean_instrumental_model:
961
+ request_data['clean_instrumental_model'] = clean_instrumental_model
962
+ if backing_vocals_models:
963
+ request_data['backing_vocals_models'] = backing_vocals_models
964
+ if other_stems_models:
965
+ request_data['other_stems_models'] = other_stems_models
966
+
967
+ response = self._request('POST', '/api/audio-search/search', json=request_data)
968
+
969
+ if response.status_code == 404:
970
+ try:
971
+ error_detail = response.json()
972
+ except Exception:
973
+ error_detail = response.text
974
+ raise ValueError(f"No audio sources found: {error_detail}")
975
+
976
+ if response.status_code != 200:
977
+ try:
978
+ error_detail = response.json()
979
+ except Exception:
980
+ error_detail = response.text
981
+ raise RuntimeError(f"Error searching for audio: {error_detail}")
982
+
983
+ return response.json()
984
+
985
+ def get_audio_search_results(self, job_id: str) -> Dict[str, Any]:
986
+ """Get audio search results for a job awaiting selection."""
987
+ response = self._request('GET', f'/api/audio-search/{job_id}/results')
988
+ if response.status_code != 200:
989
+ try:
990
+ error_detail = response.json()
991
+ except Exception:
992
+ error_detail = response.text
993
+ raise RuntimeError(f"Error getting search results: {error_detail}")
994
+ return response.json()
995
+
996
+ def select_audio_source(self, job_id: str, selection_index: int) -> Dict[str, Any]:
997
+ """Select an audio source and start processing."""
998
+ response = self._request(
999
+ 'POST',
1000
+ f'/api/audio-search/{job_id}/select',
1001
+ json={'selection_index': selection_index}
1002
+ )
1003
+ if response.status_code != 200:
1004
+ try:
1005
+ error_detail = response.json()
1006
+ except Exception:
1007
+ error_detail = response.text
1008
+ raise RuntimeError(f"Error selecting audio: {error_detail}")
1009
+ return response.json()
623
1010
 
624
1011
 
625
1012
  class JobMonitor:
@@ -631,6 +1018,7 @@ class JobMonitor:
631
1018
  self.logger = logger
632
1019
  self._review_opened = False
633
1020
  self._instrumental_prompted = False
1021
+ self._audio_selection_prompted = False # Batch 5: audio source selection
634
1022
  self._last_timeline_index = 0
635
1023
  self._last_log_index = 0
636
1024
  self._show_worker_logs = True # Enable worker log display
@@ -640,6 +1028,11 @@ class JobMonitor:
640
1028
  # Status descriptions for user-friendly logging
641
1029
  STATUS_DESCRIPTIONS = {
642
1030
  'pending': 'Job queued, waiting to start',
1031
+ # Audio search states (Batch 5)
1032
+ 'searching_audio': 'Searching for audio sources',
1033
+ 'awaiting_audio_selection': 'Waiting for audio source selection',
1034
+ 'downloading_audio': 'Downloading selected audio',
1035
+ # Main workflow
643
1036
  'downloading': 'Downloading and preparing input files',
644
1037
  'separating_stage1': 'AI audio separation (stage 1 of 2)',
645
1038
  'separating_stage2': 'AI audio separation (stage 2 of 2)',
@@ -754,39 +1147,97 @@ class JobMonitor:
754
1147
  time.sleep(self.config.poll_interval)
755
1148
 
756
1149
  def handle_instrumental_selection(self, job_id: str) -> None:
757
- """Handle instrumental selection interaction."""
1150
+ """Handle instrumental selection interaction with analysis-based recommendations."""
758
1151
  self.logger.info("=" * 60)
759
1152
  self.logger.info("INSTRUMENTAL SELECTION NEEDED")
760
1153
  self.logger.info("=" * 60)
761
1154
 
762
- # In non-interactive mode, auto-select clean instrumental
763
- if self.config.non_interactive:
764
- self.logger.info("Non-interactive mode: Auto-selecting clean instrumental")
765
- selection = 'clean'
766
- else:
767
- self.logger.info("")
768
- self.logger.info("Choose which instrumental track to use for the final video:")
769
- self.logger.info("")
770
- self.logger.info(" 1) Clean Instrumental (no backing vocals)")
771
- self.logger.info(" Best for songs where you want ONLY the lead vocal removed")
1155
+ # Try to get analysis data for smart recommendations
1156
+ analysis_data = None
1157
+ try:
1158
+ analysis_data = self.client.get_instrumental_analysis(job_id)
1159
+ analysis = analysis_data.get('analysis', {})
1160
+
1161
+ # Display analysis summary
772
1162
  self.logger.info("")
773
- self.logger.info(" 2) Instrumental with Backing Vocals")
774
- self.logger.info(" Best for songs where backing vocals add to the karaoke experience")
1163
+ self.logger.info("=== Backing Vocals Analysis ===")
1164
+ if analysis.get('has_audible_content'):
1165
+ self.logger.info(f" Backing vocals detected: YES")
1166
+ self.logger.info(f" Audible segments: {len(analysis.get('audible_segments', []))}")
1167
+ self.logger.info(f" Audible duration: {analysis.get('total_audible_duration_seconds', 0):.1f}s "
1168
+ f"({analysis.get('audible_percentage', 0):.1f}% of track)")
1169
+ else:
1170
+ self.logger.info(f" Backing vocals detected: NO")
1171
+ self.logger.info(f" Recommendation: {analysis.get('recommended_selection', 'review_needed')}")
775
1172
  self.logger.info("")
1173
+ except Exception as e:
1174
+ self.logger.warning(f"Could not fetch analysis data: {e}")
1175
+ self.logger.info("Falling back to manual selection...")
1176
+
1177
+ # In non-interactive mode, use analysis recommendation or default to clean
1178
+ if self.config.non_interactive:
1179
+ if analysis_data and analysis_data.get('analysis', {}).get('recommended_selection') == 'clean':
1180
+ self.logger.info("Non-interactive mode: Auto-selecting clean instrumental (recommended)")
1181
+ selection = 'clean'
1182
+ else:
1183
+ self.logger.info("Non-interactive mode: Auto-selecting clean instrumental (default)")
1184
+ selection = 'clean'
1185
+ else:
1186
+ # Check if we should recommend clean based on analysis
1187
+ recommend_clean = (
1188
+ analysis_data and
1189
+ not analysis_data.get('analysis', {}).get('has_audible_content', True)
1190
+ )
776
1191
 
777
- selection = ""
778
- while not selection:
1192
+ if recommend_clean:
1193
+ self.logger.info("No backing vocals detected - recommending clean instrumental.")
1194
+ self.logger.info("")
1195
+ self.logger.info("Options:")
1196
+ self.logger.info(" 1) Accept recommendation (clean instrumental)")
1197
+ self.logger.info(" 2) Open browser to review and select")
1198
+ self.logger.info("")
1199
+
779
1200
  try:
780
1201
  choice = input("Enter your choice (1 or 2): ").strip()
781
1202
  if choice == '1':
782
1203
  selection = 'clean'
783
- elif choice == '2':
784
- selection = 'with_backing'
785
1204
  else:
786
- self.logger.error("Invalid choice. Please enter 1 or 2.")
1205
+ self._open_instrumental_review_and_wait(job_id)
1206
+ return # Selection will be submitted via browser
787
1207
  except KeyboardInterrupt:
788
1208
  print()
789
1209
  raise
1210
+ else:
1211
+ # Backing vocals detected or analysis unavailable - offer browser review
1212
+ self.logger.info("Choose how to select your instrumental:")
1213
+ self.logger.info("")
1214
+ self.logger.info(" 1) Clean Instrumental (no backing vocals)")
1215
+ self.logger.info(" Best for songs where you want ONLY the lead vocal removed")
1216
+ self.logger.info("")
1217
+ self.logger.info(" 2) Instrumental with Backing Vocals")
1218
+ self.logger.info(" Best for songs where backing vocals add to the karaoke experience")
1219
+ self.logger.info("")
1220
+ self.logger.info(" 3) Open Browser for Advanced Review")
1221
+ self.logger.info(" Listen to audio, view waveform, and optionally mute sections")
1222
+ self.logger.info(" to create a custom instrumental")
1223
+ self.logger.info("")
1224
+
1225
+ selection = ""
1226
+ while not selection:
1227
+ try:
1228
+ choice = input("Enter your choice (1, 2, or 3): ").strip()
1229
+ if choice == '1':
1230
+ selection = 'clean'
1231
+ elif choice == '2':
1232
+ selection = 'with_backing'
1233
+ elif choice == '3':
1234
+ self._open_instrumental_review_and_wait(job_id)
1235
+ return # Selection will be submitted via browser
1236
+ else:
1237
+ self.logger.error("Invalid choice. Please enter 1, 2, or 3.")
1238
+ except KeyboardInterrupt:
1239
+ print()
1240
+ raise
790
1241
 
791
1242
  self.logger.info(f"Submitting selection: {selection}")
792
1243
 
@@ -799,6 +1250,126 @@ class JobMonitor:
799
1250
  except Exception as e:
800
1251
  self.logger.error(f"Error submitting selection: {e}")
801
1252
 
1253
+ def handle_audio_selection(self, job_id: str) -> None:
1254
+ """Handle audio source selection interaction (Batch 5)."""
1255
+ self.logger.info("=" * 60)
1256
+ self.logger.info("AUDIO SOURCE SELECTION NEEDED")
1257
+ self.logger.info("=" * 60)
1258
+
1259
+ try:
1260
+ # Get search results
1261
+ results_data = self.client.get_audio_search_results(job_id)
1262
+ results = results_data.get('results', [])
1263
+
1264
+ if not results:
1265
+ self.logger.error("No search results available")
1266
+ return
1267
+
1268
+ # In non-interactive mode, auto-select first result
1269
+ if self.config.non_interactive:
1270
+ self.logger.info("Non-interactive mode: Auto-selecting first result")
1271
+ selection_index = 0
1272
+ else:
1273
+ self.logger.info("")
1274
+ self.logger.info("Choose which audio source to download:")
1275
+ self.logger.info("")
1276
+
1277
+ for result in results:
1278
+ index = result.get('index', 0)
1279
+ provider = result.get('provider', 'Unknown')
1280
+ artist = result.get('artist', 'Unknown')
1281
+ title = result.get('title', 'Unknown')
1282
+ quality = result.get('quality', '')
1283
+ duration = result.get('duration')
1284
+
1285
+ # Format duration if available
1286
+ duration_str = ""
1287
+ if duration:
1288
+ minutes = duration // 60
1289
+ seconds = duration % 60
1290
+ duration_str = f" [{minutes}:{seconds:02d}]"
1291
+
1292
+ quality_str = f" ({quality})" if quality else ""
1293
+
1294
+ self.logger.info(f" {index + 1}) [{provider}] {artist} - {title}{quality_str}{duration_str}")
1295
+
1296
+ self.logger.info("")
1297
+
1298
+ selection_index = -1
1299
+ while selection_index < 0:
1300
+ try:
1301
+ choice = input(f"Enter your choice (1-{len(results)}): ").strip()
1302
+ choice_num = int(choice)
1303
+ if 1 <= choice_num <= len(results):
1304
+ selection_index = choice_num - 1
1305
+ else:
1306
+ self.logger.error(f"Please enter a number between 1 and {len(results)}")
1307
+ except ValueError:
1308
+ self.logger.error("Please enter a valid number")
1309
+ except KeyboardInterrupt:
1310
+ print()
1311
+ raise
1312
+
1313
+ selected = results[selection_index]
1314
+ self.logger.info(f"Selected: [{selected.get('provider')}] {selected.get('artist')} - {selected.get('title')}")
1315
+ self.logger.info("")
1316
+
1317
+ # Submit selection
1318
+ result = self.client.select_audio_source(job_id, selection_index)
1319
+ if result.get('status') == 'success':
1320
+ self.logger.info(f"Selection submitted successfully")
1321
+ else:
1322
+ self.logger.error(f"Error submitting selection: {result}")
1323
+
1324
+ except Exception as e:
1325
+ self.logger.error(f"Error handling audio selection: {e}")
1326
+
1327
+ def _open_instrumental_review_and_wait(self, job_id: str) -> None:
1328
+ """Open browser to instrumental review UI and wait for selection."""
1329
+ review_url = f"{self.config.review_ui_url}/jobs/{job_id}/instrumental-review"
1330
+
1331
+ self.logger.info("")
1332
+ self.logger.info("=" * 60)
1333
+ self.logger.info("OPENING BROWSER FOR INSTRUMENTAL REVIEW")
1334
+ self.logger.info("=" * 60)
1335
+ self.logger.info(f"Review URL: {review_url}")
1336
+ self.logger.info("")
1337
+ self.logger.info("In the browser you can:")
1338
+ self.logger.info(" - View the backing vocals waveform")
1339
+ self.logger.info(" - Listen to clean instrumental, backing vocals, or combined")
1340
+ self.logger.info(" - Select regions to mute and create a custom instrumental")
1341
+ self.logger.info(" - Submit your final selection")
1342
+ self.logger.info("")
1343
+ self.logger.info("Waiting for selection to be submitted...")
1344
+ self.logger.info("(Press Ctrl+C to cancel)")
1345
+ self.logger.info("")
1346
+
1347
+ # Open browser
1348
+ webbrowser.open(review_url)
1349
+
1350
+ # Poll until job status changes from awaiting_instrumental_selection
1351
+ while True:
1352
+ try:
1353
+ job_data = self.client.get_job(job_id)
1354
+ current_status = job_data.get('status')
1355
+
1356
+ if current_status != 'awaiting_instrumental_selection':
1357
+ selection = job_data.get('state_data', {}).get('instrumental_selection', 'unknown')
1358
+ self.logger.info(f"Selection received: {selection}")
1359
+ self.logger.info(f"Job status: {current_status}")
1360
+ return
1361
+
1362
+ time.sleep(self.config.poll_interval)
1363
+
1364
+ except KeyboardInterrupt:
1365
+ print()
1366
+ self.logger.info("Cancelled. You can resume this job later with --resume")
1367
+ raise
1368
+ except Exception as e:
1369
+ self.logger.warning(f"Error checking status: {e}")
1370
+ time.sleep(self.config.poll_interval)
1371
+
1372
+
802
1373
  def download_outputs(self, job_id: str, job_data: Dict[str, Any]) -> None:
803
1374
  """
804
1375
  Download all output files for a completed job.
@@ -1206,7 +1777,14 @@ class JobMonitor:
1206
1777
  self._polls_without_updates = 0
1207
1778
 
1208
1779
  # Handle human interaction points
1209
- if status in ['awaiting_review', 'in_review']:
1780
+ if status == 'awaiting_audio_selection':
1781
+ if not self._audio_selection_prompted:
1782
+ self.logger.info("")
1783
+ self.handle_audio_selection(job_id)
1784
+ self._audio_selection_prompted = True
1785
+ self._last_timeline_index = 0 # Reset to catch any events
1786
+
1787
+ elif status in ['awaiting_review', 'in_review']:
1210
1788
  if not self._review_opened:
1211
1789
  self.logger.info("")
1212
1790
  self.handle_review(job_id)
@@ -1642,12 +2220,9 @@ def main():
1642
2220
  ignored_features.append("--skip-transcription")
1643
2221
  if args.lyrics_only:
1644
2222
  ignored_features.append("--lyrics-only")
1645
- if args.existing_instrumental:
1646
- ignored_features.append("--existing_instrumental")
1647
2223
  if args.background_video:
1648
2224
  ignored_features.append("--background_video")
1649
- if getattr(args, 'auto_download', False):
1650
- ignored_features.append("--auto-download (audio search not yet supported)")
2225
+ # --auto-download is now supported (Batch 5)
1651
2226
  # These are now supported but server-side handling may be partial
1652
2227
  if args.organised_dir:
1653
2228
  ignored_features.append("--organised_dir (local-only)")
@@ -1668,6 +2243,8 @@ def main():
1668
2243
 
1669
2244
  # Handle new job submission - parse input arguments same as gen_cli
1670
2245
  input_media, artist, title, filename_pattern = None, None, None, None
2246
+ use_audio_search = False # Batch 5: audio search mode
2247
+ is_url_input = False
1671
2248
 
1672
2249
  if not args.args:
1673
2250
  parser.print_help()
@@ -1675,52 +2252,137 @@ def main():
1675
2252
 
1676
2253
  # Allow 3 forms of positional arguments:
1677
2254
  # 1. URL or Media File only
1678
- # 2. Artist and Title only
1679
- # 3. URL, Artist, and Title
2255
+ # 2. Artist and Title only (audio search mode - Batch 5)
2256
+ # 3. URL/File, Artist, and Title
1680
2257
  if args.args and (is_url(args.args[0]) or is_file(args.args[0])):
1681
2258
  input_media = args.args[0]
2259
+ is_url_input = is_url(args.args[0])
1682
2260
  if len(args.args) > 2:
1683
2261
  artist = args.args[1]
1684
2262
  title = args.args[2]
1685
2263
  elif len(args.args) > 1:
1686
2264
  artist = args.args[1]
1687
2265
  else:
1688
- logger.error("Input media provided without Artist and Title")
1689
- return 1
2266
+ # For URLs, artist/title can be auto-detected
2267
+ if is_url_input:
2268
+ logger.info("URL provided without Artist and Title - will be auto-detected from video metadata")
2269
+ else:
2270
+ logger.error("Input media provided without Artist and Title")
2271
+ return 1
1690
2272
  elif os.path.isdir(args.args[0]):
1691
2273
  logger.error("Folder processing is not yet supported in remote mode")
1692
2274
  return 1
1693
2275
  elif len(args.args) > 1:
2276
+ # Audio search mode: artist + title without file (Batch 5)
1694
2277
  artist = args.args[0]
1695
2278
  title = args.args[1]
1696
- logger.error("Audio search (artist+title) is not yet supported in remote mode.")
1697
- logger.error("Please provide a local audio file path instead.")
1698
- logger.error("")
1699
- logger.error("For local flacfetch search, use karaoke-gen instead:")
1700
- logger.error(f" karaoke-gen \"{artist}\" \"{title}\"")
1701
- return 1
2279
+ use_audio_search = True
1702
2280
  else:
1703
2281
  parser.print_help()
1704
2282
  return 1
1705
2283
 
1706
- # For now, remote mode only supports file uploads
1707
- if not input_media or not os.path.isfile(input_media):
1708
- logger.error("Remote mode currently only supports local file uploads")
1709
- logger.error("Please provide a path to an audio file (mp3, wav, flac, m4a, ogg, aac)")
1710
- return 1
1711
-
1712
2284
  # Validate artist and title are provided
1713
2285
  if not artist or not title:
1714
2286
  logger.error("Artist and Title are required")
1715
2287
  parser.print_help()
1716
2288
  return 1
1717
2289
 
2290
+ # For file/URL input modes, validate input exists
2291
+ if not use_audio_search:
2292
+ if not input_media:
2293
+ logger.error("No input media or URL provided")
2294
+ return 1
2295
+
2296
+ # For file input (not URL), validate file exists
2297
+ if not is_url_input and not os.path.isfile(input_media):
2298
+ logger.error(f"File not found: {input_media}")
2299
+ logger.error("Please provide a valid path to an audio file (mp3, wav, flac, m4a, ogg, aac)")
2300
+ return 1
2301
+
2302
+ # Handle audio search mode (Batch 5)
2303
+ if use_audio_search:
2304
+ logger.info("=" * 60)
2305
+ logger.info("Karaoke Generator (Remote) - Audio Search Mode")
2306
+ logger.info("=" * 60)
2307
+ logger.info(f"Searching for: {artist} - {title}")
2308
+ if getattr(args, 'auto_download', False) or config.non_interactive:
2309
+ logger.info(f"Auto-download: enabled (will auto-select best source)")
2310
+ if args.style_params_json:
2311
+ logger.info(f"Style: {args.style_params_json}")
2312
+ logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
2313
+ if args.brand_prefix:
2314
+ logger.info(f"Brand: {args.brand_prefix}")
2315
+ logger.info(f"Service URL: {config.service_url}")
2316
+ logger.info("")
2317
+
2318
+ # Read youtube description from file if provided
2319
+ youtube_description = None
2320
+ if args.youtube_description_file and os.path.isfile(args.youtube_description_file):
2321
+ try:
2322
+ with open(args.youtube_description_file, 'r') as f:
2323
+ youtube_description = f.read()
2324
+ logger.info(f"Loaded YouTube description from: {args.youtube_description_file}")
2325
+ except Exception as e:
2326
+ logger.warning(f"Failed to read YouTube description file: {e}")
2327
+
2328
+ try:
2329
+ # Determine auto_download mode
2330
+ auto_download = getattr(args, 'auto_download', False) or config.non_interactive
2331
+
2332
+ result = client.search_audio(
2333
+ artist=artist,
2334
+ title=title,
2335
+ auto_download=auto_download,
2336
+ enable_cdg=args.enable_cdg,
2337
+ enable_txt=args.enable_txt,
2338
+ brand_prefix=args.brand_prefix,
2339
+ discord_webhook_url=args.discord_webhook_url,
2340
+ youtube_description=youtube_description,
2341
+ enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
2342
+ dropbox_path=getattr(args, 'dropbox_path', None),
2343
+ gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
2344
+ lyrics_artist=getattr(args, 'lyrics_artist', None),
2345
+ lyrics_title=getattr(args, 'lyrics_title', None),
2346
+ subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
2347
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2348
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2349
+ other_stems_models=getattr(args, 'other_stems_models', None),
2350
+ )
2351
+
2352
+ job_id = result.get('job_id')
2353
+ results_count = result.get('results_count', 0)
2354
+ server_version = result.get('server_version', 'unknown')
2355
+
2356
+ logger.info(f"Job created: {job_id}")
2357
+ logger.info(f"Server version: {server_version}")
2358
+ logger.info(f"Audio sources found: {results_count}")
2359
+ logger.info("")
2360
+
2361
+ # Monitor job
2362
+ return monitor.monitor(job_id)
2363
+
2364
+ except ValueError as e:
2365
+ logger.error(str(e))
2366
+ return 1
2367
+ except Exception as e:
2368
+ logger.error(f"Error: {e}")
2369
+ logger.exception("Full error details:")
2370
+ return 1
2371
+
2372
+ # File upload mode (original flow)
1718
2373
  logger.info("=" * 60)
1719
2374
  logger.info("Karaoke Generator (Remote) - Job Submission")
1720
2375
  logger.info("=" * 60)
1721
- logger.info(f"File: {input_media}")
1722
- logger.info(f"Artist: {artist}")
1723
- logger.info(f"Title: {title}")
2376
+ if is_url_input:
2377
+ logger.info(f"URL: {input_media}")
2378
+ else:
2379
+ logger.info(f"File: {input_media}")
2380
+ if artist:
2381
+ logger.info(f"Artist: {artist}")
2382
+ if title:
2383
+ logger.info(f"Title: {title}")
2384
+ if not artist and not title and is_url_input:
2385
+ logger.info(f"Artist/Title: (will be auto-detected from URL)")
1724
2386
  if args.style_params_json:
1725
2387
  logger.info(f"Style: {args.style_params_json}")
1726
2388
  logger.info(f"CDG: {args.enable_cdg}, TXT: {args.enable_txt}")
@@ -1747,6 +2409,15 @@ def main():
1747
2409
  logger.info(f"Lyrics File: {args.lyrics_file}")
1748
2410
  if getattr(args, 'subtitle_offset_ms', 0):
1749
2411
  logger.info(f"Subtitle Offset: {args.subtitle_offset_ms}ms")
2412
+ # Audio model configuration
2413
+ if getattr(args, 'clean_instrumental_model', None):
2414
+ logger.info(f"Clean Instrumental Model: {args.clean_instrumental_model}")
2415
+ if getattr(args, 'backing_vocals_models', None):
2416
+ logger.info(f"Backing Vocals Models: {args.backing_vocals_models}")
2417
+ if getattr(args, 'other_stems_models', None):
2418
+ logger.info(f"Other Stems Models: {args.other_stems_models}")
2419
+ if getattr(args, 'existing_instrumental', None):
2420
+ logger.info(f"Existing Instrumental: {args.existing_instrumental}")
1750
2421
  logger.info(f"Service URL: {config.service_url}")
1751
2422
  logger.info(f"Review UI: {config.review_ui_url}")
1752
2423
  if config.non_interactive:
@@ -1764,28 +2435,67 @@ def main():
1764
2435
  logger.warning(f"Failed to read YouTube description file: {e}")
1765
2436
 
1766
2437
  try:
1767
- # Submit job with all options
1768
- result = client.submit_job(
1769
- filepath=input_media,
1770
- artist=artist,
1771
- title=title,
1772
- style_params_path=args.style_params_json,
1773
- enable_cdg=args.enable_cdg,
1774
- enable_txt=args.enable_txt,
1775
- brand_prefix=args.brand_prefix,
1776
- discord_webhook_url=args.discord_webhook_url,
1777
- youtube_description=youtube_description,
1778
- organised_dir_rclone_root=args.organised_dir_rclone_root,
1779
- enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
1780
- # Native API distribution (preferred for remote CLI)
1781
- dropbox_path=getattr(args, 'dropbox_path', None),
1782
- gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
1783
- # Lyrics configuration
1784
- lyrics_artist=getattr(args, 'lyrics_artist', None),
1785
- lyrics_title=getattr(args, 'lyrics_title', None),
1786
- lyrics_file=getattr(args, 'lyrics_file', None),
1787
- subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
1788
- )
2438
+ # Submit job - different endpoint for URL vs file
2439
+ if is_url_input:
2440
+ # URL-based job submission
2441
+ # Note: style_params_path is not supported for URL-based jobs
2442
+ # If custom styles are needed, download the audio locally first
2443
+ if args.style_params_json:
2444
+ logger.warning("Custom styles (--style_params_json) are not supported for URL-based jobs. "
2445
+ "Download the audio locally first and use file upload for custom styles.")
2446
+
2447
+ result = client.submit_job_from_url(
2448
+ url=input_media,
2449
+ artist=artist,
2450
+ title=title,
2451
+ enable_cdg=args.enable_cdg,
2452
+ enable_txt=args.enable_txt,
2453
+ brand_prefix=args.brand_prefix,
2454
+ discord_webhook_url=args.discord_webhook_url,
2455
+ youtube_description=youtube_description,
2456
+ organised_dir_rclone_root=args.organised_dir_rclone_root,
2457
+ enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
2458
+ # Native API distribution (preferred for remote CLI)
2459
+ dropbox_path=getattr(args, 'dropbox_path', None),
2460
+ gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
2461
+ # Lyrics configuration
2462
+ lyrics_artist=getattr(args, 'lyrics_artist', None),
2463
+ lyrics_title=getattr(args, 'lyrics_title', None),
2464
+ subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
2465
+ # Audio separation model configuration
2466
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2467
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2468
+ other_stems_models=getattr(args, 'other_stems_models', None),
2469
+ )
2470
+ else:
2471
+ # File-based job submission
2472
+ result = client.submit_job(
2473
+ filepath=input_media,
2474
+ artist=artist,
2475
+ title=title,
2476
+ style_params_path=args.style_params_json,
2477
+ enable_cdg=args.enable_cdg,
2478
+ enable_txt=args.enable_txt,
2479
+ brand_prefix=args.brand_prefix,
2480
+ discord_webhook_url=args.discord_webhook_url,
2481
+ youtube_description=youtube_description,
2482
+ organised_dir_rclone_root=args.organised_dir_rclone_root,
2483
+ enable_youtube_upload=getattr(args, 'enable_youtube_upload', False),
2484
+ # Native API distribution (preferred for remote CLI)
2485
+ dropbox_path=getattr(args, 'dropbox_path', None),
2486
+ gdrive_folder_id=getattr(args, 'gdrive_folder_id', None),
2487
+ # Lyrics configuration
2488
+ lyrics_artist=getattr(args, 'lyrics_artist', None),
2489
+ lyrics_title=getattr(args, 'lyrics_title', None),
2490
+ lyrics_file=getattr(args, 'lyrics_file', None),
2491
+ subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
2492
+ # Audio separation model configuration
2493
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
2494
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
2495
+ other_stems_models=getattr(args, 'other_stems_models', None),
2496
+ # Existing instrumental (Batch 3)
2497
+ existing_instrumental=getattr(args, 'existing_instrumental', None),
2498
+ )
1789
2499
  job_id = result.get('job_id')
1790
2500
  style_assets = result.get('style_assets_uploaded', [])
1791
2501
  server_version = result.get('server_version', 'unknown')