karaoke-gen 0.71.23__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -143,6 +143,70 @@ class RemoteKaraokeClient:
143
143
  response = self.session.request(method, url, **kwargs)
144
144
  return response
145
145
 
146
+ def _upload_file_to_signed_url(self, signed_url: str, file_path: str, content_type: str) -> bool:
147
+ """
148
+ Upload a file directly to GCS using a signed URL.
149
+
150
+ Args:
151
+ signed_url: The signed URL from the backend
152
+ file_path: Local path to the file to upload
153
+ content_type: MIME type for the Content-Type header
154
+
155
+ Returns:
156
+ True if upload succeeded, False otherwise
157
+ """
158
+ try:
159
+ with open(file_path, 'rb') as f:
160
+ # Use a fresh requests session (not self.session) because
161
+ # signed URLs should not have our auth headers
162
+ response = requests.put(
163
+ signed_url,
164
+ data=f,
165
+ headers={'Content-Type': content_type},
166
+ timeout=600 # 10 minutes for large files
167
+ )
168
+
169
+ if response.status_code in (200, 201):
170
+ return True
171
+ else:
172
+ self.logger.error(f"Failed to upload to signed URL: HTTP {response.status_code} - {response.text}")
173
+ return False
174
+ except Exception as e:
175
+ self.logger.error(f"Error uploading to signed URL: {e}")
176
+ return False
177
+
178
+ def _get_content_type(self, file_path: str) -> str:
179
+ """Get the MIME content type for a file based on its extension."""
180
+ ext = Path(file_path).suffix.lower()
181
+
182
+ content_types = {
183
+ # Audio
184
+ '.mp3': 'audio/mpeg',
185
+ '.wav': 'audio/wav',
186
+ '.flac': 'audio/flac',
187
+ '.m4a': 'audio/mp4',
188
+ '.ogg': 'audio/ogg',
189
+ '.aac': 'audio/aac',
190
+ # Images
191
+ '.png': 'image/png',
192
+ '.jpg': 'image/jpeg',
193
+ '.jpeg': 'image/jpeg',
194
+ '.gif': 'image/gif',
195
+ '.webp': 'image/webp',
196
+ # Fonts
197
+ '.ttf': 'font/ttf',
198
+ '.otf': 'font/otf',
199
+ '.woff': 'font/woff',
200
+ '.woff2': 'font/woff2',
201
+ # Other
202
+ '.json': 'application/json',
203
+ '.txt': 'text/plain',
204
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
205
+ '.rtf': 'application/rtf',
206
+ }
207
+
208
+ return content_types.get(ext, 'application/octet-stream')
209
+
146
210
  def _parse_style_params(self, style_params_path: str) -> Dict[str, str]:
147
211
  """
148
212
  Parse style_params.json and extract file paths that need to be uploaded.
@@ -204,10 +268,21 @@ class RemoteKaraokeClient:
204
268
  lyrics_title: Optional[str] = None,
205
269
  lyrics_file: Optional[str] = None,
206
270
  subtitle_offset_ms: int = 0,
271
+ # Audio separation model configuration
272
+ clean_instrumental_model: Optional[str] = None,
273
+ backing_vocals_models: Optional[list] = None,
274
+ other_stems_models: Optional[list] = None,
275
+ # Existing instrumental (Batch 3)
276
+ existing_instrumental: Optional[str] = None,
207
277
  ) -> Dict[str, Any]:
208
278
  """
209
279
  Submit a new karaoke generation job with optional style configuration.
210
280
 
281
+ Uses signed URL upload flow to bypass Cloud Run's 32MB request body limit:
282
+ 1. Create job and get signed upload URLs from backend
283
+ 2. Upload files directly to GCS using signed URLs
284
+ 3. Notify backend that uploads are complete to start processing
285
+
211
286
  Args:
212
287
  filepath: Path to audio file
213
288
  artist: Artist name
@@ -226,6 +301,10 @@ class RemoteKaraokeClient:
226
301
  lyrics_title: Override title for lyrics search
227
302
  lyrics_file: Path to user-provided lyrics file
228
303
  subtitle_offset_ms: Subtitle timing offset in milliseconds
304
+ clean_instrumental_model: Model for clean instrumental separation
305
+ backing_vocals_models: List of models for backing vocals separation
306
+ other_stems_models: List of models for other stems (bass, drums, etc.)
307
+ existing_instrumental: Path to existing instrumental file to use instead of AI separation
229
308
  """
230
309
  file_path = Path(filepath)
231
310
 
@@ -239,110 +318,168 @@ class RemoteKaraokeClient:
239
318
  f"Allowed: {', '.join(self.ALLOWED_AUDIO_EXTENSIONS)}"
240
319
  )
241
320
 
242
- self.logger.info(f"Uploading audio file: {filepath}")
321
+ # Step 1: Build list of files to upload
322
+ files_info = []
323
+ local_files = {} # file_type -> local_path
324
+
325
+ # Main audio file
326
+ audio_content_type = self._get_content_type(filepath)
327
+ files_info.append({
328
+ 'filename': file_path.name,
329
+ 'content_type': audio_content_type,
330
+ 'file_type': 'audio'
331
+ })
332
+ local_files['audio'] = filepath
333
+ self.logger.info(f"Will upload audio: {filepath}")
334
+
335
+ # Parse style params and find referenced files
336
+ style_assets = {}
337
+ if style_params_path and os.path.isfile(style_params_path):
338
+ self.logger.info(f"Parsing style configuration: {style_params_path}")
339
+ style_assets = self._parse_style_params(style_params_path)
340
+
341
+ # Add style_params.json
342
+ files_info.append({
343
+ 'filename': Path(style_params_path).name,
344
+ 'content_type': 'application/json',
345
+ 'file_type': 'style_params'
346
+ })
347
+ local_files['style_params'] = style_params_path
348
+ self.logger.info(f" Will upload style_params.json")
349
+
350
+ # Add each style asset file
351
+ for asset_key, asset_path in style_assets.items():
352
+ if os.path.isfile(asset_path):
353
+ content_type = self._get_content_type(asset_path)
354
+ files_info.append({
355
+ 'filename': Path(asset_path).name,
356
+ 'content_type': content_type,
357
+ 'file_type': asset_key # e.g., 'style_intro_background'
358
+ })
359
+ local_files[asset_key] = asset_path
360
+ self.logger.info(f" Will upload {asset_key}: {asset_path}")
361
+
362
+ # Add lyrics file if provided
363
+ if lyrics_file and os.path.isfile(lyrics_file):
364
+ content_type = self._get_content_type(lyrics_file)
365
+ files_info.append({
366
+ 'filename': Path(lyrics_file).name,
367
+ 'content_type': content_type,
368
+ 'file_type': 'lyrics_file'
369
+ })
370
+ local_files['lyrics_file'] = lyrics_file
371
+ self.logger.info(f"Will upload lyrics file: {lyrics_file}")
372
+
373
+ # Add existing instrumental file if provided (Batch 3)
374
+ if existing_instrumental and os.path.isfile(existing_instrumental):
375
+ content_type = self._get_content_type(existing_instrumental)
376
+ files_info.append({
377
+ 'filename': Path(existing_instrumental).name,
378
+ 'content_type': content_type,
379
+ 'file_type': 'existing_instrumental'
380
+ })
381
+ local_files['existing_instrumental'] = existing_instrumental
382
+ self.logger.info(f"Will upload existing instrumental: {existing_instrumental}")
383
+
384
+ # Step 2: Create job and get signed upload URLs
385
+ self.logger.info(f"Creating job at {self.config.service_url}/api/jobs/create-with-upload-urls")
386
+
387
+ create_request = {
388
+ 'artist': artist,
389
+ 'title': title,
390
+ 'files': files_info,
391
+ 'enable_cdg': enable_cdg,
392
+ 'enable_txt': enable_txt,
393
+ }
243
394
 
244
- # Prepare files dict for multipart upload
245
- files_to_upload = {}
246
- files_to_close = []
395
+ if brand_prefix:
396
+ create_request['brand_prefix'] = brand_prefix
397
+ if discord_webhook_url:
398
+ create_request['discord_webhook_url'] = discord_webhook_url
399
+ if youtube_description:
400
+ create_request['youtube_description'] = youtube_description
401
+ if enable_youtube_upload:
402
+ create_request['enable_youtube_upload'] = enable_youtube_upload
403
+ if dropbox_path:
404
+ create_request['dropbox_path'] = dropbox_path
405
+ if gdrive_folder_id:
406
+ create_request['gdrive_folder_id'] = gdrive_folder_id
407
+ if organised_dir_rclone_root:
408
+ create_request['organised_dir_rclone_root'] = organised_dir_rclone_root
409
+ if lyrics_artist:
410
+ create_request['lyrics_artist'] = lyrics_artist
411
+ if lyrics_title:
412
+ create_request['lyrics_title'] = lyrics_title
413
+ if subtitle_offset_ms != 0:
414
+ create_request['subtitle_offset_ms'] = subtitle_offset_ms
415
+ if clean_instrumental_model:
416
+ create_request['clean_instrumental_model'] = clean_instrumental_model
417
+ if backing_vocals_models:
418
+ create_request['backing_vocals_models'] = backing_vocals_models
419
+ if other_stems_models:
420
+ create_request['other_stems_models'] = other_stems_models
421
+
422
+ response = self._request('POST', '/api/jobs/create-with-upload-urls', json=create_request)
247
423
 
248
- try:
249
- # Main audio file
250
- audio_file = open(filepath, 'rb')
251
- files_to_close.append(audio_file)
252
- files_to_upload['file'] = (file_path.name, audio_file)
253
-
254
- # Parse style params and find referenced files
255
- style_assets = {}
256
- if style_params_path and os.path.isfile(style_params_path):
257
- self.logger.info(f"Parsing style configuration: {style_params_path}")
258
- style_assets = self._parse_style_params(style_params_path)
259
-
260
- # Upload style_params.json
261
- style_file = open(style_params_path, 'rb')
262
- files_to_close.append(style_file)
263
- files_to_upload['style_params'] = (Path(style_params_path).name, style_file, 'application/json')
264
- self.logger.info(f" Will upload style_params.json")
265
-
266
- # Upload each style asset file
267
- for asset_key, asset_path in style_assets.items():
268
- if os.path.isfile(asset_path):
269
- asset_file = open(asset_path, 'rb')
270
- files_to_close.append(asset_file)
271
- # Determine content type
272
- ext = Path(asset_path).suffix.lower()
273
- if ext in self.ALLOWED_IMAGE_EXTENSIONS:
274
- content_type = f'image/{ext[1:]}'
275
- elif ext in self.ALLOWED_FONT_EXTENSIONS:
276
- content_type = 'font/ttf'
277
- else:
278
- content_type = 'application/octet-stream'
279
- files_to_upload[asset_key] = (Path(asset_path).name, asset_file, content_type)
280
- self.logger.info(f" Will upload {asset_key}: {asset_path}")
281
-
282
- # Upload lyrics file if provided
283
- if lyrics_file and os.path.isfile(lyrics_file):
284
- self.logger.info(f"Uploading lyrics file: {lyrics_file}")
285
- lyrics_file_handle = open(lyrics_file, 'rb')
286
- files_to_close.append(lyrics_file_handle)
287
- files_to_upload['lyrics_file'] = (Path(lyrics_file).name, lyrics_file_handle, 'text/plain')
288
-
289
- # Prepare form data
290
- data = {
291
- 'artist': artist,
292
- 'title': title,
293
- 'enable_cdg': str(enable_cdg).lower(),
294
- 'enable_txt': str(enable_txt).lower(),
295
- }
296
-
297
- if brand_prefix:
298
- data['brand_prefix'] = brand_prefix
299
- if discord_webhook_url:
300
- data['discord_webhook_url'] = discord_webhook_url
301
- if youtube_description:
302
- data['youtube_description'] = youtube_description
303
- if enable_youtube_upload:
304
- data['enable_youtube_upload'] = str(enable_youtube_upload).lower()
305
-
306
- # Native API distribution (preferred for remote CLI)
307
- if dropbox_path:
308
- data['dropbox_path'] = dropbox_path
309
- if gdrive_folder_id:
310
- data['gdrive_folder_id'] = gdrive_folder_id
311
-
312
- # Legacy rclone distribution (deprecated)
313
- if organised_dir_rclone_root:
314
- data['organised_dir_rclone_root'] = organised_dir_rclone_root
424
+ if response.status_code != 200:
425
+ try:
426
+ error_detail = response.json()
427
+ except Exception:
428
+ error_detail = response.text
429
+ raise RuntimeError(f"Error creating job: {error_detail}")
430
+
431
+ create_result = response.json()
432
+ if create_result.get('status') != 'success':
433
+ raise RuntimeError(f"Error creating job: {create_result}")
434
+
435
+ job_id = create_result['job_id']
436
+ upload_urls = create_result['upload_urls']
437
+
438
+ self.logger.info(f"Job {job_id} created. Uploading {len(upload_urls)} files directly to storage...")
439
+
440
+ # Step 3: Upload each file directly to GCS using signed URLs
441
+ uploaded_files = []
442
+ for url_info in upload_urls:
443
+ file_type = url_info['file_type']
444
+ signed_url = url_info['upload_url']
445
+ content_type = url_info['content_type']
446
+ local_path = local_files.get(file_type)
315
447
 
316
- # Lyrics configuration
317
- if lyrics_artist:
318
- data['lyrics_artist'] = lyrics_artist
319
- if lyrics_title:
320
- data['lyrics_title'] = lyrics_title
321
- if subtitle_offset_ms != 0:
322
- data['subtitle_offset_ms'] = str(subtitle_offset_ms)
448
+ if not local_path:
449
+ self.logger.warning(f"No local file found for file_type: {file_type}")
450
+ continue
323
451
 
324
- self.logger.info(f"Submitting job to {self.config.service_url}/api/jobs/upload")
452
+ # Calculate file size for logging
453
+ file_size = os.path.getsize(local_path)
454
+ file_size_mb = file_size / (1024 * 1024)
455
+ self.logger.info(f" Uploading {file_type} ({file_size_mb:.1f} MB)...")
325
456
 
326
- response = self._request('POST', '/api/jobs/upload', files=files_to_upload, data=data)
457
+ success = self._upload_file_to_signed_url(signed_url, local_path, content_type)
458
+ if not success:
459
+ raise RuntimeError(f"Failed to upload {file_type} to storage")
327
460
 
328
- finally:
329
- # Close all opened files
330
- for f in files_to_close:
331
- try:
332
- f.close()
333
- except:
334
- pass
461
+ uploaded_files.append(file_type)
462
+ self.logger.info(f" ✓ Uploaded {file_type}")
463
+
464
+ # Step 4: Notify backend that uploads are complete
465
+ self.logger.info(f"Notifying backend that uploads are complete...")
466
+
467
+ complete_request = {
468
+ 'uploaded_files': uploaded_files
469
+ }
470
+
471
+ response = self._request('POST', f'/api/jobs/{job_id}/uploads-complete', json=complete_request)
335
472
 
336
473
  if response.status_code != 200:
337
474
  try:
338
475
  error_detail = response.json()
339
476
  except Exception:
340
477
  error_detail = response.text
341
- raise RuntimeError(f"Error submitting job: {error_detail}")
478
+ raise RuntimeError(f"Error completing uploads: {error_detail}")
342
479
 
343
480
  result = response.json()
344
481
  if result.get('status') != 'success':
345
- raise RuntimeError(f"Error submitting job: {result}")
482
+ raise RuntimeError(f"Error completing uploads: {result}")
346
483
 
347
484
  # Log distribution services info if available
348
485
  if 'distribution_services' in result:
@@ -1642,8 +1779,6 @@ def main():
1642
1779
  ignored_features.append("--skip-transcription")
1643
1780
  if args.lyrics_only:
1644
1781
  ignored_features.append("--lyrics-only")
1645
- if args.existing_instrumental:
1646
- ignored_features.append("--existing_instrumental")
1647
1782
  if args.background_video:
1648
1783
  ignored_features.append("--background_video")
1649
1784
  if getattr(args, 'auto_download', False):
@@ -1747,6 +1882,15 @@ def main():
1747
1882
  logger.info(f"Lyrics File: {args.lyrics_file}")
1748
1883
  if getattr(args, 'subtitle_offset_ms', 0):
1749
1884
  logger.info(f"Subtitle Offset: {args.subtitle_offset_ms}ms")
1885
+ # Audio model configuration
1886
+ if getattr(args, 'clean_instrumental_model', None):
1887
+ logger.info(f"Clean Instrumental Model: {args.clean_instrumental_model}")
1888
+ if getattr(args, 'backing_vocals_models', None):
1889
+ logger.info(f"Backing Vocals Models: {args.backing_vocals_models}")
1890
+ if getattr(args, 'other_stems_models', None):
1891
+ logger.info(f"Other Stems Models: {args.other_stems_models}")
1892
+ if getattr(args, 'existing_instrumental', None):
1893
+ logger.info(f"Existing Instrumental: {args.existing_instrumental}")
1750
1894
  logger.info(f"Service URL: {config.service_url}")
1751
1895
  logger.info(f"Review UI: {config.review_ui_url}")
1752
1896
  if config.non_interactive:
@@ -1785,6 +1929,12 @@ def main():
1785
1929
  lyrics_title=getattr(args, 'lyrics_title', None),
1786
1930
  lyrics_file=getattr(args, 'lyrics_file', None),
1787
1931
  subtitle_offset_ms=getattr(args, 'subtitle_offset_ms', 0) or 0,
1932
+ # Audio separation model configuration
1933
+ clean_instrumental_model=getattr(args, 'clean_instrumental_model', None),
1934
+ backing_vocals_models=getattr(args, 'backing_vocals_models', None),
1935
+ other_stems_models=getattr(args, 'other_stems_models', None),
1936
+ # Existing instrumental (Batch 3)
1937
+ existing_instrumental=getattr(args, 'existing_instrumental', None),
1788
1938
  )
1789
1939
  job_id = result.get('job_id')
1790
1940
  style_assets = result.get('style_assets_uploaded', [])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: karaoke-gen
3
- Version: 0.71.23
3
+ Version: 0.71.27
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -30,7 +30,7 @@ karaoke_gen/utils/__init__.py,sha256=FpOHyeBRB06f3zMoLBUJHTDZACrabg-DoyBTxNKYyNY
30
30
  karaoke_gen/utils/bulk_cli.py,sha256=bBRHfhvi-wkoNjAoq5rzVkaOwOraoiUhXNQY5rBsX18,19167
31
31
  karaoke_gen/utils/cli_args.py,sha256=jfU6QXfzDlqZiLs58EC5eQs970tkI3-zSN2dKqW3u00,17618
32
32
  karaoke_gen/utils/gen_cli.py,sha256=8TQsu8Ubd-aKeQMHBH4-j65seGnR9IBjmi-OQwR0GAA,25775
33
- karaoke_gen/utils/remote_cli.py,sha256=DUzSTkG3R5FrZMQvi_e01NPGXvdKOGkpnEE1sJMeaEg,76648
33
+ karaoke_gen/utils/remote_cli.py,sha256=IwJRZNwVF_qmAOYoV9PVQlnTd39WtEycRicmdCa5Wxg,83306
34
34
  karaoke_gen/video_background_processor.py,sha256=p3sryMxmkori4Uy2MYgmlk5_QQ7Uh9IoVJLAdkdLIUI,15124
35
35
  karaoke_gen/video_generator.py,sha256=B7BQBrjkyvk3L3sctnPXnvr1rzkw0NYx5UCAl0ZiVx0,18464
36
36
  lyrics_transcriber/__init__.py,sha256=g9ZbJg9U1qo7XzrC25J3bTKcNzzwUJWDVdi_7-hjcM4,412
@@ -268,8 +268,8 @@ lyrics_transcriber/transcribers/whisper.py,sha256=YcCB1ic9H6zL1GS0jD0emu8-qlcH0Q
268
268
  lyrics_transcriber/types.py,sha256=Y7WUx8PAOBYWCIZgw4ndeHfPH8Gg--O3OYYQgMpJ2iI,27728
269
269
  lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
270
  lyrics_transcriber/utils/word_utils.py,sha256=-cMGpj9UV4F6IsoDKAV2i1aiqSO8eI91HMAm_igtVMk,958
271
- karaoke_gen-0.71.23.dist-info/METADATA,sha256=c269LSFEjwPZaZLJUT5GNOr655lF_dZuNtCHBnNlDWA,16948
272
- karaoke_gen-0.71.23.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
273
- karaoke_gen-0.71.23.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
274
- karaoke_gen-0.71.23.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
275
- karaoke_gen-0.71.23.dist-info/RECORD,,
271
+ karaoke_gen-0.71.27.dist-info/METADATA,sha256=xuqeNzosJwGo41JXO1BSjpcouoJjYHAyJwtlGXML_YU,16948
272
+ karaoke_gen-0.71.27.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
273
+ karaoke_gen-0.71.27.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
274
+ karaoke_gen-0.71.27.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
275
+ karaoke_gen-0.71.27.dist-info/RECORD,,