karaoke-gen 0.90.1__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/utils/__init__.py +163 -8
  148. karaoke_gen/video_background_processor.py +9 -4
  149. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +1 -1
  150. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +186 -41
  151. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  152. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  153. lyrics_transcriber/correction/corrector.py +192 -130
  154. lyrics_transcriber/correction/operations.py +24 -9
  155. lyrics_transcriber/frontend/package-lock.json +2 -2
  156. lyrics_transcriber/frontend/package.json +1 -1
  157. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  158. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  159. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  160. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  161. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  162. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  163. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  164. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  165. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  168. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  169. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  170. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  171. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  172. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  173. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  174. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  175. lyrics_transcriber/frontend/src/theme.ts +42 -15
  176. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  177. lyrics_transcriber/frontend/vite.config.js +5 -0
  178. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  179. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  180. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  181. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  182. lyrics_transcriber/output/generator.py +17 -3
  183. lyrics_transcriber/output/video.py +60 -95
  184. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  185. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  186. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  187. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,618 @@
1
+ """
2
+ Audio separation worker.
3
+
4
+ Handles the audio processing track of parallel processing:
5
+ 1. Stage 1: Clean instrumental separation (Modal API, 3-5 min)
6
+ 2. Stage 2: Backing vocals separation (Modal API, 2-3 min)
7
+ 3. Post-processing: Combine instrumentals, normalize
8
+
9
+ Re-uses karaoke_gen.audio_processor.AudioProcessor for remote GPU separation.
10
+
11
+ Observability:
12
+ - All operations wrapped in tracing spans for Cloud Trace visibility
13
+ - Logs include [job:ID] prefix for easy filtering in Cloud Logging
14
+ - Worker start/end timing logged with WORKER_START/WORKER_END markers
15
+ """
16
+ import logging
17
+ import os
18
+ import shutil
19
+ import tempfile
20
+ import time
21
+ from typing import Optional, Dict, Any
22
+ from pathlib import Path
23
+
24
+ from backend.models.job import JobStatus
25
+ from backend.services.job_manager import JobManager
26
+ from backend.services.storage_service import StorageService
27
+ from backend.config import get_settings
28
+ from backend.workers.worker_logging import create_job_logger, setup_job_logging, job_logging_context
29
+ from backend.services.tracing import job_span, add_span_event, add_span_attribute
30
+ from backend.services.metrics import metrics
31
+
32
+ # Import from karaoke_gen package
33
+ from karaoke_gen.audio_processor import AudioProcessor
34
+
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ async def _trigger_lyrics_worker_after_url_download(job_id: str) -> None:
40
+ """
41
+ Trigger lyrics worker after URL audio download completes.
42
+
43
+ For URL jobs, we use sequential triggering:
44
+ 1. Audio worker downloads and uploads audio to GCS
45
+ 2. Audio worker triggers lyrics worker (this function)
46
+ 3. Both workers then proceed in parallel (audio separation + lyrics transcription)
47
+
48
+ This prevents the race condition where lyrics worker times out waiting for audio.
49
+ """
50
+ from backend.services.worker_service import get_worker_service
51
+
52
+ try:
53
+ worker_service = get_worker_service()
54
+ await worker_service.trigger_lyrics_worker(job_id)
55
+ logger.info(f"Job {job_id}: Triggered lyrics worker after URL download")
56
+ except Exception as e:
57
+ # Log but don't fail - audio processing can still continue
58
+ # The job will eventually timeout if lyrics worker doesn't run
59
+ logger.error(f"Job {job_id}: Failed to trigger lyrics worker: {e}")
60
+
61
+
62
+ # Default model names - used by create_audio_processor and stored in state_data
63
+ DEFAULT_CLEAN_MODEL = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
64
+ DEFAULT_BACKING_MODELS = ["mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt"]
65
+ DEFAULT_OTHER_MODELS = ["htdemucs_6s.yaml"]
66
+
67
+
68
+ # Loggers to capture for audio worker
69
+ AUDIO_WORKER_LOGGERS = [
70
+ "karaoke_gen.audio_processor",
71
+ ]
72
+
73
+
74
+ async def download_from_url(url: str, temp_dir: str, artist: str, title: str, job_manager: JobManager = None, job_id: str = None) -> Optional[str]:
75
+ """
76
+ Download audio from a URL (YouTube, etc.) using karaoke_gen.FileHandler.
77
+
78
+ Uses the battle-tested FileHandler from karaoke_gen which includes:
79
+ - Anti-detection options (user agent, headers, delays)
80
+ - Cookie support for authenticated downloads
81
+ - Retry logic
82
+
83
+ If artist and/or title are not provided, attempts to extract them from
84
+ the URL metadata.
85
+
86
+ Args:
87
+ url: URL to download from
88
+ temp_dir: Temporary directory to save to
89
+ artist: Artist name for filename (can be None for auto-detection)
90
+ title: Song title for filename (can be None for auto-detection)
91
+ job_manager: Optional JobManager to update job with detected metadata
92
+ job_id: Optional job ID to update
93
+
94
+ Returns:
95
+ Path to downloaded audio file, or None if failed
96
+ """
97
+ try:
98
+ from karaoke_gen.file_handler import FileHandler
99
+ from karaoke_gen.utils import sanitize_filename
100
+
101
+ # Create FileHandler instance
102
+ file_handler = FileHandler(
103
+ logger=logger,
104
+ ffmpeg_base_command="ffmpeg -hide_banner -loglevel error -nostats -y",
105
+ create_track_subfolders=False,
106
+ dry_run=False
107
+ )
108
+
109
+ # Try to extract metadata if artist or title not provided
110
+ if not artist or not title:
111
+ logger.info(f"Extracting metadata from URL: {url}")
112
+ metadata = file_handler.extract_metadata_from_url(url)
113
+
114
+ if metadata:
115
+ if not artist:
116
+ artist = metadata.get('artist', 'Unknown')
117
+ logger.info(f"Auto-detected artist: {artist}")
118
+ if not title:
119
+ title = metadata.get('title', 'Unknown')
120
+ logger.info(f"Auto-detected title: {title}")
121
+
122
+ # Update job with detected metadata if job_manager provided
123
+ if job_manager and job_id:
124
+ update_data = {}
125
+ if artist:
126
+ update_data['artist'] = artist
127
+ if title:
128
+ update_data['title'] = title
129
+ if update_data:
130
+ job_manager.update_job(job_id, update_data)
131
+ logger.info(f"Updated job {job_id} with detected metadata")
132
+ else:
133
+ logger.warning("Could not extract metadata from URL, using defaults")
134
+ artist = artist or "Unknown"
135
+ title = title or "Unknown"
136
+
137
+ # Create output filename (without extension)
138
+ safe_artist = sanitize_filename(artist) if artist else "Unknown"
139
+ safe_title = sanitize_filename(title) if title else "Unknown"
140
+ output_filename_no_extension = os.path.join(temp_dir, f"{safe_artist} - {safe_title}")
141
+
142
+ # Get YouTube cookies from environment variable if available
143
+ # This helps bypass "Sign in to confirm you're not a bot" errors
144
+ cookies_str = os.environ.get("YOUTUBE_COOKIES")
145
+ if cookies_str:
146
+ logger.info("Using YouTube cookies for download authentication")
147
+ else:
148
+ logger.info("No YOUTUBE_COOKIES env var set - attempting download without cookies")
149
+
150
+ # Download using FileHandler (includes anti-detection features)
151
+ logger.info(f"Downloading from URL: {url}")
152
+ downloaded_file = file_handler.download_video(
153
+ url=url,
154
+ output_filename_no_extension=output_filename_no_extension,
155
+ cookies_str=cookies_str
156
+ )
157
+
158
+ if downloaded_file and os.path.exists(downloaded_file):
159
+ logger.info(f"Downloaded video: {downloaded_file}")
160
+
161
+ # Convert to WAV for processing
162
+ wav_file = file_handler.convert_to_wav(
163
+ input_filename=downloaded_file,
164
+ output_filename_no_extension=output_filename_no_extension
165
+ )
166
+
167
+ if wav_file and os.path.exists(wav_file):
168
+ logger.info(f"Converted to WAV: {wav_file}")
169
+ return wav_file
170
+ else:
171
+ logger.error("WAV conversion failed")
172
+ return None
173
+ else:
174
+ logger.error("Download failed - no file returned")
175
+ return None
176
+
177
+ except ImportError as e:
178
+ logger.error(f"Import error: {e}. Check karaoke_gen installation.")
179
+ return None
180
+ except Exception as e:
181
+ logger.error(f"Failed to download from URL {url}: {e}", exc_info=True)
182
+ return None
183
+
184
+
185
+ def create_audio_processor(
186
+ temp_dir: str,
187
+ clean_instrumental_model: Optional[str] = None,
188
+ backing_vocals_models: Optional[list] = None,
189
+ other_stems_models: Optional[list] = None
190
+ ) -> AudioProcessor:
191
+ """
192
+ Create an AudioProcessor instance configured for remote API processing.
193
+
194
+ This reuses the karaoke_gen AudioProcessor with settings optimized for Cloud Run:
195
+ - Uses remote Modal API (via AUDIO_SEPARATOR_API_URL env var)
196
+ - No local models needed (model_file_dir=None)
197
+ - FLAC output format for quality
198
+ - Model configurations from job or CLI defaults
199
+
200
+ Args:
201
+ temp_dir: Temporary directory for processing
202
+ clean_instrumental_model: Model for clean instrumental separation (optional, uses default if not provided)
203
+ backing_vocals_models: List of models for backing vocals separation (optional, uses default if not provided)
204
+ other_stems_models: List of models for other stems separation (optional, uses default if not provided)
205
+
206
+ Returns:
207
+ Configured AudioProcessor instance
208
+ """
209
+ # Configure logger for AudioProcessor
210
+ audio_logger = logging.getLogger("karaoke_gen.audio_processor")
211
+ audio_logger.setLevel(logging.INFO)
212
+
213
+ # Model configurations - use provided values or defaults from module constants
214
+ effective_clean_model = clean_instrumental_model or DEFAULT_CLEAN_MODEL
215
+ effective_backing_models = backing_vocals_models or DEFAULT_BACKING_MODELS
216
+ effective_other_models = other_stems_models or DEFAULT_OTHER_MODELS # For 6-stem separation (bass, drums, etc.)
217
+
218
+ # FFmpeg command for combining audio files (must be a string, not a list)
219
+ ffmpeg_base_command = "ffmpeg -hide_banner -loglevel error -nostats -y"
220
+
221
+ return AudioProcessor(
222
+ logger=audio_logger,
223
+ log_level=logging.INFO,
224
+ log_formatter=None, # Not needed for our use case
225
+ model_file_dir=None, # No local models, using remote API
226
+ lossless_output_format="FLAC",
227
+ clean_instrumental_model=effective_clean_model,
228
+ backing_vocals_models=effective_backing_models,
229
+ other_stems_models=effective_other_models,
230
+ ffmpeg_base_command=ffmpeg_base_command
231
+ )
232
+
233
+
234
+ async def process_audio_separation(job_id: str) -> bool:
235
+ """
236
+ Process audio separation for a job using karaoke_gen.AudioProcessor.
237
+
238
+ This is the main entry point for the audio worker.
239
+ Called asynchronously from the job submission endpoint.
240
+
241
+ Workflow:
242
+ 1. Download audio from GCS
243
+ 2. Stage 1: Separate with clean instrumental + other stems models (Modal API)
244
+ 3. Stage 2: Separate vocals for backing vocals (Modal API)
245
+ 4. Post-process: Combine instrumentals, normalize audio
246
+ 5. Upload all stems to GCS
247
+ 6. Mark job as AUDIO_COMPLETE
248
+
249
+ Args:
250
+ job_id: Job ID to process
251
+
252
+ Returns:
253
+ True if successful, False otherwise
254
+ """
255
+ start_time = time.time()
256
+ job_manager = JobManager()
257
+ storage = StorageService()
258
+ settings = get_settings()
259
+
260
+ # Create job logger for remote debugging FIRST
261
+ job_log = create_job_logger(job_id, "audio")
262
+
263
+ # Log with structured markers for easy Cloud Logging queries
264
+ logger.info(f"[job:{job_id}] WORKER_START worker=audio")
265
+ job_log.info("=== AUDIO WORKER STARTED ===")
266
+ job_log.info(f"Job ID: {job_id}")
267
+
268
+ # Set up log capture for AudioProcessor
269
+ log_handler = setup_job_logging(job_id, "audio", *AUDIO_WORKER_LOGGERS)
270
+ job_log.info(f"Log handler attached for {len(AUDIO_WORKER_LOGGERS)} loggers")
271
+
272
+ job = job_manager.get_job(job_id)
273
+ if not job:
274
+ logger.error(f"[job:{job_id}] Job not found in Firestore")
275
+ job_log.error(f"Job {job_id} not found in Firestore!")
276
+ return False
277
+
278
+ # Create temporary working directory
279
+ temp_dir = tempfile.mkdtemp(prefix=f"karaoke_{job_id}_")
280
+ job_log.info(f"Created temp directory: {temp_dir}")
281
+
282
+ try:
283
+ # Wrap entire worker in a tracing span
284
+ with job_span("audio-worker", job_id, {"artist": job.artist, "title": job.title}) as root_span:
285
+ # Use job_logging_context for proper log isolation when multiple jobs run concurrently
286
+ # This ensures logs from third-party libraries (karaoke_gen.audio_processor) are
287
+ # only captured by this job's handler, not handlers from other concurrent jobs
288
+ with job_logging_context(job_id):
289
+ job_log.info(f"Starting audio separation for {job.artist} - {job.title}")
290
+ logger.info(f"[job:{job_id}] Starting audio separation for {job.artist} - {job.title}")
291
+
292
+ # Ensure AUDIO_SEPARATOR_API_URL is set
293
+ api_url = os.environ.get("AUDIO_SEPARATOR_API_URL")
294
+ if not api_url:
295
+ raise Exception("AUDIO_SEPARATOR_API_URL environment variable not set. "
296
+ "Cannot perform audio separation without remote API access.")
297
+ job_log.info(f"Audio separator API: {api_url}")
298
+ add_span_attribute("audio_separator_api", api_url)
299
+
300
+ # Download audio file from GCS or URL
301
+ with job_span("download-audio", job_id) as download_span:
302
+ job_log.info("Downloading audio file...")
303
+ audio_path = await download_audio(job_id, temp_dir, storage, job, job_manager_instance=job_manager)
304
+ if not audio_path:
305
+ raise Exception("Failed to download audio file")
306
+ job_log.info(f"Audio downloaded: {os.path.basename(audio_path)}")
307
+ download_span.set_attribute("audio_file", os.path.basename(audio_path))
308
+ download_span.set_attribute("source", "url" if job.url else "gcs")
309
+
310
+ # Update progress using state_data (don't change status during parallel processing)
311
+ # The status is managed at a higher level - workers just track their progress
312
+ job_manager.update_state_data(job_id, 'audio_progress', {
313
+ 'stage': 'separating_stage1',
314
+ 'progress': 10,
315
+ 'message': 'Starting audio separation (Stage 1: Clean instrumental)'
316
+ })
317
+
318
+ # Create AudioProcessor instance (reuses karaoke_gen code)
319
+ # Use model configuration from job if provided, otherwise use defaults
320
+ job_log.info("Creating AudioProcessor instance...")
321
+ if job.clean_instrumental_model:
322
+ job_log.info(f" Using clean instrumental model: {job.clean_instrumental_model}")
323
+ add_span_attribute("clean_model", job.clean_instrumental_model)
324
+ if job.backing_vocals_models:
325
+ job_log.info(f" Using backing vocals models: {job.backing_vocals_models}")
326
+ if job.other_stems_models:
327
+ job_log.info(f" Using other stems models: {job.other_stems_models}")
328
+
329
+ audio_processor = create_audio_processor(
330
+ temp_dir,
331
+ clean_instrumental_model=job.clean_instrumental_model,
332
+ backing_vocals_models=job.backing_vocals_models,
333
+ other_stems_models=job.other_stems_models
334
+ )
335
+
336
+ # Store effective model names in state_data for video_worker to use in file naming
337
+ # This ensures output filenames match local CLI behavior (e.g., "Instrumental model_bs_roformer_ep_317_sdr_12.9755.ckpt")
338
+ effective_model_names = {
339
+ 'clean_instrumental_model': job.clean_instrumental_model or DEFAULT_CLEAN_MODEL,
340
+ 'backing_vocals_models': job.backing_vocals_models or DEFAULT_BACKING_MODELS,
341
+ 'other_stems_models': job.other_stems_models or DEFAULT_OTHER_MODELS,
342
+ }
343
+ job_manager.update_state_data(job_id, 'model_names', effective_model_names)
344
+ job_log.info(f"Stored effective model names: clean={effective_model_names['clean_instrumental_model']}")
345
+
346
+ # Format artist-title for file naming (matches CLI behavior)
347
+ # Sanitize to handle Unicode characters (curly quotes, em dashes, etc.)
348
+ # that cause HTTP header encoding issues with the remote API
349
+ from karaoke_gen.utils import sanitize_filename
350
+ safe_artist = sanitize_filename(job.artist) if job.artist else "Unknown"
351
+ safe_title = sanitize_filename(job.title) if job.title else "Unknown"
352
+ artist_title = f"{safe_artist} - {safe_title}"
353
+
354
+ # Run audio separation (calls Modal API internally)
355
+ # This returns a dict with paths to all separated stems
356
+ with job_span("modal-separation", job_id) as sep_span:
357
+ sep_start = time.time()
358
+ job_log.info("Starting audio separation (this may take 5-10 minutes)...")
359
+ job_log.info(" Stage 1: Clean instrumental separation (MDX models)")
360
+ job_log.info(" Stage 2: Backing vocals separation (Demucs model)")
361
+ add_span_event("separation_started")
362
+ logger.info(f"[job:{job_id}] Calling Modal API for audio separation")
363
+
364
+ with metrics.time_external_api("modal", job_id):
365
+ separation_result = audio_processor.process_audio_separation(
366
+ audio_file=audio_path,
367
+ artist_title=artist_title,
368
+ track_output_dir=temp_dir
369
+ )
370
+
371
+ sep_duration = time.time() - sep_start
372
+ sep_span.set_attribute("duration_seconds", sep_duration)
373
+ sep_span.set_attribute("stem_count", len(separation_result))
374
+ add_span_event("separation_completed", {"duration_seconds": sep_duration})
375
+
376
+ job_log.info("Audio separation complete!")
377
+ job_log.info(f" Generated {len(separation_result)} stem files")
378
+ logger.info(f"[job:{job_id}] Audio separation complete, organizing results")
379
+
380
+ # Update progress using state_data (don't change status during parallel processing)
381
+ job_manager.update_state_data(job_id, 'audio_progress', {
382
+ 'stage': 'audio_complete',
383
+ 'progress': 45,
384
+ 'message': 'Audio separation complete, uploading stems'
385
+ })
386
+
387
+ # Upload all stems to GCS
388
+ with job_span("upload-stems", job_id) as upload_span:
389
+ await upload_separation_results(job_id, separation_result, storage, job_manager)
390
+ upload_span.set_attribute("stem_count", len(separation_result))
391
+
392
+ logger.info(f"[job:{job_id}] All stems uploaded successfully")
393
+
394
+ # Mark audio processing complete
395
+ # This will check if lyrics are also complete and transition to next stage if so
396
+ job_manager.mark_audio_complete(job_id)
397
+
398
+ duration = time.time() - start_time
399
+ root_span.set_attribute("duration_seconds", duration)
400
+ logger.info(f"[job:{job_id}] WORKER_END worker=audio status=success duration={duration:.1f}s")
401
+ return True
402
+
403
+ except Exception as e:
404
+ duration = time.time() - start_time
405
+ logger.error(f"[job:{job_id}] WORKER_END worker=audio status=error duration={duration:.1f}s error={e}")
406
+ job_manager.mark_job_failed(
407
+ job_id=job_id,
408
+ error_message=f"Audio separation failed: {str(e)}",
409
+ error_details={"stage": "audio_separation", "error": str(e)}
410
+ )
411
+ return False
412
+
413
+ finally:
414
+ # Remove log handler to avoid duplicate logging on future runs
415
+ for logger_name in AUDIO_WORKER_LOGGERS:
416
+ try:
417
+ logging.getLogger(logger_name).removeHandler(log_handler)
418
+ except Exception:
419
+ pass
420
+
421
+ # Cleanup temporary directory
422
+ if os.path.exists(temp_dir):
423
+ shutil.rmtree(temp_dir)
424
+ logger.debug(f"[job:{job_id}] Cleaned up temp directory: {temp_dir}")
425
+
426
+
427
+ async def download_audio(
428
+ job_id: str,
429
+ temp_dir: str,
430
+ storage: StorageService,
431
+ job,
432
+ job_manager_instance: JobManager = None
433
+ ) -> Optional[str]:
434
+ """
435
+ Download or fetch audio file to local temp directory.
436
+
437
+ Handles two cases:
438
+ 1. Uploaded file: Download from GCS using input_media_gcs_path
439
+ 2. URL (YouTube, etc.): Download using yt-dlp or other tools
440
+
441
+ Args:
442
+ job_id: Job ID
443
+ temp_dir: Temporary directory to save to
444
+ storage: StorageService instance
445
+ job: Job object with URL or GCS path
446
+ job_manager_instance: Optional JobManager instance for updating job metadata
447
+
448
+ Returns:
449
+ Path to downloaded audio file, or None if failed
450
+ """
451
+ try:
452
+ from karaoke_gen.utils import sanitize_filename
453
+
454
+ # Case 1: File was uploaded to GCS
455
+ if job.input_media_gcs_path:
456
+ logger.info(f"Job {job_id}: Downloading uploaded file from GCS: {job.input_media_gcs_path}")
457
+ # Sanitize filename to handle Unicode chars that cause HTTP header encoding issues
458
+ safe_filename = sanitize_filename(job.filename) if job.filename else "input.flac"
459
+ local_path = os.path.join(temp_dir, safe_filename)
460
+ storage.download_file(job.input_media_gcs_path, local_path)
461
+ logger.info(f"Job {job_id}: Downloaded uploaded file to {local_path}")
462
+ return local_path
463
+
464
+ # Case 2: URL download (from file_urls if already downloaded, or from job.url)
465
+ if job.file_urls and job.file_urls.get('input'):
466
+ # Already downloaded and stored in GCS
467
+ input_url = job.file_urls.get('input')
468
+ local_path = os.path.join(temp_dir, "input.flac")
469
+ storage.download_file(input_url, local_path)
470
+ logger.info(f"Job {job_id}: Downloaded audio from GCS: {input_url}")
471
+ return local_path
472
+
473
+ # Case 3: Fresh URL that needs downloading
474
+ if job.url:
475
+ logger.info(f"Job {job_id}: Downloading from URL: {job.url}")
476
+
477
+ # Use provided job_manager or create new one
478
+ jm = job_manager_instance or JobManager()
479
+
480
+ local_path = await download_from_url(
481
+ job.url,
482
+ temp_dir,
483
+ job.artist,
484
+ job.title,
485
+ job_manager=jm,
486
+ job_id=job_id
487
+ )
488
+
489
+ if local_path and os.path.exists(local_path):
490
+ # Upload to GCS and update job
491
+ gcs_path = f"jobs/{job_id}/input/{os.path.basename(local_path)}"
492
+ url = storage.upload_file(local_path, gcs_path)
493
+
494
+ # Update job with GCS path for lyrics worker
495
+ jm.update_job(job_id, {'input_media_gcs_path': gcs_path})
496
+ jm.update_file_url(job_id, 'input', 'audio', url)
497
+
498
+ logger.info(f"Job {job_id}: Downloaded and uploaded audio to GCS: {gcs_path}")
499
+
500
+ # For URL jobs, trigger lyrics worker now that audio is available
501
+ # This is the sequential trigger pattern - audio first, then lyrics
502
+ await _trigger_lyrics_worker_after_url_download(job_id)
503
+
504
+ return local_path
505
+ else:
506
+ logger.error(f"Job {job_id}: Failed to download from URL: {job.url}")
507
+ return None
508
+
509
+ logger.error(f"Job {job_id}: No input source found (no GCS path, file_urls, or URL)")
510
+ return None
511
+
512
+ except Exception as e:
513
+ logger.error(f"Job {job_id}: Failed to download audio: {e}", exc_info=True)
514
+ return None
515
+
516
+
517
+ async def upload_separation_results(
518
+ job_id: str,
519
+ separation_result: Dict[str, Any],
520
+ storage: StorageService,
521
+ job_manager: JobManager
522
+ ) -> None:
523
+ """
524
+ Upload all audio separation results to GCS.
525
+
526
+ The separation_result dict from AudioProcessor.process_audio_separation() contains:
527
+ - clean_instrumental: Dict with 'vocals' and 'instrumental' paths
528
+ - other_stems: Dict with stem paths (bass, drums, guitar, piano, other)
529
+ - backing_vocals: Dict with model-keyed paths to lead_vocals and backing_vocals
530
+ - combined_instrumentals: Dict with model-keyed paths to instrumental+BV files
531
+
532
+ Args:
533
+ job_id: Job ID
534
+ separation_result: Result dict from AudioProcessor
535
+ storage: StorageService instance
536
+ job_manager: JobManager instance
537
+ """
538
+ logger.info(f"Job {job_id}: Uploading separation results to GCS")
539
+
540
+ # Upload clean instrumental + vocals (Stage 1, clean model)
541
+ if separation_result.get("clean_instrumental"):
542
+ clean = separation_result["clean_instrumental"]
543
+
544
+ if clean.get("instrumental") and os.path.exists(clean["instrumental"]):
545
+ gcs_path = f"jobs/{job_id}/stems/instrumental_clean.flac"
546
+ url = storage.upload_file(clean["instrumental"], gcs_path)
547
+ job_manager.update_file_url(job_id, 'stems', 'instrumental_clean', url)
548
+ logger.info(f"Job {job_id}: Uploaded clean instrumental")
549
+
550
+ if clean.get("vocals") and os.path.exists(clean["vocals"]):
551
+ gcs_path = f"jobs/{job_id}/stems/vocals_clean.flac"
552
+ url = storage.upload_file(clean["vocals"], gcs_path)
553
+ job_manager.update_file_url(job_id, 'stems', 'vocals_clean', url)
554
+ logger.info(f"Job {job_id}: Uploaded clean vocals")
555
+
556
+ # Upload other stems (Stage 1, htdemucs 6-stem)
557
+ if separation_result.get("other_stems"):
558
+ for stem_name, stem_value in separation_result["other_stems"].items():
559
+ # Handle both string paths and nested dicts
560
+ if isinstance(stem_value, str):
561
+ stem_path = stem_value
562
+ elif isinstance(stem_value, dict):
563
+ # Some models return nested dicts like {"path": "/path/to/file"}
564
+ stem_path = stem_value.get("path") or stem_value.get("file")
565
+ logger.debug(f"Job {job_id}: other_stems[{stem_name}] is dict: {stem_value}")
566
+ else:
567
+ logger.warning(f"Job {job_id}: Unexpected type for other_stems[{stem_name}]: {type(stem_value)}")
568
+ continue
569
+
570
+ if stem_path and isinstance(stem_path, str) and os.path.exists(stem_path):
571
+ gcs_path = f"jobs/{job_id}/stems/{stem_name.lower()}.flac"
572
+ url = storage.upload_file(stem_path, gcs_path)
573
+ job_manager.update_file_url(job_id, 'stems', stem_name.lower(), url)
574
+ logger.info(f"Job {job_id}: Uploaded {stem_name} stem")
575
+
576
+ # Upload backing vocals separation (Stage 2)
577
+ if separation_result.get("backing_vocals"):
578
+ # backing_vocals is a dict keyed by model name
579
+ for model_name, bv_stems in separation_result["backing_vocals"].items():
580
+ if bv_stems.get("lead_vocals") and os.path.exists(bv_stems["lead_vocals"]):
581
+ gcs_path = f"jobs/{job_id}/stems/lead_vocals.flac"
582
+ url = storage.upload_file(bv_stems["lead_vocals"], gcs_path)
583
+ job_manager.update_file_url(job_id, 'stems', 'lead_vocals', url)
584
+ logger.info(f"Job {job_id}: Uploaded lead vocals")
585
+
586
+ if bv_stems.get("backing_vocals") and os.path.exists(bv_stems["backing_vocals"]):
587
+ gcs_path = f"jobs/{job_id}/stems/backing_vocals.flac"
588
+ url = storage.upload_file(bv_stems["backing_vocals"], gcs_path)
589
+ job_manager.update_file_url(job_id, 'stems', 'backing_vocals', url)
590
+ logger.info(f"Job {job_id}: Uploaded backing vocals")
591
+
592
+ # Only process first model (we typically only use one backing vocals model)
593
+ break
594
+
595
+ # Upload combined instrumentals (instrumental + backing vocals)
596
+ if separation_result.get("combined_instrumentals"):
597
+ # combined_instrumentals is a dict keyed by model name
598
+ for model_name, combined_path in separation_result["combined_instrumentals"].items():
599
+ if combined_path and os.path.exists(combined_path):
600
+ gcs_path = f"jobs/{job_id}/stems/instrumental_with_backing.flac"
601
+ url = storage.upload_file(combined_path, gcs_path)
602
+ job_manager.update_file_url(job_id, 'stems', 'instrumental_with_backing', url)
603
+ logger.info(f"Job {job_id}: Uploaded instrumental with backing vocals")
604
+
605
+ # Only process first model
606
+ break
607
+
608
+ # Store instrumental options in state_data for later selection
609
+ instrumental_options = {}
610
+ if separation_result.get("clean_instrumental", {}).get("instrumental"):
611
+ instrumental_options["clean"] = f"jobs/{job_id}/stems/instrumental_clean.flac"
612
+ if separation_result.get("combined_instrumentals"):
613
+ instrumental_options["with_backing"] = f"jobs/{job_id}/stems/instrumental_with_backing.flac"
614
+
615
+ if instrumental_options:
616
+ job_manager.update_state_data(job_id, 'instrumental_options', instrumental_options)
617
+ logger.info(f"Job {job_id}: Stored instrumental options: {list(instrumental_options.keys())}")
618
+