karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1629 @@
1
+ """
2
+ Job management routes.
3
+
4
+ Handles job lifecycle endpoints including:
5
+ - Job creation and submission
6
+ - Status polling
7
+ - Human-in-the-loop interactions (lyrics review, instrumental selection)
8
+ - Job deletion and cancellation
9
+ """
10
+ import asyncio
11
+ import logging
12
+ import httpx
13
+ from typing import List, Optional, Dict, Any, Tuple
14
+ from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends
15
+
16
+ from backend.models.job import Job, JobCreate, JobResponse, JobStatus
17
+ from backend.models.requests import (
18
+ URLSubmissionRequest,
19
+ CorrectionsSubmission,
20
+ InstrumentalSelection,
21
+ StartReviewRequest,
22
+ CancelJobRequest,
23
+ CreateCustomInstrumentalRequest,
24
+ )
25
+ from backend.services.job_manager import JobManager
26
+ from backend.services.worker_service import get_worker_service
27
+ from backend.services.storage_service import StorageService
28
+ from backend.services.theme_service import get_theme_service
29
+ from backend.config import get_settings
30
+ from backend.api.dependencies import require_admin, require_auth, require_instrumental_auth
31
+ from backend.services.auth_service import UserType, AuthResult
32
+ from backend.services.metrics import metrics
33
+ from backend.utils.test_data import is_test_email
34
+
35
+
36
+ logger = logging.getLogger(__name__)
37
+ router = APIRouter(prefix="/jobs", tags=["jobs"])
38
+
39
+ # Initialize services
40
+ job_manager = JobManager()
41
+ worker_service = get_worker_service()
42
+ settings = get_settings()
43
+
44
+
45
+ async def _trigger_workers_parallel(job_id: str) -> None:
46
+ """
47
+ Trigger both audio and lyrics workers in parallel.
48
+
49
+ FastAPI's BackgroundTasks runs async tasks sequentially, so we use
50
+ asyncio.gather to ensure both workers start at the same time.
51
+ """
52
+ await asyncio.gather(
53
+ worker_service.trigger_audio_worker(job_id),
54
+ worker_service.trigger_lyrics_worker(job_id)
55
+ )
56
+
57
+
58
+ @router.post("", response_model=JobResponse)
59
+ async def create_job(
60
+ request: URLSubmissionRequest,
61
+ background_tasks: BackgroundTasks,
62
+ auth_result: AuthResult = Depends(require_auth)
63
+ ) -> JobResponse:
64
+ """
65
+ Create a new karaoke generation job from a URL.
66
+
67
+ This triggers the complete workflow:
68
+ 1. Job created in PENDING state
69
+ 2. Audio and lyrics workers triggered in parallel
70
+ 3. Both workers update job state as they progress
71
+ 4. When both complete, job transitions to AWAITING_REVIEW
72
+ """
73
+ try:
74
+ # Determine job owner email:
75
+ # All authentication methods must provide a user_email for job ownership
76
+ if auth_result.user_email:
77
+ # Use authenticated user's email (standard case)
78
+ user_email = auth_result.user_email
79
+ else:
80
+ # This should never happen - all auth methods now require user_email
81
+ logger.error("Authentication succeeded but no user_email provided")
82
+ raise HTTPException(
83
+ status_code=500,
84
+ detail="Authentication error: no user identity available"
85
+ )
86
+
87
+ # Admins can optionally create jobs on behalf of other users
88
+ if request.user_email and auth_result.is_admin and request.user_email != auth_result.user_email:
89
+ user_email = request.user_email
90
+ logger.info(f"Admin {auth_result.user_email} creating job on behalf of {user_email}")
91
+
92
+ # Apply YouTube upload default from settings
93
+ # Use explicit value if provided, otherwise fall back to server default
94
+ settings = get_settings()
95
+ effective_enable_youtube_upload = request.enable_youtube_upload if request.enable_youtube_upload is not None else settings.default_enable_youtube_upload
96
+
97
+ # Apply default theme - all jobs require a theme
98
+ theme_service = get_theme_service()
99
+ effective_theme_id = theme_service.get_default_theme_id()
100
+ if not effective_theme_id:
101
+ raise HTTPException(
102
+ status_code=422,
103
+ detail="No default theme configured. Please contact support or specify a theme_id."
104
+ )
105
+ logger.info(f"Applying default theme: {effective_theme_id}")
106
+
107
+ # Create job with all preferences
108
+ job_create = JobCreate(
109
+ url=str(request.url),
110
+ artist=request.artist,
111
+ title=request.title,
112
+ theme_id=effective_theme_id, # Required - all jobs must have a theme
113
+ enable_cdg=request.enable_cdg,
114
+ enable_txt=request.enable_txt,
115
+ enable_youtube_upload=effective_enable_youtube_upload,
116
+ youtube_description=request.youtube_description,
117
+ webhook_url=request.webhook_url,
118
+ user_email=user_email
119
+ )
120
+ job = job_manager.create_job(job_create)
121
+
122
+ # Record job creation metric
123
+ metrics.record_job_created(job.job_id, source="url")
124
+
125
+ # Trigger both workers in parallel using asyncio.gather
126
+ # (FastAPI's BackgroundTasks runs async tasks sequentially)
127
+ background_tasks.add_task(_trigger_workers_parallel, job.job_id)
128
+
129
+ logger.info(f"Job {job.job_id} created, workers triggered")
130
+
131
+ return JobResponse(
132
+ status="success",
133
+ job_id=job.job_id,
134
+ message="Job created successfully. Processing started."
135
+ )
136
+ except Exception as e:
137
+ logger.error(f"Error creating job: {e}", exc_info=True)
138
+ raise HTTPException(status_code=500, detail=str(e))
139
+
140
+
141
+ # Worker triggering is now handled by WorkerService
142
+ # See backend/services/worker_service.py
143
+
144
+
145
+ @router.get("/{job_id}", response_model=Job)
146
+ async def get_job(
147
+ job_id: str,
148
+ auth_result: AuthResult = Depends(require_auth)
149
+ ) -> Job:
150
+ """Get job status and details."""
151
+ job = job_manager.get_job(job_id)
152
+ if not job:
153
+ raise HTTPException(status_code=404, detail="Job not found")
154
+
155
+ # Check ownership - users can only see their own jobs, admins can see all
156
+ if not _check_job_ownership(job, auth_result):
157
+ raise HTTPException(status_code=403, detail="You don't have permission to access this job")
158
+
159
+ # If job is complete, include download URLs
160
+ if job.status == JobStatus.COMPLETE:
161
+ job.download_urls = job_manager.get_output_urls(job_id)
162
+
163
+ return job
164
+
165
+
166
+ def _check_job_ownership(job: Job, auth_result: AuthResult) -> bool:
167
+ """
168
+ Check if the authenticated user owns the job or has admin access.
169
+
170
+ Returns:
171
+ True if user can access the job, False otherwise
172
+ """
173
+ # Admins can access all jobs
174
+ if auth_result.is_admin:
175
+ return True
176
+
177
+ # Check if user owns the job
178
+ if auth_result.user_email and job.user_email:
179
+ return auth_result.user_email.lower() == job.user_email.lower()
180
+
181
+ # If no user_email on auth (token auth without email), deny access to jobs with user_email
182
+ # This prevents token-based auth from accessing user jobs
183
+ if job.user_email:
184
+ return False
185
+
186
+ # Legacy jobs without user_email - allow access for backward compatibility
187
+ # TODO: Consider restricting this in the future
188
+ return True
189
+
190
+
191
+ @router.get("", response_model=List[Job])
192
+ async def list_jobs(
193
+ status: Optional[JobStatus] = None,
194
+ environment: Optional[str] = None,
195
+ client_id: Optional[str] = None,
196
+ created_after: Optional[str] = None,
197
+ created_before: Optional[str] = None,
198
+ exclude_test: bool = True,
199
+ limit: int = 100,
200
+ auth_result: AuthResult = Depends(require_auth)
201
+ ) -> List[Job]:
202
+ """
203
+ List jobs with optional filters.
204
+
205
+ Regular users only see their own jobs. Admins see all jobs.
206
+
207
+ Args:
208
+ status: Filter by job status (pending, complete, failed, etc.)
209
+ environment: Filter by request_metadata.environment (test/production/development)
210
+ client_id: Filter by request_metadata.client_id (customer identifier)
211
+ created_after: Filter jobs created after this ISO datetime (e.g., 2024-01-01T00:00:00Z)
212
+ created_before: Filter jobs created before this ISO datetime
213
+ exclude_test: If True (default), exclude jobs from test users (admin only)
214
+ limit: Maximum number of jobs to return (default 100)
215
+
216
+ Returns:
217
+ List of jobs matching filters, ordered by created_at descending
218
+ """
219
+ from datetime import datetime
220
+
221
+ try:
222
+ # Parse datetime strings if provided
223
+ created_after_dt = None
224
+ created_before_dt = None
225
+
226
+ if created_after:
227
+ try:
228
+ created_after_dt = datetime.fromisoformat(created_after.replace('Z', '+00:00'))
229
+ except ValueError as e:
230
+ raise HTTPException(status_code=400, detail=f"Invalid created_after format: {created_after}") from e
231
+
232
+ if created_before:
233
+ try:
234
+ created_before_dt = datetime.fromisoformat(created_before.replace('Z', '+00:00'))
235
+ except ValueError as e:
236
+ raise HTTPException(status_code=400, detail=f"Invalid created_before format: {created_before}") from e
237
+
238
+ # Determine user_email filter based on admin status
239
+ # Admins see all jobs, regular users only see their own
240
+ user_email_filter = None
241
+ if not auth_result.is_admin:
242
+ if auth_result.user_email:
243
+ user_email_filter = auth_result.user_email
244
+ logger.debug(f"Filtering jobs for user: {user_email_filter}")
245
+ else:
246
+ # Token-based auth without user email - show no jobs for security
247
+ logger.warning("Non-admin auth without user_email, returning empty job list")
248
+ return []
249
+
250
+ jobs = job_manager.list_jobs(
251
+ status=status,
252
+ environment=environment,
253
+ client_id=client_id,
254
+ created_after=created_after_dt,
255
+ created_before=created_before_dt,
256
+ user_email=user_email_filter,
257
+ limit=limit
258
+ )
259
+
260
+ # Filter out test user jobs if exclude_test is True (admin only)
261
+ if exclude_test and auth_result.is_admin:
262
+ jobs = [j for j in jobs if not is_test_email(j.user_email or "")]
263
+
264
+ logger.debug(f"Listed {len(jobs)} jobs for user={auth_result.user_email}, admin={auth_result.is_admin}")
265
+ return jobs
266
+ except HTTPException:
267
+ raise
268
+ except Exception as e:
269
+ logger.error(f"Error listing jobs: {e}", exc_info=True)
270
+ raise HTTPException(status_code=500, detail=str(e))
271
+
272
+
273
+ @router.delete("/{job_id}")
274
+ async def delete_job(
275
+ job_id: str,
276
+ delete_files: bool = True,
277
+ auth_result: AuthResult = Depends(require_auth)
278
+ ) -> dict:
279
+ """Delete a job and optionally its output files."""
280
+ try:
281
+ job = job_manager.get_job(job_id)
282
+ if not job:
283
+ raise HTTPException(status_code=404, detail="Job not found")
284
+
285
+ # Check ownership - users can only delete their own jobs
286
+ if not _check_job_ownership(job, auth_result):
287
+ raise HTTPException(status_code=403, detail="You don't have permission to delete this job")
288
+
289
+ job_manager.delete_job(job_id, delete_files=delete_files)
290
+
291
+ return {"status": "success", "message": f"Job {job_id} deleted"}
292
+ except HTTPException:
293
+ raise
294
+ except Exception as e:
295
+ logger.error(f"Error deleting job {job_id}: {e}", exc_info=True)
296
+ raise HTTPException(status_code=500, detail=str(e))
297
+
298
+
299
+ @router.delete("")
300
+ async def bulk_delete_jobs(
301
+ environment: Optional[str] = None,
302
+ client_id: Optional[str] = None,
303
+ status: Optional[JobStatus] = None,
304
+ created_before: Optional[str] = None,
305
+ delete_files: bool = True,
306
+ confirm: bool = False,
307
+ _auth_result: AuthResult = Depends(require_admin)
308
+ ) -> dict:
309
+ """
310
+ Delete multiple jobs matching filter criteria.
311
+
312
+ CAUTION: This is a destructive operation. Requires confirm=true.
313
+
314
+ Use cases:
315
+ - Delete all test jobs: ?environment=test&confirm=true
316
+ - Delete jobs from a specific client: ?client_id=test-runner&confirm=true
317
+ - Delete old failed jobs: ?status=failed&created_before=2024-01-01T00:00:00Z&confirm=true
318
+
319
+ Args:
320
+ environment: Delete jobs with this environment (test/production/development)
321
+ client_id: Delete jobs from this client
322
+ status: Delete jobs with this status
323
+ created_before: Delete jobs created before this ISO datetime
324
+ delete_files: Also delete GCS files (default True)
325
+ confirm: Must be True to execute deletion (safety check)
326
+
327
+ Returns:
328
+ Statistics about the deletion
329
+ """
330
+ from datetime import datetime
331
+
332
+ # Require at least one filter to prevent accidental deletion of all jobs
333
+ if not any([environment, client_id, status, created_before]):
334
+ raise HTTPException(
335
+ status_code=400,
336
+ detail="At least one filter (environment, client_id, status, created_before) is required"
337
+ )
338
+
339
+ # Require explicit confirmation
340
+ if not confirm:
341
+ # Return preview of what would be deleted
342
+ created_before_dt = None
343
+ if created_before:
344
+ try:
345
+ created_before_dt = datetime.fromisoformat(created_before.replace('Z', '+00:00'))
346
+ except ValueError:
347
+ raise HTTPException(status_code=400, detail=f"Invalid created_before format: {created_before}")
348
+
349
+ jobs = job_manager.list_jobs(
350
+ status=status,
351
+ environment=environment,
352
+ client_id=client_id,
353
+ created_before=created_before_dt,
354
+ limit=1000
355
+ )
356
+
357
+ return {
358
+ "status": "preview",
359
+ "message": "Add &confirm=true to execute deletion",
360
+ "jobs_to_delete": len(jobs),
361
+ "sample_jobs": [
362
+ {
363
+ "job_id": j.job_id,
364
+ "artist": j.artist,
365
+ "title": j.title,
366
+ "status": j.status,
367
+ "environment": j.request_metadata.get('environment'),
368
+ "client_id": j.request_metadata.get('client_id'),
369
+ "created_at": j.created_at.isoformat() if j.created_at else None
370
+ }
371
+ for j in jobs[:10] # Show first 10 as sample
372
+ ]
373
+ }
374
+
375
+ try:
376
+ # Parse datetime string
377
+ created_before_dt = None
378
+ if created_before:
379
+ try:
380
+ created_before_dt = datetime.fromisoformat(created_before.replace('Z', '+00:00'))
381
+ except ValueError:
382
+ raise HTTPException(status_code=400, detail=f"Invalid created_before format: {created_before}")
383
+
384
+ result = job_manager.delete_jobs_by_filter(
385
+ environment=environment,
386
+ client_id=client_id,
387
+ status=status,
388
+ created_before=created_before_dt,
389
+ delete_files=delete_files
390
+ )
391
+
392
+ return {
393
+ "status": "success",
394
+ "message": f"Deleted {result['jobs_deleted']} jobs",
395
+ "jobs_deleted": result['jobs_deleted'],
396
+ "files_deleted": result.get('files_deleted', 0)
397
+ }
398
+
399
+ except Exception as e:
400
+ logger.error(f"Error bulk deleting jobs: {e}", exc_info=True)
401
+ raise HTTPException(status_code=500, detail=str(e))
402
+
403
+
404
+ # ============================================================================
405
+ # Human-in-the-Loop Interaction Endpoints
406
+ # ============================================================================
407
+
408
+ @router.get("/{job_id}/review-data")
409
+ async def get_review_data(
410
+ job_id: str,
411
+ auth_result: AuthResult = Depends(require_auth)
412
+ ) -> Dict[str, Any]:
413
+ """
414
+ Get data needed for lyrics review interface.
415
+
416
+ Returns corrections JSON URL and audio URL.
417
+ Frontend loads these to render the review UI.
418
+ """
419
+ job = job_manager.get_job(job_id)
420
+ if not job:
421
+ raise HTTPException(status_code=404, detail="Job not found")
422
+
423
+ # Check ownership
424
+ if not _check_job_ownership(job, auth_result):
425
+ raise HTTPException(status_code=403, detail="You don't have permission to access this job")
426
+
427
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
428
+ raise HTTPException(
429
+ status_code=400,
430
+ detail=f"Job not ready for review (current status: {job.status})"
431
+ )
432
+
433
+ # Get URLs from file_urls
434
+ corrections_url = job.file_urls.get('lyrics', {}).get('corrections')
435
+
436
+ # For audio, try multiple sources in order of preference:
437
+ # 1. Explicit lyrics audio (if worker uploaded it)
438
+ # 2. Lead vocals stem (best for reviewing lyrics sync)
439
+ # 3. Input media (original audio)
440
+ audio_url = (
441
+ job.file_urls.get('lyrics', {}).get('audio') or
442
+ job.file_urls.get('stems', {}).get('lead_vocals') or
443
+ job.input_media_gcs_path
444
+ )
445
+
446
+ if not corrections_url:
447
+ raise HTTPException(
448
+ status_code=500,
449
+ detail="Corrections data not available"
450
+ )
451
+
452
+ if not audio_url:
453
+ raise HTTPException(
454
+ status_code=500,
455
+ detail="Audio not available for review"
456
+ )
457
+
458
+ # Generate signed URLs for direct access
459
+ from backend.services.storage_service import StorageService
460
+ storage = StorageService()
461
+
462
+ return {
463
+ "corrections_url": storage.generate_signed_url(corrections_url, expiration_minutes=120),
464
+ "audio_url": storage.generate_signed_url(audio_url, expiration_minutes=120),
465
+ "status": job.status,
466
+ "artist": job.artist,
467
+ "title": job.title
468
+ }
469
+
470
+
471
+ @router.post("/{job_id}/start-review")
472
+ async def start_review(
473
+ job_id: str,
474
+ request: StartReviewRequest,
475
+ auth_result: AuthResult = Depends(require_auth)
476
+ ) -> dict:
477
+ """
478
+ Mark job as IN_REVIEW (user opened review interface).
479
+
480
+ This helps track that the user is actively working on the review.
481
+ """
482
+ job = job_manager.get_job(job_id)
483
+ if not job:
484
+ raise HTTPException(status_code=404, detail="Job not found")
485
+
486
+ # Check ownership
487
+ if not _check_job_ownership(job, auth_result):
488
+ raise HTTPException(status_code=403, detail="You don't have permission to access this job")
489
+
490
+ success = job_manager.transition_to_state(
491
+ job_id=job_id,
492
+ new_status=JobStatus.IN_REVIEW,
493
+ message="User started reviewing lyrics"
494
+ )
495
+
496
+ if not success:
497
+ raise HTTPException(status_code=400, detail="Cannot start review")
498
+
499
+ return {"status": "success", "job_status": "in_review"}
500
+
501
+
502
+ @router.post("/{job_id}/corrections")
503
+ async def submit_corrections(
504
+ job_id: str,
505
+ submission: CorrectionsSubmission,
506
+ background_tasks: BackgroundTasks,
507
+ auth_result: AuthResult = Depends(require_auth)
508
+ ) -> dict:
509
+ """
510
+ Save corrected lyrics during human review.
511
+
512
+ This endpoint saves review progress but does NOT complete the review.
513
+ Call POST /{job_id}/complete-review to finish and trigger video rendering.
514
+
515
+ Can be called multiple times to save progress.
516
+ """
517
+ job = job_manager.get_job(job_id)
518
+ if not job:
519
+ raise HTTPException(status_code=404, detail="Job not found")
520
+
521
+ # Check ownership
522
+ if not _check_job_ownership(job, auth_result):
523
+ raise HTTPException(status_code=403, detail="You don't have permission to modify this job")
524
+
525
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
526
+ raise HTTPException(
527
+ status_code=400,
528
+ detail=f"Job not in review state (current status: {job.status})"
529
+ )
530
+
531
+ try:
532
+ # Store corrected lyrics in state_data
533
+ job_manager.update_state_data(job_id, 'corrected_lyrics', submission.corrections)
534
+ if submission.user_notes:
535
+ job_manager.update_state_data(job_id, 'review_notes', submission.user_notes)
536
+
537
+ # Transition to IN_REVIEW if not already
538
+ if job.status == JobStatus.AWAITING_REVIEW:
539
+ job_manager.transition_to_state(
540
+ job_id=job_id,
541
+ new_status=JobStatus.IN_REVIEW,
542
+ message="User is reviewing lyrics"
543
+ )
544
+
545
+ # Save updated corrections to GCS for the render worker
546
+ from backend.services.storage_service import StorageService
547
+ storage = StorageService()
548
+
549
+ corrections_gcs_path = f"jobs/{job_id}/lyrics/corrections_updated.json"
550
+ storage.upload_json(corrections_gcs_path, submission.corrections)
551
+ job_manager.update_file_url(job_id, 'lyrics', 'corrections_updated', corrections_gcs_path)
552
+
553
+ logger.info(f"Job {job_id}: Corrections saved (review in progress)")
554
+
555
+ return {
556
+ "status": "success",
557
+ "job_status": "in_review",
558
+ "message": "Corrections saved. Call /complete-review when done."
559
+ }
560
+
561
+ except Exception as e:
562
+ logger.error(f"Error saving corrections for job {job_id}: {e}", exc_info=True)
563
+ raise HTTPException(status_code=500, detail=str(e))
564
+
565
+
566
+ @router.post("/{job_id}/complete-review")
567
+ async def complete_review(
568
+ job_id: str,
569
+ background_tasks: BackgroundTasks,
570
+ auth_result: AuthResult = Depends(require_auth)
571
+ ) -> dict:
572
+ """
573
+ Complete the human review and trigger video rendering.
574
+
575
+ This is the FIRST critical human-in-the-loop completion point.
576
+ After this:
577
+ 1. Job transitions to REVIEW_COMPLETE
578
+ 2. Render video worker is triggered
579
+ 3. Worker uses OutputGenerator to create with_vocals.mkv
580
+ 4. Job transitions to AWAITING_INSTRUMENTAL_SELECTION
581
+ """
582
+ job = job_manager.get_job(job_id)
583
+ if not job:
584
+ raise HTTPException(status_code=404, detail="Job not found")
585
+
586
+ # Check ownership
587
+ if not _check_job_ownership(job, auth_result):
588
+ raise HTTPException(status_code=403, detail="You don't have permission to modify this job")
589
+
590
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
591
+ raise HTTPException(
592
+ status_code=400,
593
+ detail=f"Job not in review state (current status: {job.status})"
594
+ )
595
+
596
+ try:
597
+ # Transition to REVIEW_COMPLETE
598
+ job_manager.transition_to_state(
599
+ job_id=job_id,
600
+ new_status=JobStatus.REVIEW_COMPLETE,
601
+ progress=70,
602
+ message="Review complete, rendering video with corrected lyrics"
603
+ )
604
+
605
+ # Trigger render video worker
606
+ background_tasks.add_task(worker_service.trigger_render_video_worker, job_id)
607
+
608
+ logger.info(f"Job {job_id}: Review complete, triggering render video worker")
609
+
610
+ return {
611
+ "status": "success",
612
+ "job_status": "review_complete",
613
+ "message": "Review complete. Video rendering started."
614
+ }
615
+
616
+ except Exception as e:
617
+ logger.error(f"Error completing review for job {job_id}: {e}", exc_info=True)
618
+ raise HTTPException(status_code=500, detail=str(e))
619
+
620
+
621
+ @router.get("/{job_id}/instrumental-options")
622
+ async def get_instrumental_options(
623
+ job_id: str,
624
+ auth_info: Tuple[str, str] = Depends(require_instrumental_auth)
625
+ ) -> Dict[str, Any]:
626
+ """
627
+ Get instrumental audio options for user selection.
628
+
629
+ Returns signed URLs for both options:
630
+ 1. Clean instrumental (no backing vocals)
631
+ 2. Instrumental with backing vocals
632
+
633
+ Accepts either full auth token or job-specific instrumental_token.
634
+ """
635
+ job = job_manager.get_job(job_id)
636
+ if not job:
637
+ raise HTTPException(status_code=404, detail="Job not found")
638
+
639
+ if job.status != JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
640
+ raise HTTPException(
641
+ status_code=400,
642
+ detail=f"Job not ready for instrumental selection (current status: {job.status})"
643
+ )
644
+
645
+ # Get stem URLs
646
+ stems = job.file_urls.get('stems', {})
647
+ clean_url = stems.get('instrumental_clean')
648
+ backing_url = stems.get('instrumental_with_backing')
649
+
650
+ if not clean_url or not backing_url:
651
+ raise HTTPException(
652
+ status_code=500,
653
+ detail="Instrumental options not available"
654
+ )
655
+
656
+ # Generate signed URLs
657
+ from backend.services.storage_service import StorageService
658
+ storage = StorageService()
659
+
660
+ return {
661
+ "options": [
662
+ {
663
+ "id": "clean",
664
+ "label": "Clean Instrumental (no backing vocals)",
665
+ "audio_url": storage.generate_signed_url(clean_url, expiration_minutes=120),
666
+ "duration_seconds": None # TODO: Extract from audio file
667
+ },
668
+ {
669
+ "id": "with_backing",
670
+ "label": "Instrumental with Backing Vocals",
671
+ "audio_url": storage.generate_signed_url(backing_url, expiration_minutes=120),
672
+ "duration_seconds": None # TODO: Extract from audio file
673
+ }
674
+ ],
675
+ "status": job.status,
676
+ "artist": job.artist,
677
+ "title": job.title
678
+ }
679
+
680
+
681
+ @router.get("/{job_id}/instrumental-analysis")
682
+ async def get_instrumental_analysis(
683
+ job_id: str,
684
+ auth_info: Tuple[str, str] = Depends(require_instrumental_auth)
685
+ ) -> Dict[str, Any]:
686
+ """
687
+ Get audio analysis data for instrumental selection.
688
+
689
+ Returns:
690
+ - Analysis of backing vocals (audible segments, recommendation)
691
+ - Waveform image URL
692
+ - Audio stream URLs for playback
693
+
694
+ This endpoint enables intelligent instrumental selection by providing
695
+ detailed analysis of the backing vocals track.
696
+
697
+ Accepts either full auth token or job-specific instrumental_token.
698
+ """
699
+ job = job_manager.get_job(job_id)
700
+ if not job:
701
+ raise HTTPException(status_code=404, detail="Job not found")
702
+
703
+ if job.status != JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
704
+ raise HTTPException(
705
+ status_code=400,
706
+ detail=f"Job not ready for instrumental analysis (current status: {job.status})"
707
+ )
708
+
709
+ storage = StorageService()
710
+
711
+ # Get analysis from state_data (populated by render_video_worker)
712
+ analysis_data = job.state_data.get('backing_vocals_analysis', {})
713
+
714
+ # Get URLs
715
+ stems = job.file_urls.get('stems', {})
716
+ analysis_files = job.file_urls.get('analysis', {})
717
+
718
+ clean_url = stems.get('instrumental_clean')
719
+ backing_vocals_url = stems.get('backing_vocals')
720
+ with_backing_url = stems.get('instrumental_with_backing')
721
+ waveform_url = analysis_files.get('backing_vocals_waveform')
722
+ custom_instrumental_url = stems.get('instrumental_custom')
723
+
724
+ # Build response
725
+ response = {
726
+ "job_id": job_id,
727
+ "artist": job.artist,
728
+ "title": job.title,
729
+ "status": job.status,
730
+
731
+ # Analysis results
732
+ "analysis": {
733
+ "has_audible_content": analysis_data.get('has_audible_content', False),
734
+ "total_duration_seconds": analysis_data.get('total_duration_seconds', 0),
735
+ "audible_segments": analysis_data.get('audible_segments', []),
736
+ "recommended_selection": analysis_data.get('recommended_selection', 'review_needed'),
737
+ "total_audible_duration_seconds": analysis_data.get('total_audible_duration_seconds', 0),
738
+ "audible_percentage": analysis_data.get('audible_percentage', 0),
739
+ "silence_threshold_db": analysis_data.get('silence_threshold_db', -40.0),
740
+ },
741
+
742
+ # Audio URLs for playback
743
+ "audio_urls": {
744
+ "clean_instrumental": storage.generate_signed_url(clean_url, expiration_minutes=120) if clean_url else None,
745
+ "backing_vocals": storage.generate_signed_url(backing_vocals_url, expiration_minutes=120) if backing_vocals_url else None,
746
+ "with_backing": storage.generate_signed_url(with_backing_url, expiration_minutes=120) if with_backing_url else None,
747
+ "custom_instrumental": storage.generate_signed_url(custom_instrumental_url, expiration_minutes=120) if custom_instrumental_url else None,
748
+ },
749
+
750
+ # Waveform image URL
751
+ "waveform_url": storage.generate_signed_url(waveform_url, expiration_minutes=120) if waveform_url else None,
752
+
753
+ # Whether a custom instrumental has been created
754
+ "has_custom_instrumental": custom_instrumental_url is not None,
755
+ }
756
+
757
+ return response
758
+
759
+
760
+ @router.get("/{job_id}/audio-stream/{stem_type}")
761
+ async def stream_audio(
762
+ job_id: str,
763
+ stem_type: str,
764
+ auth_info: Tuple[str, str] = Depends(require_instrumental_auth)
765
+ ):
766
+ """
767
+ Stream an audio file for playback in the browser.
768
+
769
+ Supported stem_type values:
770
+ - clean_instrumental: Clean instrumental (no backing vocals)
771
+ - backing_vocals: Backing vocals only
772
+ - with_backing: Instrumental with backing vocals
773
+ - custom_instrumental: Custom instrumental (if created)
774
+
775
+ Returns audio as a streaming response with proper headers for
776
+ browser audio playback.
777
+
778
+ Accepts either full auth token or job-specific instrumental_token.
779
+ """
780
+ from fastapi.responses import StreamingResponse
781
+ import tempfile
782
+
783
+ job = job_manager.get_job(job_id)
784
+ if not job:
785
+ raise HTTPException(status_code=404, detail="Job not found")
786
+
787
+ # Map stem_type to file_urls key
788
+ stem_map = {
789
+ 'clean_instrumental': ('stems', 'instrumental_clean'),
790
+ 'backing_vocals': ('stems', 'backing_vocals'),
791
+ 'with_backing': ('stems', 'instrumental_with_backing'),
792
+ 'custom_instrumental': ('stems', 'instrumental_custom'),
793
+ }
794
+
795
+ if stem_type not in stem_map:
796
+ raise HTTPException(
797
+ status_code=400,
798
+ detail=f"Invalid stem_type. Must be one of: {list(stem_map.keys())}"
799
+ )
800
+
801
+ category, key = stem_map[stem_type]
802
+ gcs_path = job.file_urls.get(category, {}).get(key)
803
+
804
+ if not gcs_path:
805
+ raise HTTPException(
806
+ status_code=404,
807
+ detail=f"Audio file not available: {stem_type}"
808
+ )
809
+
810
+ # Determine content type
811
+ ext = gcs_path.split('.')[-1].lower()
812
+ content_types = {
813
+ 'flac': 'audio/flac',
814
+ 'mp3': 'audio/mpeg',
815
+ 'wav': 'audio/wav',
816
+ 'm4a': 'audio/mp4',
817
+ }
818
+ content_type = content_types.get(ext, 'audio/flac')
819
+
820
+ try:
821
+ storage = StorageService()
822
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{ext}') as tmp:
823
+ tmp_path = tmp.name
824
+
825
+ storage.download_file(gcs_path, tmp_path)
826
+
827
+ def file_iterator():
828
+ try:
829
+ with open(tmp_path, 'rb') as f:
830
+ while chunk := f.read(8192):
831
+ yield chunk
832
+ finally:
833
+ import os
834
+ os.unlink(tmp_path)
835
+
836
+ filename = gcs_path.split('/')[-1]
837
+
838
+ return StreamingResponse(
839
+ file_iterator(),
840
+ media_type=content_type,
841
+ headers={
842
+ 'Content-Disposition': f'inline; filename="{filename}"',
843
+ 'Accept-Ranges': 'bytes',
844
+ }
845
+ )
846
+ except Exception as e:
847
+ logger.exception(f"Error streaming audio {gcs_path}: {e}")
848
+ raise HTTPException(status_code=500, detail=f"Error streaming audio: {e}") from e
849
+
850
+
851
+ @router.post("/{job_id}/create-custom-instrumental")
852
+ async def create_custom_instrumental(
853
+ job_id: str,
854
+ request: CreateCustomInstrumentalRequest,
855
+ auth_info: Tuple[str, str] = Depends(require_instrumental_auth)
856
+ ) -> Dict[str, Any]:
857
+ """
858
+ Create a custom instrumental by muting regions of backing vocals.
859
+
860
+ This endpoint:
861
+ 1. Takes a list of time ranges to mute in the backing vocals
862
+ 2. Creates a custom instrumental (clean + muted backing vocals)
863
+ 3. Stores the result and makes it available for selection
864
+
865
+ After calling this endpoint, the user can select "custom" in the
866
+ select-instrumental endpoint.
867
+
868
+ Accepts either full auth token or job-specific instrumental_token.
869
+ """
870
+ from backend.services.audio_editing_service import AudioEditingService
871
+ from karaoke_gen.instrumental_review import MuteRegion
872
+
873
+ job = job_manager.get_job(job_id)
874
+ if not job:
875
+ raise HTTPException(status_code=404, detail="Job not found")
876
+
877
+ if job.status != JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
878
+ raise HTTPException(
879
+ status_code=400,
880
+ detail=f"Job not ready for custom instrumental creation (current status: {job.status})"
881
+ )
882
+
883
+ # Get stem paths
884
+ stems = job.file_urls.get('stems', {})
885
+ clean_path = stems.get('instrumental_clean')
886
+ backing_path = stems.get('backing_vocals')
887
+
888
+ if not clean_path or not backing_path:
889
+ raise HTTPException(
890
+ status_code=500,
891
+ detail="Required audio files not available"
892
+ )
893
+
894
+ try:
895
+ # Convert request mute regions to model objects
896
+ mute_regions = [
897
+ MuteRegion(
898
+ start_seconds=r.start_seconds,
899
+ end_seconds=r.end_seconds
900
+ )
901
+ for r in request.mute_regions
902
+ ]
903
+
904
+ # Create custom instrumental
905
+ editing_service = AudioEditingService()
906
+
907
+ # Determine output path
908
+ output_path = f"jobs/{job_id}/stems/instrumental_custom.flac"
909
+
910
+ result = editing_service.create_custom_instrumental(
911
+ gcs_clean_instrumental_path=clean_path,
912
+ gcs_backing_vocals_path=backing_path,
913
+ mute_regions=mute_regions,
914
+ gcs_output_path=output_path,
915
+ job_id=job_id,
916
+ )
917
+
918
+ # Update job file_urls with custom instrumental
919
+ job_manager.update_file_url(job_id, 'stems', 'instrumental_custom', output_path)
920
+
921
+ # Store mute regions in state_data for reference
922
+ job_manager.update_state_data(job_id, 'custom_instrumental_mute_regions', [
923
+ {"start_seconds": r.start_seconds, "end_seconds": r.end_seconds}
924
+ for r in mute_regions
925
+ ])
926
+
927
+ logger.info(f"Job {job_id}: Custom instrumental created with {len(mute_regions)} mute regions")
928
+
929
+ # Generate signed URL for the new file
930
+ storage = StorageService()
931
+
932
+ return {
933
+ "status": "success",
934
+ "message": "Custom instrumental created successfully",
935
+ "custom_instrumental_url": storage.generate_signed_url(output_path, expiration_minutes=120),
936
+ "mute_regions_applied": len(result.mute_regions_applied),
937
+ "total_muted_duration_seconds": result.total_muted_duration_seconds,
938
+ "output_duration_seconds": result.output_duration_seconds,
939
+ }
940
+
941
+ except Exception as e:
942
+ logger.exception(f"Error creating custom instrumental for job {job_id}: {e}")
943
+ raise HTTPException(status_code=500, detail=str(e)) from e
944
+
945
+
946
+ @router.get("/{job_id}/waveform-data")
947
+ async def get_waveform_data(
948
+ job_id: str,
949
+ num_points: int = 500,
950
+ auth_info: Tuple[str, str] = Depends(require_instrumental_auth)
951
+ ) -> Dict[str, Any]:
952
+ """
953
+ Get waveform amplitude data for client-side rendering.
954
+
955
+ This endpoint returns raw amplitude data that the frontend can use
956
+ to render a waveform using Canvas or SVG, enabling interactive
957
+ features like click-to-seek.
958
+
959
+ Args:
960
+ num_points: Number of data points to return (default 500)
961
+
962
+ Returns:
963
+ {
964
+ "amplitudes": [0.1, 0.2, ...], # Normalized 0-1 values
965
+ "duration_seconds": 180.5,
966
+ "num_points": 500
967
+ }
968
+
969
+ Accepts either full auth token or job-specific instrumental_token.
970
+ """
971
+ from backend.services.audio_analysis_service import AudioAnalysisService
972
+
973
+ job = job_manager.get_job(job_id)
974
+ if not job:
975
+ raise HTTPException(status_code=404, detail="Job not found")
976
+
977
+ if job.status != JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
978
+ raise HTTPException(
979
+ status_code=400,
980
+ detail=f"Job not ready for waveform data (current status: {job.status})"
981
+ )
982
+
983
+ backing_vocals_path = job.file_urls.get('stems', {}).get('backing_vocals')
984
+ if not backing_vocals_path:
985
+ raise HTTPException(status_code=404, detail="Backing vocals file not found")
986
+
987
+ try:
988
+ analysis_service = AudioAnalysisService()
989
+ amplitudes, duration = analysis_service.get_waveform_data(
990
+ gcs_audio_path=backing_vocals_path,
991
+ job_id=job_id,
992
+ num_points=num_points,
993
+ )
994
+
995
+ return {
996
+ "amplitudes": amplitudes,
997
+ "duration_seconds": duration,
998
+ "num_points": len(amplitudes),
999
+ }
1000
+
1001
+ except Exception as e:
1002
+ logger.exception(f"Error getting waveform data for job {job_id}: {e}")
1003
+ raise HTTPException(status_code=500, detail=str(e)) from e
1004
+
1005
+
1006
+ @router.post("/{job_id}/select-instrumental")
1007
+ async def select_instrumental(
1008
+ job_id: str,
1009
+ selection: InstrumentalSelection,
1010
+ background_tasks: BackgroundTasks,
1011
+ auth_info: Tuple[str, str] = Depends(require_instrumental_auth)
1012
+ ) -> dict:
1013
+ """
1014
+ Submit instrumental selection.
1015
+
1016
+ This is the SECOND critical human-in-the-loop interaction point.
1017
+ After selection, the job proceeds to video generation.
1018
+
1019
+ Accepts either full auth token or job-specific instrumental_token.
1020
+ """
1021
+ job = job_manager.get_job(job_id)
1022
+ if not job:
1023
+ raise HTTPException(status_code=404, detail="Job not found")
1024
+
1025
+ if job.status != JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
1026
+ raise HTTPException(
1027
+ status_code=400,
1028
+ detail=f"Job not ready for instrumental selection (current status: {job.status})"
1029
+ )
1030
+
1031
+ try:
1032
+ # Store selection in state_data
1033
+ job_manager.update_state_data(job_id, 'instrumental_selection', selection.selection)
1034
+
1035
+ # Transition to INSTRUMENTAL_SELECTED
1036
+ job_manager.transition_to_state(
1037
+ job_id=job_id,
1038
+ new_status=JobStatus.INSTRUMENTAL_SELECTED,
1039
+ progress=65,
1040
+ message=f"Instrumental selected: {selection.selection}"
1041
+ )
1042
+
1043
+ # Trigger video generation worker
1044
+ background_tasks.add_task(worker_service.trigger_video_worker, job_id)
1045
+
1046
+ logger.info(f"Job {job_id}: Instrumental selected ({selection.selection}), triggering video generation")
1047
+
1048
+ return {
1049
+ "status": "success",
1050
+ "job_status": "instrumental_selected",
1051
+ "selection": selection.selection,
1052
+ "message": "Selection accepted, starting video generation"
1053
+ }
1054
+
1055
+ except Exception as e:
1056
+ logger.error(f"Error selecting instrumental for job {job_id}: {e}", exc_info=True)
1057
+ raise HTTPException(status_code=500, detail=str(e))
1058
+
1059
+
1060
+ @router.get("/{job_id}/download-urls")
1061
+ async def get_download_urls(
1062
+ job_id: str,
1063
+ auth_result: AuthResult = Depends(require_auth)
1064
+ ) -> dict:
1065
+ """
1066
+ Get download URLs for all job output files.
1067
+
1068
+ Returns a dictionary mapping file types to download URLs.
1069
+ Uses the streaming download endpoint which proxies through the backend.
1070
+ """
1071
+ job = job_manager.get_job(job_id)
1072
+ if not job:
1073
+ raise HTTPException(status_code=404, detail="Job not found")
1074
+
1075
+ # Check ownership
1076
+ if not _check_job_ownership(job, auth_result):
1077
+ raise HTTPException(status_code=403, detail="You don't have permission to access this job")
1078
+
1079
+ if job.status != JobStatus.COMPLETE:
1080
+ raise HTTPException(
1081
+ status_code=400,
1082
+ detail=f"Job not complete (current status: {job.status})"
1083
+ )
1084
+
1085
+ file_urls = job.file_urls or {}
1086
+ download_urls = {}
1087
+
1088
+ # Build download URLs using the streaming endpoint
1089
+ base_url = f"/api/jobs/{job_id}/download"
1090
+
1091
+ for category, files in file_urls.items():
1092
+ if isinstance(files, dict):
1093
+ download_urls[category] = {}
1094
+ for file_key, gcs_path in files.items():
1095
+ if gcs_path:
1096
+ download_urls[category][file_key] = f"{base_url}/{category}/{file_key}"
1097
+ elif isinstance(files, str) and files:
1098
+ # For single-file categories, use the category name as the file_key
1099
+ download_urls[category] = f"{base_url}/{category}/{category}"
1100
+
1101
+ return {
1102
+ "job_id": job_id,
1103
+ "artist": job.artist,
1104
+ "title": job.title,
1105
+ "download_urls": download_urls
1106
+ }
1107
+
1108
+
1109
+ # Map file keys to human-readable suffixes (matching Dropbox naming from karaoke_finalise.py)
1110
+ DOWNLOAD_FILENAME_SUFFIXES = {
1111
+ "lossless_4k_mp4": " (Final Karaoke Lossless 4k).mp4",
1112
+ "lossless_4k_mkv": " (Final Karaoke Lossless 4k).mkv",
1113
+ "lossy_4k_mp4": " (Final Karaoke Lossy 4k).mp4",
1114
+ "lossy_720p_mp4": " (Final Karaoke Lossy 720p).mp4",
1115
+ "cdg_zip": " (Final Karaoke CDG).zip",
1116
+ "txt_zip": " (Final Karaoke TXT).zip",
1117
+ "with_vocals": " (With Vocals).mkv",
1118
+ }
1119
+
1120
+
1121
+ @router.get("/{job_id}/download/{category}/{file_key}")
1122
+ async def download_file(
1123
+ job_id: str,
1124
+ category: str,
1125
+ file_key: str,
1126
+ auth_result: AuthResult = Depends(require_auth)
1127
+ ):
1128
+ """
1129
+ Stream download a specific file from a completed job.
1130
+
1131
+ This endpoint proxies the file from GCS through the backend,
1132
+ so no client-side authentication is required.
1133
+ """
1134
+ from fastapi.responses import StreamingResponse
1135
+ import tempfile
1136
+
1137
+ job = job_manager.get_job(job_id)
1138
+ if not job:
1139
+ raise HTTPException(status_code=404, detail="Job not found")
1140
+
1141
+ # Check ownership
1142
+ if not _check_job_ownership(job, auth_result):
1143
+ raise HTTPException(status_code=403, detail="You don't have permission to access this job")
1144
+
1145
+ if job.status != JobStatus.COMPLETE:
1146
+ raise HTTPException(
1147
+ status_code=400,
1148
+ detail=f"Job not complete (current status: {job.status})"
1149
+ )
1150
+
1151
+ file_urls = job.file_urls or {}
1152
+ category_files = file_urls.get(category)
1153
+
1154
+ if not category_files:
1155
+ raise HTTPException(status_code=404, detail=f"Category '{category}' not found")
1156
+
1157
+ if isinstance(category_files, dict):
1158
+ gcs_path = category_files.get(file_key)
1159
+ else:
1160
+ gcs_path = category_files if file_key == category else None
1161
+
1162
+ if not gcs_path:
1163
+ raise HTTPException(status_code=404, detail=f"File '{file_key}' not found in '{category}'")
1164
+
1165
+ # Determine content type based on file extension
1166
+ ext = gcs_path.split('.')[-1].lower()
1167
+ content_types = {
1168
+ 'mp4': 'video/mp4',
1169
+ 'mkv': 'video/x-matroska',
1170
+ 'mov': 'video/quicktime',
1171
+ 'flac': 'audio/flac',
1172
+ 'mp3': 'audio/mpeg',
1173
+ 'wav': 'audio/wav',
1174
+ 'ass': 'text/plain',
1175
+ 'lrc': 'text/plain',
1176
+ 'txt': 'text/plain',
1177
+ 'json': 'application/json',
1178
+ 'zip': 'application/zip',
1179
+ 'png': 'image/png',
1180
+ 'jpg': 'image/jpeg',
1181
+ 'jpeg': 'image/jpeg',
1182
+ }
1183
+ content_type = content_types.get(ext, 'application/octet-stream')
1184
+
1185
+ # Build proper filename: "Artist - Title (Final Karaoke Lossy 4k).mp4"
1186
+ # Use sanitize_filename to handle Unicode characters (curly quotes, em dashes, etc.)
1187
+ # that cause HTTP header encoding issues (Content-Disposition uses latin-1)
1188
+ from karaoke_gen.utils import sanitize_filename
1189
+ artist_clean = sanitize_filename(job.artist) if job.artist else None
1190
+ title_clean = sanitize_filename(job.title) if job.title else None
1191
+ base_name = f"{artist_clean} - {title_clean}" if artist_clean and title_clean else None
1192
+
1193
+ if base_name and file_key in DOWNLOAD_FILENAME_SUFFIXES:
1194
+ filename = f"{base_name}{DOWNLOAD_FILENAME_SUFFIXES[file_key]}"
1195
+ else:
1196
+ filename = gcs_path.split('/')[-1] # Fallback to original
1197
+
1198
+ try:
1199
+ # Download to temp file and stream
1200
+ storage = StorageService()
1201
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{ext}') as tmp:
1202
+ tmp_path = tmp.name
1203
+
1204
+ storage.download_file(gcs_path, tmp_path)
1205
+
1206
+ def file_iterator():
1207
+ try:
1208
+ with open(tmp_path, 'rb') as f:
1209
+ while chunk := f.read(8192):
1210
+ yield chunk
1211
+ finally:
1212
+ import os
1213
+ os.unlink(tmp_path)
1214
+
1215
+ return StreamingResponse(
1216
+ file_iterator(),
1217
+ media_type=content_type,
1218
+ headers={
1219
+ 'Content-Disposition': f'attachment; filename="{filename}"'
1220
+ }
1221
+ )
1222
+ except Exception as e:
1223
+ logger.error(f"Error downloading {gcs_path}: {e}")
1224
+ raise HTTPException(status_code=500, detail=f"Error downloading file: {e}")
1225
+
1226
+
1227
+ @router.post("/{job_id}/cancel")
1228
+ async def cancel_job(
1229
+ job_id: str,
1230
+ request: CancelJobRequest,
1231
+ auth_result: AuthResult = Depends(require_auth)
1232
+ ) -> dict:
1233
+ """
1234
+ Cancel a job.
1235
+
1236
+ Jobs can be cancelled at any stage before completion.
1237
+ """
1238
+ job = job_manager.get_job(job_id)
1239
+ if not job:
1240
+ raise HTTPException(status_code=404, detail="Job not found")
1241
+
1242
+ # Check ownership
1243
+ if not _check_job_ownership(job, auth_result):
1244
+ raise HTTPException(status_code=403, detail="You don't have permission to cancel this job")
1245
+
1246
+ success = job_manager.cancel_job(job_id, reason=request.reason)
1247
+
1248
+ if not success:
1249
+ raise HTTPException(status_code=400, detail="Cannot cancel job")
1250
+
1251
+ return {
1252
+ "status": "success",
1253
+ "job_status": "cancelled",
1254
+ "message": "Job cancelled successfully"
1255
+ }
1256
+
1257
+
1258
+ @router.post("/{job_id}/retry")
1259
+ async def retry_job(
1260
+ job_id: str,
1261
+ background_tasks: BackgroundTasks,
1262
+ auth_result: AuthResult = Depends(require_auth)
1263
+ ) -> dict:
1264
+ """
1265
+ Retry a failed or cancelled job from the last successful checkpoint.
1266
+
1267
+ This endpoint allows resuming jobs that failed or were cancelled during:
1268
+ - Audio processing (re-runs from beginning if input audio exists)
1269
+ - Video generation (re-runs video worker)
1270
+ - Encoding (re-runs video worker)
1271
+ - Packaging (re-runs video worker)
1272
+
1273
+ The retry logic determines the appropriate stage to resume from
1274
+ based on what files/state already exist.
1275
+ """
1276
+ job = job_manager.get_job(job_id)
1277
+ if not job:
1278
+ raise HTTPException(status_code=404, detail="Job not found")
1279
+
1280
+ # Check ownership
1281
+ if not _check_job_ownership(job, auth_result):
1282
+ raise HTTPException(status_code=403, detail="You don't have permission to retry this job")
1283
+
1284
+ if job.status not in [JobStatus.FAILED, JobStatus.CANCELLED]:
1285
+ raise HTTPException(
1286
+ status_code=400,
1287
+ detail=f"Only failed or cancelled jobs can be retried (current status: {job.status})"
1288
+ )
1289
+
1290
+ try:
1291
+ # Determine retry point based on what's already complete
1292
+ error_details = job.error_details or {}
1293
+ error_stage = error_details.get('stage', 'unknown')
1294
+ original_status = job.status
1295
+
1296
+ logger.info(f"Job {job_id}: Retrying from {original_status} state (error stage: '{error_stage}')")
1297
+
1298
+ # Check what state exists to determine retry point
1299
+ file_urls = job.file_urls or {}
1300
+ state_data = job.state_data or {}
1301
+
1302
+ # If we have a video with vocals and instrumental selection, retry video generation
1303
+ if (file_urls.get('videos', {}).get('with_vocals') and
1304
+ state_data.get('instrumental_selection')):
1305
+
1306
+ logger.info(f"Job {job_id}: Has rendered video and instrumental selection, retrying video generation")
1307
+
1308
+ # Clear error state and reset worker progress for idempotency
1309
+ job_manager.update_job(job_id, {
1310
+ 'error_message': None,
1311
+ 'error_details': None,
1312
+ })
1313
+ job_manager.update_state_data(job_id, 'video_progress', {'stage': 'pending'})
1314
+
1315
+ # Reset to INSTRUMENTAL_SELECTED and trigger video worker
1316
+ if not job_manager.transition_to_state(
1317
+ job_id=job_id,
1318
+ new_status=JobStatus.INSTRUMENTAL_SELECTED,
1319
+ progress=65,
1320
+ message=f"Retrying video generation from failed state"
1321
+ ):
1322
+ raise HTTPException(
1323
+ status_code=500,
1324
+ detail="Failed to transition job status for retry"
1325
+ )
1326
+
1327
+ # Trigger video generation worker
1328
+ background_tasks.add_task(worker_service.trigger_video_worker, job_id)
1329
+
1330
+ return {
1331
+ "status": "success",
1332
+ "job_status": "instrumental_selected",
1333
+ "message": "Job retry started from video generation stage",
1334
+ "retry_stage": "video_generation"
1335
+ }
1336
+
1337
+ # If we have corrections and screens but no video, retry render
1338
+ elif (file_urls.get('lyrics', {}).get('corrections') and
1339
+ file_urls.get('screens', {}).get('title')):
1340
+
1341
+ logger.info(f"Job {job_id}: Has corrections and screens, retrying from render stage")
1342
+
1343
+ # Clear error state and reset worker progress for idempotency
1344
+ job_manager.update_job(job_id, {
1345
+ 'error_message': None,
1346
+ 'error_details': None,
1347
+ })
1348
+ job_manager.update_state_data(job_id, 'render_progress', {'stage': 'pending'})
1349
+
1350
+ # Reset to REVIEW_COMPLETE and trigger render worker
1351
+ if not job_manager.transition_to_state(
1352
+ job_id=job_id,
1353
+ new_status=JobStatus.REVIEW_COMPLETE,
1354
+ progress=70,
1355
+ message=f"Retrying video render from failed state"
1356
+ ):
1357
+ raise HTTPException(
1358
+ status_code=500,
1359
+ detail="Failed to transition job status for retry"
1360
+ )
1361
+
1362
+ # Trigger render video worker
1363
+ background_tasks.add_task(worker_service.trigger_render_video_worker, job_id)
1364
+
1365
+ return {
1366
+ "status": "success",
1367
+ "job_status": "review_complete",
1368
+ "message": "Job retry started from render stage",
1369
+ "retry_stage": "render_video"
1370
+ }
1371
+
1372
+ # If we have stems and corrections, retry from screens generation
1373
+ elif (file_urls.get('stems', {}).get('instrumental_clean') and
1374
+ file_urls.get('lyrics', {}).get('corrections')):
1375
+
1376
+ logger.info(f"Job {job_id}: Has stems and corrections, retrying from screens stage")
1377
+
1378
+ # Clear error state and reset worker progress for idempotency
1379
+ job_manager.update_job(job_id, {
1380
+ 'error_message': None,
1381
+ 'error_details': None,
1382
+ })
1383
+ job_manager.update_state_data(job_id, 'screens_progress', {'stage': 'pending'})
1384
+
1385
+ # Reset to a state before screens and trigger screens worker
1386
+ if not job_manager.transition_to_state(
1387
+ job_id=job_id,
1388
+ new_status=JobStatus.LYRICS_COMPLETE,
1389
+ progress=45,
1390
+ message=f"Retrying from screens generation"
1391
+ ):
1392
+ raise HTTPException(
1393
+ status_code=500,
1394
+ detail="Failed to transition job status for retry"
1395
+ )
1396
+
1397
+ # Trigger screens worker
1398
+ background_tasks.add_task(worker_service.trigger_screens_worker, job_id)
1399
+
1400
+ return {
1401
+ "status": "success",
1402
+ "job_status": "lyrics_complete",
1403
+ "message": "Job retry started from screens generation",
1404
+ "retry_stage": "screens_generation"
1405
+ }
1406
+
1407
+ # If we have input audio (uploaded or from URL), restart from beginning
1408
+ elif job.input_media_gcs_path or job.url:
1409
+ logger.info(f"Job {job_id}: Has input audio, restarting from beginning")
1410
+
1411
+ # Clear error state and any partial progress
1412
+ job_manager.update_job(job_id, {
1413
+ 'error_message': None,
1414
+ 'error_details': None,
1415
+ 'state_data': {}, # Clear parallel worker progress
1416
+ })
1417
+
1418
+ # Reset to DOWNLOADING and trigger audio worker
1419
+ if not job_manager.transition_to_state(
1420
+ job_id=job_id,
1421
+ new_status=JobStatus.DOWNLOADING,
1422
+ progress=5,
1423
+ message=f"Restarting job from {original_status} state"
1424
+ ):
1425
+ raise HTTPException(
1426
+ status_code=500,
1427
+ detail="Failed to transition job status for retry"
1428
+ )
1429
+
1430
+ # Trigger audio worker (which kicks off parallel audio + lyrics processing)
1431
+ background_tasks.add_task(worker_service.trigger_audio_worker, job_id)
1432
+ background_tasks.add_task(worker_service.trigger_lyrics_worker, job_id)
1433
+
1434
+ return {
1435
+ "status": "success",
1436
+ "job_id": job_id,
1437
+ "job_status": "downloading",
1438
+ "message": f"Job restarted from {original_status} state",
1439
+ "retry_stage": "from_beginning"
1440
+ }
1441
+
1442
+ else:
1443
+ # No input audio available - job needs to be resubmitted
1444
+ raise HTTPException(
1445
+ status_code=400,
1446
+ detail="Cannot retry: no input audio available. Job must be resubmitted."
1447
+ )
1448
+
1449
+ except HTTPException:
1450
+ raise
1451
+ except Exception as e:
1452
+ logger.error(f"Error retrying job {job_id}: {e}", exc_info=True)
1453
+ raise HTTPException(status_code=500, detail=str(e))
1454
+
1455
+
1456
+ @router.get("/{job_id}/logs")
1457
+ async def get_worker_logs(
1458
+ job_id: str,
1459
+ since_index: int = 0,
1460
+ worker: Optional[str] = None,
1461
+ auth_result: AuthResult = Depends(require_auth)
1462
+ ) -> Dict[str, Any]:
1463
+ """
1464
+ Get worker logs for debugging.
1465
+
1466
+ This endpoint returns worker logs stored in Firestore.
1467
+ Use `since_index` for efficient polling (returns only new logs).
1468
+
1469
+ Logs are stored in a subcollection (jobs/{job_id}/logs) to avoid
1470
+ the 1MB document size limit. Older jobs may have logs in an embedded
1471
+ array (worker_logs field) - this endpoint handles both transparently.
1472
+
1473
+ Args:
1474
+ job_id: Job ID
1475
+ since_index: Return only logs after this index (for pagination/polling)
1476
+ worker: Filter by worker name (audio, lyrics, screens, video, render, distribution)
1477
+
1478
+ Returns:
1479
+ {
1480
+ "logs": [{"timestamp": "...", "level": "INFO", "worker": "audio", "message": "..."}],
1481
+ "next_index": 42, # Use this for next poll
1482
+ "total_logs": 42
1483
+ }
1484
+ """
1485
+ job = job_manager.get_job(job_id)
1486
+ if not job:
1487
+ raise HTTPException(status_code=404, detail="Job not found")
1488
+
1489
+ # Check ownership - users can only see logs for their own jobs
1490
+ if not _check_job_ownership(job, auth_result):
1491
+ raise HTTPException(status_code=403, detail="You don't have permission to access logs for this job")
1492
+
1493
+ logs = job_manager.get_worker_logs(job_id, since_index=since_index, worker=worker)
1494
+ total = job_manager.get_worker_logs_count(job_id)
1495
+
1496
+ return {
1497
+ "logs": logs,
1498
+ "next_index": since_index + len(logs),
1499
+ "total_logs": total
1500
+ }
1501
+
1502
+
1503
+ @router.post("/{job_id}/cleanup-distribution")
1504
+ async def cleanup_distribution(
1505
+ job_id: str,
1506
+ delete_job: bool = True,
1507
+ auth_result: AuthResult = Depends(require_admin)
1508
+ ) -> dict:
1509
+ """
1510
+ Clean up all distributed content for a job (YouTube, Dropbox, Google Drive).
1511
+
1512
+ This admin-only endpoint is designed for E2E test cleanup. It:
1513
+ 1. Deletes YouTube video (if uploaded)
1514
+ 2. Deletes Dropbox folder (if uploaded)
1515
+ 3. Deletes Google Drive files (if uploaded)
1516
+ 4. Optionally deletes the job itself
1517
+
1518
+ Args:
1519
+ job_id: Job ID to clean up
1520
+ delete_job: If True, also delete the job after cleaning up distribution
1521
+
1522
+ Returns:
1523
+ Cleanup results for each service
1524
+ """
1525
+ job = job_manager.get_job(job_id)
1526
+ if not job:
1527
+ raise HTTPException(status_code=404, detail="Job not found")
1528
+
1529
+ state_data = job.state_data or {}
1530
+ results = {
1531
+ "job_id": job_id,
1532
+ "youtube": {"status": "skipped", "reason": "no youtube_url in state_data"},
1533
+ "dropbox": {"status": "skipped", "reason": "no brand_code or dropbox_path"},
1534
+ "gdrive": {"status": "skipped", "reason": "no gdrive_files in state_data"},
1535
+ "job_deleted": False
1536
+ }
1537
+
1538
+ # Clean up YouTube
1539
+ youtube_url = state_data.get('youtube_url')
1540
+ if youtube_url:
1541
+ try:
1542
+ # Extract video ID from URL (format: https://youtu.be/VIDEO_ID or https://www.youtube.com/watch?v=VIDEO_ID)
1543
+ import re
1544
+ video_id_match = re.search(r'(?:youtu\.be/|youtube\.com/watch\?v=)([^&\s]+)', youtube_url)
1545
+ if video_id_match:
1546
+ video_id = video_id_match.group(1)
1547
+
1548
+ # Import and use karaoke_finalise for YouTube deletion
1549
+ from karaoke_gen.karaoke_finalise.karaoke_finalise import KaraokeFinalise
1550
+ from backend.services.youtube_service import get_youtube_service
1551
+
1552
+ youtube_service = get_youtube_service()
1553
+ if youtube_service.is_configured:
1554
+ # Create minimal KaraokeFinalise instance for deletion
1555
+ finalise = KaraokeFinalise(
1556
+ dry_run=False,
1557
+ non_interactive=True,
1558
+ enable_youtube=True,
1559
+ user_youtube_credentials=youtube_service.get_credentials_dict()
1560
+ )
1561
+ success = finalise.delete_youtube_video(video_id)
1562
+ results["youtube"] = {
1563
+ "status": "success" if success else "failed",
1564
+ "video_id": video_id
1565
+ }
1566
+ else:
1567
+ results["youtube"] = {"status": "failed", "reason": "YouTube credentials not configured"}
1568
+ else:
1569
+ results["youtube"] = {"status": "failed", "reason": f"Could not extract video ID from {youtube_url}"}
1570
+ except Exception as e:
1571
+ logger.error(f"Error cleaning up YouTube for job {job_id}: {e}", exc_info=True)
1572
+ results["youtube"] = {"status": "error", "error": str(e)}
1573
+
1574
+ # Clean up Dropbox
1575
+ brand_code = state_data.get('brand_code')
1576
+ dropbox_path = getattr(job, 'dropbox_path', None)
1577
+ if brand_code and dropbox_path:
1578
+ try:
1579
+ from backend.services.dropbox_service import get_dropbox_service
1580
+ dropbox = get_dropbox_service()
1581
+ if dropbox.is_configured:
1582
+ base_name = f"{job.artist} - {job.title}"
1583
+ folder_name = f"{brand_code} - {base_name}"
1584
+ full_path = f"{dropbox_path}/{folder_name}"
1585
+ success = dropbox.delete_folder(full_path)
1586
+ results["dropbox"] = {
1587
+ "status": "success" if success else "failed",
1588
+ "path": full_path
1589
+ }
1590
+ else:
1591
+ results["dropbox"] = {"status": "failed", "reason": "Dropbox credentials not configured"}
1592
+ except Exception as e:
1593
+ logger.error(f"Error cleaning up Dropbox for job {job_id}: {e}", exc_info=True)
1594
+ results["dropbox"] = {"status": "error", "error": str(e)}
1595
+
1596
+ # Clean up Google Drive
1597
+ gdrive_files = state_data.get('gdrive_files')
1598
+ if gdrive_files:
1599
+ try:
1600
+ from backend.services.gdrive_service import get_gdrive_service
1601
+ gdrive = get_gdrive_service()
1602
+ if gdrive.is_configured:
1603
+ # gdrive_files is a dict like {"mp4": "file_id", "mp4_720p": "file_id", "cdg": "file_id"}
1604
+ file_ids = list(gdrive_files.values()) if isinstance(gdrive_files, dict) else []
1605
+ delete_results = gdrive.delete_files(file_ids)
1606
+ all_success = all(delete_results.values())
1607
+ results["gdrive"] = {
1608
+ "status": "success" if all_success else "partial",
1609
+ "files": delete_results
1610
+ }
1611
+ else:
1612
+ results["gdrive"] = {"status": "failed", "reason": "Google Drive credentials not configured"}
1613
+ except Exception as e:
1614
+ logger.error(f"Error cleaning up Google Drive for job {job_id}: {e}", exc_info=True)
1615
+ results["gdrive"] = {"status": "error", "error": str(e)}
1616
+
1617
+ # Delete the job if requested
1618
+ if delete_job:
1619
+ try:
1620
+ job_manager.delete_job(job_id, delete_files=True)
1621
+ results["job_deleted"] = True
1622
+ logger.info(f"Deleted job {job_id} after distribution cleanup")
1623
+ except Exception as e:
1624
+ logger.error(f"Error deleting job {job_id}: {e}", exc_info=True)
1625
+ results["job_deleted"] = False
1626
+ results["job_delete_error"] = str(e)
1627
+
1628
+ return results
1629
+