karaoke-gen 0.90.1__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/utils/__init__.py +163 -8
  148. karaoke_gen/video_background_processor.py +9 -4
  149. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +1 -1
  150. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +186 -41
  151. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  152. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  153. lyrics_transcriber/correction/corrector.py +192 -130
  154. lyrics_transcriber/correction/operations.py +24 -9
  155. lyrics_transcriber/frontend/package-lock.json +2 -2
  156. lyrics_transcriber/frontend/package.json +1 -1
  157. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  158. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  159. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  160. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  161. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  162. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  163. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  164. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  165. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  168. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  169. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  170. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  171. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  172. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  173. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  174. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  175. lyrics_transcriber/frontend/src/theme.ts +42 -15
  176. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  177. lyrics_transcriber/frontend/vite.config.js +5 -0
  178. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  179. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  180. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  181. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  182. lyrics_transcriber/output/generator.py +17 -3
  183. lyrics_transcriber/output/video.py +60 -95
  184. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  185. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  186. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  187. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,842 @@
1
+ """
2
+ Job management and queue operations.
3
+
4
+ This module handles the complete job lifecycle including:
5
+ - Job creation and initialization
6
+ - State transitions and validation
7
+ - Worker coordination (parallel audio + lyrics processing)
8
+ - Progress tracking and timeline events
9
+ - Error handling and retries
10
+ """
11
+ import logging
12
+ import uuid
13
+ from datetime import datetime
14
+ from typing import Optional, Dict, Any, List
15
+
16
+ from backend.config import settings
17
+ from backend.models.job import Job, JobStatus, JobCreate, STATE_TRANSITIONS
18
+ from backend.models.worker_log import WorkerLogEntry
19
+ from backend.services.firestore_service import FirestoreService
20
+ from backend.services.storage_service import StorageService
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class JobManager:
27
+ """Manager for job lifecycle and state."""
28
+
29
+ def __init__(self):
30
+ """Initialize job manager with required services."""
31
+ self.firestore = FirestoreService()
32
+ self.storage = StorageService()
33
+
34
+ def create_job(self, job_create: JobCreate) -> Job:
35
+ """
36
+ Create a new job with initial state PENDING.
37
+
38
+ Jobs start in PENDING state and transition to DOWNLOADING
39
+ when a worker picks them up.
40
+ """
41
+ job_id = str(uuid.uuid4())[:8]
42
+
43
+ now = datetime.utcnow()
44
+ job = Job(
45
+ job_id=job_id,
46
+ status=JobStatus.PENDING, # New state machine starts with PENDING
47
+ progress=0,
48
+ created_at=now,
49
+ updated_at=now,
50
+ url=str(job_create.url) if job_create.url else None,
51
+ artist=job_create.artist,
52
+ title=job_create.title,
53
+ # User preferences
54
+ enable_cdg=job_create.enable_cdg,
55
+ enable_txt=job_create.enable_txt,
56
+ enable_youtube_upload=job_create.enable_youtube_upload,
57
+ youtube_description=job_create.youtube_description,
58
+ youtube_description_template=job_create.youtube_description_template, # video_worker reads this
59
+ webhook_url=job_create.webhook_url,
60
+ user_email=job_create.user_email,
61
+ # Distribution settings
62
+ brand_prefix=job_create.brand_prefix,
63
+ discord_webhook_url=job_create.discord_webhook_url,
64
+ dropbox_path=job_create.dropbox_path,
65
+ gdrive_folder_id=job_create.gdrive_folder_id,
66
+ # Theme configuration
67
+ theme_id=job_create.theme_id,
68
+ color_overrides=job_create.color_overrides,
69
+ style_params_gcs_path=job_create.style_params_gcs_path,
70
+ style_assets=job_create.style_assets,
71
+ # Two-phase workflow (Batch 6)
72
+ prep_only=job_create.prep_only,
73
+ finalise_only=job_create.finalise_only,
74
+ keep_brand_code=job_create.keep_brand_code,
75
+ # Request metadata (for tracking and filtering)
76
+ request_metadata=job_create.request_metadata,
77
+ )
78
+
79
+ self.firestore.create_job(job)
80
+ logger.info(f"Created new job {job_id} with status PENDING")
81
+
82
+ return job
83
+
84
+ def get_job(self, job_id: str) -> Optional[Job]:
85
+ """Get a job by ID."""
86
+ return self.firestore.get_job(job_id)
87
+
88
+ def update_job_status(
89
+ self,
90
+ job_id: str,
91
+ status: JobStatus,
92
+ progress: Optional[int] = None,
93
+ message: Optional[str] = None,
94
+ **kwargs
95
+ ) -> None:
96
+ """Update job status with timeline tracking."""
97
+ self.firestore.update_job_status(
98
+ job_id=job_id,
99
+ status=status,
100
+ progress=progress,
101
+ message=message,
102
+ **kwargs
103
+ )
104
+
105
+ def update_job(self, job_id: str, updates: Dict[str, Any]) -> None:
106
+ """Update job with arbitrary fields."""
107
+ self.firestore.update_job(job_id, updates)
108
+
109
+ def list_jobs(
110
+ self,
111
+ status: Optional[JobStatus] = None,
112
+ environment: Optional[str] = None,
113
+ client_id: Optional[str] = None,
114
+ created_after: Optional[datetime] = None,
115
+ created_before: Optional[datetime] = None,
116
+ user_email: Optional[str] = None,
117
+ limit: int = 100
118
+ ) -> List[Job]:
119
+ """
120
+ List jobs with optional filtering.
121
+
122
+ Args:
123
+ status: Filter by job status
124
+ environment: Filter by request_metadata.environment (test/production/development)
125
+ client_id: Filter by request_metadata.client_id (customer identifier)
126
+ created_after: Filter jobs created after this datetime
127
+ created_before: Filter jobs created before this datetime
128
+ user_email: Filter by user_email (job owner)
129
+ limit: Maximum number of jobs to return
130
+
131
+ Returns:
132
+ List of Job objects matching filters
133
+ """
134
+ return self.firestore.list_jobs(
135
+ status=status,
136
+ environment=environment,
137
+ client_id=client_id,
138
+ created_after=created_after,
139
+ created_before=created_before,
140
+ user_email=user_email,
141
+ limit=limit
142
+ )
143
+
144
+ def delete_jobs_by_filter(
145
+ self,
146
+ environment: Optional[str] = None,
147
+ client_id: Optional[str] = None,
148
+ status: Optional[JobStatus] = None,
149
+ created_before: Optional[datetime] = None,
150
+ delete_files: bool = True
151
+ ) -> Dict[str, Any]:
152
+ """
153
+ Delete multiple jobs matching filter criteria.
154
+
155
+ CAUTION: This is a destructive operation. Use carefully.
156
+
157
+ Args:
158
+ environment: Delete jobs with this environment (e.g., "test")
159
+ client_id: Delete jobs from this client
160
+ status: Delete jobs with this status
161
+ created_before: Delete jobs created before this datetime
162
+ delete_files: Also delete GCS files (default True)
163
+
164
+ Returns:
165
+ Dict with deletion statistics
166
+ """
167
+ # First get matching jobs to delete their files
168
+ if delete_files:
169
+ jobs = self.firestore.list_jobs(
170
+ status=status,
171
+ environment=environment,
172
+ client_id=client_id,
173
+ created_before=created_before,
174
+ limit=10000 # High limit for deletion
175
+ )
176
+
177
+ files_deleted = 0
178
+ for job in jobs:
179
+ # Delete files from various locations
180
+ try:
181
+ # Delete uploads folder
182
+ self.storage.delete_folder(f"uploads/{job.job_id}/")
183
+ # Delete jobs folder
184
+ self.storage.delete_folder(f"jobs/{job.job_id}/")
185
+ files_deleted += 1
186
+ except Exception as e:
187
+ logger.warning(f"Error deleting files for job {job.job_id}: {e}")
188
+
189
+ # Delete the jobs from Firestore
190
+ deleted_count = self.firestore.delete_jobs_by_filter(
191
+ environment=environment,
192
+ client_id=client_id,
193
+ status=status,
194
+ created_before=created_before
195
+ )
196
+
197
+ return {
198
+ 'jobs_deleted': deleted_count,
199
+ 'files_deleted': files_deleted if delete_files else 0
200
+ }
201
+
202
+ def mark_job_error(self, job_id: str, error_message: str) -> None:
203
+ """Mark a job as errored."""
204
+ self.firestore.update_job_status(
205
+ job_id=job_id,
206
+ status=JobStatus.ERROR,
207
+ progress=0,
208
+ message=error_message,
209
+ error_message=error_message
210
+ )
211
+ logger.error(f"Job {job_id} marked as error: {error_message}")
212
+
213
+ def get_output_urls(self, job_id: str) -> Dict[str, str]:
214
+ """Generate signed URLs for job output files."""
215
+ job = self.get_job(job_id)
216
+ if not job or not job.output_files:
217
+ return {}
218
+
219
+ urls = {}
220
+ for file_type, gcs_path in job.output_files.items():
221
+ try:
222
+ urls[file_type] = self.storage.generate_signed_url(gcs_path, expiration_minutes=120)
223
+ except Exception as e:
224
+ logger.error(f"Error generating URL for {file_type}: {e}")
225
+
226
+ return urls
227
+
228
+ def delete_job(self, job_id: str, delete_files: bool = True) -> None:
229
+ """Delete a job, its files, and its logs subcollection."""
230
+ if delete_files:
231
+ job = self.get_job(job_id)
232
+ if job and job.output_files:
233
+ for gcs_path in job.output_files.values():
234
+ try:
235
+ self.storage.delete_file(gcs_path)
236
+ except Exception as e:
237
+ logger.error(f"Error deleting file {gcs_path}: {e}")
238
+
239
+ # Delete logs subcollection first (must be done before deleting parent doc)
240
+ try:
241
+ deleted_logs = self.firestore.delete_logs_subcollection(job_id)
242
+ if deleted_logs > 0:
243
+ logger.info(f"Deleted {deleted_logs} log entries for job {job_id}")
244
+ except Exception as e:
245
+ logger.warning(f"Error deleting logs subcollection for job {job_id}: {e}")
246
+
247
+ self.firestore.delete_job(job_id)
248
+ logger.info(f"Deleted job {job_id}")
249
+
250
+ def validate_state_transition(self, job_id: str, new_status: JobStatus) -> bool:
251
+ """
252
+ Validate that a state transition is legal.
253
+
254
+ Returns:
255
+ True if transition is valid, False otherwise
256
+ """
257
+ job = self.get_job(job_id)
258
+ if not job:
259
+ logger.error(f"Job {job_id} not found")
260
+ return False
261
+
262
+ current_status = job.status
263
+ valid_transitions = STATE_TRANSITIONS.get(current_status, [])
264
+
265
+ if new_status not in valid_transitions:
266
+ logger.error(
267
+ f"Invalid state transition for job {job_id}: "
268
+ f"{current_status} -> {new_status}. "
269
+ f"Valid transitions: {valid_transitions}"
270
+ )
271
+ return False
272
+
273
+ return True
274
+
275
+ def transition_to_state(
276
+ self,
277
+ job_id: str,
278
+ new_status: JobStatus,
279
+ progress: Optional[int] = None,
280
+ message: Optional[str] = None,
281
+ state_data_updates: Optional[Dict[str, Any]] = None
282
+ ) -> bool:
283
+ """
284
+ Transition job to new state with validation.
285
+
286
+ Args:
287
+ job_id: Job ID
288
+ new_status: Target state
289
+ progress: Progress percentage (0-100)
290
+ message: Timeline message
291
+ state_data_updates: Updates to state_data field
292
+
293
+ Returns:
294
+ True if transition succeeded, False otherwise
295
+ """
296
+ if not self.validate_state_transition(job_id, new_status):
297
+ return False
298
+
299
+ updates = {
300
+ 'status': new_status,
301
+ 'updated_at': datetime.utcnow()
302
+ }
303
+
304
+ if progress is not None:
305
+ updates['progress'] = progress
306
+
307
+ # Generate review token when entering AWAITING_REVIEW state
308
+ if new_status == JobStatus.AWAITING_REVIEW:
309
+ from backend.api.dependencies import generate_review_token, get_review_token_expiry
310
+ review_token = generate_review_token()
311
+ review_token_expires = get_review_token_expiry(hours=48) # 48 hour expiry
312
+ updates['review_token'] = review_token
313
+ updates['review_token_expires_at'] = review_token_expires
314
+ logger.info(f"Generated review token for job {job_id}, expires in 48 hours")
315
+
316
+ # Generate instrumental token when entering AWAITING_INSTRUMENTAL_SELECTION state
317
+ if new_status == JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
318
+ from backend.api.dependencies import generate_review_token, get_review_token_expiry
319
+ instrumental_token = generate_review_token() # Reuse same token generator
320
+ instrumental_token_expires = get_review_token_expiry(hours=48) # 48 hour expiry
321
+ updates['instrumental_token'] = instrumental_token
322
+ updates['instrumental_token_expires_at'] = instrumental_token_expires
323
+ logger.info(f"Generated instrumental token for job {job_id}, expires in 48 hours")
324
+
325
+ # If we have state_data_updates, merge them with existing state_data
326
+ merged_state_data = None
327
+ if state_data_updates:
328
+ job = self.get_job(job_id)
329
+ if job:
330
+ merged_state_data = {**job.state_data, **state_data_updates}
331
+
332
+ # Update job status (includes timeline event), passing state_data if present
333
+ if merged_state_data is not None:
334
+ self.update_job_status(
335
+ job_id=job_id,
336
+ status=new_status,
337
+ progress=progress,
338
+ message=message,
339
+ state_data=merged_state_data
340
+ )
341
+ else:
342
+ self.update_job_status(
343
+ job_id=job_id,
344
+ status=new_status,
345
+ progress=progress,
346
+ message=message
347
+ )
348
+
349
+ # Apply review token update separately if generated
350
+ if new_status == JobStatus.AWAITING_REVIEW and 'review_token' in updates:
351
+ self.firestore.update_job(job_id, {
352
+ 'review_token': updates['review_token'],
353
+ 'review_token_expires_at': updates['review_token_expires_at']
354
+ })
355
+
356
+ # Apply instrumental token update separately if generated
357
+ if new_status == JobStatus.AWAITING_INSTRUMENTAL_SELECTION and 'instrumental_token' in updates:
358
+ self.firestore.update_job(job_id, {
359
+ 'instrumental_token': updates['instrumental_token'],
360
+ 'instrumental_token_expires_at': updates['instrumental_token_expires_at']
361
+ })
362
+
363
+ logger.info(f"Job {job_id} transitioned to {new_status}")
364
+
365
+ # Trigger notifications asynchronously (fire-and-forget)
366
+ self._trigger_state_notifications(job_id, new_status)
367
+
368
+ return True
369
+
370
+ def _trigger_state_notifications(self, job_id: str, new_status: JobStatus) -> None:
371
+ """
372
+ Trigger email notifications based on state transitions.
373
+
374
+ This is fire-and-forget - notification failures don't affect job processing.
375
+
376
+ Args:
377
+ job_id: Job ID
378
+ new_status: New job status
379
+ """
380
+ import asyncio
381
+
382
+ try:
383
+ # Get the job to access user info
384
+ job = self.get_job(job_id)
385
+ if not job or not job.user_email:
386
+ logger.debug(f"No user email for job {job_id}, skipping notifications")
387
+ return
388
+
389
+ # Job completion notification
390
+ if new_status == JobStatus.COMPLETE:
391
+ self._schedule_completion_email(job)
392
+
393
+ # Idle reminder scheduling for blocking states
394
+ elif new_status in [JobStatus.AWAITING_REVIEW, JobStatus.AWAITING_INSTRUMENTAL_SELECTION]:
395
+ self._schedule_idle_reminder(job, new_status)
396
+
397
+ except Exception as e:
398
+ # Never let notification failures affect job processing
399
+ logger.error(f"Error triggering notifications for job {job_id}: {e}")
400
+
401
+ def _schedule_completion_email(self, job: Job) -> None:
402
+ """
403
+ Schedule sending a job completion email.
404
+
405
+ Uses asyncio to fire-and-forget the email sending.
406
+ """
407
+ import asyncio
408
+ import threading
409
+
410
+ try:
411
+ from backend.services.job_notification_service import get_job_notification_service
412
+
413
+ notification_service = get_job_notification_service()
414
+
415
+ # Get youtube, dropbox URLs, and brand_code from state_data (may be None)
416
+ state_data = job.state_data or {}
417
+ youtube_url = state_data.get('youtube_url')
418
+ dropbox_url = state_data.get('dropbox_link')
419
+ brand_code = state_data.get('brand_code')
420
+
421
+ # Create async task (fire-and-forget)
422
+ async def send_email():
423
+ await notification_service.send_job_completion_email(
424
+ job_id=job.job_id,
425
+ user_email=job.user_email,
426
+ user_name=None, # Could fetch from user service if needed
427
+ artist=job.artist,
428
+ title=job.title,
429
+ youtube_url=youtube_url,
430
+ dropbox_url=dropbox_url,
431
+ brand_code=brand_code,
432
+ )
433
+
434
+ # Try to get existing event loop, create new one if none exists
435
+ try:
436
+ loop = asyncio.get_running_loop()
437
+ # If we're in an async context, create a task
438
+ loop.create_task(send_email())
439
+ except RuntimeError:
440
+ # No event loop - we're likely in a sync context
441
+ # Use daemon thread to avoid blocking job completion
442
+ def run_in_thread():
443
+ asyncio.run(send_email())
444
+ thread = threading.Thread(target=run_in_thread, daemon=True)
445
+ thread.start()
446
+
447
+ logger.info(f"Scheduled completion email for job {job.job_id}")
448
+
449
+ except Exception as e:
450
+ logger.error(f"Failed to schedule completion email for job {job.job_id}: {e}")
451
+
452
+ def _schedule_idle_reminder(self, job: Job, new_status: JobStatus) -> None:
453
+ """
454
+ Schedule an idle reminder for a blocking state.
455
+
456
+ Records the timestamp when the blocking state was entered and
457
+ schedules a Cloud Tasks task for 5 minutes later.
458
+ """
459
+ import asyncio
460
+ import threading
461
+
462
+ try:
463
+ # Record when we entered blocking state (for idle detection)
464
+ blocking_entered_at = datetime.utcnow().isoformat()
465
+
466
+ action_type = "lyrics" if new_status == JobStatus.AWAITING_REVIEW else "instrumental"
467
+
468
+ # Update state_data with blocking state info (handle None state_data)
469
+ existing_state_data = job.state_data or {}
470
+ state_data_update = {
471
+ 'blocking_state_entered_at': blocking_entered_at,
472
+ 'blocking_action_type': action_type,
473
+ 'reminder_sent': False, # Will be set to True after reminder is sent
474
+ }
475
+
476
+ self.firestore.update_job(job.job_id, {
477
+ 'state_data': {**existing_state_data, **state_data_update}
478
+ })
479
+
480
+ # Schedule the idle reminder check via worker service (5 min delay)
481
+ from backend.services.worker_service import get_worker_service
482
+
483
+ async def schedule_reminder():
484
+ worker_service = get_worker_service()
485
+ await worker_service.schedule_idle_reminder(job.job_id)
486
+
487
+ # Try to get existing event loop, create new one if none exists
488
+ try:
489
+ loop = asyncio.get_running_loop()
490
+ loop.create_task(schedule_reminder())
491
+ except RuntimeError:
492
+ # No event loop - we're in a sync context
493
+ # Use daemon thread to avoid blocking job processing
494
+ def run_in_thread():
495
+ asyncio.run(schedule_reminder())
496
+ thread = threading.Thread(target=run_in_thread, daemon=True)
497
+ thread.start()
498
+
499
+ logger.info(f"Scheduled idle reminder for job {job.job_id} ({action_type})")
500
+
501
+ except Exception as e:
502
+ logger.error(f"Failed to schedule idle reminder for job {job.job_id}: {e}")
503
+
504
+ def update_state_data(self, job_id: str, key: str, value: Any) -> None:
505
+ """
506
+ Update a specific key in the job's state_data field.
507
+
508
+ This is used by workers to store stage-specific metadata.
509
+ """
510
+ job = self.get_job(job_id)
511
+ if not job:
512
+ logger.error(f"Job {job_id} not found")
513
+ return
514
+
515
+ state_data = job.state_data.copy()
516
+ state_data[key] = value
517
+
518
+ self.update_job(job_id, {'state_data': state_data})
519
+ logger.debug(f"Job {job_id} state_data updated: {key} = {value}")
520
+
521
+ def fail_job(self, job_id: str, error_message: str, error_details: Optional[Dict[str, Any]] = None) -> bool:
522
+ """
523
+ Mark a job as failed with error information.
524
+
525
+ Args:
526
+ job_id: Job ID
527
+ error_message: Human-readable error message
528
+ error_details: Optional structured error details
529
+
530
+ Returns:
531
+ True if successful
532
+ """
533
+ try:
534
+ # Update error fields
535
+ self.update_job(job_id, {
536
+ 'error_message': error_message,
537
+ 'error_details': error_details or {}
538
+ })
539
+
540
+ # Use update_job_status which handles timeline
541
+ self.update_job_status(
542
+ job_id=job_id,
543
+ status=JobStatus.FAILED,
544
+ message=error_message
545
+ )
546
+
547
+ logger.error(f"Job {job_id} failed: {error_message}")
548
+ return True
549
+ except Exception as e:
550
+ logger.error(f"Error marking job {job_id} as failed: {e}")
551
+ return False
552
+
553
+ def update_file_url(self, job_id: str, category: str, file_type: str, url: str) -> None:
554
+ """
555
+ Update a file URL in the job's file_urls structure.
556
+
557
+ Args:
558
+ job_id: Job ID
559
+ category: Category (e.g., "stems", "lyrics", "finals")
560
+ file_type: File type within category (e.g., "clean", "lrc", "lossless_4k_mp4")
561
+ url: GCS URL
562
+ """
563
+ job = self.get_job(job_id)
564
+ if not job:
565
+ logger.error(f"Job {job_id} not found")
566
+ return
567
+
568
+ file_urls = job.file_urls.copy()
569
+ if category not in file_urls:
570
+ file_urls[category] = {}
571
+
572
+ file_urls[category][file_type] = url
573
+
574
+ self.update_job(job_id, {'file_urls': file_urls})
575
+ logger.debug(f"Job {job_id} file URL updated: {category}.{file_type}")
576
+
577
+ def check_parallel_processing_complete(self, job_id: str) -> bool:
578
+ """
579
+ Check if both parallel tracks (audio + lyrics) are complete.
580
+
581
+ This is called after audio_complete or lyrics_complete to determine
582
+ if we can proceed to screen generation.
583
+
584
+ Returns:
585
+ True if both tracks complete, False otherwise
586
+ """
587
+ job = self.get_job(job_id)
588
+ if not job:
589
+ return False
590
+
591
+ audio_complete = job.state_data.get('audio_complete', False)
592
+ lyrics_complete = job.state_data.get('lyrics_complete', False)
593
+
594
+ return audio_complete and lyrics_complete
595
+
596
+ def mark_audio_complete(self, job_id: str) -> None:
597
+ """
598
+ Mark audio processing as complete and check if can proceed.
599
+
600
+ If lyrics are also complete, automatically triggers screens worker.
601
+ """
602
+ self.update_state_data(job_id, 'audio_complete', True)
603
+
604
+ if self.check_parallel_processing_complete(job_id):
605
+ logger.info(f"Job {job_id}: Both audio and lyrics complete, triggering screens worker")
606
+ # Transition happens in screens worker
607
+ # We just trigger it here
608
+ self._trigger_screens_worker(job_id)
609
+
610
+ def mark_lyrics_complete(self, job_id: str) -> None:
611
+ """
612
+ Mark lyrics processing as complete and check if can proceed.
613
+
614
+ If audio is also complete, automatically triggers screens worker.
615
+ """
616
+ self.update_state_data(job_id, 'lyrics_complete', True)
617
+
618
+ if self.check_parallel_processing_complete(job_id):
619
+ logger.info(f"Job {job_id}: Both audio and lyrics complete, triggering screens worker")
620
+ # Transition happens in screens worker
621
+ # We just trigger it here
622
+ self._trigger_screens_worker(job_id)
623
+
624
+ def _trigger_screens_worker(self, job_id: str) -> None:
625
+ """
626
+ Trigger screens generation worker.
627
+
628
+ Uses WorkerService to make HTTP call to internal API.
629
+ This must be async, so we use asyncio to create a task.
630
+ """
631
+ import asyncio
632
+ from backend.services.worker_service import get_worker_service
633
+
634
+ logger.info(f"Job {job_id}: Triggering screens worker")
635
+
636
+ # Create async task to trigger worker
637
+ # This allows us to call async code from sync context
638
+ async def _trigger():
639
+ worker_service = get_worker_service()
640
+ await worker_service.trigger_screens_worker(job_id)
641
+
642
+ # Create task in event loop
643
+ try:
644
+ loop = asyncio.get_event_loop()
645
+ if loop.is_running():
646
+ # If loop is already running, create task
647
+ asyncio.create_task(_trigger())
648
+ else:
649
+ # If no loop, run directly
650
+ asyncio.run(_trigger())
651
+ except RuntimeError:
652
+ # Fallback: just log
653
+ logger.warning(f"Job {job_id}: Could not trigger screens worker (no event loop)")
654
+ # TODO: In production, use message queue instead
655
+
656
+ def cancel_job(self, job_id: str, reason: Optional[str] = None) -> bool:
657
+ """
658
+ Cancel a job.
659
+
660
+ Only jobs in non-terminal states can be cancelled.
661
+ """
662
+ job = self.get_job(job_id)
663
+ if not job:
664
+ return False
665
+
666
+ terminal_states = [JobStatus.COMPLETE, JobStatus.FAILED, JobStatus.CANCELLED]
667
+ if job.status in terminal_states:
668
+ logger.warning(f"Cannot cancel job {job_id} in terminal state {job.status}")
669
+ return False
670
+
671
+ message = f"Job cancelled{f': {reason}' if reason else ''}"
672
+
673
+ self.update_job_status(
674
+ job_id=job_id,
675
+ status=JobStatus.CANCELLED,
676
+ message=message
677
+ )
678
+
679
+ logger.info(f"Job {job_id} cancelled")
680
+ return True
681
+
682
+ def mark_job_failed(
683
+ self,
684
+ job_id: str,
685
+ error_message: str,
686
+ error_details: Optional[Dict[str, Any]] = None
687
+ ) -> None:
688
+ """
689
+ Mark a job as failed with error details.
690
+
691
+ This replaces mark_job_error() with better error tracking.
692
+ """
693
+ updates = {
694
+ 'error_message': error_message
695
+ }
696
+
697
+ if error_details:
698
+ updates['error_details'] = error_details
699
+
700
+ self.firestore.update_job_status(
701
+ job_id=job_id,
702
+ status=JobStatus.FAILED,
703
+ progress=0,
704
+ message=f"Failed: {error_message}",
705
+ **updates
706
+ )
707
+ logger.error(f"Job {job_id} failed: {error_message}")
708
+
709
+ def increment_retry_count(self, job_id: str) -> int:
710
+ """
711
+ Increment retry count for a job.
712
+
713
+ Returns:
714
+ New retry count
715
+ """
716
+ job = self.get_job(job_id)
717
+ if not job:
718
+ return 0
719
+
720
+ new_count = job.retry_count + 1
721
+ self.update_job(job_id, {'retry_count': new_count})
722
+ return new_count
723
+
724
+ def append_worker_log(
725
+ self,
726
+ job_id: str,
727
+ worker: str,
728
+ level: str,
729
+ message: str,
730
+ max_logs: int = 500,
731
+ metadata: Optional[Dict[str, Any]] = None
732
+ ) -> None:
733
+ """
734
+ Append a log entry to the job's logs.
735
+
736
+ By default (USE_LOG_SUBCOLLECTION=true), logs are stored in a Firestore
737
+ subcollection (jobs/{job_id}/logs) to avoid the 1MB document size limit.
738
+
739
+ For backward compatibility (USE_LOG_SUBCOLLECTION=false), logs are stored
740
+ in the embedded worker_logs array using atomic ArrayUnion.
741
+
742
+ Args:
743
+ job_id: Job ID
744
+ worker: Worker name (audio, lyrics, screens, video, render, distribution)
745
+ level: Log level (DEBUG, INFO, WARNING, ERROR)
746
+ message: Log message
747
+ max_logs: Not used (kept for API compatibility)
748
+ metadata: Optional additional metadata dict
749
+ """
750
+ if settings.use_log_subcollection:
751
+ # New subcollection approach - avoids 1MB limit
752
+ log_entry = WorkerLogEntry.create(
753
+ job_id=job_id,
754
+ worker=worker,
755
+ level=level,
756
+ message=message,
757
+ metadata=metadata
758
+ )
759
+ self.firestore.append_log_to_subcollection(job_id, log_entry)
760
+ else:
761
+ # Legacy embedded array approach
762
+ log_entry = {
763
+ 'timestamp': datetime.utcnow().isoformat() + 'Z',
764
+ 'level': level,
765
+ 'worker': worker,
766
+ 'message': message[:1000] # Truncate long messages
767
+ }
768
+ self.firestore.append_worker_log(job_id, log_entry)
769
+
770
+ def get_worker_logs(
771
+ self,
772
+ job_id: str,
773
+ since_index: int = 0,
774
+ worker: Optional[str] = None
775
+ ) -> List[Dict[str, Any]]:
776
+ """
777
+ Get worker logs for a job, optionally filtered by worker and index.
778
+
779
+ By default (USE_LOG_SUBCOLLECTION=true), logs are read from the
780
+ subcollection. Falls back to embedded array for older jobs.
781
+
782
+ Args:
783
+ job_id: Job ID
784
+ since_index: Return only logs after this index (for pagination)
785
+ worker: Filter by worker name (optional)
786
+
787
+ Returns:
788
+ List of log entries as dicts (in legacy format for API compatibility)
789
+ """
790
+ if settings.use_log_subcollection:
791
+ # Try subcollection first
792
+ subcollection_logs = self.firestore.get_logs_from_subcollection(
793
+ job_id=job_id,
794
+ offset=since_index,
795
+ worker=worker,
796
+ limit=500
797
+ )
798
+ if subcollection_logs:
799
+ # Convert to legacy format for API compatibility
800
+ return [log.to_legacy_dict() for log in subcollection_logs]
801
+ # Fall through to check embedded array for older jobs
802
+
803
+ # Embedded array approach (legacy jobs or fallback)
804
+ job = self.get_job(job_id)
805
+ if not job or not job.worker_logs:
806
+ return []
807
+
808
+ logs = [log.dict() if hasattr(log, 'dict') else log for log in job.worker_logs]
809
+
810
+ # Filter by index
811
+ if since_index > 0:
812
+ logs = logs[since_index:]
813
+
814
+ # Filter by worker
815
+ if worker:
816
+ logs = [log for log in logs if log.get('worker') == worker]
817
+
818
+ return logs
819
+
820
+ def get_worker_logs_count(self, job_id: str) -> int:
821
+ """
822
+ Get the total count of worker logs for a job.
823
+
824
+ Args:
825
+ job_id: Job ID
826
+
827
+ Returns:
828
+ Total count of logs
829
+ """
830
+ if settings.use_log_subcollection:
831
+ # Try subcollection first
832
+ count = self.firestore.get_logs_count_from_subcollection(job_id)
833
+ if count > 0:
834
+ return count
835
+ # Fall through to check embedded array
836
+
837
+ # Embedded array (legacy jobs or fallback)
838
+ job = self.get_job(job_id)
839
+ if not job or not job.worker_logs:
840
+ return 0
841
+ return len(job.worker_logs)
842
+