karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,853 @@
1
+ """
2
+ Job management and queue operations.
3
+
4
+ This module handles the complete job lifecycle including:
5
+ - Job creation and initialization
6
+ - State transitions and validation
7
+ - Worker coordination (parallel audio + lyrics processing)
8
+ - Progress tracking and timeline events
9
+ - Error handling and retries
10
+ """
11
+ import logging
12
+ import uuid
13
+ from datetime import datetime
14
+ from typing import Optional, Dict, Any, List
15
+
16
+ from backend.config import settings
17
+ from backend.models.job import Job, JobStatus, JobCreate, STATE_TRANSITIONS
18
+ from backend.models.worker_log import WorkerLogEntry
19
+ from backend.services.firestore_service import FirestoreService
20
+ from backend.services.storage_service import StorageService
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class JobManager:
27
+ """Manager for job lifecycle and state."""
28
+
29
+ def __init__(self):
30
+ """Initialize job manager with required services."""
31
+ self.firestore = FirestoreService()
32
+ self.storage = StorageService()
33
+
34
+ def create_job(self, job_create: JobCreate) -> Job:
35
+ """
36
+ Create a new job with initial state PENDING.
37
+
38
+ Jobs start in PENDING state and transition to DOWNLOADING
39
+ when a worker picks them up.
40
+
41
+ Raises:
42
+ ValueError: If theme_id is not provided (all jobs require a theme)
43
+ """
44
+ # Enforce theme requirement - all jobs must have a theme
45
+ # This prevents unstyled videos from ever being generated
46
+ if not job_create.theme_id:
47
+ raise ValueError(
48
+ "theme_id is required for all jobs. "
49
+ "Use get_theme_service().get_default_theme_id() to get the default theme."
50
+ )
51
+
52
+ job_id = str(uuid.uuid4())[:8]
53
+
54
+ now = datetime.utcnow()
55
+ job = Job(
56
+ job_id=job_id,
57
+ status=JobStatus.PENDING, # New state machine starts with PENDING
58
+ progress=0,
59
+ created_at=now,
60
+ updated_at=now,
61
+ url=str(job_create.url) if job_create.url else None,
62
+ artist=job_create.artist,
63
+ title=job_create.title,
64
+ # User preferences
65
+ enable_cdg=job_create.enable_cdg,
66
+ enable_txt=job_create.enable_txt,
67
+ enable_youtube_upload=job_create.enable_youtube_upload,
68
+ youtube_description=job_create.youtube_description,
69
+ youtube_description_template=job_create.youtube_description_template, # video_worker reads this
70
+ webhook_url=job_create.webhook_url,
71
+ user_email=job_create.user_email,
72
+ # Distribution settings
73
+ brand_prefix=job_create.brand_prefix,
74
+ discord_webhook_url=job_create.discord_webhook_url,
75
+ dropbox_path=job_create.dropbox_path,
76
+ gdrive_folder_id=job_create.gdrive_folder_id,
77
+ # Theme configuration
78
+ theme_id=job_create.theme_id,
79
+ color_overrides=job_create.color_overrides,
80
+ style_params_gcs_path=job_create.style_params_gcs_path,
81
+ style_assets=job_create.style_assets,
82
+ # Two-phase workflow (Batch 6)
83
+ prep_only=job_create.prep_only,
84
+ finalise_only=job_create.finalise_only,
85
+ keep_brand_code=job_create.keep_brand_code,
86
+ # Request metadata (for tracking and filtering)
87
+ request_metadata=job_create.request_metadata,
88
+ )
89
+
90
+ self.firestore.create_job(job)
91
+ logger.info(f"Created new job {job_id} with status PENDING")
92
+
93
+ return job
94
+
95
+ def get_job(self, job_id: str) -> Optional[Job]:
96
+ """Get a job by ID."""
97
+ return self.firestore.get_job(job_id)
98
+
99
+ def update_job_status(
100
+ self,
101
+ job_id: str,
102
+ status: JobStatus,
103
+ progress: Optional[int] = None,
104
+ message: Optional[str] = None,
105
+ **kwargs
106
+ ) -> None:
107
+ """Update job status with timeline tracking."""
108
+ self.firestore.update_job_status(
109
+ job_id=job_id,
110
+ status=status,
111
+ progress=progress,
112
+ message=message,
113
+ **kwargs
114
+ )
115
+
116
+ def update_job(self, job_id: str, updates: Dict[str, Any]) -> None:
117
+ """Update job with arbitrary fields."""
118
+ self.firestore.update_job(job_id, updates)
119
+
120
+ def list_jobs(
121
+ self,
122
+ status: Optional[JobStatus] = None,
123
+ environment: Optional[str] = None,
124
+ client_id: Optional[str] = None,
125
+ created_after: Optional[datetime] = None,
126
+ created_before: Optional[datetime] = None,
127
+ user_email: Optional[str] = None,
128
+ limit: int = 100
129
+ ) -> List[Job]:
130
+ """
131
+ List jobs with optional filtering.
132
+
133
+ Args:
134
+ status: Filter by job status
135
+ environment: Filter by request_metadata.environment (test/production/development)
136
+ client_id: Filter by request_metadata.client_id (customer identifier)
137
+ created_after: Filter jobs created after this datetime
138
+ created_before: Filter jobs created before this datetime
139
+ user_email: Filter by user_email (job owner)
140
+ limit: Maximum number of jobs to return
141
+
142
+ Returns:
143
+ List of Job objects matching filters
144
+ """
145
+ return self.firestore.list_jobs(
146
+ status=status,
147
+ environment=environment,
148
+ client_id=client_id,
149
+ created_after=created_after,
150
+ created_before=created_before,
151
+ user_email=user_email,
152
+ limit=limit
153
+ )
154
+
155
+ def delete_jobs_by_filter(
156
+ self,
157
+ environment: Optional[str] = None,
158
+ client_id: Optional[str] = None,
159
+ status: Optional[JobStatus] = None,
160
+ created_before: Optional[datetime] = None,
161
+ delete_files: bool = True
162
+ ) -> Dict[str, Any]:
163
+ """
164
+ Delete multiple jobs matching filter criteria.
165
+
166
+ CAUTION: This is a destructive operation. Use carefully.
167
+
168
+ Args:
169
+ environment: Delete jobs with this environment (e.g., "test")
170
+ client_id: Delete jobs from this client
171
+ status: Delete jobs with this status
172
+ created_before: Delete jobs created before this datetime
173
+ delete_files: Also delete GCS files (default True)
174
+
175
+ Returns:
176
+ Dict with deletion statistics
177
+ """
178
+ # First get matching jobs to delete their files
179
+ if delete_files:
180
+ jobs = self.firestore.list_jobs(
181
+ status=status,
182
+ environment=environment,
183
+ client_id=client_id,
184
+ created_before=created_before,
185
+ limit=10000 # High limit for deletion
186
+ )
187
+
188
+ files_deleted = 0
189
+ for job in jobs:
190
+ # Delete files from various locations
191
+ try:
192
+ # Delete uploads folder
193
+ self.storage.delete_folder(f"uploads/{job.job_id}/")
194
+ # Delete jobs folder
195
+ self.storage.delete_folder(f"jobs/{job.job_id}/")
196
+ files_deleted += 1
197
+ except Exception as e:
198
+ logger.warning(f"Error deleting files for job {job.job_id}: {e}")
199
+
200
+ # Delete the jobs from Firestore
201
+ deleted_count = self.firestore.delete_jobs_by_filter(
202
+ environment=environment,
203
+ client_id=client_id,
204
+ status=status,
205
+ created_before=created_before
206
+ )
207
+
208
+ return {
209
+ 'jobs_deleted': deleted_count,
210
+ 'files_deleted': files_deleted if delete_files else 0
211
+ }
212
+
213
+ def mark_job_error(self, job_id: str, error_message: str) -> None:
214
+ """Mark a job as errored."""
215
+ self.firestore.update_job_status(
216
+ job_id=job_id,
217
+ status=JobStatus.ERROR,
218
+ progress=0,
219
+ message=error_message,
220
+ error_message=error_message
221
+ )
222
+ logger.error(f"Job {job_id} marked as error: {error_message}")
223
+
224
+ def get_output_urls(self, job_id: str) -> Dict[str, str]:
225
+ """Generate signed URLs for job output files."""
226
+ job = self.get_job(job_id)
227
+ if not job or not job.output_files:
228
+ return {}
229
+
230
+ urls = {}
231
+ for file_type, gcs_path in job.output_files.items():
232
+ try:
233
+ urls[file_type] = self.storage.generate_signed_url(gcs_path, expiration_minutes=120)
234
+ except Exception as e:
235
+ logger.error(f"Error generating URL for {file_type}: {e}")
236
+
237
+ return urls
238
+
239
+ def delete_job(self, job_id: str, delete_files: bool = True) -> None:
240
+ """Delete a job, its files, and its logs subcollection."""
241
+ if delete_files:
242
+ job = self.get_job(job_id)
243
+ if job and job.output_files:
244
+ for gcs_path in job.output_files.values():
245
+ try:
246
+ self.storage.delete_file(gcs_path)
247
+ except Exception as e:
248
+ logger.error(f"Error deleting file {gcs_path}: {e}")
249
+
250
+ # Delete logs subcollection first (must be done before deleting parent doc)
251
+ try:
252
+ deleted_logs = self.firestore.delete_logs_subcollection(job_id)
253
+ if deleted_logs > 0:
254
+ logger.info(f"Deleted {deleted_logs} log entries for job {job_id}")
255
+ except Exception as e:
256
+ logger.warning(f"Error deleting logs subcollection for job {job_id}: {e}")
257
+
258
+ self.firestore.delete_job(job_id)
259
+ logger.info(f"Deleted job {job_id}")
260
+
261
+ def validate_state_transition(self, job_id: str, new_status: JobStatus) -> bool:
262
+ """
263
+ Validate that a state transition is legal.
264
+
265
+ Returns:
266
+ True if transition is valid, False otherwise
267
+ """
268
+ job = self.get_job(job_id)
269
+ if not job:
270
+ logger.error(f"Job {job_id} not found")
271
+ return False
272
+
273
+ current_status = job.status
274
+ valid_transitions = STATE_TRANSITIONS.get(current_status, [])
275
+
276
+ if new_status not in valid_transitions:
277
+ logger.error(
278
+ f"Invalid state transition for job {job_id}: "
279
+ f"{current_status} -> {new_status}. "
280
+ f"Valid transitions: {valid_transitions}"
281
+ )
282
+ return False
283
+
284
+ return True
285
+
286
+ def transition_to_state(
287
+ self,
288
+ job_id: str,
289
+ new_status: JobStatus,
290
+ progress: Optional[int] = None,
291
+ message: Optional[str] = None,
292
+ state_data_updates: Optional[Dict[str, Any]] = None
293
+ ) -> bool:
294
+ """
295
+ Transition job to new state with validation.
296
+
297
+ Args:
298
+ job_id: Job ID
299
+ new_status: Target state
300
+ progress: Progress percentage (0-100)
301
+ message: Timeline message
302
+ state_data_updates: Updates to state_data field
303
+
304
+ Returns:
305
+ True if transition succeeded, False otherwise
306
+ """
307
+ if not self.validate_state_transition(job_id, new_status):
308
+ return False
309
+
310
+ updates = {
311
+ 'status': new_status,
312
+ 'updated_at': datetime.utcnow()
313
+ }
314
+
315
+ if progress is not None:
316
+ updates['progress'] = progress
317
+
318
+ # Generate review token when entering AWAITING_REVIEW state
319
+ if new_status == JobStatus.AWAITING_REVIEW:
320
+ from backend.api.dependencies import generate_review_token, get_review_token_expiry
321
+ review_token = generate_review_token()
322
+ review_token_expires = get_review_token_expiry(hours=48) # 48 hour expiry
323
+ updates['review_token'] = review_token
324
+ updates['review_token_expires_at'] = review_token_expires
325
+ logger.info(f"Generated review token for job {job_id}, expires in 48 hours")
326
+
327
+ # Generate instrumental token when entering AWAITING_INSTRUMENTAL_SELECTION state
328
+ if new_status == JobStatus.AWAITING_INSTRUMENTAL_SELECTION:
329
+ from backend.api.dependencies import generate_review_token, get_review_token_expiry
330
+ instrumental_token = generate_review_token() # Reuse same token generator
331
+ instrumental_token_expires = get_review_token_expiry(hours=48) # 48 hour expiry
332
+ updates['instrumental_token'] = instrumental_token
333
+ updates['instrumental_token_expires_at'] = instrumental_token_expires
334
+ logger.info(f"Generated instrumental token for job {job_id}, expires in 48 hours")
335
+
336
+ # If we have state_data_updates, merge them with existing state_data
337
+ merged_state_data = None
338
+ if state_data_updates:
339
+ job = self.get_job(job_id)
340
+ if job:
341
+ merged_state_data = {**job.state_data, **state_data_updates}
342
+
343
+ # Update job status (includes timeline event), passing state_data if present
344
+ if merged_state_data is not None:
345
+ self.update_job_status(
346
+ job_id=job_id,
347
+ status=new_status,
348
+ progress=progress,
349
+ message=message,
350
+ state_data=merged_state_data
351
+ )
352
+ else:
353
+ self.update_job_status(
354
+ job_id=job_id,
355
+ status=new_status,
356
+ progress=progress,
357
+ message=message
358
+ )
359
+
360
+ # Apply review token update separately if generated
361
+ if new_status == JobStatus.AWAITING_REVIEW and 'review_token' in updates:
362
+ self.firestore.update_job(job_id, {
363
+ 'review_token': updates['review_token'],
364
+ 'review_token_expires_at': updates['review_token_expires_at']
365
+ })
366
+
367
+ # Apply instrumental token update separately if generated
368
+ if new_status == JobStatus.AWAITING_INSTRUMENTAL_SELECTION and 'instrumental_token' in updates:
369
+ self.firestore.update_job(job_id, {
370
+ 'instrumental_token': updates['instrumental_token'],
371
+ 'instrumental_token_expires_at': updates['instrumental_token_expires_at']
372
+ })
373
+
374
+ logger.info(f"Job {job_id} transitioned to {new_status}")
375
+
376
+ # Trigger notifications asynchronously (fire-and-forget)
377
+ self._trigger_state_notifications(job_id, new_status)
378
+
379
+ return True
380
+
381
+ def _trigger_state_notifications(self, job_id: str, new_status: JobStatus) -> None:
382
+ """
383
+ Trigger email notifications based on state transitions.
384
+
385
+ This is fire-and-forget - notification failures don't affect job processing.
386
+
387
+ Args:
388
+ job_id: Job ID
389
+ new_status: New job status
390
+ """
391
+ import asyncio
392
+
393
+ try:
394
+ # Get the job to access user info
395
+ job = self.get_job(job_id)
396
+ if not job or not job.user_email:
397
+ logger.debug(f"No user email for job {job_id}, skipping notifications")
398
+ return
399
+
400
+ # Job completion notification
401
+ if new_status == JobStatus.COMPLETE:
402
+ self._schedule_completion_email(job)
403
+
404
+ # Idle reminder scheduling for blocking states
405
+ elif new_status in [JobStatus.AWAITING_REVIEW, JobStatus.AWAITING_INSTRUMENTAL_SELECTION]:
406
+ self._schedule_idle_reminder(job, new_status)
407
+
408
+ except Exception as e:
409
+ # Never let notification failures affect job processing
410
+ logger.error(f"Error triggering notifications for job {job_id}: {e}")
411
+
412
+ def _schedule_completion_email(self, job: Job) -> None:
413
+ """
414
+ Schedule sending a job completion email.
415
+
416
+ Uses asyncio to fire-and-forget the email sending.
417
+ """
418
+ import asyncio
419
+ import threading
420
+
421
+ try:
422
+ from backend.services.job_notification_service import get_job_notification_service
423
+
424
+ notification_service = get_job_notification_service()
425
+
426
+ # Get youtube, dropbox URLs, and brand_code from state_data (may be None)
427
+ state_data = job.state_data or {}
428
+ youtube_url = state_data.get('youtube_url')
429
+ dropbox_url = state_data.get('dropbox_link')
430
+ brand_code = state_data.get('brand_code')
431
+
432
+ # Create async task (fire-and-forget)
433
+ async def send_email():
434
+ await notification_service.send_job_completion_email(
435
+ job_id=job.job_id,
436
+ user_email=job.user_email,
437
+ user_name=None, # Could fetch from user service if needed
438
+ artist=job.artist,
439
+ title=job.title,
440
+ youtube_url=youtube_url,
441
+ dropbox_url=dropbox_url,
442
+ brand_code=brand_code,
443
+ )
444
+
445
+ # Try to get existing event loop, create new one if none exists
446
+ try:
447
+ loop = asyncio.get_running_loop()
448
+ # If we're in an async context, create a task
449
+ loop.create_task(send_email())
450
+ except RuntimeError:
451
+ # No event loop - we're likely in a sync context
452
+ # Use daemon thread to avoid blocking job completion
453
+ def run_in_thread():
454
+ asyncio.run(send_email())
455
+ thread = threading.Thread(target=run_in_thread, daemon=True)
456
+ thread.start()
457
+
458
+ logger.info(f"Scheduled completion email for job {job.job_id}")
459
+
460
+ except Exception as e:
461
+ logger.error(f"Failed to schedule completion email for job {job.job_id}: {e}")
462
+
463
+ def _schedule_idle_reminder(self, job: Job, new_status: JobStatus) -> None:
464
+ """
465
+ Schedule an idle reminder for a blocking state.
466
+
467
+ Records the timestamp when the blocking state was entered and
468
+ schedules a Cloud Tasks task for 5 minutes later.
469
+ """
470
+ import asyncio
471
+ import threading
472
+
473
+ try:
474
+ # Record when we entered blocking state (for idle detection)
475
+ blocking_entered_at = datetime.utcnow().isoformat()
476
+
477
+ action_type = "lyrics" if new_status == JobStatus.AWAITING_REVIEW else "instrumental"
478
+
479
+ # Update state_data with blocking state info (handle None state_data)
480
+ existing_state_data = job.state_data or {}
481
+ state_data_update = {
482
+ 'blocking_state_entered_at': blocking_entered_at,
483
+ 'blocking_action_type': action_type,
484
+ 'reminder_sent': False, # Will be set to True after reminder is sent
485
+ }
486
+
487
+ self.firestore.update_job(job.job_id, {
488
+ 'state_data': {**existing_state_data, **state_data_update}
489
+ })
490
+
491
+ # Schedule the idle reminder check via worker service (5 min delay)
492
+ from backend.services.worker_service import get_worker_service
493
+
494
+ async def schedule_reminder():
495
+ worker_service = get_worker_service()
496
+ await worker_service.schedule_idle_reminder(job.job_id)
497
+
498
+ # Try to get existing event loop, create new one if none exists
499
+ try:
500
+ loop = asyncio.get_running_loop()
501
+ loop.create_task(schedule_reminder())
502
+ except RuntimeError:
503
+ # No event loop - we're in a sync context
504
+ # Use daemon thread to avoid blocking job processing
505
+ def run_in_thread():
506
+ asyncio.run(schedule_reminder())
507
+ thread = threading.Thread(target=run_in_thread, daemon=True)
508
+ thread.start()
509
+
510
+ logger.info(f"Scheduled idle reminder for job {job.job_id} ({action_type})")
511
+
512
+ except Exception as e:
513
+ logger.error(f"Failed to schedule idle reminder for job {job.job_id}: {e}")
514
+
515
+ def update_state_data(self, job_id: str, key: str, value: Any) -> None:
516
+ """
517
+ Update a specific key in the job's state_data field.
518
+
519
+ This is used by workers to store stage-specific metadata.
520
+ """
521
+ job = self.get_job(job_id)
522
+ if not job:
523
+ logger.error(f"Job {job_id} not found")
524
+ return
525
+
526
+ state_data = job.state_data.copy()
527
+ state_data[key] = value
528
+
529
+ self.update_job(job_id, {'state_data': state_data})
530
+ logger.debug(f"Job {job_id} state_data updated: {key} = {value}")
531
+
532
+ def fail_job(self, job_id: str, error_message: str, error_details: Optional[Dict[str, Any]] = None) -> bool:
533
+ """
534
+ Mark a job as failed with error information.
535
+
536
+ Args:
537
+ job_id: Job ID
538
+ error_message: Human-readable error message
539
+ error_details: Optional structured error details
540
+
541
+ Returns:
542
+ True if successful
543
+ """
544
+ try:
545
+ # Update error fields
546
+ self.update_job(job_id, {
547
+ 'error_message': error_message,
548
+ 'error_details': error_details or {}
549
+ })
550
+
551
+ # Use update_job_status which handles timeline
552
+ self.update_job_status(
553
+ job_id=job_id,
554
+ status=JobStatus.FAILED,
555
+ message=error_message
556
+ )
557
+
558
+ logger.error(f"Job {job_id} failed: {error_message}")
559
+ return True
560
+ except Exception as e:
561
+ logger.error(f"Error marking job {job_id} as failed: {e}")
562
+ return False
563
+
564
+ def update_file_url(self, job_id: str, category: str, file_type: str, url: str) -> None:
565
+ """
566
+ Update a file URL in the job's file_urls structure.
567
+
568
+ Args:
569
+ job_id: Job ID
570
+ category: Category (e.g., "stems", "lyrics", "finals")
571
+ file_type: File type within category (e.g., "clean", "lrc", "lossless_4k_mp4")
572
+ url: GCS URL
573
+ """
574
+ job = self.get_job(job_id)
575
+ if not job:
576
+ logger.error(f"Job {job_id} not found")
577
+ return
578
+
579
+ file_urls = job.file_urls.copy()
580
+ if category not in file_urls:
581
+ file_urls[category] = {}
582
+
583
+ file_urls[category][file_type] = url
584
+
585
+ self.update_job(job_id, {'file_urls': file_urls})
586
+ logger.debug(f"Job {job_id} file URL updated: {category}.{file_type}")
587
+
588
+ def check_parallel_processing_complete(self, job_id: str) -> bool:
589
+ """
590
+ Check if both parallel tracks (audio + lyrics) are complete.
591
+
592
+ This is called after audio_complete or lyrics_complete to determine
593
+ if we can proceed to screen generation.
594
+
595
+ Returns:
596
+ True if both tracks complete, False otherwise
597
+ """
598
+ job = self.get_job(job_id)
599
+ if not job:
600
+ return False
601
+
602
+ audio_complete = job.state_data.get('audio_complete', False)
603
+ lyrics_complete = job.state_data.get('lyrics_complete', False)
604
+
605
+ return audio_complete and lyrics_complete
606
+
607
+ def mark_audio_complete(self, job_id: str) -> None:
608
+ """
609
+ Mark audio processing as complete and check if can proceed.
610
+
611
+ If lyrics are also complete, automatically triggers screens worker.
612
+ """
613
+ self.update_state_data(job_id, 'audio_complete', True)
614
+
615
+ if self.check_parallel_processing_complete(job_id):
616
+ logger.info(f"Job {job_id}: Both audio and lyrics complete, triggering screens worker")
617
+ # Transition happens in screens worker
618
+ # We just trigger it here
619
+ self._trigger_screens_worker(job_id)
620
+
621
+ def mark_lyrics_complete(self, job_id: str) -> None:
622
+ """
623
+ Mark lyrics processing as complete and check if can proceed.
624
+
625
+ If audio is also complete, automatically triggers screens worker.
626
+ """
627
+ self.update_state_data(job_id, 'lyrics_complete', True)
628
+
629
+ if self.check_parallel_processing_complete(job_id):
630
+ logger.info(f"Job {job_id}: Both audio and lyrics complete, triggering screens worker")
631
+ # Transition happens in screens worker
632
+ # We just trigger it here
633
+ self._trigger_screens_worker(job_id)
634
+
635
+ def _trigger_screens_worker(self, job_id: str) -> None:
636
+ """
637
+ Trigger screens generation worker.
638
+
639
+ Uses WorkerService to make HTTP call to internal API.
640
+ This must be async, so we use asyncio to create a task.
641
+ """
642
+ import asyncio
643
+ from backend.services.worker_service import get_worker_service
644
+
645
+ logger.info(f"Job {job_id}: Triggering screens worker")
646
+
647
+ # Create async task to trigger worker
648
+ # This allows us to call async code from sync context
649
+ async def _trigger():
650
+ worker_service = get_worker_service()
651
+ await worker_service.trigger_screens_worker(job_id)
652
+
653
+ # Create task in event loop
654
+ try:
655
+ loop = asyncio.get_event_loop()
656
+ if loop.is_running():
657
+ # If loop is already running, create task
658
+ asyncio.create_task(_trigger())
659
+ else:
660
+ # If no loop, run directly
661
+ asyncio.run(_trigger())
662
+ except RuntimeError:
663
+ # Fallback: just log
664
+ logger.warning(f"Job {job_id}: Could not trigger screens worker (no event loop)")
665
+ # TODO: In production, use message queue instead
666
+
667
+ def cancel_job(self, job_id: str, reason: Optional[str] = None) -> bool:
668
+ """
669
+ Cancel a job.
670
+
671
+ Only jobs in non-terminal states can be cancelled.
672
+ """
673
+ job = self.get_job(job_id)
674
+ if not job:
675
+ return False
676
+
677
+ terminal_states = [JobStatus.COMPLETE, JobStatus.FAILED, JobStatus.CANCELLED]
678
+ if job.status in terminal_states:
679
+ logger.warning(f"Cannot cancel job {job_id} in terminal state {job.status}")
680
+ return False
681
+
682
+ message = f"Job cancelled{f': {reason}' if reason else ''}"
683
+
684
+ self.update_job_status(
685
+ job_id=job_id,
686
+ status=JobStatus.CANCELLED,
687
+ message=message
688
+ )
689
+
690
+ logger.info(f"Job {job_id} cancelled")
691
+ return True
692
+
693
+ def mark_job_failed(
694
+ self,
695
+ job_id: str,
696
+ error_message: str,
697
+ error_details: Optional[Dict[str, Any]] = None
698
+ ) -> None:
699
+ """
700
+ Mark a job as failed with error details.
701
+
702
+ This replaces mark_job_error() with better error tracking.
703
+ """
704
+ updates = {
705
+ 'error_message': error_message
706
+ }
707
+
708
+ if error_details:
709
+ updates['error_details'] = error_details
710
+
711
+ self.firestore.update_job_status(
712
+ job_id=job_id,
713
+ status=JobStatus.FAILED,
714
+ progress=0,
715
+ message=f"Failed: {error_message}",
716
+ **updates
717
+ )
718
+ logger.error(f"Job {job_id} failed: {error_message}")
719
+
720
+ def increment_retry_count(self, job_id: str) -> int:
721
+ """
722
+ Increment retry count for a job.
723
+
724
+ Returns:
725
+ New retry count
726
+ """
727
+ job = self.get_job(job_id)
728
+ if not job:
729
+ return 0
730
+
731
+ new_count = job.retry_count + 1
732
+ self.update_job(job_id, {'retry_count': new_count})
733
+ return new_count
734
+
735
+ def append_worker_log(
736
+ self,
737
+ job_id: str,
738
+ worker: str,
739
+ level: str,
740
+ message: str,
741
+ max_logs: int = 500,
742
+ metadata: Optional[Dict[str, Any]] = None
743
+ ) -> None:
744
+ """
745
+ Append a log entry to the job's logs.
746
+
747
+ By default (USE_LOG_SUBCOLLECTION=true), logs are stored in a Firestore
748
+ subcollection (jobs/{job_id}/logs) to avoid the 1MB document size limit.
749
+
750
+ For backward compatibility (USE_LOG_SUBCOLLECTION=false), logs are stored
751
+ in the embedded worker_logs array using atomic ArrayUnion.
752
+
753
+ Args:
754
+ job_id: Job ID
755
+ worker: Worker name (audio, lyrics, screens, video, render, distribution)
756
+ level: Log level (DEBUG, INFO, WARNING, ERROR)
757
+ message: Log message
758
+ max_logs: Not used (kept for API compatibility)
759
+ metadata: Optional additional metadata dict
760
+ """
761
+ if settings.use_log_subcollection:
762
+ # New subcollection approach - avoids 1MB limit
763
+ log_entry = WorkerLogEntry.create(
764
+ job_id=job_id,
765
+ worker=worker,
766
+ level=level,
767
+ message=message,
768
+ metadata=metadata
769
+ )
770
+ self.firestore.append_log_to_subcollection(job_id, log_entry)
771
+ else:
772
+ # Legacy embedded array approach
773
+ log_entry = {
774
+ 'timestamp': datetime.utcnow().isoformat() + 'Z',
775
+ 'level': level,
776
+ 'worker': worker,
777
+ 'message': message[:1000] # Truncate long messages
778
+ }
779
+ self.firestore.append_worker_log(job_id, log_entry)
780
+
781
+ def get_worker_logs(
782
+ self,
783
+ job_id: str,
784
+ since_index: int = 0,
785
+ worker: Optional[str] = None
786
+ ) -> List[Dict[str, Any]]:
787
+ """
788
+ Get worker logs for a job, optionally filtered by worker and index.
789
+
790
+ By default (USE_LOG_SUBCOLLECTION=true), logs are read from the
791
+ subcollection. Falls back to embedded array for older jobs.
792
+
793
+ Args:
794
+ job_id: Job ID
795
+ since_index: Return only logs after this index (for pagination)
796
+ worker: Filter by worker name (optional)
797
+
798
+ Returns:
799
+ List of log entries as dicts (in legacy format for API compatibility)
800
+ """
801
+ if settings.use_log_subcollection:
802
+ # Try subcollection first
803
+ subcollection_logs = self.firestore.get_logs_from_subcollection(
804
+ job_id=job_id,
805
+ offset=since_index,
806
+ worker=worker,
807
+ limit=500
808
+ )
809
+ if subcollection_logs:
810
+ # Convert to legacy format for API compatibility
811
+ return [log.to_legacy_dict() for log in subcollection_logs]
812
+ # Fall through to check embedded array for older jobs
813
+
814
+ # Embedded array approach (legacy jobs or fallback)
815
+ job = self.get_job(job_id)
816
+ if not job or not job.worker_logs:
817
+ return []
818
+
819
+ logs = [log.dict() if hasattr(log, 'dict') else log for log in job.worker_logs]
820
+
821
+ # Filter by index
822
+ if since_index > 0:
823
+ logs = logs[since_index:]
824
+
825
+ # Filter by worker
826
+ if worker:
827
+ logs = [log for log in logs if log.get('worker') == worker]
828
+
829
+ return logs
830
+
831
+ def get_worker_logs_count(self, job_id: str) -> int:
832
+ """
833
+ Get the total count of worker logs for a job.
834
+
835
+ Args:
836
+ job_id: Job ID
837
+
838
+ Returns:
839
+ Total count of logs
840
+ """
841
+ if settings.use_log_subcollection:
842
+ # Try subcollection first
843
+ count = self.firestore.get_logs_count_from_subcollection(job_id)
844
+ if count > 0:
845
+ return count
846
+ # Fall through to check embedded array
847
+
848
+ # Embedded array (legacy jobs or fallback)
849
+ job = self.get_job(job_id)
850
+ if not job or not job.worker_logs:
851
+ return 0
852
+ return len(job.worker_logs)
853
+