karaoke-gen 0.86.7__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/style_loader.py +3 -1
  148. karaoke_gen/utils/__init__.py +163 -8
  149. karaoke_gen/video_background_processor.py +9 -4
  150. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +2 -1
  151. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +187 -42
  152. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  153. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  154. lyrics_transcriber/correction/corrector.py +192 -130
  155. lyrics_transcriber/correction/operations.py +24 -9
  156. lyrics_transcriber/frontend/package-lock.json +2 -2
  157. lyrics_transcriber/frontend/package.json +1 -1
  158. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  159. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  160. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  161. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  162. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  163. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  164. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  165. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  168. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  170. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  171. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  172. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  173. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  174. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  175. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  176. lyrics_transcriber/frontend/src/theme.ts +42 -15
  177. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  178. lyrics_transcriber/frontend/vite.config.js +5 -0
  179. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  180. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  181. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  182. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  183. lyrics_transcriber/output/generator.py +17 -3
  184. lyrics_transcriber/output/video.py +60 -95
  185. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  186. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  187. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  188. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,597 @@
1
+ # Karaoke Generation Workers
2
+
3
+ This directory contains background workers that handle long-running karaoke generation tasks.
4
+
5
+ ## Overview
6
+
7
+ Workers are asynchronous Python functions that process different stages of the karaoke generation workflow. They are triggered via internal HTTP API endpoints and run as FastAPI background tasks.
8
+
9
+ ## Architecture
10
+
11
+ ```
12
+ Job Submission
13
+
14
+ ├─→ Audio Worker (parallel)
15
+ │ ├─→ Stage 1: Clean instrumental (3-5 min)
16
+ │ ├─→ Stage 2: Backing vocals (2-3 min)
17
+ │ └─→ mark_audio_complete()
18
+
19
+ └─→ Lyrics Worker (parallel)
20
+ ├─→ Fetch reference lyrics
21
+ ├─→ Transcribe with AudioShake (1-2 min)
22
+ ├─→ Auto-correct
23
+ └─→ mark_lyrics_complete()
24
+
25
+ ↓ (when both complete)
26
+
27
+ Screens Worker (auto-triggered)
28
+ ├─→ Generate title screen
29
+ ├─→ Generate end screen
30
+ └─→ AWAITING_INSTRUMENTAL_SELECTION
31
+
32
+ ↓ (user selects)
33
+
34
+ Video Worker (user-triggered)
35
+ ├─→ Remux with instrumental
36
+ ├─→ Encode multiple formats
37
+ ├─→ Package (CDG, TXT)
38
+ └─→ COMPLETE
39
+ ```
40
+
41
+ ## Workers
42
+
43
+ ### 1. Audio Worker (`audio_worker.py`)
44
+
45
+ **Purpose:** Separates audio into stems using GPU-accelerated Modal API
46
+
47
+ **Stages:**
48
+ 1. **Stage 1:** Clean instrumental separation
49
+ - Model: `model_bs_roformer_ep_317_sdr_12.9755.ckpt`
50
+ - Also: 6-stem separation (bass, drums, guitar, piano, other, vocals)
51
+ - Time: 3-5 minutes
52
+
53
+ 2. **Stage 2:** Backing vocals separation
54
+ - Model: `mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt`
55
+ - Separates lead vocals from backing vocals
56
+ - Time: 2-3 minutes
57
+
58
+ 3. **Post-Processing:**
59
+ - Combine instrumentals
60
+ - Normalize audio levels
61
+ - Upload all stems to GCS
62
+
63
+ **Triggers:** Automatically on job creation
64
+ **Next Stage:** Coordinates with lyrics worker
65
+ **State Transitions:** `PENDING` → `SEPARATING_STAGE1` → `SEPARATING_STAGE2` → `AUDIO_COMPLETE`
66
+
67
+ **Integration:** Uses `karaoke_gen.AudioProcessor` with remote Modal API
68
+
69
+ ---
70
+
71
+ ### 2. Lyrics Worker (`lyrics_worker.py`)
72
+
73
+ **Purpose:** Fetches, transcribes, and corrects lyrics
74
+
75
+ **Stages:**
76
+ 1. **Fetch Reference Lyrics:**
77
+ - Sources: Genius, Spotify, Musixmatch (via RapidAPI)
78
+ - Fallback order: Genius → Spotify → Musixmatch
79
+ - Time: <1 minute
80
+
81
+ 2. **Transcribe Audio:**
82
+ - Service: AudioShake API
83
+ - Output: Word-level timestamps with confidence scores
84
+ - Time: 1-2 minutes
85
+
86
+ 3. **Automatic Correction:**
87
+ - Algorithm: `LyricsTranscriber` (ExtendAnchorHandler, SyllablesMatchHandler)
88
+ - Matches transcription to reference lyrics
89
+ - Fixes common errors (homophones, etc.)
90
+ - Generates corrections JSON
91
+ - Time: 30 seconds
92
+
93
+ 4. **Upload for Review:**
94
+ - corrections.json → For review interface
95
+ - audio.flac → For playback
96
+ - reference.txt → For comparison
97
+
98
+ **Triggers:** Automatically on job creation
99
+ **Next Stage:** `AWAITING_REVIEW` (human interaction required)
100
+ **State Transitions:** `PENDING` → `TRANSCRIBING` → `CORRECTING` → `LYRICS_COMPLETE` → `AWAITING_REVIEW`
101
+
102
+ **Integration:** Uses `karaoke_gen.LyricsProcessor` and `LyricsTranscriber`
103
+
104
+ ---
105
+
106
+ ### 3. Screens Worker (`screens_worker.py`)
107
+
108
+ **Purpose:** Generates title and end screen videos
109
+
110
+ **Stages:**
111
+ 1. **Generate Title Screen:**
112
+ - Artist and song title
113
+ - Style parameters (font, colors, background)
114
+ - Output: `.mov` file
115
+ - Time: 10-15 seconds
116
+
117
+ 2. **Generate End Screen:**
118
+ - "Thank you for singing!" message
119
+ - Matching style
120
+ - Output: `.mov` file
121
+ - Time: 10-15 seconds
122
+
123
+ 3. **Upload to GCS:**
124
+ - Both screens uploaded
125
+ - URLs stored in job metadata
126
+
127
+ 4. **Countdown Padding (if needed):**
128
+ - Detects if countdown was added to vocals
129
+ - Pads instrumentals to match
130
+ - Re-uploads padded versions
131
+
132
+ **Triggers:** Automatically when audio + lyrics both complete
133
+ **Next Stage:** `AWAITING_INSTRUMENTAL_SELECTION` (human interaction required)
134
+ **State Transitions:** `AUDIO_COMPLETE + LYRICS_COMPLETE` → `GENERATING_SCREENS` → `AWAITING_INSTRUMENTAL_SELECTION`
135
+
136
+ **Integration:** Uses `karaoke_gen.VideoGenerator`
137
+
138
+ ---
139
+
140
+ ### 4. Video Worker (`video_worker.py`) - TODO
141
+
142
+ **Purpose:** Generates final karaoke videos in multiple formats
143
+
144
+ **Stages:**
145
+ 1. **Remux:**
146
+ - Combine lyrics video with selected instrumental
147
+ - Add title and end screens
148
+ - Create base karaoke video
149
+
150
+ 2. **Encode Multiple Formats:**
151
+ - Lossless 4K MP4 (PCM audio)
152
+ - Lossless 4K MKV (FLAC audio)
153
+ - Lossy 4K MP4 (AAC audio)
154
+ - Lossy 720p MP4 (AAC audio)
155
+ - Time: 15-20 minutes
156
+
157
+ 3. **Package:**
158
+ - CDG+MP3 ZIP (karaoke machines)
159
+ - TXT+MP3 ZIP (simple apps)
160
+
161
+ 4. **Upload:**
162
+ - All formats to GCS
163
+ - Optional: YouTube upload
164
+ - Optional: Dropbox sync
165
+
166
+ **Triggers:** User selects instrumental
167
+ **Next Stage:** `COMPLETE` or `UPLOADING`
168
+ **State Transitions:** `INSTRUMENTAL_SELECTED` → `GENERATING_VIDEO` → `ENCODING` → `PACKAGING` → `COMPLETE`
169
+
170
+ **Integration:** Uses `karaoke_gen.KaraokeFinalise` (Cloud Build recommended for encoding)
171
+
172
+ **Status:** Not yet implemented
173
+
174
+ ---
175
+
176
+ ## Worker Pattern
177
+
178
+ ### SOLID Principles
179
+
180
+ Each worker follows SOLID principles:
181
+
182
+ **Single Responsibility:** Each worker handles exactly one stage
183
+ - Audio: Only audio separation
184
+ - Lyrics: Only transcription/correction
185
+ - Screens: Only screen generation
186
+ - Video: Only final video generation
187
+
188
+ **Open/Closed:** Extensible without modification
189
+ - New workers can be added easily
190
+ - Style parameters allow customization
191
+ - No modification of existing workers needed
192
+
193
+ **Liskov Substitution:** Interface consistency
194
+ - All workers follow same pattern
195
+ - Can swap implementations
196
+ - `karaoke_gen` classes are abstractions
197
+
198
+ **Interface Segregation:** Focused interfaces
199
+ - No bloated base class
200
+ - Each worker has minimal dependencies
201
+ - Clear, focused API
202
+
203
+ **Dependency Inversion:** Depends on abstractions
204
+ - Workers use `karaoke_gen` classes (abstractions)
205
+ - Not coupled to specific implementations
206
+ - Easy to test and mock
207
+
208
+ ### Common Pattern
209
+
210
+ All workers follow this structure:
211
+
212
+ ```python
213
+ async def process_stage(job_id: str) -> bool:
214
+ """Main entry point for worker."""
215
+ job_manager = JobManager()
216
+ storage = StorageService()
217
+ settings = get_settings()
218
+
219
+ # Get job
220
+ job = job_manager.get_job(job_id)
221
+
222
+ # Create temp directory
223
+ temp_dir = tempfile.mkdtemp(prefix=f"karaoke_{stage}_{job_id}_")
224
+
225
+ try:
226
+ # Transition to processing state
227
+ job_manager.transition_to_state(
228
+ job_id=job_id,
229
+ new_status=JobStatus.PROCESSING_STATE,
230
+ progress=X,
231
+ message="Processing..."
232
+ )
233
+
234
+ # Download inputs from GCS
235
+ inputs = await download_inputs(...)
236
+
237
+ # Process with karaoke_gen
238
+ outputs = await process_with_karaoke_gen(inputs)
239
+
240
+ # Upload outputs to GCS
241
+ await upload_outputs(outputs)
242
+
243
+ # Transition to complete state
244
+ job_manager.transition_to_state(
245
+ job_id=job_id,
246
+ new_status=JobStatus.COMPLETE_STATE,
247
+ progress=Y,
248
+ message="Complete"
249
+ )
250
+
251
+ # Trigger next stage if applicable
252
+ job_manager.mark_stage_complete(job_id)
253
+
254
+ return True
255
+
256
+ except Exception as e:
257
+ # Structured error handling
258
+ job_manager.mark_job_failed(
259
+ job_id=job_id,
260
+ error_message=str(e),
261
+ error_details={"stage": "stage_name", "error": str(e)}
262
+ )
263
+ return False
264
+
265
+ finally:
266
+ # Always cleanup
267
+ shutil.rmtree(temp_dir)
268
+ ```
269
+
270
+ ### Error Handling
271
+
272
+ All workers implement comprehensive error handling:
273
+
274
+ 1. **Try/Except/Finally:** Standard pattern
275
+ 2. **Structured Errors:** `error_details` dict with context
276
+ 3. **State Updates:** Job marked as `FAILED` with error message
277
+ 4. **Cleanup:** Temp directories always removed
278
+ 5. **Logging:** All errors logged with stack traces
279
+ 6. **Retry:** Failed jobs can be retried from last checkpoint
280
+
281
+ ### File Management
282
+
283
+ Workers use temporary directories for isolation:
284
+
285
+ ```python
286
+ temp_dir = tempfile.mkdtemp(prefix=f"karaoke_{worker}_{job_id}_")
287
+ try:
288
+ # Processing...
289
+ finally:
290
+ shutil.rmtree(temp_dir) # Always cleanup
291
+ ```
292
+
293
+ Benefits:
294
+ - **Isolation:** Each job has separate workspace
295
+ - **No Leaks:** Automatic cleanup
296
+ - **Concurrency:** Multiple jobs don't interfere
297
+ - **Security:** Temp files are private
298
+
299
+ ### GCS Integration
300
+
301
+ Workers upload/download files from Google Cloud Storage:
302
+
303
+ ```python
304
+ # Download
305
+ local_path = os.path.join(temp_dir, "input.flac")
306
+ storage.download_file(gcs_url, local_path)
307
+
308
+ # Upload
309
+ gcs_path = f"jobs/{job_id}/category/file.ext"
310
+ url = storage.upload_file(local_path, gcs_path)
311
+ job_manager.update_file_url(job_id, 'category', 'file_type', url)
312
+ ```
313
+
314
+ File organization:
315
+ ```
316
+ gs://bucket/jobs/{job_id}/
317
+ ├── input.flac # Original audio
318
+ ├── stems/
319
+ │ ├── instrumental_clean.flac
320
+ │ ├── instrumental_with_backing.flac
321
+ │ ├── vocals.flac
322
+ │ ├── backing_vocals.flac
323
+ │ └── lead_vocals.flac
324
+ ├── lyrics/
325
+ │ ├── corrections.json
326
+ │ ├── audio.flac
327
+ │ └── reference.txt
328
+ ├── screens/
329
+ │ ├── title.mov
330
+ │ └── end.mov
331
+ ├── videos/
332
+ │ └── with_vocals.mkv
333
+ └── finals/
334
+ ├── lossless_4k_mp4.mp4
335
+ ├── lossless_4k_mkv.mkv
336
+ ├── lossy_4k_mp4.mp4
337
+ └── lossy_720p_mp4.mp4
338
+ ```
339
+
340
+ ---
341
+
342
+ ## Worker Coordination
343
+
344
+ ### Parallel Processing
345
+
346
+ Audio and lyrics workers run in parallel:
347
+
348
+ ```python
349
+ # Job submission triggers both
350
+ background_tasks.add_task(worker_service.trigger_audio_worker, job_id)
351
+ background_tasks.add_task(worker_service.trigger_lyrics_worker, job_id)
352
+ ```
353
+
354
+ Coordination via job state:
355
+
356
+ ```python
357
+ # Audio worker completes
358
+ job_manager.mark_audio_complete(job_id)
359
+ → Sets audio_complete flag
360
+ → Checks if lyrics also complete
361
+ → If both: triggers screens worker
362
+
363
+ # Lyrics worker completes
364
+ job_manager.mark_lyrics_complete(job_id)
365
+ → Sets lyrics_complete flag
366
+ → Checks if audio also complete
367
+ → If both: triggers screens worker
368
+ ```
369
+
370
+ Benefits:
371
+ - **No race conditions:** Firestore handles atomicity
372
+ - **Independent processing:** Workers don't block each other
373
+ - **Automatic progression:** No manual coordination needed
374
+ - **Clear state tracking:** Easy to debug
375
+
376
+ ### Sequential Processing
377
+
378
+ Some stages must be sequential:
379
+
380
+ ```
381
+ Screens Worker
382
+ ↓ (auto-triggered when audio + lyrics complete)
383
+ AWAITING_INSTRUMENTAL_SELECTION
384
+ ↓ (user selects)
385
+ Video Worker
386
+ ↓ (generates all formats)
387
+ COMPLETE
388
+ ```
389
+
390
+ Coordination via state transitions:
391
+
392
+ ```python
393
+ # Screens worker transitions to AWAITING_INSTRUMENTAL_SELECTION
394
+ # User submits selection
395
+ # API endpoint triggers video worker
396
+ background_tasks.add_task(worker_service.trigger_video_worker, job_id)
397
+ ```
398
+
399
+ ---
400
+
401
+ ## Triggering Workers
402
+
403
+ Workers are triggered via `WorkerService` (see `backend/services/worker_service.py`):
404
+
405
+ ```python
406
+ from backend.services.worker_service import get_worker_service
407
+
408
+ worker_service = get_worker_service()
409
+
410
+ # Trigger specific worker
411
+ await worker_service.trigger_audio_worker(job_id)
412
+ await worker_service.trigger_lyrics_worker(job_id)
413
+ await worker_service.trigger_screens_worker(job_id)
414
+ await worker_service.trigger_video_worker(job_id)
415
+ ```
416
+
417
+ Internal API endpoints (see `backend/api/routes/internal.py`):
418
+
419
+ ```
420
+ POST /api/internal/workers/audio
421
+ POST /api/internal/workers/lyrics
422
+ POST /api/internal/workers/screens
423
+ POST /api/internal/workers/video
424
+ ```
425
+
426
+ Request format:
427
+ ```json
428
+ {
429
+ "job_id": "abc123"
430
+ }
431
+ ```
432
+
433
+ Response format:
434
+ ```json
435
+ {
436
+ "status": "started",
437
+ "job_id": "abc123",
438
+ "message": "Worker started"
439
+ }
440
+ ```
441
+
442
+ ---
443
+
444
+ ## Testing Workers
445
+
446
+ ### Manual Testing
447
+
448
+ 1. **Submit a job:**
449
+ ```bash
450
+ curl -X POST http://localhost:8080/api/jobs \
451
+ -H "Content-Type: application/json" \
452
+ -d '{
453
+ "url": "https://youtube.com/watch?v=...",
454
+ "artist": "ABBA",
455
+ "title": "Waterloo"
456
+ }'
457
+ ```
458
+
459
+ 2. **Check status:**
460
+ ```bash
461
+ curl http://localhost:8080/api/jobs/{job_id}
462
+ ```
463
+
464
+ 3. **Monitor logs:**
465
+ ```bash
466
+ # Watch worker progress
467
+ tail -f logs/backend.log | grep "Job {job_id}"
468
+ ```
469
+
470
+ ### Automated Testing
471
+
472
+ TODO: Create unit tests for each worker
473
+
474
+ - Test audio separation with mock Modal API
475
+ - Test lyrics transcription with mock AudioShake API
476
+ - Test screen generation with mock VideoGenerator
477
+ - Test error handling
478
+ - Test state transitions
479
+
480
+ ---
481
+
482
+ ## Environment Variables
483
+
484
+ Workers require these environment variables:
485
+
486
+ ### Required
487
+ - `AUDIO_SEPARATOR_API_URL` - Modal audio separation API
488
+ - `AUDIOSHAKE_API_TOKEN` - AudioShake transcription API
489
+
490
+ ### Optional
491
+ - `GENIUS_API_TOKEN` - Genius lyrics API
492
+ - `SPOTIFY_COOKIE_SP_DC` - Spotify lyrics
493
+ - `RAPIDAPI_KEY` - Musixmatch via RapidAPI
494
+
495
+ ### GCP
496
+ - `GOOGLE_CLOUD_PROJECT` - GCP project ID
497
+ - `GCS_UPLOAD_BUCKET` - Upload bucket
498
+ - `GCS_TEMP_BUCKET` - Temp files bucket
499
+ - `GCS_OUTPUT_BUCKET` - Final outputs bucket
500
+
501
+ All credentials can be stored in Google Secret Manager (production) or environment variables (development).
502
+
503
+ ---
504
+
505
+ ## Performance
506
+
507
+ ### Processing Times
508
+
509
+ | Worker | Stage | Time | Can Parallelize |
510
+ |--------|-------|------|----------------|
511
+ | Audio | Stage 1 | 3-5 min | Yes (with lyrics) |
512
+ | Audio | Stage 2 | 2-3 min | Yes (with lyrics) |
513
+ | Lyrics | Fetch | <1 min | Yes (with audio) |
514
+ | Lyrics | Transcribe | 1-2 min | Yes (with audio) |
515
+ | Lyrics | Correct | 30 sec | Yes (with audio) |
516
+ | Screens | Generate | 30 sec | No |
517
+ | Video | Encode | 15-20 min | No |
518
+
519
+ **Total Time (ideal):** ~25-30 minutes
520
+ - Parallel phase: 5-10 minutes (audio + lyrics)
521
+ - Human review: 5-15 minutes (variable)
522
+ - Screens: 30 seconds
523
+ - Instrumental selection: 30 seconds (user)
524
+ - Video encoding: 15-20 minutes
525
+
526
+ ### Resource Usage
527
+
528
+ Per job:
529
+ - **CPU:** Low (workers are I/O bound, encoding is Cloud Build)
530
+ - **Memory:** ~500MB (temporary files)
531
+ - **Disk:** ~2GB temporary (cleaned up)
532
+ - **Network:** ~500MB download, ~1GB upload
533
+
534
+ ### Scaling
535
+
536
+ Workers scale horizontally:
537
+ - Each Cloud Run instance can handle multiple concurrent workers
538
+ - Workers are stateless (state in Firestore)
539
+ - Files in GCS (not local disk)
540
+ - Can run 10+ jobs concurrently per instance
541
+
542
+ ---
543
+
544
+ ## Future Improvements
545
+
546
+ 1. **Cloud Tasks:** Replace HTTP triggers with Cloud Tasks for better reliability
547
+ 2. **Retries:** Automatic retry with exponential backoff
548
+ 3. **Checkpoints:** Resume from last successful stage
549
+ 4. **Progress:** Real-time progress updates (percentage)
550
+ 5. **Notifications:** Email/SMS when review needed
551
+ 6. **Caching:** Cache model files, reference lyrics
552
+ 7. **Metrics:** Worker duration, success rate, error rate
553
+ 8. **Monitoring:** Dashboards for worker health
554
+
555
+ ---
556
+
557
+ ## Troubleshooting
558
+
559
+ ### Worker Not Starting
560
+
561
+ **Symptom:** Job stuck in `PENDING`
562
+ **Cause:** Worker trigger failed
563
+ **Solution:** Check logs for HTTP errors, verify internal API is accessible
564
+
565
+ ### Worker Failing
566
+
567
+ **Symptom:** Job transitions to `FAILED`
568
+ **Cause:** Exception in worker
569
+ **Solution:** Check `error_details` in job, review logs
570
+
571
+ ### Slow Processing
572
+
573
+ **Symptom:** Worker takes longer than expected
574
+ **Cause:** API slowness, large file
575
+ **Solution:** Check external API status, optimize file sizes
576
+
577
+ ### Files Not Uploading
578
+
579
+ **Symptom:** Job completes but no files in GCS
580
+ **Cause:** GCS permissions, network issue
581
+ **Solution:** Verify service account has Storage Admin role
582
+
583
+ ### Coordination Issues
584
+
585
+ **Symptom:** Screens worker not triggered
586
+ **Cause:** One parallel worker failed
587
+ **Solution:** Check both audio and lyrics worker completed successfully
588
+
589
+ ---
590
+
591
+ ## Related Documentation
592
+
593
+ - [API Manual Testing](../docs/01-reference/API-MANUAL-TESTING.md)
594
+ - [CLI Workflow](../docs/01-reference/KARAOKE-GEN-CLI-WORKFLOW.md)
595
+ - [Testing Backend](../docs/03-deployment/TESTING-BACKEND.md)
596
+ - [Infrastructure as Code](../docs/03-deployment/INFRASTRUCTURE-AS-CODE.md)
597
+
@@ -0,0 +1,11 @@
1
+ """
2
+ Background workers for long-running karaoke generation tasks.
3
+
4
+ Workers are triggered asynchronously to handle processing stages that
5
+ take several minutes or longer. Each worker:
6
+ - Updates job state and progress
7
+ - Stores intermediate files in GCS
8
+ - Handles errors gracefully
9
+ - Coordinates with other workers via job state
10
+ """
11
+