karaoke-gen 0.86.7__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/style_loader.py +3 -1
  148. karaoke_gen/utils/__init__.py +163 -8
  149. karaoke_gen/video_background_processor.py +9 -4
  150. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +2 -1
  151. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +187 -42
  152. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  153. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  154. lyrics_transcriber/correction/corrector.py +192 -130
  155. lyrics_transcriber/correction/operations.py +24 -9
  156. lyrics_transcriber/frontend/package-lock.json +2 -2
  157. lyrics_transcriber/frontend/package.json +1 -1
  158. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  159. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  160. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  161. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  162. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  163. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  164. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  165. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  168. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  170. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  171. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  172. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  173. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  174. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  175. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  176. lyrics_transcriber/frontend/src/theme.ts +42 -15
  177. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  178. lyrics_transcriber/frontend/vite.config.js +5 -0
  179. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  180. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  181. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  182. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  183. lyrics_transcriber/output/generator.py +17 -3
  184. lyrics_transcriber/output/video.py +60 -95
  185. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  186. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  187. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  188. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,652 @@
1
+ """
2
+ Review API routes - Compatible with LyricsTranscriber frontend.
3
+
4
+ These endpoints match the API that the LyricsTranscriber review frontend expects,
5
+ allowing us to use the existing React review UI with our cloud backend.
6
+
7
+ Usage:
8
+ Frontend URL: http://localhost:5173/?baseApiUrl=http://localhost:8000/api/review/{job_id}
9
+
10
+ The baseApiUrl includes the job_id, and all endpoints are relative to that.
11
+ """
12
+ import asyncio
13
+ import logging
14
+ import hashlib
15
+ import json
16
+ import os
17
+ import tempfile
18
+ from pathlib import Path
19
+ from typing import Dict, Any, Set, Tuple
20
+
21
+ from fastapi import APIRouter, HTTPException, Request, Depends
22
+ from fastapi.responses import FileResponse, StreamingResponse
23
+ from starlette.background import BackgroundTask
24
+
25
+ from backend.models.job import JobStatus
26
+ from backend.services.job_manager import JobManager
27
+ from backend.services.storage_service import StorageService
28
+ from backend.services.job_logging import job_log_context, JobLogger
29
+ from backend.services.tracing import create_span, add_span_attribute, add_span_event
30
+ from backend.services.encoding_service import get_encoding_service
31
+ from backend.api.dependencies import require_auth, require_review_auth
32
+ from backend.services.auth_service import UserType
33
+ from backend.config import get_settings
34
+
35
+ # LyricsTranscriber imports for preview generation
36
+ from lyrics_transcriber.types import CorrectionResult
37
+ from lyrics_transcriber.core.config import OutputConfig
38
+ from lyrics_transcriber.correction.operations import CorrectionOperations
39
+
40
+ # Import from the unified style loader
41
+ from karaoke_gen.style_loader import load_styles_from_gcs
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+ router = APIRouter(prefix="/review", tags=["review"])
46
+
47
+ # Store job context for the session
48
+ # In production, this would be handled differently (e.g., session tokens)
49
+ _job_contexts: Dict[str, Dict[str, Any]] = {}
50
+
51
+ # Store preview video paths for serving
52
+ _preview_videos: Dict[str, Dict[str, str]] = {}
53
+
54
+ # Keep references to background tasks to prevent garbage collection
55
+ _background_tasks: Set[asyncio.Task] = set()
56
+
57
+
58
+ def _get_audio_hash(job_id: str) -> str:
59
+ """Generate a consistent audio hash for a job."""
60
+ return hashlib.md5(job_id.encode()).hexdigest()
61
+
62
+
63
+ @router.get("/{job_id}/ping")
64
+ async def ping(job_id: str):
65
+ """Health check endpoint expected by frontend."""
66
+ return {"status": "ok"}
67
+
68
+
69
+ @router.get("/{job_id}/correction-data")
70
+ async def get_correction_data(
71
+ job_id: str,
72
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
73
+ ):
74
+ """
75
+ Get correction data for the review interface.
76
+
77
+ Returns the CorrectionResult data that the frontend needs to render
78
+ the lyrics review UI.
79
+ """
80
+ job_manager = JobManager()
81
+ storage = StorageService()
82
+
83
+ job = job_manager.get_job(job_id)
84
+ if not job:
85
+ raise HTTPException(status_code=404, detail="Job not found")
86
+
87
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
88
+ raise HTTPException(
89
+ status_code=400,
90
+ detail=f"Job not ready for review (current status: {job.status})"
91
+ )
92
+
93
+ # Get corrections URL from file_urls
94
+ corrections_gcs = job.file_urls.get('lyrics', {}).get('corrections')
95
+ if not corrections_gcs:
96
+ # Try direct path
97
+ corrections_gcs = f"jobs/{job_id}/lyrics/corrections.json"
98
+ if not storage.file_exists(corrections_gcs):
99
+ raise HTTPException(
100
+ status_code=404,
101
+ detail="Corrections data not found. Lyrics processing may not be complete."
102
+ )
103
+
104
+ # Download and return corrections data
105
+ try:
106
+ corrections_data = storage.download_json(corrections_gcs)
107
+
108
+ # Add audio hash for the frontend
109
+ audio_hash = _get_audio_hash(job_id)
110
+ if 'metadata' not in corrections_data:
111
+ corrections_data['metadata'] = {}
112
+ corrections_data['metadata']['audio_hash'] = audio_hash
113
+ corrections_data['metadata']['artist'] = job.artist
114
+ corrections_data['metadata']['title'] = job.title
115
+
116
+ # Store context for audio serving
117
+ _job_contexts[job_id] = {
118
+ 'audio_hash': audio_hash,
119
+ 'audio_gcs_path': job.input_media_gcs_path
120
+ }
121
+
122
+ # Transition to IN_REVIEW if not already
123
+ if job.status == JobStatus.AWAITING_REVIEW:
124
+ job_manager.transition_to_state(
125
+ job_id=job_id,
126
+ new_status=JobStatus.IN_REVIEW,
127
+ message="User opened review interface"
128
+ )
129
+
130
+ logger.info(f"Job {job_id}: Serving correction data for review")
131
+ return corrections_data
132
+
133
+ except Exception as e:
134
+ logger.error(f"Job {job_id}: Error loading corrections: {e}", exc_info=True)
135
+ raise HTTPException(status_code=500, detail=f"Error loading corrections: {str(e)}")
136
+
137
+
138
+ @router.get("/{job_id}/audio/{audio_hash}")
139
+ async def get_audio_with_hash(
140
+ job_id: str,
141
+ audio_hash: str,
142
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
143
+ ):
144
+ """Stream the audio file for playback (with hash parameter)."""
145
+ return await _stream_audio(job_id)
146
+
147
+
148
+ @router.get("/{job_id}/audio/")
149
+ @router.get("/{job_id}/audio")
150
+ async def get_audio_no_hash(
151
+ job_id: str,
152
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
153
+ ):
154
+ """Stream the audio file for playback (without hash parameter)."""
155
+ return await _stream_audio(job_id)
156
+
157
+
158
+ async def _stream_audio(job_id: str):
159
+ """
160
+ Stream the audio file for playback in the review interface.
161
+ """
162
+ job_manager = JobManager()
163
+ storage = StorageService()
164
+
165
+ job = job_manager.get_job(job_id)
166
+ if not job:
167
+ raise HTTPException(status_code=404, detail="Job not found")
168
+
169
+ audio_gcs_path = job.input_media_gcs_path
170
+ if not audio_gcs_path:
171
+ raise HTTPException(status_code=404, detail="Audio file not found")
172
+
173
+ # Download to temp file and stream
174
+ try:
175
+ # Create temp file
176
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".flac") as tmp:
177
+ tmp_path = tmp.name
178
+
179
+ storage.download_file(audio_gcs_path, tmp_path)
180
+
181
+ # Determine content type
182
+ if audio_gcs_path.endswith('.flac'):
183
+ media_type = "audio/flac"
184
+ elif audio_gcs_path.endswith('.wav'):
185
+ media_type = "audio/wav"
186
+ elif audio_gcs_path.endswith('.mp3'):
187
+ media_type = "audio/mpeg"
188
+ else:
189
+ media_type = "audio/mpeg"
190
+
191
+ logger.info(f"Job {job_id}: Streaming audio for review")
192
+
193
+ return FileResponse(
194
+ tmp_path,
195
+ media_type=media_type,
196
+ filename=os.path.basename(audio_gcs_path),
197
+ background=BackgroundTask(os.unlink, tmp_path),
198
+ )
199
+
200
+ except Exception as e:
201
+ logger.error(f"Job {job_id}: Error streaming audio: {e}", exc_info=True)
202
+ raise HTTPException(status_code=500, detail=f"Error streaming audio: {str(e)}")
203
+
204
+
205
+ @router.post("/{job_id}/complete")
206
+ async def complete_review(
207
+ job_id: str,
208
+ updated_data: Dict[str, Any],
209
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
210
+ ):
211
+ """
212
+ Complete the review and save corrected lyrics.
213
+
214
+ This endpoint receives the updated correction data from the frontend
215
+ and saves it, then triggers the render video worker.
216
+ """
217
+ job_manager = JobManager()
218
+ storage = StorageService()
219
+
220
+ job = job_manager.get_job(job_id)
221
+ if not job:
222
+ raise HTTPException(status_code=404, detail="Job not found")
223
+
224
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
225
+ raise HTTPException(
226
+ status_code=400,
227
+ detail=f"Job not in review state (current status: {job.status})"
228
+ )
229
+
230
+ try:
231
+ # Save updated corrections to GCS
232
+ corrections_gcs_path = f"jobs/{job_id}/lyrics/corrections_updated.json"
233
+ storage.upload_json(corrections_gcs_path, updated_data)
234
+ job_manager.update_file_url(job_id, 'lyrics', 'corrections_updated', corrections_gcs_path)
235
+
236
+ logger.info(f"Job {job_id}: Saved updated corrections")
237
+
238
+ # Transition to REVIEW_COMPLETE
239
+ job_manager.transition_to_state(
240
+ job_id=job_id,
241
+ new_status=JobStatus.REVIEW_COMPLETE,
242
+ progress=70,
243
+ message="Review complete, rendering video with corrected lyrics"
244
+ )
245
+
246
+ # Trigger render video worker
247
+ from backend.services.worker_service import get_worker_service
248
+ worker_service = get_worker_service()
249
+
250
+ # Run in background, keep reference to prevent garbage collection
251
+ task = asyncio.create_task(worker_service.trigger_render_video_worker(job_id))
252
+ _background_tasks.add(task)
253
+ task.add_done_callback(_background_tasks.discard)
254
+
255
+ logger.info(f"Job {job_id}: Review complete, triggered render video worker")
256
+
257
+ return {"status": "success"}
258
+
259
+ except Exception as e:
260
+ logger.error(f"Job {job_id}: Error completing review: {e}", exc_info=True)
261
+ raise HTTPException(status_code=500, detail=f"Error completing review: {str(e)}")
262
+
263
+
264
+ @router.post("/{job_id}/handlers")
265
+ async def update_handlers(
266
+ job_id: str,
267
+ enabled_handlers: list,
268
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
269
+ ):
270
+ """
271
+ Update enabled correction handlers (optional feature).
272
+
273
+ For now, just acknowledge the request - full handler support
274
+ would require re-running correction.
275
+ """
276
+ logger.info(f"Job {job_id}: Handler update requested (not implemented)")
277
+ return {"status": "success", "message": "Handler updates not yet implemented"}
278
+
279
+
280
+ @router.post("/{job_id}/add-lyrics")
281
+ async def add_lyrics(
282
+ job_id: str,
283
+ data: Dict[str, str],
284
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
285
+ ):
286
+ """
287
+ Add custom lyrics source and rerun correction.
288
+
289
+ Uses the LyricsTranscriber's CorrectionOperations to add a new lyrics source
290
+ and regenerate corrections with the new source included.
291
+ """
292
+ job_manager = JobManager()
293
+ storage = StorageService()
294
+
295
+ job = job_manager.get_job(job_id)
296
+ if not job:
297
+ raise HTTPException(status_code=404, detail="Job not found")
298
+
299
+ # Job must be in review state to add lyrics
300
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
301
+ raise HTTPException(
302
+ status_code=400,
303
+ detail=f"Job not in review state (current status: {job.status})"
304
+ )
305
+
306
+ source = data.get("source", "").strip()
307
+ lyrics_text = data.get("lyrics", "").strip()
308
+
309
+ logger.info(f"Job {job_id}: Adding lyrics source '{source}' with {len(lyrics_text)} characters")
310
+
311
+ # Use tracing and job_log_context for full observability
312
+ with create_span("add-lyrics", {"job_id": job_id, "source": source, "lyrics_length": len(lyrics_text)}) as span:
313
+ with job_log_context(job_id, worker="add-lyrics"):
314
+ try:
315
+ # Create temp directory for this operation
316
+ with tempfile.TemporaryDirectory() as temp_dir:
317
+ # Download current corrections.json
318
+ with create_span("download-corrections") as download_span:
319
+ corrections_gcs = f"jobs/{job_id}/lyrics/corrections.json"
320
+ corrections_path = os.path.join(temp_dir, "corrections.json")
321
+ storage.download_file(corrections_gcs, corrections_path)
322
+ download_span.set_attribute("gcs_path", corrections_gcs)
323
+
324
+ with open(corrections_path, 'r', encoding='utf-8') as f:
325
+ original_data = json.load(f)
326
+
327
+ # Load as CorrectionResult
328
+ correction_result = CorrectionResult.from_dict(original_data)
329
+ add_span_event("corrections_loaded", {
330
+ "segments": len(correction_result.corrected_segments) if correction_result.corrected_segments else 0,
331
+ "reference_sources": len(correction_result.reference_lyrics) if correction_result.reference_lyrics else 0,
332
+ })
333
+
334
+ # Set up cache directory
335
+ cache_dir = os.path.join(temp_dir, "cache")
336
+ os.makedirs(cache_dir, exist_ok=True)
337
+
338
+ # Add lyrics source using CorrectionOperations (this is the heavy operation)
339
+ with create_span("correction-operations-add-lyrics") as correction_span:
340
+ correction_span.set_attribute("source_name", source)
341
+ updated_result = CorrectionOperations.add_lyrics_source(
342
+ correction_result=correction_result,
343
+ source=source,
344
+ lyrics_text=lyrics_text,
345
+ cache_dir=cache_dir,
346
+ logger=logger
347
+ )
348
+ add_span_event("correction_complete", {
349
+ "new_segments": len(updated_result.corrected_segments) if updated_result.corrected_segments else 0,
350
+ })
351
+
352
+ # Add audio hash for the frontend
353
+ audio_hash = _get_audio_hash(job_id)
354
+ if not updated_result.metadata:
355
+ updated_result.metadata = {}
356
+ updated_result.metadata['audio_hash'] = audio_hash
357
+ updated_result.metadata['artist'] = job.artist
358
+ updated_result.metadata['title'] = job.title
359
+
360
+ # Upload updated corrections back to GCS
361
+ with create_span("upload-corrections") as upload_span:
362
+ updated_data = updated_result.to_dict()
363
+ storage.upload_json(corrections_gcs, updated_data)
364
+ upload_span.set_attribute("gcs_path", corrections_gcs)
365
+
366
+ logger.info(f"Job {job_id}: Successfully added lyrics source '{source}'")
367
+ span.set_attribute("success", True)
368
+
369
+ return {"status": "success", "data": updated_data}
370
+
371
+ except ValueError as e:
372
+ # ValueError from CorrectionOperations (e.g., duplicate source name)
373
+ logger.warning(f"Job {job_id}: Invalid add lyrics request: {e}")
374
+ span.set_attribute("error", str(e))
375
+ raise HTTPException(status_code=400, detail=str(e))
376
+ except Exception as e:
377
+ logger.error(f"Job {job_id}: Failed to add lyrics: {e}", exc_info=True)
378
+ span.set_attribute("error", str(e))
379
+ raise HTTPException(status_code=500, detail=f"Failed to add lyrics: {str(e)}")
380
+
381
+
382
+ @router.post("/{job_id}/preview-video")
383
+ async def generate_preview_video(
384
+ job_id: str,
385
+ updated_data: Dict[str, Any],
386
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
387
+ ):
388
+ """
389
+ Generate a preview video with the current corrections.
390
+
391
+ Uses the LyricsTranscriber's CorrectionOperations to generate a 360p preview
392
+ video with the user's current corrections applied.
393
+
394
+ When USE_GCE_PREVIEW_ENCODING is enabled, video encoding is offloaded to
395
+ the high-performance GCE worker for faster generation (15-20s vs 60+s).
396
+ """
397
+ job_manager = JobManager()
398
+ storage = StorageService()
399
+ settings = get_settings()
400
+ encoding_service = get_encoding_service()
401
+
402
+ job = job_manager.get_job(job_id)
403
+ if not job:
404
+ raise HTTPException(status_code=404, detail="Job not found")
405
+
406
+ # Job must be in review state to generate preview
407
+ if job.status not in [JobStatus.AWAITING_REVIEW, JobStatus.IN_REVIEW]:
408
+ raise HTTPException(
409
+ status_code=400,
410
+ detail=f"Job not in review state (current status: {job.status})"
411
+ )
412
+
413
+ # Check if GCE preview encoding is enabled
414
+ use_gce_preview = encoding_service.is_preview_enabled
415
+ logger.info(f"Job {job_id}: Generating preview video (GCE preview: {use_gce_preview})")
416
+
417
+ # Use tracing and job_log_context for full observability
418
+ with create_span("generate-preview-video", {"job_id": job_id, "use_gce": use_gce_preview}) as span:
419
+ with job_log_context(job_id, worker="preview"):
420
+ try:
421
+ # Create temp directory for this preview operation
422
+ with tempfile.TemporaryDirectory() as temp_dir:
423
+ # 1. Download original corrections.json (has full structure)
424
+ with create_span("download-corrections-and-audio") as download_span:
425
+ corrections_gcs = f"jobs/{job_id}/lyrics/corrections.json"
426
+ corrections_path = os.path.join(temp_dir, "corrections.json")
427
+ storage.download_file(corrections_gcs, corrections_path)
428
+
429
+ with open(corrections_path, 'r', encoding='utf-8') as f:
430
+ original_data = json.load(f)
431
+
432
+ # 2. Download input audio
433
+ audio_path = os.path.join(temp_dir, "audio.flac")
434
+ storage.download_file(job.input_media_gcs_path, audio_path)
435
+ download_span.set_attribute("audio_gcs_path", job.input_media_gcs_path)
436
+
437
+ # 3. Load original as CorrectionResult
438
+ correction_result = CorrectionResult.from_dict(original_data)
439
+ add_span_event("corrections_loaded")
440
+
441
+ # 4. Get or create styles file for preview using unified style loader
442
+ with create_span("load-styles") as styles_span:
443
+ styles_path, _ = load_styles_from_gcs(
444
+ style_params_gcs_path=job.style_params_gcs_path,
445
+ style_assets=job.style_assets,
446
+ temp_dir=temp_dir,
447
+ download_func=storage.download_file,
448
+ logger=logger,
449
+ )
450
+ styles_span.set_attribute("styles_path", styles_path)
451
+
452
+ # 5. Set up output config for preview
453
+ output_dir = os.path.join(temp_dir, "output")
454
+ cache_dir = os.path.join(temp_dir, "cache")
455
+ os.makedirs(output_dir, exist_ok=True)
456
+ os.makedirs(cache_dir, exist_ok=True)
457
+
458
+ output_config = OutputConfig(
459
+ output_styles_json=styles_path,
460
+ output_dir=output_dir,
461
+ cache_dir=cache_dir,
462
+ video_resolution="360p",
463
+ )
464
+
465
+ # 6. Generate preview (ASS-only if using GCE, or full video if local)
466
+ preview_gcs_path = None
467
+
468
+ if use_gce_preview:
469
+ # GCE path: Generate ASS only, then offload encoding to GCE
470
+ try:
471
+ with create_span("generate-ass-subtitles") as ass_span:
472
+ result = CorrectionOperations.generate_preview_video(
473
+ correction_result=correction_result,
474
+ updated_data=updated_data,
475
+ output_config=output_config,
476
+ audio_filepath=audio_path,
477
+ artist=job.artist,
478
+ title=job.title,
479
+ logger=logger,
480
+ ass_only=True, # Only generate ASS, skip video encoding
481
+ )
482
+ preview_hash = result["preview_hash"]
483
+ ass_path = result["ass_path"]
484
+ ass_span.set_attribute("ass_path", ass_path)
485
+ add_span_event("ass_generated")
486
+
487
+ # Upload ASS to GCS
488
+ with create_span("upload-ass-to-gcs") as upload_ass_span:
489
+ ass_gcs_path = f"jobs/{job_id}/previews/{preview_hash}.ass"
490
+ storage.upload_file(ass_path, ass_gcs_path)
491
+ upload_ass_span.set_attribute("ass_gcs_path", ass_gcs_path)
492
+
493
+ # Call GCE encoding service
494
+ with create_span("gce-preview-encoding") as gce_span:
495
+ bucket_name = settings.gcs_bucket_name
496
+ preview_gcs_path = f"jobs/{job_id}/previews/{preview_hash}.mp4"
497
+
498
+ # Get background image and font from style assets if available
499
+ style_assets = job.style_assets or {}
500
+
501
+ background_image_gcs_path = None
502
+ for key in ["karaoke_background", "style_karaoke_background"]:
503
+ if key in style_assets:
504
+ background_image_gcs_path = f"gs://{bucket_name}/{style_assets[key]}"
505
+ gce_span.set_attribute("background_image", background_image_gcs_path)
506
+ break
507
+
508
+ font_gcs_path = None
509
+ for key in ["font", "style_font"]:
510
+ if key in style_assets:
511
+ font_gcs_path = f"gs://{bucket_name}/{style_assets[key]}"
512
+ gce_span.set_attribute("font", font_gcs_path)
513
+ break
514
+
515
+ gce_result = await encoding_service.encode_preview_video(
516
+ job_id=f"preview_{job_id}_{preview_hash}",
517
+ ass_gcs_path=f"gs://{bucket_name}/{ass_gcs_path}",
518
+ audio_gcs_path=f"gs://{bucket_name}/{job.input_media_gcs_path}",
519
+ output_gcs_path=f"gs://{bucket_name}/{preview_gcs_path}",
520
+ background_color="black",
521
+ background_image_gcs_path=background_image_gcs_path,
522
+ font_gcs_path=font_gcs_path,
523
+ )
524
+ gce_span.set_attribute("gce_status", gce_result.get("status"))
525
+ add_span_event("gce_encoding_complete")
526
+
527
+ logger.info(f"Job {job_id}: Preview generated via GCE: {preview_hash}")
528
+
529
+ except Exception as gce_error:
530
+ # Fall back to local encoding if GCE fails
531
+ logger.warning(
532
+ f"Job {job_id}: GCE preview encoding failed, falling back to local: {gce_error}"
533
+ )
534
+ span.set_attribute("gce_fallback", True)
535
+ use_gce_preview = False # Fall through to local encoding below
536
+
537
+ if not use_gce_preview:
538
+ # Local path: Generate full preview video locally
539
+ with create_span("render-preview-video-local") as render_span:
540
+ render_span.set_attribute("resolution", "360p")
541
+ result = CorrectionOperations.generate_preview_video(
542
+ correction_result=correction_result,
543
+ updated_data=updated_data,
544
+ output_config=output_config,
545
+ audio_filepath=audio_path,
546
+ artist=job.artist,
547
+ title=job.title,
548
+ logger=logger,
549
+ ass_only=False, # Generate full video locally
550
+ )
551
+ preview_hash = result["preview_hash"]
552
+ video_path = result["video_path"]
553
+ add_span_event("render_complete")
554
+
555
+ # Upload preview video to GCS
556
+ with create_span("upload-preview-video") as upload_span:
557
+ preview_gcs_path = f"jobs/{job_id}/previews/{preview_hash}.mp4"
558
+ storage.upload_file(video_path, preview_gcs_path)
559
+ upload_span.set_attribute("gcs_path", preview_gcs_path)
560
+
561
+ # Store the GCS path for serving
562
+ if job_id not in _preview_videos:
563
+ _preview_videos[job_id] = {}
564
+ _preview_videos[job_id][preview_hash] = preview_gcs_path
565
+
566
+ logger.info(f"Job {job_id}: Preview video generated: {preview_hash}")
567
+ span.set_attribute("preview_hash", preview_hash)
568
+ span.set_attribute("success", True)
569
+
570
+ return {"status": "success", "preview_hash": preview_hash}
571
+
572
+ except Exception as e:
573
+ logger.error(f"Job {job_id}: Failed to generate preview video: {e}", exc_info=True)
574
+ span.set_attribute("error", str(e))
575
+ raise HTTPException(
576
+ status_code=500,
577
+ detail=f"Failed to generate preview video: {e}"
578
+ ) from e
579
+
580
+
581
+ @router.get("/{job_id}/preview-video/{preview_hash}")
582
+ async def get_preview_video(
583
+ job_id: str,
584
+ preview_hash: str,
585
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
586
+ ):
587
+ """Stream the generated preview video."""
588
+ storage = StorageService()
589
+
590
+ # Check in-memory cache first
591
+ preview_gcs_path = None
592
+ if job_id in _preview_videos and preview_hash in _preview_videos[job_id]:
593
+ preview_gcs_path = _preview_videos[job_id][preview_hash]
594
+ else:
595
+ # Try standard path
596
+ preview_gcs_path = f"jobs/{job_id}/previews/{preview_hash}.mp4"
597
+ if not storage.file_exists(preview_gcs_path):
598
+ raise HTTPException(status_code=404, detail="Preview video not found")
599
+
600
+ try:
601
+ # Download to temp file and stream
602
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
603
+ tmp_path = tmp.name
604
+
605
+ storage.download_file(preview_gcs_path, tmp_path)
606
+
607
+ logger.info(f"Job {job_id}: Streaming preview video {preview_hash}")
608
+
609
+ return FileResponse(
610
+ tmp_path,
611
+ media_type="video/mp4",
612
+ filename=f"preview_{preview_hash}.mp4",
613
+ headers={
614
+ "Accept-Ranges": "bytes",
615
+ "Content-Disposition": "inline",
616
+ "Cache-Control": "no-cache",
617
+ },
618
+ background=BackgroundTask(os.unlink, tmp_path),
619
+ )
620
+
621
+ except Exception as e:
622
+ logger.error(f"Job {job_id}: Error streaming preview video: {e}", exc_info=True)
623
+ raise HTTPException(status_code=500, detail=f"Error streaming preview video: {str(e)}")
624
+
625
+
626
+ @router.post("/{job_id}/v1/annotations")
627
+ async def submit_annotation(
628
+ job_id: str,
629
+ annotation: Dict[str, Any],
630
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
631
+ ):
632
+ """
633
+ Submit a correction annotation for ML training data.
634
+
635
+ For now, just log and acknowledge - full annotation support
636
+ would require a database.
637
+ """
638
+ logger.info(f"Job {job_id}: Annotation submitted (logged but not stored)")
639
+ return {"status": "success", "annotation_id": "stub"}
640
+
641
+
642
+ @router.get("/{job_id}/v1/annotations/stats")
643
+ async def get_annotation_stats(
644
+ job_id: str,
645
+ auth_info: Tuple[str, str] = Depends(require_review_auth)
646
+ ):
647
+ """Get annotation statistics."""
648
+ return {
649
+ "total_annotations": 0,
650
+ "by_type": {},
651
+ "message": "Annotation stats not yet implemented"
652
+ }