karaoke-gen 0.90.1__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/utils/__init__.py +163 -8
  148. karaoke_gen/video_background_processor.py +9 -4
  149. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +1 -1
  150. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +186 -41
  151. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  152. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  153. lyrics_transcriber/correction/corrector.py +192 -130
  154. lyrics_transcriber/correction/operations.py +24 -9
  155. lyrics_transcriber/frontend/package-lock.json +2 -2
  156. lyrics_transcriber/frontend/package.json +1 -1
  157. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  158. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  159. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  160. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  161. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  162. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  163. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  164. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  165. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  168. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  169. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  170. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  171. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  172. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  173. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  174. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  175. lyrics_transcriber/frontend/src/theme.ts +42 -15
  176. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  177. lyrics_transcriber/frontend/vite.config.js +5 -0
  178. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  179. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  180. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  181. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  182. lyrics_transcriber/output/generator.py +17 -3
  183. lyrics_transcriber/output/video.py +60 -95
  184. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  185. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  186. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  187. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,701 @@
1
+ """
2
+ Video Worker Orchestrator.
3
+
4
+ Coordinates the video generation pipeline stages in a unified way,
5
+ regardless of whether encoding happens locally or on GCE.
6
+
7
+ This resolves the code path divergence where GCE encoding bypassed
8
+ features like YouTube upload, Discord notifications, and CDG/TXT packaging.
9
+
10
+ Pipeline stages:
11
+ 1. Setup - Download files, prepare directories
12
+ 2. Packaging - CDG/TXT generation (if enabled)
13
+ 3. Encoding - GCE or Local via EncodingBackend interface
14
+ 4. Organization - Brand code, folder structure
15
+ 5. Distribution - YouTube, Dropbox, Google Drive uploads
16
+ 6. Notification - Discord notifications
17
+ """
18
+
19
+ import logging
20
+ import os
21
+ import time
22
+ from dataclasses import dataclass, field
23
+ from typing import Optional, Dict, Any, List
24
+ from pathlib import Path
25
+
26
+ from backend.models.job import JobStatus
27
+ from backend.services.job_manager import JobManager
28
+ from backend.services.storage_service import StorageService
29
+ from backend.services.tracing import job_span, add_span_event
30
+ from karaoke_gen.utils import sanitize_filename
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ @dataclass
36
+ class OrchestratorConfig:
37
+ """Configuration for the video worker orchestrator."""
38
+ job_id: str
39
+ artist: str
40
+ title: str
41
+
42
+ # Input file paths (in temp_dir)
43
+ title_video_path: str
44
+ karaoke_video_path: str
45
+ instrumental_audio_path: str
46
+ end_video_path: Optional[str] = None
47
+ lrc_file_path: Optional[str] = None
48
+ title_jpg_path: Optional[str] = None
49
+
50
+ # Output directory
51
+ output_dir: str = ""
52
+
53
+ # Feature flags
54
+ enable_cdg: bool = False
55
+ enable_txt: bool = False
56
+ enable_youtube_upload: bool = False
57
+
58
+ # Service configurations
59
+ brand_prefix: Optional[str] = None
60
+ discord_webhook_url: Optional[str] = None
61
+ youtube_credentials: Optional[Dict[str, Any]] = None
62
+ youtube_description_template: Optional[str] = None
63
+ cdg_styles: Optional[Dict[str, Any]] = None
64
+
65
+ # Dropbox/GDrive configuration
66
+ dropbox_path: Optional[str] = None
67
+ gdrive_folder_id: Optional[str] = None
68
+
69
+ # Keep existing brand code (for re-processing)
70
+ keep_brand_code: Optional[str] = None
71
+
72
+ # Encoding backend preference
73
+ encoding_backend: str = "auto" # "auto", "local", "gce"
74
+
75
+ # Additional options
76
+ dry_run: bool = False
77
+ non_interactive: bool = True
78
+
79
+
80
+ @dataclass
81
+ class OrchestratorResult:
82
+ """Result from the video worker orchestrator."""
83
+ success: bool
84
+ error_message: Optional[str] = None
85
+
86
+ # Generated files
87
+ final_video: Optional[str] = None # Lossless 4K MP4
88
+ final_video_mkv: Optional[str] = None # Lossless 4K MKV
89
+ final_video_lossy: Optional[str] = None # Lossy 4K MP4
90
+ final_video_720p: Optional[str] = None # Lossy 720p MP4
91
+ final_karaoke_cdg_zip: Optional[str] = None
92
+ final_karaoke_txt_zip: Optional[str] = None
93
+
94
+ # Organization
95
+ brand_code: Optional[str] = None
96
+
97
+ # Distribution results
98
+ youtube_url: Optional[str] = None
99
+ dropbox_link: Optional[str] = None
100
+ gdrive_files: Optional[Dict[str, str]] = field(default_factory=dict)
101
+
102
+ # Timing
103
+ encoding_time_seconds: Optional[float] = None
104
+ total_time_seconds: Optional[float] = None
105
+
106
+
107
+ class VideoWorkerOrchestrator:
108
+ """
109
+ Orchestrates the video generation pipeline.
110
+
111
+ This class coordinates all stages of video generation in a unified way,
112
+ ensuring that features like YouTube upload and Discord notifications
113
+ work regardless of whether GCE or local encoding is used.
114
+ """
115
+
116
+ def __init__(
117
+ self,
118
+ config: OrchestratorConfig,
119
+ job_manager: Optional[JobManager] = None,
120
+ storage: Optional[StorageService] = None,
121
+ job_logger: Optional[logging.Logger] = None,
122
+ ):
123
+ """
124
+ Initialize the orchestrator.
125
+
126
+ Args:
127
+ config: Orchestrator configuration
128
+ job_manager: Job manager for state updates (optional)
129
+ storage: Storage service for file downloads (optional)
130
+ job_logger: Job-specific logger (optional)
131
+ """
132
+ self.config = config
133
+ self.job_manager = job_manager
134
+ self.storage = storage
135
+ self.job_log = job_logger or logging.getLogger(__name__)
136
+
137
+ # Services (lazy-loaded)
138
+ self._encoding_backend = None
139
+ self._packaging_service = None
140
+ self._youtube_service = None
141
+ self._discord_service = None
142
+
143
+ # State
144
+ self.result = OrchestratorResult(success=False)
145
+
146
+ def _get_encoding_backend(self):
147
+ """Lazy-load the encoding backend."""
148
+ if self._encoding_backend is None:
149
+ from backend.services.encoding_interface import get_encoding_backend
150
+ self._encoding_backend = get_encoding_backend(
151
+ backend_type=self.config.encoding_backend,
152
+ dry_run=self.config.dry_run,
153
+ logger=self.job_log,
154
+ )
155
+ return self._encoding_backend
156
+
157
+ def _get_packaging_service(self):
158
+ """Lazy-load the packaging service."""
159
+ if self._packaging_service is None:
160
+ from backend.services.packaging_service import PackagingService
161
+ self._packaging_service = PackagingService(
162
+ cdg_styles=self.config.cdg_styles,
163
+ dry_run=self.config.dry_run,
164
+ non_interactive=self.config.non_interactive,
165
+ logger=self.job_log,
166
+ )
167
+ return self._packaging_service
168
+
169
+ def _get_youtube_service(self):
170
+ """Lazy-load the YouTube upload service."""
171
+ if self._youtube_service is None:
172
+ from backend.services.youtube_upload_service import YouTubeUploadService
173
+ self._youtube_service = YouTubeUploadService(
174
+ credentials=self.config.youtube_credentials,
175
+ non_interactive=self.config.non_interactive,
176
+ server_side_mode=True,
177
+ dry_run=self.config.dry_run,
178
+ logger=self.job_log,
179
+ )
180
+ return self._youtube_service
181
+
182
+ def _get_discord_service(self):
183
+ """Lazy-load the Discord notification service."""
184
+ if self._discord_service is None:
185
+ from backend.services.discord_service import DiscordNotificationService
186
+ self._discord_service = DiscordNotificationService(
187
+ webhook_url=self.config.discord_webhook_url,
188
+ dry_run=self.config.dry_run,
189
+ logger=self.job_log,
190
+ )
191
+ return self._discord_service
192
+
193
+ def _update_progress(self, status: JobStatus, progress: int, message: str):
194
+ """Update job progress if job_manager is available."""
195
+ if self.job_manager:
196
+ self.job_manager.transition_to_state(
197
+ job_id=self.config.job_id,
198
+ new_status=status,
199
+ progress=progress,
200
+ message=message
201
+ )
202
+
203
+ async def run(self) -> OrchestratorResult:
204
+ """
205
+ Run the full video generation pipeline.
206
+
207
+ Returns:
208
+ OrchestratorResult with generated files and metadata
209
+ """
210
+ start_time = time.time()
211
+
212
+ try:
213
+ with job_span("orchestrator", self.config.job_id) as span:
214
+ self.job_log.info(f"Starting orchestrated video generation for {self.config.artist} - {self.config.title}")
215
+
216
+ # Stage 1: Packaging (CDG/TXT) - runs BEFORE encoding
217
+ # This depends on LRC file, not on encoded videos
218
+ if self.config.enable_cdg or self.config.enable_txt:
219
+ await self._run_packaging()
220
+
221
+ # Stage 2: Encoding
222
+ await self._run_encoding()
223
+
224
+ # Stage 3: Organization (brand code)
225
+ await self._run_organization()
226
+
227
+ # Stage 4: Distribution (YouTube, Dropbox, GDrive)
228
+ await self._run_distribution()
229
+
230
+ # Stage 5: Notifications (Discord)
231
+ await self._run_notifications()
232
+
233
+ self.result.success = True
234
+ self.result.total_time_seconds = time.time() - start_time
235
+
236
+ self.job_log.info(f"Orchestrated video generation complete in {self.result.total_time_seconds:.1f}s")
237
+
238
+ except Exception as e:
239
+ self.result.success = False
240
+ self.result.error_message = str(e)
241
+ self.result.total_time_seconds = time.time() - start_time
242
+ self.job_log.error(f"Orchestrated video generation failed: {e}")
243
+ logger.error(f"[job:{self.config.job_id}] Orchestration failed: {e}")
244
+
245
+ return self.result
246
+
247
+ async def _run_packaging(self):
248
+ """Run the packaging stage (CDG/TXT generation)."""
249
+ self.job_log.info("Starting packaging stage (CDG/TXT)")
250
+
251
+ if not self.config.lrc_file_path or not os.path.isfile(self.config.lrc_file_path):
252
+ self.job_log.warning("No LRC file available, skipping CDG/TXT packaging")
253
+ return
254
+
255
+ base_name = f"{self.config.artist} - {self.config.title}"
256
+ packaging_service = self._get_packaging_service()
257
+
258
+ # Generate CDG package
259
+ if self.config.enable_cdg:
260
+ self.job_log.info("Generating CDG package")
261
+ try:
262
+ cdg_zip_path = os.path.join(
263
+ self.config.output_dir,
264
+ f"{base_name} (Final Karaoke CDG).zip"
265
+ )
266
+ mp3_path = os.path.join(
267
+ self.config.output_dir,
268
+ f"{base_name} (Karaoke).mp3"
269
+ )
270
+ cdg_path = os.path.join(
271
+ self.config.output_dir,
272
+ f"{base_name} (Karaoke).cdg"
273
+ )
274
+
275
+ zip_file, mp3_file, cdg_file = packaging_service.create_cdg_package(
276
+ lrc_file=self.config.lrc_file_path,
277
+ audio_file=self.config.instrumental_audio_path,
278
+ output_zip_path=cdg_zip_path,
279
+ artist=self.config.artist,
280
+ title=self.config.title,
281
+ output_mp3_path=mp3_path,
282
+ output_cdg_path=cdg_path,
283
+ )
284
+
285
+ self.result.final_karaoke_cdg_zip = zip_file
286
+ self.job_log.info(f"CDG package created: {zip_file}")
287
+
288
+ except Exception as e:
289
+ self.job_log.error(f"CDG generation failed: {e}")
290
+ # Don't fail the pipeline, CDG is optional
291
+
292
+ # Generate TXT package
293
+ if self.config.enable_txt:
294
+ self.job_log.info("Generating TXT package")
295
+ try:
296
+ # TXT package needs MP3 file (from CDG generation or create it)
297
+ mp3_path = os.path.join(
298
+ self.config.output_dir,
299
+ f"{base_name} (Karaoke).mp3"
300
+ )
301
+
302
+ if not os.path.isfile(mp3_path):
303
+ self.job_log.info("MP3 not found, CDG must be enabled first for TXT")
304
+ return
305
+
306
+ txt_zip_path = os.path.join(
307
+ self.config.output_dir,
308
+ f"{base_name} (Final Karaoke TXT).zip"
309
+ )
310
+
311
+ zip_file, txt_file = packaging_service.create_txt_package(
312
+ lrc_file=self.config.lrc_file_path,
313
+ mp3_file=mp3_path,
314
+ output_zip_path=txt_zip_path,
315
+ )
316
+
317
+ self.result.final_karaoke_txt_zip = zip_file
318
+ self.job_log.info(f"TXT package created: {zip_file}")
319
+
320
+ except Exception as e:
321
+ self.job_log.error(f"TXT generation failed: {e}")
322
+ # Don't fail the pipeline, TXT is optional
323
+
324
+ async def _run_encoding(self):
325
+ """Run the encoding stage."""
326
+ self.job_log.info("Starting encoding stage")
327
+ self._update_progress(JobStatus.ENCODING, 75, "Encoding videos")
328
+
329
+ encoding_backend = self._get_encoding_backend()
330
+ self.job_log.info(f"Using encoding backend: {encoding_backend.name}")
331
+
332
+ # Build encoding input
333
+ from backend.services.encoding_interface import EncodingInput
334
+ from backend.config import settings
335
+
336
+ # For GCE encoding, we need to provide GCS paths
337
+ gcs_bucket = settings.gcs_bucket_name
338
+ input_gcs_path = f"gs://{gcs_bucket}/jobs/{self.config.job_id}/"
339
+ output_gcs_path = f"gs://{gcs_bucket}/jobs/{self.config.job_id}/finals/"
340
+
341
+ encoding_input = EncodingInput(
342
+ title_video_path=self.config.title_video_path,
343
+ karaoke_video_path=self.config.karaoke_video_path,
344
+ instrumental_audio_path=self.config.instrumental_audio_path,
345
+ end_video_path=self.config.end_video_path,
346
+ artist=self.config.artist,
347
+ title=self.config.title,
348
+ brand_code=self.config.keep_brand_code,
349
+ output_dir=self.config.output_dir,
350
+ options={
351
+ "job_id": self.config.job_id,
352
+ "input_gcs_path": input_gcs_path,
353
+ "output_gcs_path": output_gcs_path,
354
+ },
355
+ )
356
+
357
+ # Run encoding
358
+ with job_span("encoding", self.config.job_id) as span:
359
+ add_span_event("encoding_started", {"backend": encoding_backend.name})
360
+
361
+ output = await encoding_backend.encode(encoding_input)
362
+
363
+ add_span_event("encoding_completed", {
364
+ "success": output.success,
365
+ "duration": output.encoding_time_seconds or 0
366
+ })
367
+
368
+ if not output.success:
369
+ raise Exception(f"Encoding failed: {output.error_message}")
370
+
371
+ # Store results - for GCE backend, these are GCS blob paths that need to be downloaded
372
+ self.result.final_video = output.lossless_4k_mp4_path
373
+ self.result.final_video_mkv = output.lossless_mkv_path
374
+ self.result.final_video_lossy = output.lossy_4k_mp4_path
375
+ self.result.final_video_720p = output.lossy_720p_mp4_path
376
+ self.result.encoding_time_seconds = output.encoding_time_seconds
377
+
378
+ # For GCE encoding, download the encoded files from GCS to local directory
379
+ # This is required for YouTube upload and other local file operations
380
+ if encoding_backend.name == "gce" and self.storage:
381
+ await self._download_gce_encoded_files(output)
382
+
383
+ self.job_log.info(f"Encoding complete ({encoding_backend.name}) in {output.encoding_time_seconds:.1f}s")
384
+
385
+ async def _download_gce_encoded_files(self, output):
386
+ """
387
+ Download GCE-encoded files from GCS to the local output directory.
388
+
389
+ GCE encoding stores files in GCS and returns blob paths like:
390
+ 'jobs/{job_id}/finals/Artist - Title (Final Karaoke Lossless 4k).mp4'
391
+
392
+ This method downloads those files locally so that subsequent stages
393
+ (YouTube upload, etc.) can access them as local files.
394
+
395
+ Args:
396
+ output: EncodingOutput from the GCE backend with GCS blob paths
397
+ """
398
+ self.job_log.info("Downloading GCE-encoded files from GCS")
399
+
400
+ # Map of result attributes to download
401
+ file_mappings = [
402
+ ('lossless_4k_mp4_path', 'final_video'),
403
+ ('lossless_mkv_path', 'final_video_mkv'),
404
+ ('lossy_4k_mp4_path', 'final_video_lossy'),
405
+ ('lossy_720p_mp4_path', 'final_video_720p'),
406
+ ]
407
+
408
+ downloaded_count = 0
409
+ for output_attr, result_attr in file_mappings:
410
+ gcs_path = getattr(output, output_attr, None)
411
+ if not gcs_path:
412
+ continue
413
+
414
+ # Extract filename from GCS path
415
+ filename = os.path.basename(gcs_path)
416
+ local_path = os.path.join(self.config.output_dir, filename)
417
+
418
+ try:
419
+ self.job_log.info(f"Downloading {filename} from GCS")
420
+ self.storage.download_file(gcs_path, local_path)
421
+
422
+ # Update the result to point to local file
423
+ setattr(self.result, result_attr, local_path)
424
+ downloaded_count += 1
425
+ self.job_log.info(f"Downloaded {filename} to {local_path}")
426
+
427
+ except Exception as e:
428
+ self.job_log.error(f"Failed to download {filename}: {e}")
429
+ # Clear the result attribute so downstream doesn't try to use invalid GCS path
430
+ setattr(self.result, result_attr, None)
431
+ # Don't fail - some formats might not be generated
432
+
433
+ self.job_log.info(f"Downloaded {downloaded_count} encoded files from GCS")
434
+
435
+ async def _run_organization(self):
436
+ """Run the organization stage (brand code generation)."""
437
+ self.job_log.info("Starting organization stage")
438
+
439
+ # Use existing brand code if provided
440
+ if self.config.keep_brand_code:
441
+ self.result.brand_code = self.config.keep_brand_code
442
+ self.job_log.info(f"Using preserved brand code: {self.result.brand_code}")
443
+ return
444
+
445
+ # Generate brand code from Dropbox if configured
446
+ if self.config.dropbox_path and self.config.brand_prefix:
447
+ try:
448
+ from backend.services.dropbox_service import get_dropbox_service
449
+
450
+ dropbox = get_dropbox_service()
451
+ if dropbox.is_configured:
452
+ brand_code = dropbox.get_next_brand_code(
453
+ self.config.dropbox_path,
454
+ self.config.brand_prefix
455
+ )
456
+ self.result.brand_code = brand_code
457
+ self.job_log.info(f"Generated brand code: {brand_code}")
458
+ else:
459
+ self.job_log.warning("Dropbox not configured, skipping brand code generation")
460
+
461
+ except Exception as e:
462
+ self.job_log.error(f"Brand code generation failed: {e}")
463
+ # Don't fail - brand code is optional
464
+
465
+ async def _run_distribution(self):
466
+ """Run the distribution stage (YouTube, Dropbox, GDrive uploads)."""
467
+ self.job_log.info("Starting distribution stage")
468
+ self._update_progress(JobStatus.PACKAGING, 90, "Uploading files")
469
+
470
+ # YouTube upload
471
+ if self.config.enable_youtube_upload and self.config.youtube_credentials:
472
+ await self._upload_to_youtube()
473
+
474
+ # Dropbox upload
475
+ if self.config.dropbox_path and self.config.brand_prefix:
476
+ await self._upload_to_dropbox()
477
+
478
+ # Google Drive upload
479
+ if self.config.gdrive_folder_id:
480
+ await self._upload_to_gdrive()
481
+
482
+ async def _upload_to_youtube(self):
483
+ """Upload video to YouTube."""
484
+ self.job_log.info("Uploading to YouTube")
485
+
486
+ # Find the best video file to upload (prefer MKV for FLAC audio, then lossless MP4)
487
+ video_to_upload = None
488
+ if self.result.final_video_mkv and os.path.isfile(self.result.final_video_mkv):
489
+ video_to_upload = self.result.final_video_mkv
490
+ elif self.result.final_video and os.path.isfile(self.result.final_video):
491
+ video_to_upload = self.result.final_video
492
+ elif self.result.final_video_lossy and os.path.isfile(self.result.final_video_lossy):
493
+ video_to_upload = self.result.final_video_lossy
494
+
495
+ if not video_to_upload:
496
+ self.job_log.warning("No video file available for YouTube upload")
497
+ return
498
+
499
+ try:
500
+ youtube_service = self._get_youtube_service()
501
+
502
+ # Build video title
503
+ title = f"{self.config.artist} - {self.config.title} (Karaoke)"
504
+
505
+ # Build description
506
+ description = self.config.youtube_description_template or ""
507
+ if self.result.brand_code:
508
+ description = f"{description}\n\nBrand Code: {self.result.brand_code}".strip()
509
+
510
+ # Upload
511
+ video_id, video_url = youtube_service.upload_video(
512
+ video_path=video_to_upload,
513
+ title=title,
514
+ description=description,
515
+ thumbnail_path=self.config.title_jpg_path,
516
+ tags=["karaoke", self.config.artist, self.config.title],
517
+ replace_existing=True, # Server-side always replaces
518
+ )
519
+
520
+ if video_url:
521
+ self.result.youtube_url = video_url
522
+ self.job_log.info(f"Uploaded to YouTube: {video_url}")
523
+ else:
524
+ self.job_log.warning("YouTube upload did not return a URL")
525
+
526
+ except Exception as e:
527
+ self.job_log.error(f"YouTube upload failed: {e}")
528
+ # Don't fail the pipeline - YouTube is optional
529
+
530
+ async def _upload_to_dropbox(self):
531
+ """Upload files to Dropbox."""
532
+ self.job_log.info("Uploading to Dropbox")
533
+
534
+ try:
535
+ from backend.services.dropbox_service import get_dropbox_service
536
+
537
+ dropbox = get_dropbox_service()
538
+ if not dropbox.is_configured:
539
+ self.job_log.warning("Dropbox not configured, skipping upload")
540
+ return
541
+
542
+ # Sanitize artist/title to handle Unicode characters (curly quotes, em dashes, etc.)
543
+ safe_artist = sanitize_filename(self.config.artist) if self.config.artist else "Unknown"
544
+ safe_title = sanitize_filename(self.config.title) if self.config.title else "Unknown"
545
+ base_name = f"{safe_artist} - {safe_title}"
546
+ folder_name = f"{self.result.brand_code or 'TRACK-0000'} - {base_name}"
547
+ remote_folder = f"{self.config.dropbox_path}/{folder_name}"
548
+
549
+ # Upload entire output directory
550
+ dropbox.upload_folder(self.config.output_dir, remote_folder)
551
+
552
+ # Create sharing link
553
+ try:
554
+ sharing_link = dropbox.create_shared_link(remote_folder)
555
+ self.result.dropbox_link = sharing_link
556
+ self.job_log.info(f"Dropbox sharing link: {sharing_link}")
557
+ except Exception as e:
558
+ self.job_log.warning(f"Failed to create Dropbox sharing link: {e}")
559
+
560
+ self.job_log.info("Dropbox upload complete")
561
+
562
+ except Exception as e:
563
+ self.job_log.error(f"Dropbox upload failed: {e}")
564
+ # Don't fail the pipeline - Dropbox is optional
565
+
566
+ async def _upload_to_gdrive(self):
567
+ """Upload files to Google Drive."""
568
+ self.job_log.info("Uploading to Google Drive")
569
+
570
+ try:
571
+ from backend.services.gdrive_service import get_gdrive_service
572
+
573
+ gdrive = get_gdrive_service()
574
+ if not gdrive.is_configured:
575
+ self.job_log.warning("Google Drive not configured, skipping upload")
576
+ return
577
+
578
+ base_name = f"{self.config.artist} - {self.config.title}"
579
+ brand_code = self.result.brand_code or f"{self.config.brand_prefix or 'TRACK'}-0000"
580
+
581
+ # Map result files to expected keys
582
+ output_files = {
583
+ 'final_karaoke_lossy_mp4': self.result.final_video_lossy,
584
+ 'final_karaoke_lossy_720p_mp4': self.result.final_video_720p,
585
+ 'final_karaoke_cdg_zip': self.result.final_karaoke_cdg_zip,
586
+ }
587
+
588
+ uploaded = gdrive.upload_to_public_share(
589
+ root_folder_id=self.config.gdrive_folder_id,
590
+ brand_code=brand_code,
591
+ base_name=base_name,
592
+ output_files=output_files,
593
+ )
594
+
595
+ self.result.gdrive_files = uploaded
596
+ self.job_log.info(f"Google Drive upload complete: {len(uploaded)} files")
597
+
598
+ except Exception as e:
599
+ self.job_log.error(f"Google Drive upload failed: {e}")
600
+ # Don't fail the pipeline - GDrive is optional
601
+
602
+ async def _run_notifications(self):
603
+ """Run the notifications stage (Discord)."""
604
+ self.job_log.info("Starting notifications stage")
605
+
606
+ if not self.config.discord_webhook_url:
607
+ self.job_log.debug("No Discord webhook configured, skipping notification")
608
+ return
609
+
610
+ if not self.result.youtube_url:
611
+ self.job_log.info("No YouTube URL available, skipping Discord notification")
612
+ return
613
+
614
+ try:
615
+ discord_service = self._get_discord_service()
616
+ discord_service.post_video_notification(self.result.youtube_url)
617
+ self.job_log.info("Discord notification sent")
618
+ except Exception as e:
619
+ self.job_log.error(f"Discord notification failed: {e}")
620
+ # Don't fail the pipeline - notifications are optional
621
+
622
+
623
+ def create_orchestrator_config_from_job(
624
+ job,
625
+ temp_dir: str,
626
+ youtube_credentials: Optional[Dict[str, Any]] = None,
627
+ cdg_styles: Optional[Dict[str, Any]] = None,
628
+ ) -> OrchestratorConfig:
629
+ """
630
+ Create an OrchestratorConfig from a job object.
631
+
632
+ This is a helper function to bridge the existing job structure
633
+ with the new orchestrator configuration.
634
+
635
+ Args:
636
+ job: Job object from Firestore
637
+ temp_dir: Temporary directory with downloaded files
638
+ youtube_credentials: Pre-loaded YouTube credentials
639
+ cdg_styles: CDG style configuration
640
+
641
+ Returns:
642
+ OrchestratorConfig for the orchestrator
643
+ """
644
+ # Sanitize artist/title to handle Unicode characters (curly quotes, em dashes, etc.)
645
+ safe_artist = sanitize_filename(job.artist) if job.artist else "Unknown"
646
+ safe_title = sanitize_filename(job.title) if job.title else "Unknown"
647
+ base_name = f"{safe_artist} - {safe_title}"
648
+
649
+ # Determine instrumental file path
650
+ instrumental_selection = job.state_data.get('instrumental_selection', 'clean')
651
+ existing_instrumental = getattr(job, 'existing_instrumental_gcs_path', None)
652
+
653
+ if existing_instrumental:
654
+ ext = Path(existing_instrumental).suffix.lower()
655
+ instrumental_path = os.path.join(temp_dir, f"{base_name} (Instrumental User){ext}")
656
+ else:
657
+ instrumental_suffix = "Clean" if instrumental_selection == 'clean' else "Backing"
658
+ instrumental_path = os.path.join(temp_dir, f"{base_name} (Instrumental {instrumental_suffix}).flac")
659
+
660
+ return OrchestratorConfig(
661
+ job_id=job.job_id,
662
+ artist=job.artist,
663
+ title=job.title,
664
+
665
+ # Input files
666
+ title_video_path=os.path.join(temp_dir, f"{base_name} (Title).mov"),
667
+ karaoke_video_path=os.path.join(temp_dir, f"{base_name} (With Vocals).mov"),
668
+ instrumental_audio_path=instrumental_path,
669
+ end_video_path=os.path.join(temp_dir, f"{base_name} (End).mov"),
670
+ lrc_file_path=os.path.join(temp_dir, f"{base_name} (Karaoke).lrc"),
671
+ title_jpg_path=os.path.join(temp_dir, f"{base_name} (Title).jpg"),
672
+
673
+ # Output directory
674
+ output_dir=temp_dir,
675
+
676
+ # Feature flags
677
+ enable_cdg=getattr(job, 'enable_cdg', False),
678
+ enable_txt=getattr(job, 'enable_txt', False),
679
+ enable_youtube_upload=getattr(job, 'enable_youtube_upload', False),
680
+
681
+ # Service configurations
682
+ brand_prefix=getattr(job, 'brand_prefix', None),
683
+ discord_webhook_url=getattr(job, 'discord_webhook_url', None),
684
+ youtube_credentials=youtube_credentials,
685
+ youtube_description_template=getattr(job, 'youtube_description_template', None),
686
+ cdg_styles=cdg_styles,
687
+
688
+ # Dropbox/GDrive
689
+ dropbox_path=getattr(job, 'dropbox_path', None),
690
+ gdrive_folder_id=getattr(job, 'gdrive_folder_id', None),
691
+
692
+ # Keep existing brand code
693
+ keep_brand_code=getattr(job, 'keep_brand_code', None),
694
+
695
+ # Encoding backend - auto selects GCE if available
696
+ encoding_backend="auto",
697
+
698
+ # Server-side defaults
699
+ dry_run=False,
700
+ non_interactive=True,
701
+ )