karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +835 -0
- backend/api/routes/audio_search.py +913 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2112 -0
- backend/api/routes/health.py +409 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1629 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1513 -0
- backend/config.py +172 -0
- backend/main.py +157 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +502 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +853 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/langfuse_preloader.py +98 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +371 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +109 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +356 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +283 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/utils/test_data.py +27 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +535 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,558 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Worker coordination service.
|
|
3
|
+
|
|
4
|
+
Handles triggering and coordinating background workers.
|
|
5
|
+
Supports two modes:
|
|
6
|
+
- Cloud Tasks (production): Guaranteed delivery, automatic retries, horizontal scaling
|
|
7
|
+
- Direct HTTP (development): Faster iteration, simpler debugging
|
|
8
|
+
|
|
9
|
+
SOLID Principles:
|
|
10
|
+
- Single Responsibility: Only handles worker coordination
|
|
11
|
+
- Dependency Inversion: Depends on HTTP abstraction, not implementation
|
|
12
|
+
- Open/Closed: Can add new workers without modifying existing code
|
|
13
|
+
|
|
14
|
+
Observability:
|
|
15
|
+
- Propagates trace context through Cloud Tasks for distributed tracing
|
|
16
|
+
- All worker invocations create spans linked to original request trace
|
|
17
|
+
"""
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import json
|
|
21
|
+
from typing import Optional
|
|
22
|
+
import httpx
|
|
23
|
+
from google.protobuf import duration_pb2
|
|
24
|
+
|
|
25
|
+
from backend.config import get_settings
|
|
26
|
+
from backend.services.tracing import inject_trace_context
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Mapping of worker types to their Cloud Tasks queue names
|
|
33
|
+
# These queues are created by Pulumi in infrastructure/__main__.py
|
|
34
|
+
WORKER_QUEUES = {
|
|
35
|
+
"audio": "audio-worker-queue",
|
|
36
|
+
"lyrics": "lyrics-worker-queue",
|
|
37
|
+
"screens": "screens-worker-queue",
|
|
38
|
+
"render-video": "render-worker-queue",
|
|
39
|
+
"video": "video-worker-queue",
|
|
40
|
+
"idle-reminder": "idle-reminder-queue", # For delayed idle reminder checks
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# Dispatch deadlines for each worker type (in seconds)
|
|
44
|
+
# Cloud Tasks max is 1800s (30 min). These set how long Cloud Tasks waits
|
|
45
|
+
# for the HTTP handler to respond before considering it failed.
|
|
46
|
+
# Must be >= actual worker timeout + buffer for startup/upload.
|
|
47
|
+
WORKER_DISPATCH_DEADLINES = {
|
|
48
|
+
"audio": 1800, # 30 min - Modal separation can take 15-20 min
|
|
49
|
+
"lyrics": 1500, # 25 min - TRANSCRIPTION_TIMEOUT_SECONDS is 1200s (20 min)
|
|
50
|
+
"screens": 600, # 10 min - Screen generation is fast
|
|
51
|
+
"render-video": 1800, # 30 min - Video encoding can be slow
|
|
52
|
+
"video": 1800, # 30 min - Video encoding can be slow
|
|
53
|
+
"idle-reminder": 60, # 1 min - Quick check and potential email send
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Default delay for idle reminders (seconds)
|
|
57
|
+
IDLE_REMINDER_DELAY_SECONDS = 5 * 60 # 5 minutes
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class WorkerService:
|
|
61
|
+
"""
|
|
62
|
+
Service for coordinating background workers.
|
|
63
|
+
|
|
64
|
+
Supports two execution modes controlled by ENABLE_CLOUD_TASKS env var:
|
|
65
|
+
|
|
66
|
+
1. Cloud Tasks mode (production, ENABLE_CLOUD_TASKS=true):
|
|
67
|
+
- Tasks enqueued to Cloud Tasks for guaranteed delivery
|
|
68
|
+
- Automatic retries on failure
|
|
69
|
+
- Each task runs in dedicated Cloud Run instance
|
|
70
|
+
- Rate limiting to protect external APIs (Modal, AudioShake)
|
|
71
|
+
|
|
72
|
+
2. Direct HTTP mode (development, ENABLE_CLOUD_TASKS=false):
|
|
73
|
+
- Direct HTTP POST to internal worker endpoints
|
|
74
|
+
- Worker runs as FastAPI BackgroundTask
|
|
75
|
+
- Faster iteration for local development
|
|
76
|
+
- No retry guarantees
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self):
|
|
80
|
+
"""Initialize worker service."""
|
|
81
|
+
self.settings = get_settings()
|
|
82
|
+
self._base_url = self._get_base_url()
|
|
83
|
+
self._admin_token = self._get_admin_token()
|
|
84
|
+
self._use_cloud_tasks = self._should_use_cloud_tasks()
|
|
85
|
+
self._tasks_client = None
|
|
86
|
+
|
|
87
|
+
if self._use_cloud_tasks:
|
|
88
|
+
logger.info("WorkerService initialized with Cloud Tasks mode")
|
|
89
|
+
else:
|
|
90
|
+
logger.info("WorkerService initialized with direct HTTP mode")
|
|
91
|
+
|
|
92
|
+
def _should_use_cloud_tasks(self) -> bool:
|
|
93
|
+
"""
|
|
94
|
+
Check if Cloud Tasks should be used for worker coordination.
|
|
95
|
+
|
|
96
|
+
Controlled by ENABLE_CLOUD_TASKS setting (from environment variable).
|
|
97
|
+
Default is false (direct HTTP mode) for backward compatibility.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
True if Cloud Tasks should be used, False for direct HTTP
|
|
101
|
+
"""
|
|
102
|
+
return self.settings.enable_cloud_tasks
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def tasks_client(self):
|
|
106
|
+
"""
|
|
107
|
+
Lazy-initialize Cloud Tasks client.
|
|
108
|
+
|
|
109
|
+
Only created when needed to avoid import overhead in development mode.
|
|
110
|
+
"""
|
|
111
|
+
if self._tasks_client is None and self._use_cloud_tasks:
|
|
112
|
+
try:
|
|
113
|
+
from google.cloud import tasks_v2
|
|
114
|
+
self._tasks_client = tasks_v2.CloudTasksClient()
|
|
115
|
+
except ImportError:
|
|
116
|
+
logger.error(
|
|
117
|
+
"google-cloud-tasks not installed. "
|
|
118
|
+
"Install with: pip install google-cloud-tasks"
|
|
119
|
+
)
|
|
120
|
+
raise
|
|
121
|
+
return self._tasks_client
|
|
122
|
+
|
|
123
|
+
def _get_admin_token(self) -> Optional[str]:
|
|
124
|
+
"""
|
|
125
|
+
Get admin token for internal API authentication.
|
|
126
|
+
|
|
127
|
+
Returns the first admin token from ADMIN_TOKENS env var.
|
|
128
|
+
"""
|
|
129
|
+
admin_tokens_str = self.settings.admin_tokens or ""
|
|
130
|
+
tokens = [t.strip() for t in admin_tokens_str.split(",") if t.strip()]
|
|
131
|
+
if tokens:
|
|
132
|
+
return tokens[0]
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
def _get_base_url(self) -> str:
|
|
136
|
+
"""
|
|
137
|
+
Get base URL for internal API calls.
|
|
138
|
+
|
|
139
|
+
Priority:
|
|
140
|
+
1. TEST_SERVER_URL env var (for tests)
|
|
141
|
+
2. CLOUD_RUN_SERVICE_URL env var (for Cloud Tasks to call back)
|
|
142
|
+
3. localhost with PORT env var (for development)
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Base URL for API calls
|
|
146
|
+
"""
|
|
147
|
+
# Check for test environment override
|
|
148
|
+
test_url = os.getenv("TEST_SERVER_URL")
|
|
149
|
+
if test_url:
|
|
150
|
+
return test_url
|
|
151
|
+
|
|
152
|
+
# Production: Cloud Run service URL
|
|
153
|
+
# This must be the publicly accessible URL for Cloud Tasks to call
|
|
154
|
+
service_url = os.getenv("CLOUD_RUN_SERVICE_URL")
|
|
155
|
+
if service_url:
|
|
156
|
+
return service_url
|
|
157
|
+
|
|
158
|
+
# Development mode: use localhost with PORT env var
|
|
159
|
+
port = os.getenv("PORT", "8000")
|
|
160
|
+
return f"http://localhost:{port}"
|
|
161
|
+
|
|
162
|
+
async def trigger_worker(
|
|
163
|
+
self,
|
|
164
|
+
worker_type: str,
|
|
165
|
+
job_id: str,
|
|
166
|
+
timeout_seconds: int = 30
|
|
167
|
+
) -> bool:
|
|
168
|
+
"""
|
|
169
|
+
Trigger a background worker for a job.
|
|
170
|
+
|
|
171
|
+
In production (ENABLE_CLOUD_TASKS=true):
|
|
172
|
+
- Enqueues task to Cloud Tasks queue
|
|
173
|
+
- Cloud Tasks delivers HTTP request to internal endpoint
|
|
174
|
+
- Automatic retries on failure with exponential backoff
|
|
175
|
+
- Rate limiting protects external APIs
|
|
176
|
+
|
|
177
|
+
In development (ENABLE_CLOUD_TASKS=false):
|
|
178
|
+
- Direct HTTP POST to internal endpoint
|
|
179
|
+
- Faster iteration, but no retry guarantees
|
|
180
|
+
- Worker runs in same container as API
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
worker_type: Worker type ("audio", "lyrics", "screens", "render-video", "video")
|
|
184
|
+
job_id: Job ID to process
|
|
185
|
+
timeout_seconds: Request timeout (for direct HTTP mode)
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
True if trigger successful (or task enqueued), False otherwise
|
|
189
|
+
"""
|
|
190
|
+
if self._use_cloud_tasks:
|
|
191
|
+
return await self._enqueue_cloud_task(worker_type, job_id)
|
|
192
|
+
else:
|
|
193
|
+
return await self._trigger_http(worker_type, job_id, timeout_seconds)
|
|
194
|
+
|
|
195
|
+
async def _enqueue_cloud_task(self, worker_type: str, job_id: str) -> bool:
|
|
196
|
+
"""
|
|
197
|
+
Enqueue task to Cloud Tasks for guaranteed delivery.
|
|
198
|
+
|
|
199
|
+
The task will be delivered as an HTTP POST to the internal worker endpoint.
|
|
200
|
+
Cloud Tasks handles:
|
|
201
|
+
- Retry on failure (with exponential backoff)
|
|
202
|
+
- Rate limiting (max dispatches per second)
|
|
203
|
+
- Deduplication (via task name if needed)
|
|
204
|
+
- OIDC authentication for Cloud Run
|
|
205
|
+
|
|
206
|
+
Observability:
|
|
207
|
+
- Injects trace context headers so worker spans link to parent trace
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
worker_type: Worker type
|
|
211
|
+
job_id: Job ID to process
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
True if task enqueued successfully, False otherwise
|
|
215
|
+
"""
|
|
216
|
+
try:
|
|
217
|
+
from google.cloud import tasks_v2
|
|
218
|
+
|
|
219
|
+
queue_name = WORKER_QUEUES.get(worker_type)
|
|
220
|
+
if not queue_name:
|
|
221
|
+
logger.error(f"Unknown worker type: {worker_type}")
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
project = self.settings.google_cloud_project
|
|
225
|
+
if not project:
|
|
226
|
+
logger.error("GOOGLE_CLOUD_PROJECT not set, cannot enqueue Cloud Task")
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
location = self.settings.gcp_region # Must match queue location
|
|
230
|
+
|
|
231
|
+
# Build queue path
|
|
232
|
+
parent = self.tasks_client.queue_path(project, location, queue_name)
|
|
233
|
+
|
|
234
|
+
# Build base headers
|
|
235
|
+
headers = {
|
|
236
|
+
"Content-Type": "application/json",
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
# Add admin auth via custom header
|
|
240
|
+
# NOTE: We use X-Admin-Token instead of Authorization because Cloud Tasks
|
|
241
|
+
# OIDC token overwrites the Authorization header when oidc_token is specified.
|
|
242
|
+
# The OIDC token handles Cloud Run authentication (allows Cloud Tasks to invoke
|
|
243
|
+
# the service), while X-Admin-Token handles application-level authentication.
|
|
244
|
+
if self._admin_token:
|
|
245
|
+
headers["X-Admin-Token"] = self._admin_token
|
|
246
|
+
logger.debug(
|
|
247
|
+
f"[job:{job_id}] Using admin token for Cloud Task auth via X-Admin-Token, "
|
|
248
|
+
f"token prefix: {self._admin_token[:8]}..., len={len(self._admin_token)}"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Inject trace context for distributed tracing
|
|
252
|
+
# This allows worker spans to link back to the original request trace
|
|
253
|
+
headers = inject_trace_context(headers)
|
|
254
|
+
|
|
255
|
+
# Get dispatch deadline for this worker type (how long Cloud Tasks waits for response)
|
|
256
|
+
dispatch_deadline_seconds = WORKER_DISPATCH_DEADLINES.get(worker_type, 600)
|
|
257
|
+
|
|
258
|
+
# Build task payload
|
|
259
|
+
task = {
|
|
260
|
+
"http_request": {
|
|
261
|
+
"http_method": tasks_v2.HttpMethod.POST,
|
|
262
|
+
"url": f"{self._base_url}/api/internal/workers/{worker_type}",
|
|
263
|
+
"headers": headers,
|
|
264
|
+
"body": json.dumps({"job_id": job_id}).encode(),
|
|
265
|
+
# Use OIDC token for Cloud Run authentication
|
|
266
|
+
# This allows Cloud Tasks to invoke the Cloud Run service
|
|
267
|
+
"oidc_token": {
|
|
268
|
+
"service_account_email": f"karaoke-backend@{project}.iam.gserviceaccount.com",
|
|
269
|
+
},
|
|
270
|
+
},
|
|
271
|
+
# Set dispatch_deadline - how long Cloud Tasks waits for the handler to respond
|
|
272
|
+
# Default is 10 min, but audio separation can take 30+ min
|
|
273
|
+
# Max is 1800s (30 min) for Cloud Tasks
|
|
274
|
+
"dispatch_deadline": duration_pb2.Duration(seconds=dispatch_deadline_seconds),
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
# Create task
|
|
278
|
+
# Note: We don't set a task name, allowing Cloud Tasks to generate one
|
|
279
|
+
# This prevents duplicate task errors on retries
|
|
280
|
+
response = self.tasks_client.create_task(parent=parent, task=task)
|
|
281
|
+
logger.info(
|
|
282
|
+
f"[job:{job_id}] Created Cloud Task for {worker_type} worker: {response.name} "
|
|
283
|
+
f"(dispatch_deadline={dispatch_deadline_seconds}s)"
|
|
284
|
+
)
|
|
285
|
+
return True
|
|
286
|
+
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.error(
|
|
289
|
+
f"[job:{job_id}] Failed to enqueue Cloud Task for {worker_type}: {e}",
|
|
290
|
+
exc_info=True
|
|
291
|
+
)
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
async def _trigger_http(
|
|
295
|
+
self,
|
|
296
|
+
worker_type: str,
|
|
297
|
+
job_id: str,
|
|
298
|
+
timeout_seconds: int = 30
|
|
299
|
+
) -> bool:
|
|
300
|
+
"""
|
|
301
|
+
Trigger worker via direct HTTP call (development mode).
|
|
302
|
+
|
|
303
|
+
This is the original implementation - direct HTTP POST to internal endpoint.
|
|
304
|
+
The endpoint adds the worker function to FastAPI BackgroundTasks.
|
|
305
|
+
|
|
306
|
+
Observability:
|
|
307
|
+
- Injects trace context headers so worker spans link to parent trace
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
worker_type: Worker type
|
|
311
|
+
job_id: Job ID to process
|
|
312
|
+
timeout_seconds: Request timeout
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
True if trigger successful, False otherwise
|
|
316
|
+
"""
|
|
317
|
+
try:
|
|
318
|
+
# Build headers with admin auth token
|
|
319
|
+
headers = {}
|
|
320
|
+
if self._admin_token:
|
|
321
|
+
headers["Authorization"] = f"Bearer {self._admin_token}"
|
|
322
|
+
|
|
323
|
+
# Inject trace context for distributed tracing
|
|
324
|
+
headers = inject_trace_context(headers)
|
|
325
|
+
|
|
326
|
+
async with httpx.AsyncClient(timeout=timeout_seconds) as client:
|
|
327
|
+
url = f"{self._base_url}/api/internal/workers/{worker_type}"
|
|
328
|
+
|
|
329
|
+
response = await client.post(
|
|
330
|
+
url,
|
|
331
|
+
json={"job_id": job_id},
|
|
332
|
+
headers=headers
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if response.status_code == 200:
|
|
336
|
+
logger.info(f"[job:{job_id}] Successfully triggered {worker_type} worker")
|
|
337
|
+
return True
|
|
338
|
+
else:
|
|
339
|
+
logger.error(
|
|
340
|
+
f"[job:{job_id}] Failed to trigger {worker_type} worker: "
|
|
341
|
+
f"HTTP {response.status_code} - {response.text}"
|
|
342
|
+
)
|
|
343
|
+
return False
|
|
344
|
+
|
|
345
|
+
except httpx.TimeoutException:
|
|
346
|
+
logger.error(f"[job:{job_id}] Timeout triggering {worker_type} worker")
|
|
347
|
+
return False
|
|
348
|
+
|
|
349
|
+
except Exception as e:
|
|
350
|
+
logger.error(
|
|
351
|
+
f"[job:{job_id}] Error triggering {worker_type} worker: {e}",
|
|
352
|
+
exc_info=True
|
|
353
|
+
)
|
|
354
|
+
return False
|
|
355
|
+
|
|
356
|
+
# Convenience methods for specific workers
|
|
357
|
+
# These provide a cleaner API and better IDE autocomplete
|
|
358
|
+
|
|
359
|
+
async def trigger_audio_worker(self, job_id: str) -> bool:
|
|
360
|
+
"""Trigger audio separation worker."""
|
|
361
|
+
return await self.trigger_worker("audio", job_id)
|
|
362
|
+
|
|
363
|
+
async def trigger_lyrics_worker(self, job_id: str) -> bool:
|
|
364
|
+
"""Trigger lyrics transcription worker."""
|
|
365
|
+
return await self.trigger_worker("lyrics", job_id)
|
|
366
|
+
|
|
367
|
+
async def trigger_screens_worker(self, job_id: str) -> bool:
|
|
368
|
+
"""Trigger screen generation worker."""
|
|
369
|
+
return await self.trigger_worker("screens", job_id)
|
|
370
|
+
|
|
371
|
+
async def trigger_video_worker(self, job_id: str) -> bool:
|
|
372
|
+
"""
|
|
373
|
+
Trigger video generation worker.
|
|
374
|
+
|
|
375
|
+
When USE_CLOUD_RUN_JOBS_FOR_VIDEO=true and ENABLE_CLOUD_TASKS=true,
|
|
376
|
+
uses Cloud Run Jobs for execution (supports >30 min encoding).
|
|
377
|
+
Otherwise, uses Cloud Tasks or direct HTTP.
|
|
378
|
+
"""
|
|
379
|
+
if self._use_cloud_tasks and self.settings.use_cloud_run_jobs_for_video:
|
|
380
|
+
return await self._trigger_cloud_run_job(job_id)
|
|
381
|
+
return await self.trigger_worker("video", job_id)
|
|
382
|
+
|
|
383
|
+
async def _trigger_cloud_run_job(self, job_id: str) -> bool:
|
|
384
|
+
"""
|
|
385
|
+
Trigger video encoding as a Cloud Run Job.
|
|
386
|
+
|
|
387
|
+
Cloud Run Jobs support up to 24 hours of execution time,
|
|
388
|
+
making them suitable for very long video encoding tasks.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
job_id: Job ID to process
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
True if job was triggered successfully, False otherwise
|
|
395
|
+
"""
|
|
396
|
+
try:
|
|
397
|
+
from google.cloud import run_v2
|
|
398
|
+
|
|
399
|
+
project = self.settings.google_cloud_project
|
|
400
|
+
if not project:
|
|
401
|
+
logger.error("GOOGLE_CLOUD_PROJECT not set, cannot trigger Cloud Run Job")
|
|
402
|
+
return False
|
|
403
|
+
|
|
404
|
+
location = self.settings.gcp_region
|
|
405
|
+
job_name = f"projects/{project}/locations/{location}/jobs/video-encoding-job"
|
|
406
|
+
|
|
407
|
+
# Create Cloud Run Jobs client
|
|
408
|
+
client = run_v2.JobsClient()
|
|
409
|
+
|
|
410
|
+
# Run the job with overrides for the specific job_id
|
|
411
|
+
request = run_v2.RunJobRequest(
|
|
412
|
+
name=job_name,
|
|
413
|
+
overrides=run_v2.RunJobRequest.Overrides(
|
|
414
|
+
container_overrides=[
|
|
415
|
+
run_v2.RunJobRequest.Overrides.ContainerOverride(
|
|
416
|
+
args=["python", "-m", "backend.workers.video_worker", "--job-id", job_id],
|
|
417
|
+
)
|
|
418
|
+
]
|
|
419
|
+
)
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Run the job (async operation)
|
|
423
|
+
operation = client.run_job(request=request)
|
|
424
|
+
logger.info(f"Started Cloud Run Job for video encoding, job {job_id}: {operation.metadata}")
|
|
425
|
+
return True
|
|
426
|
+
|
|
427
|
+
except Exception as e:
|
|
428
|
+
logger.error(
|
|
429
|
+
f"Failed to trigger Cloud Run Job for video/{job_id}: {e}",
|
|
430
|
+
exc_info=True
|
|
431
|
+
)
|
|
432
|
+
return False
|
|
433
|
+
|
|
434
|
+
async def trigger_render_video_worker(self, job_id: str) -> bool:
|
|
435
|
+
"""Trigger render video worker (post-review)."""
|
|
436
|
+
return await self.trigger_worker("render-video", job_id)
|
|
437
|
+
|
|
438
|
+
async def schedule_idle_reminder(
|
|
439
|
+
self,
|
|
440
|
+
job_id: str,
|
|
441
|
+
delay_seconds: int = IDLE_REMINDER_DELAY_SECONDS
|
|
442
|
+
) -> bool:
|
|
443
|
+
"""
|
|
444
|
+
Schedule an idle reminder check for a job.
|
|
445
|
+
|
|
446
|
+
The reminder task will be delivered after the specified delay.
|
|
447
|
+
When the task executes, it checks if the job is still in a blocking
|
|
448
|
+
state and sends a reminder email if the user hasn't taken action.
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
job_id: Job ID to check
|
|
452
|
+
delay_seconds: Delay before executing the check (default: 5 minutes)
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
True if task was scheduled successfully, False otherwise
|
|
456
|
+
"""
|
|
457
|
+
if not self._use_cloud_tasks:
|
|
458
|
+
# In development mode, log and skip (no delayed execution support)
|
|
459
|
+
logger.info(
|
|
460
|
+
f"[job:{job_id}] Idle reminder not scheduled (Cloud Tasks disabled). "
|
|
461
|
+
f"Would have fired in {delay_seconds}s."
|
|
462
|
+
)
|
|
463
|
+
return True
|
|
464
|
+
|
|
465
|
+
try:
|
|
466
|
+
from google.cloud import tasks_v2
|
|
467
|
+
from google.protobuf import timestamp_pb2
|
|
468
|
+
import time
|
|
469
|
+
|
|
470
|
+
queue_name = WORKER_QUEUES.get("idle-reminder")
|
|
471
|
+
if not queue_name:
|
|
472
|
+
logger.error("Idle reminder queue not configured")
|
|
473
|
+
return False
|
|
474
|
+
|
|
475
|
+
project = self.settings.google_cloud_project
|
|
476
|
+
if not project:
|
|
477
|
+
logger.error("GOOGLE_CLOUD_PROJECT not set")
|
|
478
|
+
return False
|
|
479
|
+
|
|
480
|
+
location = self.settings.gcp_region
|
|
481
|
+
|
|
482
|
+
# Build queue path
|
|
483
|
+
parent = self.tasks_client.queue_path(project, location, queue_name)
|
|
484
|
+
|
|
485
|
+
# Build headers
|
|
486
|
+
headers = {
|
|
487
|
+
"Content-Type": "application/json",
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
if self._admin_token:
|
|
491
|
+
headers["X-Admin-Token"] = self._admin_token
|
|
492
|
+
|
|
493
|
+
# Inject trace context
|
|
494
|
+
headers = inject_trace_context(headers)
|
|
495
|
+
|
|
496
|
+
# Calculate schedule time
|
|
497
|
+
schedule_time = timestamp_pb2.Timestamp()
|
|
498
|
+
schedule_time.FromSeconds(int(time.time()) + delay_seconds)
|
|
499
|
+
|
|
500
|
+
dispatch_deadline_seconds = WORKER_DISPATCH_DEADLINES.get("idle-reminder", 60)
|
|
501
|
+
|
|
502
|
+
# Build task payload
|
|
503
|
+
task = {
|
|
504
|
+
"http_request": {
|
|
505
|
+
"http_method": tasks_v2.HttpMethod.POST,
|
|
506
|
+
"url": f"{self._base_url}/api/internal/jobs/{job_id}/check-idle-reminder",
|
|
507
|
+
"headers": headers,
|
|
508
|
+
"body": json.dumps({"job_id": job_id}).encode(),
|
|
509
|
+
"oidc_token": {
|
|
510
|
+
"service_account_email": f"karaoke-backend@{project}.iam.gserviceaccount.com",
|
|
511
|
+
},
|
|
512
|
+
},
|
|
513
|
+
"schedule_time": schedule_time,
|
|
514
|
+
"dispatch_deadline": duration_pb2.Duration(seconds=dispatch_deadline_seconds),
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
# Create task
|
|
518
|
+
response = self.tasks_client.create_task(parent=parent, task=task)
|
|
519
|
+
logger.info(
|
|
520
|
+
f"[job:{job_id}] Scheduled idle reminder check in {delay_seconds}s: {response.name}"
|
|
521
|
+
)
|
|
522
|
+
return True
|
|
523
|
+
|
|
524
|
+
except Exception as e:
|
|
525
|
+
logger.error(
|
|
526
|
+
f"[job:{job_id}] Failed to schedule idle reminder: {e}",
|
|
527
|
+
exc_info=True
|
|
528
|
+
)
|
|
529
|
+
return False
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
# Global worker service instance
|
|
533
|
+
_worker_service: Optional[WorkerService] = None
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def get_worker_service() -> WorkerService:
|
|
537
|
+
"""
|
|
538
|
+
Get global worker service instance.
|
|
539
|
+
|
|
540
|
+
Singleton pattern to reuse HTTP client pool.
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
WorkerService instance
|
|
544
|
+
"""
|
|
545
|
+
global _worker_service
|
|
546
|
+
if _worker_service is None:
|
|
547
|
+
_worker_service = WorkerService()
|
|
548
|
+
return _worker_service
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def reset_worker_service() -> None:
|
|
552
|
+
"""
|
|
553
|
+
Reset the global worker service instance.
|
|
554
|
+
|
|
555
|
+
Used in tests to ensure clean state between test cases.
|
|
556
|
+
"""
|
|
557
|
+
global _worker_service
|
|
558
|
+
_worker_service = None
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
YouTube credential service for server-side video uploads.
|
|
3
|
+
|
|
4
|
+
This service manages YouTube OAuth credentials stored in Secret Manager
|
|
5
|
+
for non-interactive uploads from backend workers.
|
|
6
|
+
"""
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Optional, Dict, Any
|
|
10
|
+
|
|
11
|
+
from backend.config import get_settings
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class YouTubeService:
|
|
17
|
+
"""Service for managing YouTube OAuth credentials."""
|
|
18
|
+
|
|
19
|
+
# Secret Manager secret name for YouTube credentials
|
|
20
|
+
YOUTUBE_CREDENTIALS_SECRET = "youtube-oauth-credentials"
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.settings = get_settings()
|
|
24
|
+
self._credentials: Optional[Dict[str, Any]] = None
|
|
25
|
+
self._loaded = False
|
|
26
|
+
|
|
27
|
+
def load_credentials(self) -> bool:
|
|
28
|
+
"""
|
|
29
|
+
Load YouTube OAuth credentials from Secret Manager.
|
|
30
|
+
|
|
31
|
+
The secret should contain a JSON object with:
|
|
32
|
+
- token: Current access token (may be expired)
|
|
33
|
+
- refresh_token: Refresh token for getting new access tokens
|
|
34
|
+
- token_uri: Token endpoint URL
|
|
35
|
+
- client_id: OAuth client ID
|
|
36
|
+
- client_secret: OAuth client secret
|
|
37
|
+
- scopes: List of OAuth scopes
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
True if credentials were loaded successfully, False otherwise
|
|
41
|
+
"""
|
|
42
|
+
if self._loaded:
|
|
43
|
+
return self._credentials is not None
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
# Get credentials from Secret Manager
|
|
47
|
+
creds_json = self.settings.get_secret(self.YOUTUBE_CREDENTIALS_SECRET)
|
|
48
|
+
|
|
49
|
+
if not creds_json:
|
|
50
|
+
logger.warning("YouTube credentials not found in Secret Manager")
|
|
51
|
+
self._loaded = True
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
# Parse the credentials JSON
|
|
55
|
+
self._credentials = json.loads(creds_json)
|
|
56
|
+
|
|
57
|
+
# Validate required fields
|
|
58
|
+
required_fields = ['refresh_token', 'token_uri', 'client_id', 'client_secret']
|
|
59
|
+
missing = [f for f in required_fields if not self._credentials.get(f)]
|
|
60
|
+
|
|
61
|
+
if missing:
|
|
62
|
+
logger.error(f"YouTube credentials missing required fields: {missing}")
|
|
63
|
+
self._credentials = None
|
|
64
|
+
self._loaded = True
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
logger.info("YouTube credentials loaded successfully from Secret Manager")
|
|
68
|
+
self._loaded = True
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
except json.JSONDecodeError as e:
|
|
72
|
+
logger.error(f"Failed to parse YouTube credentials JSON: {e}")
|
|
73
|
+
self._loaded = True
|
|
74
|
+
return False
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.error(f"Failed to load YouTube credentials: {e}")
|
|
77
|
+
self._loaded = True
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
def get_credentials_dict(self) -> Optional[Dict[str, Any]]:
|
|
81
|
+
"""
|
|
82
|
+
Get the YouTube credentials as a dictionary.
|
|
83
|
+
|
|
84
|
+
This format is compatible with KaraokeFinalise's user_youtube_credentials
|
|
85
|
+
parameter, which creates a google.oauth2.credentials.Credentials object.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary with credential data, or None if not available
|
|
89
|
+
"""
|
|
90
|
+
if not self._loaded:
|
|
91
|
+
self.load_credentials()
|
|
92
|
+
|
|
93
|
+
return self._credentials
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def is_configured(self) -> bool:
|
|
97
|
+
"""Check if YouTube credentials are configured and ready to use."""
|
|
98
|
+
if not self._loaded:
|
|
99
|
+
self.load_credentials()
|
|
100
|
+
return self._credentials is not None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# Singleton instance
|
|
104
|
+
_youtube_service: Optional[YouTubeService] = None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_youtube_service() -> YouTubeService:
|
|
108
|
+
"""Get the singleton YouTube service instance."""
|
|
109
|
+
global _youtube_service
|
|
110
|
+
if _youtube_service is None:
|
|
111
|
+
_youtube_service = YouTubeService()
|
|
112
|
+
return _youtube_service
|