karaoke-gen 0.86.7__py3-none-any.whl → 0.96.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +742 -0
- backend/api/routes/audio_search.py +903 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2076 -0
- backend/api/routes/health.py +344 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1610 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1014 -0
- backend/config.py +172 -0
- backend/main.py +133 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +405 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +842 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +275 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +88 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +339 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +273 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +525 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/style_loader.py +3 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +2 -1
- {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +187 -42
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Audio separation worker.
|
|
3
|
+
|
|
4
|
+
Handles the audio processing track of parallel processing:
|
|
5
|
+
1. Stage 1: Clean instrumental separation (Modal API, 3-5 min)
|
|
6
|
+
2. Stage 2: Backing vocals separation (Modal API, 2-3 min)
|
|
7
|
+
3. Post-processing: Combine instrumentals, normalize
|
|
8
|
+
|
|
9
|
+
Re-uses karaoke_gen.audio_processor.AudioProcessor for remote GPU separation.
|
|
10
|
+
|
|
11
|
+
Observability:
|
|
12
|
+
- All operations wrapped in tracing spans for Cloud Trace visibility
|
|
13
|
+
- Logs include [job:ID] prefix for easy filtering in Cloud Logging
|
|
14
|
+
- Worker start/end timing logged with WORKER_START/WORKER_END markers
|
|
15
|
+
"""
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import shutil
|
|
19
|
+
import tempfile
|
|
20
|
+
import time
|
|
21
|
+
from typing import Optional, Dict, Any
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from backend.models.job import JobStatus
|
|
25
|
+
from backend.services.job_manager import JobManager
|
|
26
|
+
from backend.services.storage_service import StorageService
|
|
27
|
+
from backend.config import get_settings
|
|
28
|
+
from backend.workers.worker_logging import create_job_logger, setup_job_logging, job_logging_context
|
|
29
|
+
from backend.services.tracing import job_span, add_span_event, add_span_attribute
|
|
30
|
+
from backend.services.metrics import metrics
|
|
31
|
+
|
|
32
|
+
# Import from karaoke_gen package
|
|
33
|
+
from karaoke_gen.audio_processor import AudioProcessor
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def _trigger_lyrics_worker_after_url_download(job_id: str) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Trigger lyrics worker after URL audio download completes.
|
|
42
|
+
|
|
43
|
+
For URL jobs, we use sequential triggering:
|
|
44
|
+
1. Audio worker downloads and uploads audio to GCS
|
|
45
|
+
2. Audio worker triggers lyrics worker (this function)
|
|
46
|
+
3. Both workers then proceed in parallel (audio separation + lyrics transcription)
|
|
47
|
+
|
|
48
|
+
This prevents the race condition where lyrics worker times out waiting for audio.
|
|
49
|
+
"""
|
|
50
|
+
from backend.services.worker_service import get_worker_service
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
worker_service = get_worker_service()
|
|
54
|
+
await worker_service.trigger_lyrics_worker(job_id)
|
|
55
|
+
logger.info(f"Job {job_id}: Triggered lyrics worker after URL download")
|
|
56
|
+
except Exception as e:
|
|
57
|
+
# Log but don't fail - audio processing can still continue
|
|
58
|
+
# The job will eventually timeout if lyrics worker doesn't run
|
|
59
|
+
logger.error(f"Job {job_id}: Failed to trigger lyrics worker: {e}")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Default model names - used by create_audio_processor and stored in state_data
|
|
63
|
+
DEFAULT_CLEAN_MODEL = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
|
|
64
|
+
DEFAULT_BACKING_MODELS = ["mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt"]
|
|
65
|
+
DEFAULT_OTHER_MODELS = ["htdemucs_6s.yaml"]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Loggers to capture for audio worker
|
|
69
|
+
AUDIO_WORKER_LOGGERS = [
|
|
70
|
+
"karaoke_gen.audio_processor",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
async def download_from_url(url: str, temp_dir: str, artist: str, title: str, job_manager: JobManager = None, job_id: str = None) -> Optional[str]:
|
|
75
|
+
"""
|
|
76
|
+
Download audio from a URL (YouTube, etc.) using karaoke_gen.FileHandler.
|
|
77
|
+
|
|
78
|
+
Uses the battle-tested FileHandler from karaoke_gen which includes:
|
|
79
|
+
- Anti-detection options (user agent, headers, delays)
|
|
80
|
+
- Cookie support for authenticated downloads
|
|
81
|
+
- Retry logic
|
|
82
|
+
|
|
83
|
+
If artist and/or title are not provided, attempts to extract them from
|
|
84
|
+
the URL metadata.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
url: URL to download from
|
|
88
|
+
temp_dir: Temporary directory to save to
|
|
89
|
+
artist: Artist name for filename (can be None for auto-detection)
|
|
90
|
+
title: Song title for filename (can be None for auto-detection)
|
|
91
|
+
job_manager: Optional JobManager to update job with detected metadata
|
|
92
|
+
job_id: Optional job ID to update
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Path to downloaded audio file, or None if failed
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
from karaoke_gen.file_handler import FileHandler
|
|
99
|
+
from karaoke_gen.utils import sanitize_filename
|
|
100
|
+
|
|
101
|
+
# Create FileHandler instance
|
|
102
|
+
file_handler = FileHandler(
|
|
103
|
+
logger=logger,
|
|
104
|
+
ffmpeg_base_command="ffmpeg -hide_banner -loglevel error -nostats -y",
|
|
105
|
+
create_track_subfolders=False,
|
|
106
|
+
dry_run=False
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Try to extract metadata if artist or title not provided
|
|
110
|
+
if not artist or not title:
|
|
111
|
+
logger.info(f"Extracting metadata from URL: {url}")
|
|
112
|
+
metadata = file_handler.extract_metadata_from_url(url)
|
|
113
|
+
|
|
114
|
+
if metadata:
|
|
115
|
+
if not artist:
|
|
116
|
+
artist = metadata.get('artist', 'Unknown')
|
|
117
|
+
logger.info(f"Auto-detected artist: {artist}")
|
|
118
|
+
if not title:
|
|
119
|
+
title = metadata.get('title', 'Unknown')
|
|
120
|
+
logger.info(f"Auto-detected title: {title}")
|
|
121
|
+
|
|
122
|
+
# Update job with detected metadata if job_manager provided
|
|
123
|
+
if job_manager and job_id:
|
|
124
|
+
update_data = {}
|
|
125
|
+
if artist:
|
|
126
|
+
update_data['artist'] = artist
|
|
127
|
+
if title:
|
|
128
|
+
update_data['title'] = title
|
|
129
|
+
if update_data:
|
|
130
|
+
job_manager.update_job(job_id, update_data)
|
|
131
|
+
logger.info(f"Updated job {job_id} with detected metadata")
|
|
132
|
+
else:
|
|
133
|
+
logger.warning("Could not extract metadata from URL, using defaults")
|
|
134
|
+
artist = artist or "Unknown"
|
|
135
|
+
title = title or "Unknown"
|
|
136
|
+
|
|
137
|
+
# Create output filename (without extension)
|
|
138
|
+
safe_artist = sanitize_filename(artist) if artist else "Unknown"
|
|
139
|
+
safe_title = sanitize_filename(title) if title else "Unknown"
|
|
140
|
+
output_filename_no_extension = os.path.join(temp_dir, f"{safe_artist} - {safe_title}")
|
|
141
|
+
|
|
142
|
+
# Get YouTube cookies from environment variable if available
|
|
143
|
+
# This helps bypass "Sign in to confirm you're not a bot" errors
|
|
144
|
+
cookies_str = os.environ.get("YOUTUBE_COOKIES")
|
|
145
|
+
if cookies_str:
|
|
146
|
+
logger.info("Using YouTube cookies for download authentication")
|
|
147
|
+
else:
|
|
148
|
+
logger.info("No YOUTUBE_COOKIES env var set - attempting download without cookies")
|
|
149
|
+
|
|
150
|
+
# Download using FileHandler (includes anti-detection features)
|
|
151
|
+
logger.info(f"Downloading from URL: {url}")
|
|
152
|
+
downloaded_file = file_handler.download_video(
|
|
153
|
+
url=url,
|
|
154
|
+
output_filename_no_extension=output_filename_no_extension,
|
|
155
|
+
cookies_str=cookies_str
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if downloaded_file and os.path.exists(downloaded_file):
|
|
159
|
+
logger.info(f"Downloaded video: {downloaded_file}")
|
|
160
|
+
|
|
161
|
+
# Convert to WAV for processing
|
|
162
|
+
wav_file = file_handler.convert_to_wav(
|
|
163
|
+
input_filename=downloaded_file,
|
|
164
|
+
output_filename_no_extension=output_filename_no_extension
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if wav_file and os.path.exists(wav_file):
|
|
168
|
+
logger.info(f"Converted to WAV: {wav_file}")
|
|
169
|
+
return wav_file
|
|
170
|
+
else:
|
|
171
|
+
logger.error("WAV conversion failed")
|
|
172
|
+
return None
|
|
173
|
+
else:
|
|
174
|
+
logger.error("Download failed - no file returned")
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
except ImportError as e:
|
|
178
|
+
logger.error(f"Import error: {e}. Check karaoke_gen installation.")
|
|
179
|
+
return None
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.error(f"Failed to download from URL {url}: {e}", exc_info=True)
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def create_audio_processor(
|
|
186
|
+
temp_dir: str,
|
|
187
|
+
clean_instrumental_model: Optional[str] = None,
|
|
188
|
+
backing_vocals_models: Optional[list] = None,
|
|
189
|
+
other_stems_models: Optional[list] = None
|
|
190
|
+
) -> AudioProcessor:
|
|
191
|
+
"""
|
|
192
|
+
Create an AudioProcessor instance configured for remote API processing.
|
|
193
|
+
|
|
194
|
+
This reuses the karaoke_gen AudioProcessor with settings optimized for Cloud Run:
|
|
195
|
+
- Uses remote Modal API (via AUDIO_SEPARATOR_API_URL env var)
|
|
196
|
+
- No local models needed (model_file_dir=None)
|
|
197
|
+
- FLAC output format for quality
|
|
198
|
+
- Model configurations from job or CLI defaults
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
temp_dir: Temporary directory for processing
|
|
202
|
+
clean_instrumental_model: Model for clean instrumental separation (optional, uses default if not provided)
|
|
203
|
+
backing_vocals_models: List of models for backing vocals separation (optional, uses default if not provided)
|
|
204
|
+
other_stems_models: List of models for other stems separation (optional, uses default if not provided)
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Configured AudioProcessor instance
|
|
208
|
+
"""
|
|
209
|
+
# Configure logger for AudioProcessor
|
|
210
|
+
audio_logger = logging.getLogger("karaoke_gen.audio_processor")
|
|
211
|
+
audio_logger.setLevel(logging.INFO)
|
|
212
|
+
|
|
213
|
+
# Model configurations - use provided values or defaults from module constants
|
|
214
|
+
effective_clean_model = clean_instrumental_model or DEFAULT_CLEAN_MODEL
|
|
215
|
+
effective_backing_models = backing_vocals_models or DEFAULT_BACKING_MODELS
|
|
216
|
+
effective_other_models = other_stems_models or DEFAULT_OTHER_MODELS # For 6-stem separation (bass, drums, etc.)
|
|
217
|
+
|
|
218
|
+
# FFmpeg command for combining audio files (must be a string, not a list)
|
|
219
|
+
ffmpeg_base_command = "ffmpeg -hide_banner -loglevel error -nostats -y"
|
|
220
|
+
|
|
221
|
+
return AudioProcessor(
|
|
222
|
+
logger=audio_logger,
|
|
223
|
+
log_level=logging.INFO,
|
|
224
|
+
log_formatter=None, # Not needed for our use case
|
|
225
|
+
model_file_dir=None, # No local models, using remote API
|
|
226
|
+
lossless_output_format="FLAC",
|
|
227
|
+
clean_instrumental_model=effective_clean_model,
|
|
228
|
+
backing_vocals_models=effective_backing_models,
|
|
229
|
+
other_stems_models=effective_other_models,
|
|
230
|
+
ffmpeg_base_command=ffmpeg_base_command
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
async def process_audio_separation(job_id: str) -> bool:
|
|
235
|
+
"""
|
|
236
|
+
Process audio separation for a job using karaoke_gen.AudioProcessor.
|
|
237
|
+
|
|
238
|
+
This is the main entry point for the audio worker.
|
|
239
|
+
Called asynchronously from the job submission endpoint.
|
|
240
|
+
|
|
241
|
+
Workflow:
|
|
242
|
+
1. Download audio from GCS
|
|
243
|
+
2. Stage 1: Separate with clean instrumental + other stems models (Modal API)
|
|
244
|
+
3. Stage 2: Separate vocals for backing vocals (Modal API)
|
|
245
|
+
4. Post-process: Combine instrumentals, normalize audio
|
|
246
|
+
5. Upload all stems to GCS
|
|
247
|
+
6. Mark job as AUDIO_COMPLETE
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
job_id: Job ID to process
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
True if successful, False otherwise
|
|
254
|
+
"""
|
|
255
|
+
start_time = time.time()
|
|
256
|
+
job_manager = JobManager()
|
|
257
|
+
storage = StorageService()
|
|
258
|
+
settings = get_settings()
|
|
259
|
+
|
|
260
|
+
# Create job logger for remote debugging FIRST
|
|
261
|
+
job_log = create_job_logger(job_id, "audio")
|
|
262
|
+
|
|
263
|
+
# Log with structured markers for easy Cloud Logging queries
|
|
264
|
+
logger.info(f"[job:{job_id}] WORKER_START worker=audio")
|
|
265
|
+
job_log.info("=== AUDIO WORKER STARTED ===")
|
|
266
|
+
job_log.info(f"Job ID: {job_id}")
|
|
267
|
+
|
|
268
|
+
# Set up log capture for AudioProcessor
|
|
269
|
+
log_handler = setup_job_logging(job_id, "audio", *AUDIO_WORKER_LOGGERS)
|
|
270
|
+
job_log.info(f"Log handler attached for {len(AUDIO_WORKER_LOGGERS)} loggers")
|
|
271
|
+
|
|
272
|
+
job = job_manager.get_job(job_id)
|
|
273
|
+
if not job:
|
|
274
|
+
logger.error(f"[job:{job_id}] Job not found in Firestore")
|
|
275
|
+
job_log.error(f"Job {job_id} not found in Firestore!")
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
# Create temporary working directory
|
|
279
|
+
temp_dir = tempfile.mkdtemp(prefix=f"karaoke_{job_id}_")
|
|
280
|
+
job_log.info(f"Created temp directory: {temp_dir}")
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
# Wrap entire worker in a tracing span
|
|
284
|
+
with job_span("audio-worker", job_id, {"artist": job.artist, "title": job.title}) as root_span:
|
|
285
|
+
# Use job_logging_context for proper log isolation when multiple jobs run concurrently
|
|
286
|
+
# This ensures logs from third-party libraries (karaoke_gen.audio_processor) are
|
|
287
|
+
# only captured by this job's handler, not handlers from other concurrent jobs
|
|
288
|
+
with job_logging_context(job_id):
|
|
289
|
+
job_log.info(f"Starting audio separation for {job.artist} - {job.title}")
|
|
290
|
+
logger.info(f"[job:{job_id}] Starting audio separation for {job.artist} - {job.title}")
|
|
291
|
+
|
|
292
|
+
# Ensure AUDIO_SEPARATOR_API_URL is set
|
|
293
|
+
api_url = os.environ.get("AUDIO_SEPARATOR_API_URL")
|
|
294
|
+
if not api_url:
|
|
295
|
+
raise Exception("AUDIO_SEPARATOR_API_URL environment variable not set. "
|
|
296
|
+
"Cannot perform audio separation without remote API access.")
|
|
297
|
+
job_log.info(f"Audio separator API: {api_url}")
|
|
298
|
+
add_span_attribute("audio_separator_api", api_url)
|
|
299
|
+
|
|
300
|
+
# Download audio file from GCS or URL
|
|
301
|
+
with job_span("download-audio", job_id) as download_span:
|
|
302
|
+
job_log.info("Downloading audio file...")
|
|
303
|
+
audio_path = await download_audio(job_id, temp_dir, storage, job, job_manager_instance=job_manager)
|
|
304
|
+
if not audio_path:
|
|
305
|
+
raise Exception("Failed to download audio file")
|
|
306
|
+
job_log.info(f"Audio downloaded: {os.path.basename(audio_path)}")
|
|
307
|
+
download_span.set_attribute("audio_file", os.path.basename(audio_path))
|
|
308
|
+
download_span.set_attribute("source", "url" if job.url else "gcs")
|
|
309
|
+
|
|
310
|
+
# Update progress using state_data (don't change status during parallel processing)
|
|
311
|
+
# The status is managed at a higher level - workers just track their progress
|
|
312
|
+
job_manager.update_state_data(job_id, 'audio_progress', {
|
|
313
|
+
'stage': 'separating_stage1',
|
|
314
|
+
'progress': 10,
|
|
315
|
+
'message': 'Starting audio separation (Stage 1: Clean instrumental)'
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
# Create AudioProcessor instance (reuses karaoke_gen code)
|
|
319
|
+
# Use model configuration from job if provided, otherwise use defaults
|
|
320
|
+
job_log.info("Creating AudioProcessor instance...")
|
|
321
|
+
if job.clean_instrumental_model:
|
|
322
|
+
job_log.info(f" Using clean instrumental model: {job.clean_instrumental_model}")
|
|
323
|
+
add_span_attribute("clean_model", job.clean_instrumental_model)
|
|
324
|
+
if job.backing_vocals_models:
|
|
325
|
+
job_log.info(f" Using backing vocals models: {job.backing_vocals_models}")
|
|
326
|
+
if job.other_stems_models:
|
|
327
|
+
job_log.info(f" Using other stems models: {job.other_stems_models}")
|
|
328
|
+
|
|
329
|
+
audio_processor = create_audio_processor(
|
|
330
|
+
temp_dir,
|
|
331
|
+
clean_instrumental_model=job.clean_instrumental_model,
|
|
332
|
+
backing_vocals_models=job.backing_vocals_models,
|
|
333
|
+
other_stems_models=job.other_stems_models
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Store effective model names in state_data for video_worker to use in file naming
|
|
337
|
+
# This ensures output filenames match local CLI behavior (e.g., "Instrumental model_bs_roformer_ep_317_sdr_12.9755.ckpt")
|
|
338
|
+
effective_model_names = {
|
|
339
|
+
'clean_instrumental_model': job.clean_instrumental_model or DEFAULT_CLEAN_MODEL,
|
|
340
|
+
'backing_vocals_models': job.backing_vocals_models or DEFAULT_BACKING_MODELS,
|
|
341
|
+
'other_stems_models': job.other_stems_models or DEFAULT_OTHER_MODELS,
|
|
342
|
+
}
|
|
343
|
+
job_manager.update_state_data(job_id, 'model_names', effective_model_names)
|
|
344
|
+
job_log.info(f"Stored effective model names: clean={effective_model_names['clean_instrumental_model']}")
|
|
345
|
+
|
|
346
|
+
# Format artist-title for file naming (matches CLI behavior)
|
|
347
|
+
# Sanitize to handle Unicode characters (curly quotes, em dashes, etc.)
|
|
348
|
+
# that cause HTTP header encoding issues with the remote API
|
|
349
|
+
from karaoke_gen.utils import sanitize_filename
|
|
350
|
+
safe_artist = sanitize_filename(job.artist) if job.artist else "Unknown"
|
|
351
|
+
safe_title = sanitize_filename(job.title) if job.title else "Unknown"
|
|
352
|
+
artist_title = f"{safe_artist} - {safe_title}"
|
|
353
|
+
|
|
354
|
+
# Run audio separation (calls Modal API internally)
|
|
355
|
+
# This returns a dict with paths to all separated stems
|
|
356
|
+
with job_span("modal-separation", job_id) as sep_span:
|
|
357
|
+
sep_start = time.time()
|
|
358
|
+
job_log.info("Starting audio separation (this may take 5-10 minutes)...")
|
|
359
|
+
job_log.info(" Stage 1: Clean instrumental separation (MDX models)")
|
|
360
|
+
job_log.info(" Stage 2: Backing vocals separation (Demucs model)")
|
|
361
|
+
add_span_event("separation_started")
|
|
362
|
+
logger.info(f"[job:{job_id}] Calling Modal API for audio separation")
|
|
363
|
+
|
|
364
|
+
with metrics.time_external_api("modal", job_id):
|
|
365
|
+
separation_result = audio_processor.process_audio_separation(
|
|
366
|
+
audio_file=audio_path,
|
|
367
|
+
artist_title=artist_title,
|
|
368
|
+
track_output_dir=temp_dir
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
sep_duration = time.time() - sep_start
|
|
372
|
+
sep_span.set_attribute("duration_seconds", sep_duration)
|
|
373
|
+
sep_span.set_attribute("stem_count", len(separation_result))
|
|
374
|
+
add_span_event("separation_completed", {"duration_seconds": sep_duration})
|
|
375
|
+
|
|
376
|
+
job_log.info("Audio separation complete!")
|
|
377
|
+
job_log.info(f" Generated {len(separation_result)} stem files")
|
|
378
|
+
logger.info(f"[job:{job_id}] Audio separation complete, organizing results")
|
|
379
|
+
|
|
380
|
+
# Update progress using state_data (don't change status during parallel processing)
|
|
381
|
+
job_manager.update_state_data(job_id, 'audio_progress', {
|
|
382
|
+
'stage': 'audio_complete',
|
|
383
|
+
'progress': 45,
|
|
384
|
+
'message': 'Audio separation complete, uploading stems'
|
|
385
|
+
})
|
|
386
|
+
|
|
387
|
+
# Upload all stems to GCS
|
|
388
|
+
with job_span("upload-stems", job_id) as upload_span:
|
|
389
|
+
await upload_separation_results(job_id, separation_result, storage, job_manager)
|
|
390
|
+
upload_span.set_attribute("stem_count", len(separation_result))
|
|
391
|
+
|
|
392
|
+
logger.info(f"[job:{job_id}] All stems uploaded successfully")
|
|
393
|
+
|
|
394
|
+
# Mark audio processing complete
|
|
395
|
+
# This will check if lyrics are also complete and transition to next stage if so
|
|
396
|
+
job_manager.mark_audio_complete(job_id)
|
|
397
|
+
|
|
398
|
+
duration = time.time() - start_time
|
|
399
|
+
root_span.set_attribute("duration_seconds", duration)
|
|
400
|
+
logger.info(f"[job:{job_id}] WORKER_END worker=audio status=success duration={duration:.1f}s")
|
|
401
|
+
return True
|
|
402
|
+
|
|
403
|
+
except Exception as e:
|
|
404
|
+
duration = time.time() - start_time
|
|
405
|
+
logger.error(f"[job:{job_id}] WORKER_END worker=audio status=error duration={duration:.1f}s error={e}")
|
|
406
|
+
job_manager.mark_job_failed(
|
|
407
|
+
job_id=job_id,
|
|
408
|
+
error_message=f"Audio separation failed: {str(e)}",
|
|
409
|
+
error_details={"stage": "audio_separation", "error": str(e)}
|
|
410
|
+
)
|
|
411
|
+
return False
|
|
412
|
+
|
|
413
|
+
finally:
|
|
414
|
+
# Remove log handler to avoid duplicate logging on future runs
|
|
415
|
+
for logger_name in AUDIO_WORKER_LOGGERS:
|
|
416
|
+
try:
|
|
417
|
+
logging.getLogger(logger_name).removeHandler(log_handler)
|
|
418
|
+
except Exception:
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
# Cleanup temporary directory
|
|
422
|
+
if os.path.exists(temp_dir):
|
|
423
|
+
shutil.rmtree(temp_dir)
|
|
424
|
+
logger.debug(f"[job:{job_id}] Cleaned up temp directory: {temp_dir}")
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
async def download_audio(
|
|
428
|
+
job_id: str,
|
|
429
|
+
temp_dir: str,
|
|
430
|
+
storage: StorageService,
|
|
431
|
+
job,
|
|
432
|
+
job_manager_instance: JobManager = None
|
|
433
|
+
) -> Optional[str]:
|
|
434
|
+
"""
|
|
435
|
+
Download or fetch audio file to local temp directory.
|
|
436
|
+
|
|
437
|
+
Handles two cases:
|
|
438
|
+
1. Uploaded file: Download from GCS using input_media_gcs_path
|
|
439
|
+
2. URL (YouTube, etc.): Download using yt-dlp or other tools
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
job_id: Job ID
|
|
443
|
+
temp_dir: Temporary directory to save to
|
|
444
|
+
storage: StorageService instance
|
|
445
|
+
job: Job object with URL or GCS path
|
|
446
|
+
job_manager_instance: Optional JobManager instance for updating job metadata
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
Path to downloaded audio file, or None if failed
|
|
450
|
+
"""
|
|
451
|
+
try:
|
|
452
|
+
from karaoke_gen.utils import sanitize_filename
|
|
453
|
+
|
|
454
|
+
# Case 1: File was uploaded to GCS
|
|
455
|
+
if job.input_media_gcs_path:
|
|
456
|
+
logger.info(f"Job {job_id}: Downloading uploaded file from GCS: {job.input_media_gcs_path}")
|
|
457
|
+
# Sanitize filename to handle Unicode chars that cause HTTP header encoding issues
|
|
458
|
+
safe_filename = sanitize_filename(job.filename) if job.filename else "input.flac"
|
|
459
|
+
local_path = os.path.join(temp_dir, safe_filename)
|
|
460
|
+
storage.download_file(job.input_media_gcs_path, local_path)
|
|
461
|
+
logger.info(f"Job {job_id}: Downloaded uploaded file to {local_path}")
|
|
462
|
+
return local_path
|
|
463
|
+
|
|
464
|
+
# Case 2: URL download (from file_urls if already downloaded, or from job.url)
|
|
465
|
+
if job.file_urls and job.file_urls.get('input'):
|
|
466
|
+
# Already downloaded and stored in GCS
|
|
467
|
+
input_url = job.file_urls.get('input')
|
|
468
|
+
local_path = os.path.join(temp_dir, "input.flac")
|
|
469
|
+
storage.download_file(input_url, local_path)
|
|
470
|
+
logger.info(f"Job {job_id}: Downloaded audio from GCS: {input_url}")
|
|
471
|
+
return local_path
|
|
472
|
+
|
|
473
|
+
# Case 3: Fresh URL that needs downloading
|
|
474
|
+
if job.url:
|
|
475
|
+
logger.info(f"Job {job_id}: Downloading from URL: {job.url}")
|
|
476
|
+
|
|
477
|
+
# Use provided job_manager or create new one
|
|
478
|
+
jm = job_manager_instance or JobManager()
|
|
479
|
+
|
|
480
|
+
local_path = await download_from_url(
|
|
481
|
+
job.url,
|
|
482
|
+
temp_dir,
|
|
483
|
+
job.artist,
|
|
484
|
+
job.title,
|
|
485
|
+
job_manager=jm,
|
|
486
|
+
job_id=job_id
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
if local_path and os.path.exists(local_path):
|
|
490
|
+
# Upload to GCS and update job
|
|
491
|
+
gcs_path = f"jobs/{job_id}/input/{os.path.basename(local_path)}"
|
|
492
|
+
url = storage.upload_file(local_path, gcs_path)
|
|
493
|
+
|
|
494
|
+
# Update job with GCS path for lyrics worker
|
|
495
|
+
jm.update_job(job_id, {'input_media_gcs_path': gcs_path})
|
|
496
|
+
jm.update_file_url(job_id, 'input', 'audio', url)
|
|
497
|
+
|
|
498
|
+
logger.info(f"Job {job_id}: Downloaded and uploaded audio to GCS: {gcs_path}")
|
|
499
|
+
|
|
500
|
+
# For URL jobs, trigger lyrics worker now that audio is available
|
|
501
|
+
# This is the sequential trigger pattern - audio first, then lyrics
|
|
502
|
+
await _trigger_lyrics_worker_after_url_download(job_id)
|
|
503
|
+
|
|
504
|
+
return local_path
|
|
505
|
+
else:
|
|
506
|
+
logger.error(f"Job {job_id}: Failed to download from URL: {job.url}")
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
logger.error(f"Job {job_id}: No input source found (no GCS path, file_urls, or URL)")
|
|
510
|
+
return None
|
|
511
|
+
|
|
512
|
+
except Exception as e:
|
|
513
|
+
logger.error(f"Job {job_id}: Failed to download audio: {e}", exc_info=True)
|
|
514
|
+
return None
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
async def upload_separation_results(
|
|
518
|
+
job_id: str,
|
|
519
|
+
separation_result: Dict[str, Any],
|
|
520
|
+
storage: StorageService,
|
|
521
|
+
job_manager: JobManager
|
|
522
|
+
) -> None:
|
|
523
|
+
"""
|
|
524
|
+
Upload all audio separation results to GCS.
|
|
525
|
+
|
|
526
|
+
The separation_result dict from AudioProcessor.process_audio_separation() contains:
|
|
527
|
+
- clean_instrumental: Dict with 'vocals' and 'instrumental' paths
|
|
528
|
+
- other_stems: Dict with stem paths (bass, drums, guitar, piano, other)
|
|
529
|
+
- backing_vocals: Dict with model-keyed paths to lead_vocals and backing_vocals
|
|
530
|
+
- combined_instrumentals: Dict with model-keyed paths to instrumental+BV files
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
job_id: Job ID
|
|
534
|
+
separation_result: Result dict from AudioProcessor
|
|
535
|
+
storage: StorageService instance
|
|
536
|
+
job_manager: JobManager instance
|
|
537
|
+
"""
|
|
538
|
+
logger.info(f"Job {job_id}: Uploading separation results to GCS")
|
|
539
|
+
|
|
540
|
+
# Upload clean instrumental + vocals (Stage 1, clean model)
|
|
541
|
+
if separation_result.get("clean_instrumental"):
|
|
542
|
+
clean = separation_result["clean_instrumental"]
|
|
543
|
+
|
|
544
|
+
if clean.get("instrumental") and os.path.exists(clean["instrumental"]):
|
|
545
|
+
gcs_path = f"jobs/{job_id}/stems/instrumental_clean.flac"
|
|
546
|
+
url = storage.upload_file(clean["instrumental"], gcs_path)
|
|
547
|
+
job_manager.update_file_url(job_id, 'stems', 'instrumental_clean', url)
|
|
548
|
+
logger.info(f"Job {job_id}: Uploaded clean instrumental")
|
|
549
|
+
|
|
550
|
+
if clean.get("vocals") and os.path.exists(clean["vocals"]):
|
|
551
|
+
gcs_path = f"jobs/{job_id}/stems/vocals_clean.flac"
|
|
552
|
+
url = storage.upload_file(clean["vocals"], gcs_path)
|
|
553
|
+
job_manager.update_file_url(job_id, 'stems', 'vocals_clean', url)
|
|
554
|
+
logger.info(f"Job {job_id}: Uploaded clean vocals")
|
|
555
|
+
|
|
556
|
+
# Upload other stems (Stage 1, htdemucs 6-stem)
|
|
557
|
+
if separation_result.get("other_stems"):
|
|
558
|
+
for stem_name, stem_value in separation_result["other_stems"].items():
|
|
559
|
+
# Handle both string paths and nested dicts
|
|
560
|
+
if isinstance(stem_value, str):
|
|
561
|
+
stem_path = stem_value
|
|
562
|
+
elif isinstance(stem_value, dict):
|
|
563
|
+
# Some models return nested dicts like {"path": "/path/to/file"}
|
|
564
|
+
stem_path = stem_value.get("path") or stem_value.get("file")
|
|
565
|
+
logger.debug(f"Job {job_id}: other_stems[{stem_name}] is dict: {stem_value}")
|
|
566
|
+
else:
|
|
567
|
+
logger.warning(f"Job {job_id}: Unexpected type for other_stems[{stem_name}]: {type(stem_value)}")
|
|
568
|
+
continue
|
|
569
|
+
|
|
570
|
+
if stem_path and isinstance(stem_path, str) and os.path.exists(stem_path):
|
|
571
|
+
gcs_path = f"jobs/{job_id}/stems/{stem_name.lower()}.flac"
|
|
572
|
+
url = storage.upload_file(stem_path, gcs_path)
|
|
573
|
+
job_manager.update_file_url(job_id, 'stems', stem_name.lower(), url)
|
|
574
|
+
logger.info(f"Job {job_id}: Uploaded {stem_name} stem")
|
|
575
|
+
|
|
576
|
+
# Upload backing vocals separation (Stage 2)
|
|
577
|
+
if separation_result.get("backing_vocals"):
|
|
578
|
+
# backing_vocals is a dict keyed by model name
|
|
579
|
+
for model_name, bv_stems in separation_result["backing_vocals"].items():
|
|
580
|
+
if bv_stems.get("lead_vocals") and os.path.exists(bv_stems["lead_vocals"]):
|
|
581
|
+
gcs_path = f"jobs/{job_id}/stems/lead_vocals.flac"
|
|
582
|
+
url = storage.upload_file(bv_stems["lead_vocals"], gcs_path)
|
|
583
|
+
job_manager.update_file_url(job_id, 'stems', 'lead_vocals', url)
|
|
584
|
+
logger.info(f"Job {job_id}: Uploaded lead vocals")
|
|
585
|
+
|
|
586
|
+
if bv_stems.get("backing_vocals") and os.path.exists(bv_stems["backing_vocals"]):
|
|
587
|
+
gcs_path = f"jobs/{job_id}/stems/backing_vocals.flac"
|
|
588
|
+
url = storage.upload_file(bv_stems["backing_vocals"], gcs_path)
|
|
589
|
+
job_manager.update_file_url(job_id, 'stems', 'backing_vocals', url)
|
|
590
|
+
logger.info(f"Job {job_id}: Uploaded backing vocals")
|
|
591
|
+
|
|
592
|
+
# Only process first model (we typically only use one backing vocals model)
|
|
593
|
+
break
|
|
594
|
+
|
|
595
|
+
# Upload combined instrumentals (instrumental + backing vocals)
|
|
596
|
+
if separation_result.get("combined_instrumentals"):
|
|
597
|
+
# combined_instrumentals is a dict keyed by model name
|
|
598
|
+
for model_name, combined_path in separation_result["combined_instrumentals"].items():
|
|
599
|
+
if combined_path and os.path.exists(combined_path):
|
|
600
|
+
gcs_path = f"jobs/{job_id}/stems/instrumental_with_backing.flac"
|
|
601
|
+
url = storage.upload_file(combined_path, gcs_path)
|
|
602
|
+
job_manager.update_file_url(job_id, 'stems', 'instrumental_with_backing', url)
|
|
603
|
+
logger.info(f"Job {job_id}: Uploaded instrumental with backing vocals")
|
|
604
|
+
|
|
605
|
+
# Only process first model
|
|
606
|
+
break
|
|
607
|
+
|
|
608
|
+
# Store instrumental options in state_data for later selection
|
|
609
|
+
instrumental_options = {}
|
|
610
|
+
if separation_result.get("clean_instrumental", {}).get("instrumental"):
|
|
611
|
+
instrumental_options["clean"] = f"jobs/{job_id}/stems/instrumental_clean.flac"
|
|
612
|
+
if separation_result.get("combined_instrumentals"):
|
|
613
|
+
instrumental_options["with_backing"] = f"jobs/{job_id}/stems/instrumental_with_backing.flac"
|
|
614
|
+
|
|
615
|
+
if instrumental_options:
|
|
616
|
+
job_manager.update_state_data(job_id, 'instrumental_options', instrumental_options)
|
|
617
|
+
logger.info(f"Job {job_id}: Stored instrumental options: {list(instrumental_options.keys())}")
|
|
618
|
+
|