karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +835 -0
- backend/api/routes/audio_search.py +913 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2112 -0
- backend/api/routes/health.py +409 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1629 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1513 -0
- backend/config.py +172 -0
- backend/main.py +157 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +502 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +853 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/langfuse_preloader.py +98 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +371 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +109 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +356 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +283 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/utils/test_data.py +27 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +535 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Direct tests for render_video_worker style loading.
|
|
3
|
+
|
|
4
|
+
Tests the fix for styles being loaded from job.style_params_gcs_path
|
|
5
|
+
instead of the incorrect job.state_data['styles_gcs_path'].
|
|
6
|
+
|
|
7
|
+
Run with:
|
|
8
|
+
./scripts/start-emulators.sh
|
|
9
|
+
pytest backend/tests/emulator/test_style_loading_direct.py -v
|
|
10
|
+
"""
|
|
11
|
+
import pytest
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import tempfile
|
|
15
|
+
import requests
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Dict, Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def emulators_running() -> bool:
|
|
21
|
+
"""Check if GCP emulators are running."""
|
|
22
|
+
try:
|
|
23
|
+
requests.get("http://127.0.0.1:8080", timeout=1)
|
|
24
|
+
requests.get("http://127.0.0.1:4443", timeout=1)
|
|
25
|
+
return True
|
|
26
|
+
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Skip all tests in this module if emulators aren't running
|
|
31
|
+
pytestmark = pytest.mark.skipif(
|
|
32
|
+
not emulators_running(),
|
|
33
|
+
reason="GCP emulators not running. Start with: scripts/start-emulators.sh"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Set up environment for emulators
|
|
37
|
+
os.environ["FIRESTORE_EMULATOR_HOST"] = "127.0.0.1:8080"
|
|
38
|
+
os.environ["STORAGE_EMULATOR_HOST"] = "http://127.0.0.1:4443"
|
|
39
|
+
os.environ["GOOGLE_CLOUD_PROJECT"] = "test-project"
|
|
40
|
+
os.environ["GCS_BUCKET_NAME"] = "test-bucket"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class MockJob:
|
|
45
|
+
"""Mock job object for testing style loading."""
|
|
46
|
+
job_id: str
|
|
47
|
+
style_params_gcs_path: Optional[str] = None
|
|
48
|
+
style_assets: Dict[str, str] = None
|
|
49
|
+
state_data: Dict = None
|
|
50
|
+
|
|
51
|
+
def __post_init__(self):
|
|
52
|
+
if self.style_assets is None:
|
|
53
|
+
self.style_assets = {}
|
|
54
|
+
if self.state_data is None:
|
|
55
|
+
self.state_data = {}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class MockStorageService:
|
|
59
|
+
"""Mock storage service that uses GCS emulator."""
|
|
60
|
+
|
|
61
|
+
def __init__(self):
|
|
62
|
+
from google.cloud import storage
|
|
63
|
+
self.client = storage.Client()
|
|
64
|
+
self.bucket_name = "test-bucket"
|
|
65
|
+
self._ensure_bucket_exists()
|
|
66
|
+
|
|
67
|
+
def _ensure_bucket_exists(self):
|
|
68
|
+
"""Create bucket in emulator if it doesn't exist."""
|
|
69
|
+
try:
|
|
70
|
+
self.client.create_bucket(self.bucket_name)
|
|
71
|
+
except Exception:
|
|
72
|
+
pass # Bucket already exists
|
|
73
|
+
|
|
74
|
+
def upload_string(self, content: str, gcs_path: str, content_type: str = "application/json"):
|
|
75
|
+
"""Upload string content to GCS."""
|
|
76
|
+
bucket = self.client.bucket(self.bucket_name)
|
|
77
|
+
blob = bucket.blob(gcs_path)
|
|
78
|
+
blob.upload_from_string(content, content_type=content_type)
|
|
79
|
+
|
|
80
|
+
def upload_bytes(self, content: bytes, gcs_path: str, content_type: str = "application/octet-stream"):
|
|
81
|
+
"""Upload bytes to GCS."""
|
|
82
|
+
bucket = self.client.bucket(self.bucket_name)
|
|
83
|
+
blob = bucket.blob(gcs_path)
|
|
84
|
+
blob.upload_from_string(content, content_type=content_type)
|
|
85
|
+
|
|
86
|
+
def download_file(self, gcs_path: str, local_path: str):
|
|
87
|
+
"""Download file from GCS to local path."""
|
|
88
|
+
bucket = self.client.bucket(self.bucket_name)
|
|
89
|
+
blob = bucket.blob(gcs_path)
|
|
90
|
+
blob.download_to_filename(local_path)
|
|
91
|
+
|
|
92
|
+
def file_exists(self, gcs_path: str) -> bool:
|
|
93
|
+
"""Check if file exists in GCS."""
|
|
94
|
+
bucket = self.client.bucket(self.bucket_name)
|
|
95
|
+
blob = bucket.blob(gcs_path)
|
|
96
|
+
return blob.exists()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class TestStyleLoadingFix:
|
|
100
|
+
"""
|
|
101
|
+
Tests for the render_video_worker style loading fix.
|
|
102
|
+
|
|
103
|
+
The bug was: render_video_worker looked for styles at
|
|
104
|
+
job.state_data.get('styles_gcs_path') but styles are actually stored at
|
|
105
|
+
job.style_params_gcs_path and job.style_assets.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
@pytest.fixture(autouse=True)
|
|
109
|
+
def setup(self):
|
|
110
|
+
"""Set up storage service and temp directory for each test."""
|
|
111
|
+
self.storage = MockStorageService()
|
|
112
|
+
self.temp_dir = tempfile.mkdtemp()
|
|
113
|
+
yield
|
|
114
|
+
# Cleanup
|
|
115
|
+
import shutil
|
|
116
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
117
|
+
|
|
118
|
+
def _import_get_or_create_styles(self):
|
|
119
|
+
"""
|
|
120
|
+
Import _get_or_create_styles without importing the full worker module.
|
|
121
|
+
We copy the function here to avoid import issues with lyrics_transcriber.
|
|
122
|
+
"""
|
|
123
|
+
# We'll test using a copy of the function logic
|
|
124
|
+
# This avoids importing the full worker which has lyrics_transcriber dependency
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
def test_style_loading_from_correct_location(self):
|
|
128
|
+
"""
|
|
129
|
+
Test that styles are loaded from job.style_params_gcs_path,
|
|
130
|
+
NOT from job.state_data['styles_gcs_path'].
|
|
131
|
+
"""
|
|
132
|
+
import time
|
|
133
|
+
job_id = f"test-style-{int(time.time() * 1000)}"
|
|
134
|
+
|
|
135
|
+
# Create style JSON with placeholder paths
|
|
136
|
+
style_json = {
|
|
137
|
+
"intro": {
|
|
138
|
+
"background_image": "/original/path/intro_bg.png",
|
|
139
|
+
"font": "/original/path/font.ttf"
|
|
140
|
+
},
|
|
141
|
+
"karaoke": {
|
|
142
|
+
"background_image": "/original/path/karaoke_bg.png",
|
|
143
|
+
"font_path": "/original/path/font.ttf",
|
|
144
|
+
"font_size": 100
|
|
145
|
+
},
|
|
146
|
+
"end": {
|
|
147
|
+
"background_image": "/original/path/end_bg.png",
|
|
148
|
+
"font": "/original/path/font.ttf"
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# Upload style JSON to GCS (the CORRECT location)
|
|
153
|
+
style_gcs_path = f"uploads/{job_id}/style/style_params.json"
|
|
154
|
+
self.storage.upload_string(json.dumps(style_json), style_gcs_path)
|
|
155
|
+
|
|
156
|
+
# Upload mock assets
|
|
157
|
+
assets = {
|
|
158
|
+
"intro_background": f"uploads/{job_id}/style/intro_background.png",
|
|
159
|
+
"karaoke_background": f"uploads/{job_id}/style/karaoke_background.png",
|
|
160
|
+
"end_background": f"uploads/{job_id}/style/end_background.png",
|
|
161
|
+
"font": f"uploads/{job_id}/style/font.ttf",
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
for asset_key, gcs_path in assets.items():
|
|
165
|
+
# Upload fake file content
|
|
166
|
+
self.storage.upload_bytes(b"fake image/font data", gcs_path)
|
|
167
|
+
|
|
168
|
+
# Create mock job with styles in the CORRECT location
|
|
169
|
+
job = MockJob(
|
|
170
|
+
job_id=job_id,
|
|
171
|
+
style_params_gcs_path=style_gcs_path, # CORRECT
|
|
172
|
+
style_assets=assets, # CORRECT
|
|
173
|
+
state_data={} # state_data is EMPTY (no styles_gcs_path)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Verify files exist in GCS
|
|
177
|
+
assert self.storage.file_exists(style_gcs_path), "Style JSON should exist in GCS"
|
|
178
|
+
for gcs_path in assets.values():
|
|
179
|
+
assert self.storage.file_exists(gcs_path), f"Asset {gcs_path} should exist in GCS"
|
|
180
|
+
|
|
181
|
+
# Now test the style loading logic (inline version of _get_or_create_styles)
|
|
182
|
+
style_dir = os.path.join(self.temp_dir, "style")
|
|
183
|
+
os.makedirs(style_dir, exist_ok=True)
|
|
184
|
+
styles_path = os.path.join(style_dir, "styles.json")
|
|
185
|
+
|
|
186
|
+
# This is the FIX: check job.style_params_gcs_path, not state_data
|
|
187
|
+
if job.style_params_gcs_path:
|
|
188
|
+
# Download style JSON
|
|
189
|
+
self.storage.download_file(job.style_params_gcs_path, styles_path)
|
|
190
|
+
|
|
191
|
+
# Load and update paths
|
|
192
|
+
with open(styles_path, 'r') as f:
|
|
193
|
+
style_data = json.load(f)
|
|
194
|
+
|
|
195
|
+
# Download assets and update paths
|
|
196
|
+
local_assets = {}
|
|
197
|
+
for asset_key, gcs_path in job.style_assets.items():
|
|
198
|
+
ext = os.path.splitext(gcs_path)[1] or '.png'
|
|
199
|
+
local_path = os.path.join(style_dir, f"{asset_key}{ext}")
|
|
200
|
+
self.storage.download_file(gcs_path, local_path)
|
|
201
|
+
local_assets[asset_key] = local_path
|
|
202
|
+
|
|
203
|
+
# Update style_data with local paths
|
|
204
|
+
asset_mapping = {
|
|
205
|
+
'intro_background': ('intro', 'background_image'),
|
|
206
|
+
'karaoke_background': ('karaoke', 'background_image'),
|
|
207
|
+
'end_background': ('end', 'background_image'),
|
|
208
|
+
'font': [('intro', 'font'), ('karaoke', 'font_path'), ('end', 'font')],
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
for asset_key, local_path in local_assets.items():
|
|
212
|
+
if asset_key in asset_mapping:
|
|
213
|
+
mappings = asset_mapping[asset_key]
|
|
214
|
+
if isinstance(mappings[0], str):
|
|
215
|
+
mappings = [mappings]
|
|
216
|
+
for section, field in mappings:
|
|
217
|
+
if section in style_data:
|
|
218
|
+
style_data[section][field] = local_path
|
|
219
|
+
|
|
220
|
+
# Save updated styles
|
|
221
|
+
with open(styles_path, 'w') as f:
|
|
222
|
+
json.dump(style_data, f, indent=2)
|
|
223
|
+
|
|
224
|
+
# Verify the result
|
|
225
|
+
with open(styles_path, 'r') as f:
|
|
226
|
+
result = json.load(f)
|
|
227
|
+
|
|
228
|
+
# Check that paths were updated to local paths
|
|
229
|
+
assert style_dir in result['karaoke']['background_image'], \
|
|
230
|
+
f"karaoke.background_image should be local path, got: {result['karaoke']['background_image']}"
|
|
231
|
+
assert style_dir in result['karaoke']['font_path'], \
|
|
232
|
+
f"karaoke.font_path should be local path, got: {result['karaoke']['font_path']}"
|
|
233
|
+
assert style_dir in result['intro']['background_image'], \
|
|
234
|
+
f"intro.background_image should be local path"
|
|
235
|
+
|
|
236
|
+
# Verify files actually exist locally
|
|
237
|
+
assert os.path.exists(result['karaoke']['background_image']), \
|
|
238
|
+
"Downloaded karaoke background should exist"
|
|
239
|
+
assert os.path.exists(result['karaoke']['font_path']), \
|
|
240
|
+
"Downloaded font should exist"
|
|
241
|
+
|
|
242
|
+
print(f"\n✅ Style loading from job.style_params_gcs_path works!")
|
|
243
|
+
print(f" karaoke.background_image: {result['karaoke']['background_image']}")
|
|
244
|
+
print(f" karaoke.font_path: {result['karaoke']['font_path']}")
|
|
245
|
+
|
|
246
|
+
def test_old_bug_state_data_lookup_fails(self):
|
|
247
|
+
"""
|
|
248
|
+
Demonstrate that the OLD bug would fail to find styles.
|
|
249
|
+
|
|
250
|
+
If we look at job.state_data['styles_gcs_path'] (the bug),
|
|
251
|
+
we won't find anything because styles are stored at
|
|
252
|
+
job.style_params_gcs_path.
|
|
253
|
+
"""
|
|
254
|
+
import time
|
|
255
|
+
job_id = f"test-bug-{int(time.time() * 1000)}"
|
|
256
|
+
|
|
257
|
+
# Upload style to CORRECT location
|
|
258
|
+
style_gcs_path = f"uploads/{job_id}/style/style_params.json"
|
|
259
|
+
self.storage.upload_string('{"karaoke": {"font_size": 100}}', style_gcs_path)
|
|
260
|
+
|
|
261
|
+
# Create job with styles in CORRECT location
|
|
262
|
+
job = MockJob(
|
|
263
|
+
job_id=job_id,
|
|
264
|
+
style_params_gcs_path=style_gcs_path, # CORRECT location
|
|
265
|
+
style_assets={},
|
|
266
|
+
state_data={} # No styles_gcs_path here!
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# THE BUG: looking in state_data instead of style_params_gcs_path
|
|
270
|
+
wrong_path = job.state_data.get('styles_gcs_path')
|
|
271
|
+
correct_path = job.style_params_gcs_path
|
|
272
|
+
|
|
273
|
+
assert wrong_path is None, "state_data['styles_gcs_path'] should be None (the bug location)"
|
|
274
|
+
assert correct_path is not None, "style_params_gcs_path should have the correct path"
|
|
275
|
+
assert self.storage.file_exists(correct_path), "Style should exist at correct path"
|
|
276
|
+
|
|
277
|
+
print(f"\n✅ Demonstrated the bug: state_data lookup returns None")
|
|
278
|
+
print(f" state_data['styles_gcs_path'] = {wrong_path}")
|
|
279
|
+
print(f" job.style_params_gcs_path = {correct_path}")
|
|
280
|
+
|
|
281
|
+
def test_default_styles_when_no_custom_styles(self):
|
|
282
|
+
"""Test that default styles are used when no custom styles provided."""
|
|
283
|
+
import time
|
|
284
|
+
job_id = f"test-default-{int(time.time() * 1000)}"
|
|
285
|
+
|
|
286
|
+
# Job with NO custom styles
|
|
287
|
+
job = MockJob(
|
|
288
|
+
job_id=job_id,
|
|
289
|
+
style_params_gcs_path=None,
|
|
290
|
+
style_assets={},
|
|
291
|
+
state_data={}
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Simulate the logic
|
|
295
|
+
style_dir = os.path.join(self.temp_dir, "style")
|
|
296
|
+
os.makedirs(style_dir, exist_ok=True)
|
|
297
|
+
styles_path = os.path.join(style_dir, "styles.json")
|
|
298
|
+
|
|
299
|
+
if job.style_params_gcs_path:
|
|
300
|
+
# Would load custom styles
|
|
301
|
+
pass
|
|
302
|
+
else:
|
|
303
|
+
# Use default styles
|
|
304
|
+
default_styles = {
|
|
305
|
+
"karaoke": {
|
|
306
|
+
"background_color": "#000000",
|
|
307
|
+
"font": "Arial",
|
|
308
|
+
"font_path": "",
|
|
309
|
+
"font_size": 100
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
with open(styles_path, 'w') as f:
|
|
313
|
+
json.dump(default_styles, f, indent=2)
|
|
314
|
+
|
|
315
|
+
# Verify defaults were used
|
|
316
|
+
with open(styles_path, 'r') as f:
|
|
317
|
+
result = json.load(f)
|
|
318
|
+
|
|
319
|
+
assert result['karaoke']['background_color'] == "#000000"
|
|
320
|
+
assert result['karaoke']['font'] == "Arial"
|
|
321
|
+
|
|
322
|
+
print(f"\n✅ Default styles used when no custom styles provided")
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class TestParallelWorkerExecution:
|
|
326
|
+
"""
|
|
327
|
+
Tests for the parallel worker execution fix.
|
|
328
|
+
|
|
329
|
+
The bug was: FastAPI's BackgroundTasks runs async tasks sequentially,
|
|
330
|
+
causing audio worker to complete before lyrics worker starts.
|
|
331
|
+
|
|
332
|
+
The fix uses asyncio.gather() to run both workers in parallel.
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
def test_asyncio_gather_runs_parallel(self):
|
|
336
|
+
"""Test that asyncio.gather actually runs tasks in parallel."""
|
|
337
|
+
import asyncio
|
|
338
|
+
import time
|
|
339
|
+
|
|
340
|
+
start_times = {}
|
|
341
|
+
end_times = {}
|
|
342
|
+
|
|
343
|
+
async def task1():
|
|
344
|
+
start_times['task1'] = time.time()
|
|
345
|
+
await asyncio.sleep(0.1)
|
|
346
|
+
end_times['task1'] = time.time()
|
|
347
|
+
return "task1 done"
|
|
348
|
+
|
|
349
|
+
async def task2():
|
|
350
|
+
start_times['task2'] = time.time()
|
|
351
|
+
await asyncio.sleep(0.1)
|
|
352
|
+
end_times['task2'] = time.time()
|
|
353
|
+
return "task2 done"
|
|
354
|
+
|
|
355
|
+
async def run_parallel():
|
|
356
|
+
return await asyncio.gather(task1(), task2())
|
|
357
|
+
|
|
358
|
+
overall_start = time.time()
|
|
359
|
+
results = asyncio.run(run_parallel())
|
|
360
|
+
overall_duration = time.time() - overall_start
|
|
361
|
+
|
|
362
|
+
# Both tasks should have started at nearly the same time
|
|
363
|
+
start_diff = abs(start_times['task1'] - start_times['task2'])
|
|
364
|
+
|
|
365
|
+
# If running in parallel:
|
|
366
|
+
# - Both start at ~same time (diff < 0.05s)
|
|
367
|
+
# - Total duration is ~0.1s (not 0.2s for sequential)
|
|
368
|
+
|
|
369
|
+
assert start_diff < 0.05, \
|
|
370
|
+
f"Tasks should start together, diff was {start_diff:.3f}s"
|
|
371
|
+
assert overall_duration < 0.15, \
|
|
372
|
+
f"Parallel execution should take ~0.1s, took {overall_duration:.3f}s"
|
|
373
|
+
|
|
374
|
+
print(f"\n✅ asyncio.gather runs tasks in parallel!")
|
|
375
|
+
print(f" Task start time difference: {start_diff:.3f}s")
|
|
376
|
+
print(f" Total duration: {overall_duration:.3f}s (sequential would be ~0.2s)")
|
|
377
|
+
|
|
378
|
+
def test_sequential_background_tasks_is_slow(self):
|
|
379
|
+
"""
|
|
380
|
+
Demonstrate that sequential execution (the bug) is slower.
|
|
381
|
+
"""
|
|
382
|
+
import asyncio
|
|
383
|
+
import time
|
|
384
|
+
|
|
385
|
+
execution_order = []
|
|
386
|
+
|
|
387
|
+
async def task1():
|
|
388
|
+
execution_order.append(('task1', 'start'))
|
|
389
|
+
await asyncio.sleep(0.05)
|
|
390
|
+
execution_order.append(('task1', 'end'))
|
|
391
|
+
|
|
392
|
+
async def task2():
|
|
393
|
+
execution_order.append(('task2', 'start'))
|
|
394
|
+
await asyncio.sleep(0.05)
|
|
395
|
+
execution_order.append(('task2', 'end'))
|
|
396
|
+
|
|
397
|
+
# Sequential (the bug)
|
|
398
|
+
async def run_sequential():
|
|
399
|
+
await task1()
|
|
400
|
+
await task2()
|
|
401
|
+
|
|
402
|
+
execution_order.clear()
|
|
403
|
+
start = time.time()
|
|
404
|
+
asyncio.run(run_sequential())
|
|
405
|
+
sequential_duration = time.time() - start
|
|
406
|
+
sequential_order = execution_order.copy()
|
|
407
|
+
|
|
408
|
+
# Parallel (the fix)
|
|
409
|
+
async def run_parallel():
|
|
410
|
+
await asyncio.gather(task1(), task2())
|
|
411
|
+
|
|
412
|
+
execution_order.clear()
|
|
413
|
+
start = time.time()
|
|
414
|
+
asyncio.run(run_parallel())
|
|
415
|
+
parallel_duration = time.time() - start
|
|
416
|
+
parallel_order = execution_order.copy()
|
|
417
|
+
|
|
418
|
+
# Sequential: task1 starts, task1 ends, task2 starts, task2 ends
|
|
419
|
+
assert sequential_order[0] == ('task1', 'start')
|
|
420
|
+
assert sequential_order[1] == ('task1', 'end')
|
|
421
|
+
assert sequential_order[2] == ('task2', 'start')
|
|
422
|
+
|
|
423
|
+
# Parallel: task1 starts, task2 starts (interleaved)
|
|
424
|
+
assert parallel_order[0][1] == 'start'
|
|
425
|
+
assert parallel_order[1][1] == 'start'
|
|
426
|
+
|
|
427
|
+
# Parallel should be ~2x faster
|
|
428
|
+
assert parallel_duration < sequential_duration * 0.8, \
|
|
429
|
+
f"Parallel ({parallel_duration:.3f}s) should be faster than sequential ({sequential_duration:.3f}s)"
|
|
430
|
+
|
|
431
|
+
print(f"\n✅ Demonstrated sequential vs parallel execution")
|
|
432
|
+
print(f" Sequential: {sequential_duration:.3f}s - {sequential_order}")
|
|
433
|
+
print(f" Parallel: {parallel_duration:.3f}s - {parallel_order}")
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
print("✅ Style loading and parallel execution tests ready")
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Direct tests for worker logging Firestore operations.
|
|
3
|
+
|
|
4
|
+
These tests bypass the full app/worker imports and test the Firestore
|
|
5
|
+
operations directly, avoiding dependency issues.
|
|
6
|
+
|
|
7
|
+
Run with: ./scripts/run-emulator-tests.sh
|
|
8
|
+
"""
|
|
9
|
+
import pytest
|
|
10
|
+
import time
|
|
11
|
+
import requests
|
|
12
|
+
import os
|
|
13
|
+
import threading
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def emulators_running() -> bool:
|
|
19
|
+
"""Check if GCP emulators are running."""
|
|
20
|
+
try:
|
|
21
|
+
requests.get("http://127.0.0.1:8080", timeout=1)
|
|
22
|
+
return True
|
|
23
|
+
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Skip all tests in this module if emulators aren't running
|
|
28
|
+
pytestmark = pytest.mark.skipif(
|
|
29
|
+
not emulators_running(),
|
|
30
|
+
reason="GCP emulators not running. Start with: scripts/start-emulators.sh"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Set up environment for emulator
|
|
34
|
+
os.environ["FIRESTORE_EMULATOR_HOST"] = "127.0.0.1:8080"
|
|
35
|
+
os.environ["GOOGLE_CLOUD_PROJECT"] = "test-project"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TestWorkerLogsFirestoreDirect:
|
|
39
|
+
"""Direct Firestore tests for worker logs - no app imports needed."""
|
|
40
|
+
|
|
41
|
+
@pytest.fixture(autouse=True)
|
|
42
|
+
def setup_firestore(self):
|
|
43
|
+
"""Set up Firestore client for each test."""
|
|
44
|
+
from google.cloud import firestore
|
|
45
|
+
self.db = firestore.Client(project="test-project")
|
|
46
|
+
self.collection = "test-worker-logs"
|
|
47
|
+
yield
|
|
48
|
+
|
|
49
|
+
def _create_test_job(self):
|
|
50
|
+
"""Create a test job document."""
|
|
51
|
+
job_id = f"test-{int(time.time() * 1000)}"
|
|
52
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
53
|
+
doc_ref.set({
|
|
54
|
+
"job_id": job_id,
|
|
55
|
+
"status": "pending",
|
|
56
|
+
"created_at": datetime.utcnow(),
|
|
57
|
+
"worker_logs": []
|
|
58
|
+
})
|
|
59
|
+
return job_id
|
|
60
|
+
|
|
61
|
+
def _append_log_read_modify_write(self, job_id: str, worker: str, message: str):
|
|
62
|
+
"""
|
|
63
|
+
OLD METHOD: Read-modify-write (has race condition).
|
|
64
|
+
This is what we were doing before.
|
|
65
|
+
"""
|
|
66
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
67
|
+
doc = doc_ref.get()
|
|
68
|
+
if not doc.exists:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
data = doc.to_dict()
|
|
72
|
+
logs = data.get("worker_logs", [])
|
|
73
|
+
logs.append({
|
|
74
|
+
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
75
|
+
"level": "INFO",
|
|
76
|
+
"worker": worker,
|
|
77
|
+
"message": message
|
|
78
|
+
})
|
|
79
|
+
doc_ref.update({"worker_logs": logs})
|
|
80
|
+
|
|
81
|
+
def _append_log_array_union(self, job_id: str, worker: str, message: str):
|
|
82
|
+
"""
|
|
83
|
+
NEW METHOD: ArrayUnion (atomic, no race condition).
|
|
84
|
+
This is what we're doing now.
|
|
85
|
+
"""
|
|
86
|
+
from google.cloud import firestore
|
|
87
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
88
|
+
doc_ref.update({
|
|
89
|
+
"worker_logs": firestore.ArrayUnion([{
|
|
90
|
+
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
91
|
+
"level": "INFO",
|
|
92
|
+
"worker": worker,
|
|
93
|
+
"message": message
|
|
94
|
+
}])
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
def _get_logs(self, job_id: str):
|
|
98
|
+
"""Get logs from job document."""
|
|
99
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
100
|
+
doc = doc_ref.get()
|
|
101
|
+
if not doc.exists:
|
|
102
|
+
return []
|
|
103
|
+
return doc.to_dict().get("worker_logs", [])
|
|
104
|
+
|
|
105
|
+
def test_array_union_single_write(self):
|
|
106
|
+
"""Test ArrayUnion works for single write."""
|
|
107
|
+
job_id = self._create_test_job()
|
|
108
|
+
|
|
109
|
+
self._append_log_array_union(job_id, "test", "Single log message")
|
|
110
|
+
|
|
111
|
+
time.sleep(0.1)
|
|
112
|
+
logs = self._get_logs(job_id)
|
|
113
|
+
|
|
114
|
+
assert len(logs) == 1
|
|
115
|
+
assert logs[0]["message"] == "Single log message"
|
|
116
|
+
assert logs[0]["worker"] == "test"
|
|
117
|
+
|
|
118
|
+
def test_array_union_sequential_writes(self):
|
|
119
|
+
"""Test ArrayUnion preserves all sequential writes."""
|
|
120
|
+
job_id = self._create_test_job()
|
|
121
|
+
|
|
122
|
+
for i in range(10):
|
|
123
|
+
self._append_log_array_union(job_id, "test", f"Log {i}")
|
|
124
|
+
|
|
125
|
+
time.sleep(0.2)
|
|
126
|
+
logs = self._get_logs(job_id)
|
|
127
|
+
|
|
128
|
+
assert len(logs) == 10, f"Expected 10 logs, got {len(logs)}"
|
|
129
|
+
|
|
130
|
+
def test_read_modify_write_race_condition(self):
|
|
131
|
+
"""
|
|
132
|
+
Demonstrate the race condition with read-modify-write.
|
|
133
|
+
This test shows WHY we needed ArrayUnion.
|
|
134
|
+
"""
|
|
135
|
+
job_id = self._create_test_job()
|
|
136
|
+
num_writes = 20
|
|
137
|
+
|
|
138
|
+
def write_log(index):
|
|
139
|
+
self._append_log_read_modify_write(job_id, "worker", f"RMW Log {index}")
|
|
140
|
+
|
|
141
|
+
# Write concurrently - should lose some logs due to race condition
|
|
142
|
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
|
143
|
+
futures = [executor.submit(write_log, i) for i in range(num_writes)]
|
|
144
|
+
for future in as_completed(futures):
|
|
145
|
+
try:
|
|
146
|
+
future.result()
|
|
147
|
+
except Exception:
|
|
148
|
+
pass # Ignore errors for this test
|
|
149
|
+
|
|
150
|
+
time.sleep(0.5)
|
|
151
|
+
logs = self._get_logs(job_id)
|
|
152
|
+
|
|
153
|
+
# With read-modify-write, we likely lose some logs
|
|
154
|
+
# This is expected - the test documents the problem
|
|
155
|
+
rmw_count = len([l for l in logs if "RMW Log" in l.get("message", "")])
|
|
156
|
+
print(f"\nRead-modify-write: {rmw_count}/{num_writes} logs preserved")
|
|
157
|
+
|
|
158
|
+
# We expect to lose some logs (this is the bug we're fixing)
|
|
159
|
+
# If all 20 are there, the race condition didn't trigger (which is fine)
|
|
160
|
+
# The important thing is that ArrayUnion test below ALWAYS preserves all
|
|
161
|
+
|
|
162
|
+
def test_array_union_no_race_condition(self):
|
|
163
|
+
"""
|
|
164
|
+
Verify ArrayUnion preserves ALL concurrent writes.
|
|
165
|
+
This is the critical test.
|
|
166
|
+
"""
|
|
167
|
+
job_id = self._create_test_job()
|
|
168
|
+
num_writes = 20
|
|
169
|
+
|
|
170
|
+
def write_log(index):
|
|
171
|
+
self._append_log_array_union(job_id, "worker", f"ArrayUnion Log {index}")
|
|
172
|
+
|
|
173
|
+
# Write concurrently
|
|
174
|
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
|
175
|
+
futures = [executor.submit(write_log, i) for i in range(num_writes)]
|
|
176
|
+
for future in as_completed(futures):
|
|
177
|
+
future.result() # Raise any exceptions
|
|
178
|
+
|
|
179
|
+
time.sleep(0.5)
|
|
180
|
+
logs = self._get_logs(job_id)
|
|
181
|
+
|
|
182
|
+
# With ArrayUnion, ALL logs should be preserved
|
|
183
|
+
au_count = len([l for l in logs if "ArrayUnion Log" in l.get("message", "")])
|
|
184
|
+
print(f"\nArrayUnion: {au_count}/{num_writes} logs preserved")
|
|
185
|
+
|
|
186
|
+
assert au_count == num_writes, \
|
|
187
|
+
f"ArrayUnion should preserve all {num_writes} logs, got {au_count}"
|
|
188
|
+
|
|
189
|
+
def test_concurrent_workers_array_union(self):
|
|
190
|
+
"""
|
|
191
|
+
Test simulating audio and lyrics workers writing concurrently.
|
|
192
|
+
"""
|
|
193
|
+
job_id = self._create_test_job()
|
|
194
|
+
|
|
195
|
+
def audio_worker():
|
|
196
|
+
for i in range(10):
|
|
197
|
+
self._append_log_array_union(job_id, "audio", f"Audio log {i}")
|
|
198
|
+
time.sleep(0.01)
|
|
199
|
+
|
|
200
|
+
def lyrics_worker():
|
|
201
|
+
for i in range(10):
|
|
202
|
+
self._append_log_array_union(job_id, "lyrics", f"Lyrics log {i}")
|
|
203
|
+
time.sleep(0.01)
|
|
204
|
+
|
|
205
|
+
# Start both workers
|
|
206
|
+
audio_thread = threading.Thread(target=audio_worker)
|
|
207
|
+
lyrics_thread = threading.Thread(target=lyrics_worker)
|
|
208
|
+
|
|
209
|
+
audio_thread.start()
|
|
210
|
+
lyrics_thread.start()
|
|
211
|
+
|
|
212
|
+
audio_thread.join()
|
|
213
|
+
lyrics_thread.join()
|
|
214
|
+
|
|
215
|
+
time.sleep(0.5)
|
|
216
|
+
logs = self._get_logs(job_id)
|
|
217
|
+
|
|
218
|
+
audio_logs = [l for l in logs if l.get("worker") == "audio"]
|
|
219
|
+
lyrics_logs = [l for l in logs if l.get("worker") == "lyrics"]
|
|
220
|
+
|
|
221
|
+
print(f"\nConcurrent workers: {len(audio_logs)} audio + {len(lyrics_logs)} lyrics")
|
|
222
|
+
|
|
223
|
+
assert len(audio_logs) == 10, f"Expected 10 audio logs, got {len(audio_logs)}"
|
|
224
|
+
assert len(lyrics_logs) == 10, f"Expected 10 lyrics logs, got {len(lyrics_logs)}"
|
|
225
|
+
|
|
226
|
+
print("✅ All logs from both workers preserved!")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
print("✅ Direct Firestore worker logs tests ready")
|