karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,436 @@
1
+ """
2
+ Direct tests for render_video_worker style loading.
3
+
4
+ Tests the fix for styles being loaded from job.style_params_gcs_path
5
+ instead of the incorrect job.state_data['styles_gcs_path'].
6
+
7
+ Run with:
8
+ ./scripts/start-emulators.sh
9
+ pytest backend/tests/emulator/test_style_loading_direct.py -v
10
+ """
11
+ import pytest
12
+ import json
13
+ import os
14
+ import tempfile
15
+ import requests
16
+ from dataclasses import dataclass
17
+ from typing import Dict, Optional
18
+
19
+
20
+ def emulators_running() -> bool:
21
+ """Check if GCP emulators are running."""
22
+ try:
23
+ requests.get("http://127.0.0.1:8080", timeout=1)
24
+ requests.get("http://127.0.0.1:4443", timeout=1)
25
+ return True
26
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
27
+ return False
28
+
29
+
30
+ # Skip all tests in this module if emulators aren't running
31
+ pytestmark = pytest.mark.skipif(
32
+ not emulators_running(),
33
+ reason="GCP emulators not running. Start with: scripts/start-emulators.sh"
34
+ )
35
+
36
+ # Set up environment for emulators
37
+ os.environ["FIRESTORE_EMULATOR_HOST"] = "127.0.0.1:8080"
38
+ os.environ["STORAGE_EMULATOR_HOST"] = "http://127.0.0.1:4443"
39
+ os.environ["GOOGLE_CLOUD_PROJECT"] = "test-project"
40
+ os.environ["GCS_BUCKET_NAME"] = "test-bucket"
41
+
42
+
43
+ @dataclass
44
+ class MockJob:
45
+ """Mock job object for testing style loading."""
46
+ job_id: str
47
+ style_params_gcs_path: Optional[str] = None
48
+ style_assets: Dict[str, str] = None
49
+ state_data: Dict = None
50
+
51
+ def __post_init__(self):
52
+ if self.style_assets is None:
53
+ self.style_assets = {}
54
+ if self.state_data is None:
55
+ self.state_data = {}
56
+
57
+
58
+ class MockStorageService:
59
+ """Mock storage service that uses GCS emulator."""
60
+
61
+ def __init__(self):
62
+ from google.cloud import storage
63
+ self.client = storage.Client()
64
+ self.bucket_name = "test-bucket"
65
+ self._ensure_bucket_exists()
66
+
67
+ def _ensure_bucket_exists(self):
68
+ """Create bucket in emulator if it doesn't exist."""
69
+ try:
70
+ self.client.create_bucket(self.bucket_name)
71
+ except Exception:
72
+ pass # Bucket already exists
73
+
74
+ def upload_string(self, content: str, gcs_path: str, content_type: str = "application/json"):
75
+ """Upload string content to GCS."""
76
+ bucket = self.client.bucket(self.bucket_name)
77
+ blob = bucket.blob(gcs_path)
78
+ blob.upload_from_string(content, content_type=content_type)
79
+
80
+ def upload_bytes(self, content: bytes, gcs_path: str, content_type: str = "application/octet-stream"):
81
+ """Upload bytes to GCS."""
82
+ bucket = self.client.bucket(self.bucket_name)
83
+ blob = bucket.blob(gcs_path)
84
+ blob.upload_from_string(content, content_type=content_type)
85
+
86
+ def download_file(self, gcs_path: str, local_path: str):
87
+ """Download file from GCS to local path."""
88
+ bucket = self.client.bucket(self.bucket_name)
89
+ blob = bucket.blob(gcs_path)
90
+ blob.download_to_filename(local_path)
91
+
92
+ def file_exists(self, gcs_path: str) -> bool:
93
+ """Check if file exists in GCS."""
94
+ bucket = self.client.bucket(self.bucket_name)
95
+ blob = bucket.blob(gcs_path)
96
+ return blob.exists()
97
+
98
+
99
+ class TestStyleLoadingFix:
100
+ """
101
+ Tests for the render_video_worker style loading fix.
102
+
103
+ The bug was: render_video_worker looked for styles at
104
+ job.state_data.get('styles_gcs_path') but styles are actually stored at
105
+ job.style_params_gcs_path and job.style_assets.
106
+ """
107
+
108
+ @pytest.fixture(autouse=True)
109
+ def setup(self):
110
+ """Set up storage service and temp directory for each test."""
111
+ self.storage = MockStorageService()
112
+ self.temp_dir = tempfile.mkdtemp()
113
+ yield
114
+ # Cleanup
115
+ import shutil
116
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
117
+
118
+ def _import_get_or_create_styles(self):
119
+ """
120
+ Import _get_or_create_styles without importing the full worker module.
121
+ We copy the function here to avoid import issues with lyrics_transcriber.
122
+ """
123
+ # We'll test using a copy of the function logic
124
+ # This avoids importing the full worker which has lyrics_transcriber dependency
125
+ pass
126
+
127
+ def test_style_loading_from_correct_location(self):
128
+ """
129
+ Test that styles are loaded from job.style_params_gcs_path,
130
+ NOT from job.state_data['styles_gcs_path'].
131
+ """
132
+ import time
133
+ job_id = f"test-style-{int(time.time() * 1000)}"
134
+
135
+ # Create style JSON with placeholder paths
136
+ style_json = {
137
+ "intro": {
138
+ "background_image": "/original/path/intro_bg.png",
139
+ "font": "/original/path/font.ttf"
140
+ },
141
+ "karaoke": {
142
+ "background_image": "/original/path/karaoke_bg.png",
143
+ "font_path": "/original/path/font.ttf",
144
+ "font_size": 100
145
+ },
146
+ "end": {
147
+ "background_image": "/original/path/end_bg.png",
148
+ "font": "/original/path/font.ttf"
149
+ }
150
+ }
151
+
152
+ # Upload style JSON to GCS (the CORRECT location)
153
+ style_gcs_path = f"uploads/{job_id}/style/style_params.json"
154
+ self.storage.upload_string(json.dumps(style_json), style_gcs_path)
155
+
156
+ # Upload mock assets
157
+ assets = {
158
+ "intro_background": f"uploads/{job_id}/style/intro_background.png",
159
+ "karaoke_background": f"uploads/{job_id}/style/karaoke_background.png",
160
+ "end_background": f"uploads/{job_id}/style/end_background.png",
161
+ "font": f"uploads/{job_id}/style/font.ttf",
162
+ }
163
+
164
+ for asset_key, gcs_path in assets.items():
165
+ # Upload fake file content
166
+ self.storage.upload_bytes(b"fake image/font data", gcs_path)
167
+
168
+ # Create mock job with styles in the CORRECT location
169
+ job = MockJob(
170
+ job_id=job_id,
171
+ style_params_gcs_path=style_gcs_path, # CORRECT
172
+ style_assets=assets, # CORRECT
173
+ state_data={} # state_data is EMPTY (no styles_gcs_path)
174
+ )
175
+
176
+ # Verify files exist in GCS
177
+ assert self.storage.file_exists(style_gcs_path), "Style JSON should exist in GCS"
178
+ for gcs_path in assets.values():
179
+ assert self.storage.file_exists(gcs_path), f"Asset {gcs_path} should exist in GCS"
180
+
181
+ # Now test the style loading logic (inline version of _get_or_create_styles)
182
+ style_dir = os.path.join(self.temp_dir, "style")
183
+ os.makedirs(style_dir, exist_ok=True)
184
+ styles_path = os.path.join(style_dir, "styles.json")
185
+
186
+ # This is the FIX: check job.style_params_gcs_path, not state_data
187
+ if job.style_params_gcs_path:
188
+ # Download style JSON
189
+ self.storage.download_file(job.style_params_gcs_path, styles_path)
190
+
191
+ # Load and update paths
192
+ with open(styles_path, 'r') as f:
193
+ style_data = json.load(f)
194
+
195
+ # Download assets and update paths
196
+ local_assets = {}
197
+ for asset_key, gcs_path in job.style_assets.items():
198
+ ext = os.path.splitext(gcs_path)[1] or '.png'
199
+ local_path = os.path.join(style_dir, f"{asset_key}{ext}")
200
+ self.storage.download_file(gcs_path, local_path)
201
+ local_assets[asset_key] = local_path
202
+
203
+ # Update style_data with local paths
204
+ asset_mapping = {
205
+ 'intro_background': ('intro', 'background_image'),
206
+ 'karaoke_background': ('karaoke', 'background_image'),
207
+ 'end_background': ('end', 'background_image'),
208
+ 'font': [('intro', 'font'), ('karaoke', 'font_path'), ('end', 'font')],
209
+ }
210
+
211
+ for asset_key, local_path in local_assets.items():
212
+ if asset_key in asset_mapping:
213
+ mappings = asset_mapping[asset_key]
214
+ if isinstance(mappings[0], str):
215
+ mappings = [mappings]
216
+ for section, field in mappings:
217
+ if section in style_data:
218
+ style_data[section][field] = local_path
219
+
220
+ # Save updated styles
221
+ with open(styles_path, 'w') as f:
222
+ json.dump(style_data, f, indent=2)
223
+
224
+ # Verify the result
225
+ with open(styles_path, 'r') as f:
226
+ result = json.load(f)
227
+
228
+ # Check that paths were updated to local paths
229
+ assert style_dir in result['karaoke']['background_image'], \
230
+ f"karaoke.background_image should be local path, got: {result['karaoke']['background_image']}"
231
+ assert style_dir in result['karaoke']['font_path'], \
232
+ f"karaoke.font_path should be local path, got: {result['karaoke']['font_path']}"
233
+ assert style_dir in result['intro']['background_image'], \
234
+ f"intro.background_image should be local path"
235
+
236
+ # Verify files actually exist locally
237
+ assert os.path.exists(result['karaoke']['background_image']), \
238
+ "Downloaded karaoke background should exist"
239
+ assert os.path.exists(result['karaoke']['font_path']), \
240
+ "Downloaded font should exist"
241
+
242
+ print(f"\n✅ Style loading from job.style_params_gcs_path works!")
243
+ print(f" karaoke.background_image: {result['karaoke']['background_image']}")
244
+ print(f" karaoke.font_path: {result['karaoke']['font_path']}")
245
+
246
+ def test_old_bug_state_data_lookup_fails(self):
247
+ """
248
+ Demonstrate that the OLD bug would fail to find styles.
249
+
250
+ If we look at job.state_data['styles_gcs_path'] (the bug),
251
+ we won't find anything because styles are stored at
252
+ job.style_params_gcs_path.
253
+ """
254
+ import time
255
+ job_id = f"test-bug-{int(time.time() * 1000)}"
256
+
257
+ # Upload style to CORRECT location
258
+ style_gcs_path = f"uploads/{job_id}/style/style_params.json"
259
+ self.storage.upload_string('{"karaoke": {"font_size": 100}}', style_gcs_path)
260
+
261
+ # Create job with styles in CORRECT location
262
+ job = MockJob(
263
+ job_id=job_id,
264
+ style_params_gcs_path=style_gcs_path, # CORRECT location
265
+ style_assets={},
266
+ state_data={} # No styles_gcs_path here!
267
+ )
268
+
269
+ # THE BUG: looking in state_data instead of style_params_gcs_path
270
+ wrong_path = job.state_data.get('styles_gcs_path')
271
+ correct_path = job.style_params_gcs_path
272
+
273
+ assert wrong_path is None, "state_data['styles_gcs_path'] should be None (the bug location)"
274
+ assert correct_path is not None, "style_params_gcs_path should have the correct path"
275
+ assert self.storage.file_exists(correct_path), "Style should exist at correct path"
276
+
277
+ print(f"\n✅ Demonstrated the bug: state_data lookup returns None")
278
+ print(f" state_data['styles_gcs_path'] = {wrong_path}")
279
+ print(f" job.style_params_gcs_path = {correct_path}")
280
+
281
+ def test_default_styles_when_no_custom_styles(self):
282
+ """Test that default styles are used when no custom styles provided."""
283
+ import time
284
+ job_id = f"test-default-{int(time.time() * 1000)}"
285
+
286
+ # Job with NO custom styles
287
+ job = MockJob(
288
+ job_id=job_id,
289
+ style_params_gcs_path=None,
290
+ style_assets={},
291
+ state_data={}
292
+ )
293
+
294
+ # Simulate the logic
295
+ style_dir = os.path.join(self.temp_dir, "style")
296
+ os.makedirs(style_dir, exist_ok=True)
297
+ styles_path = os.path.join(style_dir, "styles.json")
298
+
299
+ if job.style_params_gcs_path:
300
+ # Would load custom styles
301
+ pass
302
+ else:
303
+ # Use default styles
304
+ default_styles = {
305
+ "karaoke": {
306
+ "background_color": "#000000",
307
+ "font": "Arial",
308
+ "font_path": "",
309
+ "font_size": 100
310
+ }
311
+ }
312
+ with open(styles_path, 'w') as f:
313
+ json.dump(default_styles, f, indent=2)
314
+
315
+ # Verify defaults were used
316
+ with open(styles_path, 'r') as f:
317
+ result = json.load(f)
318
+
319
+ assert result['karaoke']['background_color'] == "#000000"
320
+ assert result['karaoke']['font'] == "Arial"
321
+
322
+ print(f"\n✅ Default styles used when no custom styles provided")
323
+
324
+
325
+ class TestParallelWorkerExecution:
326
+ """
327
+ Tests for the parallel worker execution fix.
328
+
329
+ The bug was: FastAPI's BackgroundTasks runs async tasks sequentially,
330
+ causing audio worker to complete before lyrics worker starts.
331
+
332
+ The fix uses asyncio.gather() to run both workers in parallel.
333
+ """
334
+
335
+ def test_asyncio_gather_runs_parallel(self):
336
+ """Test that asyncio.gather actually runs tasks in parallel."""
337
+ import asyncio
338
+ import time
339
+
340
+ start_times = {}
341
+ end_times = {}
342
+
343
+ async def task1():
344
+ start_times['task1'] = time.time()
345
+ await asyncio.sleep(0.1)
346
+ end_times['task1'] = time.time()
347
+ return "task1 done"
348
+
349
+ async def task2():
350
+ start_times['task2'] = time.time()
351
+ await asyncio.sleep(0.1)
352
+ end_times['task2'] = time.time()
353
+ return "task2 done"
354
+
355
+ async def run_parallel():
356
+ return await asyncio.gather(task1(), task2())
357
+
358
+ overall_start = time.time()
359
+ results = asyncio.run(run_parallel())
360
+ overall_duration = time.time() - overall_start
361
+
362
+ # Both tasks should have started at nearly the same time
363
+ start_diff = abs(start_times['task1'] - start_times['task2'])
364
+
365
+ # If running in parallel:
366
+ # - Both start at ~same time (diff < 0.05s)
367
+ # - Total duration is ~0.1s (not 0.2s for sequential)
368
+
369
+ assert start_diff < 0.05, \
370
+ f"Tasks should start together, diff was {start_diff:.3f}s"
371
+ assert overall_duration < 0.15, \
372
+ f"Parallel execution should take ~0.1s, took {overall_duration:.3f}s"
373
+
374
+ print(f"\n✅ asyncio.gather runs tasks in parallel!")
375
+ print(f" Task start time difference: {start_diff:.3f}s")
376
+ print(f" Total duration: {overall_duration:.3f}s (sequential would be ~0.2s)")
377
+
378
+ def test_sequential_background_tasks_is_slow(self):
379
+ """
380
+ Demonstrate that sequential execution (the bug) is slower.
381
+ """
382
+ import asyncio
383
+ import time
384
+
385
+ execution_order = []
386
+
387
+ async def task1():
388
+ execution_order.append(('task1', 'start'))
389
+ await asyncio.sleep(0.05)
390
+ execution_order.append(('task1', 'end'))
391
+
392
+ async def task2():
393
+ execution_order.append(('task2', 'start'))
394
+ await asyncio.sleep(0.05)
395
+ execution_order.append(('task2', 'end'))
396
+
397
+ # Sequential (the bug)
398
+ async def run_sequential():
399
+ await task1()
400
+ await task2()
401
+
402
+ execution_order.clear()
403
+ start = time.time()
404
+ asyncio.run(run_sequential())
405
+ sequential_duration = time.time() - start
406
+ sequential_order = execution_order.copy()
407
+
408
+ # Parallel (the fix)
409
+ async def run_parallel():
410
+ await asyncio.gather(task1(), task2())
411
+
412
+ execution_order.clear()
413
+ start = time.time()
414
+ asyncio.run(run_parallel())
415
+ parallel_duration = time.time() - start
416
+ parallel_order = execution_order.copy()
417
+
418
+ # Sequential: task1 starts, task1 ends, task2 starts, task2 ends
419
+ assert sequential_order[0] == ('task1', 'start')
420
+ assert sequential_order[1] == ('task1', 'end')
421
+ assert sequential_order[2] == ('task2', 'start')
422
+
423
+ # Parallel: task1 starts, task2 starts (interleaved)
424
+ assert parallel_order[0][1] == 'start'
425
+ assert parallel_order[1][1] == 'start'
426
+
427
+ # Parallel should be ~2x faster
428
+ assert parallel_duration < sequential_duration * 0.8, \
429
+ f"Parallel ({parallel_duration:.3f}s) should be faster than sequential ({sequential_duration:.3f}s)"
430
+
431
+ print(f"\n✅ Demonstrated sequential vs parallel execution")
432
+ print(f" Sequential: {sequential_duration:.3f}s - {sequential_order}")
433
+ print(f" Parallel: {parallel_duration:.3f}s - {parallel_order}")
434
+
435
+
436
+ print("✅ Style loading and parallel execution tests ready")
@@ -0,0 +1,229 @@
1
+ """
2
+ Direct tests for worker logging Firestore operations.
3
+
4
+ These tests bypass the full app/worker imports and test the Firestore
5
+ operations directly, avoiding dependency issues.
6
+
7
+ Run with: ./scripts/run-emulator-tests.sh
8
+ """
9
+ import pytest
10
+ import time
11
+ import requests
12
+ import os
13
+ import threading
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from datetime import datetime
16
+
17
+
18
+ def emulators_running() -> bool:
19
+ """Check if GCP emulators are running."""
20
+ try:
21
+ requests.get("http://127.0.0.1:8080", timeout=1)
22
+ return True
23
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
24
+ return False
25
+
26
+
27
+ # Skip all tests in this module if emulators aren't running
28
+ pytestmark = pytest.mark.skipif(
29
+ not emulators_running(),
30
+ reason="GCP emulators not running. Start with: scripts/start-emulators.sh"
31
+ )
32
+
33
+ # Set up environment for emulator
34
+ os.environ["FIRESTORE_EMULATOR_HOST"] = "127.0.0.1:8080"
35
+ os.environ["GOOGLE_CLOUD_PROJECT"] = "test-project"
36
+
37
+
38
+ class TestWorkerLogsFirestoreDirect:
39
+ """Direct Firestore tests for worker logs - no app imports needed."""
40
+
41
+ @pytest.fixture(autouse=True)
42
+ def setup_firestore(self):
43
+ """Set up Firestore client for each test."""
44
+ from google.cloud import firestore
45
+ self.db = firestore.Client(project="test-project")
46
+ self.collection = "test-worker-logs"
47
+ yield
48
+
49
+ def _create_test_job(self):
50
+ """Create a test job document."""
51
+ job_id = f"test-{int(time.time() * 1000)}"
52
+ doc_ref = self.db.collection(self.collection).document(job_id)
53
+ doc_ref.set({
54
+ "job_id": job_id,
55
+ "status": "pending",
56
+ "created_at": datetime.utcnow(),
57
+ "worker_logs": []
58
+ })
59
+ return job_id
60
+
61
+ def _append_log_read_modify_write(self, job_id: str, worker: str, message: str):
62
+ """
63
+ OLD METHOD: Read-modify-write (has race condition).
64
+ This is what we were doing before.
65
+ """
66
+ doc_ref = self.db.collection(self.collection).document(job_id)
67
+ doc = doc_ref.get()
68
+ if not doc.exists:
69
+ return
70
+
71
+ data = doc.to_dict()
72
+ logs = data.get("worker_logs", [])
73
+ logs.append({
74
+ "timestamp": datetime.utcnow().isoformat() + "Z",
75
+ "level": "INFO",
76
+ "worker": worker,
77
+ "message": message
78
+ })
79
+ doc_ref.update({"worker_logs": logs})
80
+
81
+ def _append_log_array_union(self, job_id: str, worker: str, message: str):
82
+ """
83
+ NEW METHOD: ArrayUnion (atomic, no race condition).
84
+ This is what we're doing now.
85
+ """
86
+ from google.cloud import firestore
87
+ doc_ref = self.db.collection(self.collection).document(job_id)
88
+ doc_ref.update({
89
+ "worker_logs": firestore.ArrayUnion([{
90
+ "timestamp": datetime.utcnow().isoformat() + "Z",
91
+ "level": "INFO",
92
+ "worker": worker,
93
+ "message": message
94
+ }])
95
+ })
96
+
97
+ def _get_logs(self, job_id: str):
98
+ """Get logs from job document."""
99
+ doc_ref = self.db.collection(self.collection).document(job_id)
100
+ doc = doc_ref.get()
101
+ if not doc.exists:
102
+ return []
103
+ return doc.to_dict().get("worker_logs", [])
104
+
105
+ def test_array_union_single_write(self):
106
+ """Test ArrayUnion works for single write."""
107
+ job_id = self._create_test_job()
108
+
109
+ self._append_log_array_union(job_id, "test", "Single log message")
110
+
111
+ time.sleep(0.1)
112
+ logs = self._get_logs(job_id)
113
+
114
+ assert len(logs) == 1
115
+ assert logs[0]["message"] == "Single log message"
116
+ assert logs[0]["worker"] == "test"
117
+
118
+ def test_array_union_sequential_writes(self):
119
+ """Test ArrayUnion preserves all sequential writes."""
120
+ job_id = self._create_test_job()
121
+
122
+ for i in range(10):
123
+ self._append_log_array_union(job_id, "test", f"Log {i}")
124
+
125
+ time.sleep(0.2)
126
+ logs = self._get_logs(job_id)
127
+
128
+ assert len(logs) == 10, f"Expected 10 logs, got {len(logs)}"
129
+
130
+ def test_read_modify_write_race_condition(self):
131
+ """
132
+ Demonstrate the race condition with read-modify-write.
133
+ This test shows WHY we needed ArrayUnion.
134
+ """
135
+ job_id = self._create_test_job()
136
+ num_writes = 20
137
+
138
+ def write_log(index):
139
+ self._append_log_read_modify_write(job_id, "worker", f"RMW Log {index}")
140
+
141
+ # Write concurrently - should lose some logs due to race condition
142
+ with ThreadPoolExecutor(max_workers=5) as executor:
143
+ futures = [executor.submit(write_log, i) for i in range(num_writes)]
144
+ for future in as_completed(futures):
145
+ try:
146
+ future.result()
147
+ except Exception:
148
+ pass # Ignore errors for this test
149
+
150
+ time.sleep(0.5)
151
+ logs = self._get_logs(job_id)
152
+
153
+ # With read-modify-write, we likely lose some logs
154
+ # This is expected - the test documents the problem
155
+ rmw_count = len([l for l in logs if "RMW Log" in l.get("message", "")])
156
+ print(f"\nRead-modify-write: {rmw_count}/{num_writes} logs preserved")
157
+
158
+ # We expect to lose some logs (this is the bug we're fixing)
159
+ # If all 20 are there, the race condition didn't trigger (which is fine)
160
+ # The important thing is that ArrayUnion test below ALWAYS preserves all
161
+
162
+ def test_array_union_no_race_condition(self):
163
+ """
164
+ Verify ArrayUnion preserves ALL concurrent writes.
165
+ This is the critical test.
166
+ """
167
+ job_id = self._create_test_job()
168
+ num_writes = 20
169
+
170
+ def write_log(index):
171
+ self._append_log_array_union(job_id, "worker", f"ArrayUnion Log {index}")
172
+
173
+ # Write concurrently
174
+ with ThreadPoolExecutor(max_workers=5) as executor:
175
+ futures = [executor.submit(write_log, i) for i in range(num_writes)]
176
+ for future in as_completed(futures):
177
+ future.result() # Raise any exceptions
178
+
179
+ time.sleep(0.5)
180
+ logs = self._get_logs(job_id)
181
+
182
+ # With ArrayUnion, ALL logs should be preserved
183
+ au_count = len([l for l in logs if "ArrayUnion Log" in l.get("message", "")])
184
+ print(f"\nArrayUnion: {au_count}/{num_writes} logs preserved")
185
+
186
+ assert au_count == num_writes, \
187
+ f"ArrayUnion should preserve all {num_writes} logs, got {au_count}"
188
+
189
+ def test_concurrent_workers_array_union(self):
190
+ """
191
+ Test simulating audio and lyrics workers writing concurrently.
192
+ """
193
+ job_id = self._create_test_job()
194
+
195
+ def audio_worker():
196
+ for i in range(10):
197
+ self._append_log_array_union(job_id, "audio", f"Audio log {i}")
198
+ time.sleep(0.01)
199
+
200
+ def lyrics_worker():
201
+ for i in range(10):
202
+ self._append_log_array_union(job_id, "lyrics", f"Lyrics log {i}")
203
+ time.sleep(0.01)
204
+
205
+ # Start both workers
206
+ audio_thread = threading.Thread(target=audio_worker)
207
+ lyrics_thread = threading.Thread(target=lyrics_worker)
208
+
209
+ audio_thread.start()
210
+ lyrics_thread.start()
211
+
212
+ audio_thread.join()
213
+ lyrics_thread.join()
214
+
215
+ time.sleep(0.5)
216
+ logs = self._get_logs(job_id)
217
+
218
+ audio_logs = [l for l in logs if l.get("worker") == "audio"]
219
+ lyrics_logs = [l for l in logs if l.get("worker") == "lyrics"]
220
+
221
+ print(f"\nConcurrent workers: {len(audio_logs)} audio + {len(lyrics_logs)} lyrics")
222
+
223
+ assert len(audio_logs) == 10, f"Expected 10 audio logs, got {len(audio_logs)}"
224
+ assert len(lyrics_logs) == 10, f"Expected 10 lyrics logs, got {len(lyrics_logs)}"
225
+
226
+ print("✅ All logs from both workers preserved!")
227
+
228
+
229
+ print("✅ Direct Firestore worker logs tests ready")