karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,583 @@
1
+ """
2
+ Regression tests for job creation issues discovered 2024-12-31.
3
+
4
+ These tests ensure:
5
+ 1. user_email is properly extracted from AuthResult and set on jobs
6
+ 2. URL jobs trigger workers sequentially (audio first, then lyrics)
7
+ 3. Audio search downloads work without relying on in-memory cache
8
+ 4. Transcription has proper timeout handling
9
+
10
+ See docs/archive/2024-12-31-job-failure-investigation.md for details.
11
+ """
12
+ import asyncio
13
+ import pytest
14
+ from unittest.mock import Mock, AsyncMock, patch, MagicMock
15
+ from datetime import datetime, timezone
16
+
17
+ from backend.services.auth_service import AuthResult, UserType
18
+
19
+
20
+ class TestUserEmailExtraction:
21
+ """
22
+ Issue 1: user_email not being set on jobs.
23
+
24
+ Root cause: Job creation endpoints used require_auth but didn't extract
25
+ auth_result.user_email and set it on the job.
26
+
27
+ These tests verify that authenticated user's email is properly set on jobs.
28
+ """
29
+
30
+ @pytest.fixture
31
+ def mock_auth_result_with_email(self):
32
+ """AuthResult for a logged-in user with email."""
33
+ return AuthResult(
34
+ is_valid=True,
35
+ user_type=UserType.UNLIMITED, # Session-based users typically have UNLIMITED
36
+ remaining_uses=-1,
37
+ message="Valid session",
38
+ user_email="testuser@example.com",
39
+ is_admin=False,
40
+ )
41
+
42
+ @pytest.fixture
43
+ def mock_auth_result_admin_token(self):
44
+ """AuthResult for admin token without email."""
45
+ return AuthResult(
46
+ is_valid=True,
47
+ user_type=UserType.ADMIN,
48
+ remaining_uses=-1,
49
+ message="Valid admin token",
50
+ user_email=None,
51
+ is_admin=True,
52
+ )
53
+
54
+ def test_auth_result_has_user_email_field(self):
55
+ """AuthResult must have user_email field for job association."""
56
+ result = AuthResult(
57
+ is_valid=True,
58
+ user_type=UserType.UNLIMITED,
59
+ remaining_uses=-1,
60
+ message="test",
61
+ user_email="user@example.com",
62
+ )
63
+ assert hasattr(result, 'user_email')
64
+ assert result.user_email == "user@example.com"
65
+
66
+ def test_auth_result_user_email_can_be_none(self):
67
+ """AuthResult user_email can be None for token-based auth."""
68
+ result = AuthResult(
69
+ is_valid=True,
70
+ user_type=UserType.ADMIN,
71
+ remaining_uses=-1,
72
+ message="test",
73
+ user_email=None,
74
+ )
75
+ assert result.user_email is None
76
+
77
+ def test_upload_endpoint_extracts_user_email_from_auth(self):
78
+ """
79
+ /jobs/upload endpoint must set user_email from authenticated user.
80
+
81
+ This verifies the code pattern by inspecting the source.
82
+ Integration tests verify the full behavior.
83
+ """
84
+ from backend.api.routes import file_upload
85
+ import inspect
86
+
87
+ # Get the source code of upload_and_create_job
88
+ source = inspect.getsource(file_upload.upload_and_create_job)
89
+
90
+ # Verify the endpoint extracts user_email from auth_result
91
+ assert 'auth_result.user_email' in source, \
92
+ "upload_and_create_job must extract user_email from auth_result"
93
+ assert 'effective_user_email' in source, \
94
+ "upload_and_create_job must use effective_user_email pattern"
95
+ assert 'user_email=effective_user_email' in source or 'user_email=' in source, \
96
+ "upload_and_create_job must pass user_email to JobCreate"
97
+
98
+ def test_all_job_creation_endpoints_extract_user_email(self):
99
+ """
100
+ All job creation endpoints must extract user_email from AuthResult.
101
+
102
+ This is a regression guard for Issue 1 (user_email not being set).
103
+ """
104
+ from backend.api.routes import file_upload, audio_search
105
+ import inspect
106
+
107
+ endpoints_to_check = [
108
+ (file_upload, 'upload_and_create_job'),
109
+ (file_upload, 'create_job_from_url'),
110
+ (audio_search, 'search_audio'),
111
+ ]
112
+
113
+ for module, func_name in endpoints_to_check:
114
+ func = getattr(module, func_name)
115
+ source = inspect.getsource(func)
116
+
117
+ assert 'auth_result.user_email' in source, \
118
+ f"{func_name} must extract user_email from auth_result"
119
+
120
+ @pytest.mark.asyncio
121
+ async def test_create_from_url_endpoint_sets_user_email(self, mock_auth_result_with_email):
122
+ """
123
+ /jobs/create-from-url endpoint must set user_email from authenticated user.
124
+ """
125
+ from backend.api.routes.file_upload import create_job_from_url, CreateJobFromUrlRequest
126
+
127
+ mock_request = Mock()
128
+ mock_request.headers = {}
129
+ mock_request.client = Mock(host="127.0.0.1")
130
+ mock_request.url = Mock(path="/api/jobs/create-from-url")
131
+
132
+ mock_background_tasks = Mock()
133
+ body = CreateJobFromUrlRequest(
134
+ url="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
135
+ artist="Rick Astley",
136
+ title="Never Gonna Give You Up",
137
+ )
138
+
139
+ with patch('backend.api.routes.file_upload.job_manager') as mock_jm, \
140
+ patch('backend.api.routes.file_upload.worker_service') as mock_worker, \
141
+ patch('backend.api.routes.file_upload.get_credential_manager') as mock_cred:
142
+
143
+ mock_job = Mock()
144
+ mock_job.job_id = "test-job-456"
145
+ mock_jm.create_job.return_value = mock_job
146
+ mock_cred.return_value.check_youtube_credentials.return_value = Mock(status=Mock(value="valid"))
147
+
148
+ await create_job_from_url(
149
+ request=mock_request,
150
+ background_tasks=mock_background_tasks,
151
+ body=body,
152
+ auth_result=mock_auth_result_with_email,
153
+ )
154
+
155
+ mock_jm.create_job.assert_called_once()
156
+ job_create_arg = mock_jm.create_job.call_args[0][0]
157
+ assert job_create_arg.user_email == "testuser@example.com", \
158
+ "create-from-url must set user_email from AuthResult"
159
+
160
+ @pytest.mark.asyncio
161
+ async def test_audio_search_endpoint_sets_user_email(self, mock_auth_result_with_email):
162
+ """
163
+ /audio-search/search endpoint must set user_email from authenticated user.
164
+ """
165
+ from backend.api.routes.audio_search import search_audio, AudioSearchRequest
166
+
167
+ mock_request = Mock()
168
+ mock_request.headers = {}
169
+ mock_request.client = Mock(host="127.0.0.1")
170
+ mock_request.url = Mock(path="/api/audio-search/search")
171
+
172
+ mock_background_tasks = Mock()
173
+ body = AudioSearchRequest(
174
+ artist="Test Artist",
175
+ title="Test Song",
176
+ )
177
+
178
+ with patch('backend.api.routes.audio_search.job_manager') as mock_jm, \
179
+ patch('backend.api.routes.audio_search.get_audio_search_service') as mock_search, \
180
+ patch('backend.api.routes.audio_search.get_credential_manager') as mock_cred:
181
+
182
+ mock_job = Mock()
183
+ mock_job.job_id = "test-job-789"
184
+ mock_job.state_data = {}
185
+ mock_jm.create_job.return_value = mock_job
186
+ mock_jm.get_job.return_value = mock_job
187
+ mock_cred.return_value.check_youtube_credentials.return_value = Mock(status=Mock(value="valid"))
188
+ mock_search.return_value.search.return_value = []
189
+
190
+ await search_audio(
191
+ request=mock_request,
192
+ background_tasks=mock_background_tasks,
193
+ body=body,
194
+ auth_result=mock_auth_result_with_email,
195
+ )
196
+
197
+ mock_jm.create_job.assert_called_once()
198
+ job_create_arg = mock_jm.create_job.call_args[0][0]
199
+ assert job_create_arg.user_email == "testuser@example.com", \
200
+ "audio-search must set user_email from AuthResult"
201
+
202
+ def test_effective_user_email_prefers_auth_over_form(self):
203
+ """
204
+ When both auth_result.user_email and form user_email are provided,
205
+ the authenticated user's email should take precedence.
206
+ """
207
+ auth_email = "authenticated@example.com"
208
+ form_email = "form@example.com"
209
+
210
+ # This is the logic used in the endpoints
211
+ effective_user_email = auth_email or form_email
212
+
213
+ assert effective_user_email == auth_email, \
214
+ "Authenticated user's email should take precedence over form parameter"
215
+
216
+ def test_effective_user_email_falls_back_to_form(self):
217
+ """
218
+ When auth_result.user_email is None (e.g., admin token),
219
+ form user_email should be used as fallback.
220
+ """
221
+ auth_email = None
222
+ form_email = "form@example.com"
223
+
224
+ effective_user_email = auth_email or form_email
225
+
226
+ assert effective_user_email == form_email, \
227
+ "Form parameter should be used when auth has no email"
228
+
229
+
230
+ class TestUrlJobWorkerSequencing:
231
+ """
232
+ Issue 2: YouTube URL download race condition.
233
+
234
+ Root cause: Both audio and lyrics workers were triggered in parallel.
235
+ For URL jobs, lyrics worker would timeout waiting for audio to download.
236
+
237
+ These tests verify that URL jobs trigger workers sequentially.
238
+ """
239
+
240
+ @pytest.mark.asyncio
241
+ async def test_url_job_triggers_only_audio_worker_initially(self):
242
+ """
243
+ create-from-url must only trigger audio worker, not lyrics worker.
244
+ The audio worker will trigger lyrics after download completes.
245
+ """
246
+ from backend.api.routes.file_upload import _trigger_audio_worker_only
247
+
248
+ with patch('backend.api.routes.file_upload.worker_service') as mock_ws:
249
+ mock_ws.trigger_audio_worker = AsyncMock()
250
+ mock_ws.trigger_lyrics_worker = AsyncMock()
251
+
252
+ await _trigger_audio_worker_only("test-job-id")
253
+
254
+ mock_ws.trigger_audio_worker.assert_called_once_with("test-job-id")
255
+ mock_ws.trigger_lyrics_worker.assert_not_called()
256
+
257
+ @pytest.mark.asyncio
258
+ async def test_parallel_worker_triggers_both_workers(self):
259
+ """
260
+ _trigger_workers_parallel should trigger both workers (for uploaded files).
261
+ """
262
+ from backend.api.routes.file_upload import _trigger_workers_parallel
263
+
264
+ with patch('backend.api.routes.file_upload.worker_service') as mock_ws:
265
+ mock_ws.trigger_audio_worker = AsyncMock()
266
+ mock_ws.trigger_lyrics_worker = AsyncMock()
267
+
268
+ await _trigger_workers_parallel("test-job-id")
269
+
270
+ mock_ws.trigger_audio_worker.assert_called_once_with("test-job-id")
271
+ mock_ws.trigger_lyrics_worker.assert_called_once_with("test-job-id")
272
+
273
+ @pytest.mark.asyncio
274
+ async def test_audio_worker_triggers_lyrics_after_url_download(self):
275
+ """
276
+ Audio worker must trigger lyrics worker after successful URL download.
277
+ """
278
+ from backend.workers.audio_worker import _trigger_lyrics_worker_after_url_download
279
+
280
+ # Mock at the source module where it's imported from
281
+ with patch('backend.services.worker_service.get_worker_service') as mock_get_ws:
282
+ mock_ws = Mock()
283
+ mock_ws.trigger_lyrics_worker = AsyncMock()
284
+ mock_get_ws.return_value = mock_ws
285
+
286
+ await _trigger_lyrics_worker_after_url_download("test-job-id")
287
+
288
+ mock_ws.trigger_lyrics_worker.assert_called_once_with("test-job-id")
289
+
290
+ @pytest.mark.asyncio
291
+ async def test_audio_worker_lyrics_trigger_handles_errors_gracefully(self):
292
+ """
293
+ If lyrics worker trigger fails, audio processing should continue.
294
+ """
295
+ from backend.workers.audio_worker import _trigger_lyrics_worker_after_url_download
296
+
297
+ # Mock at the source module where it's imported from
298
+ with patch('backend.services.worker_service.get_worker_service') as mock_get_ws:
299
+ mock_ws = Mock()
300
+ mock_ws.trigger_lyrics_worker = AsyncMock(side_effect=Exception("Network error"))
301
+ mock_get_ws.return_value = mock_ws
302
+
303
+ # Should not raise exception
304
+ await _trigger_lyrics_worker_after_url_download("test-job-id")
305
+
306
+
307
+ class TestAudioSearchCacheIndependence:
308
+ """
309
+ Issue 3: Audio search cache not persisting across Cloud Run instances.
310
+
311
+ Root cause: AudioSearchService used in-memory cache that doesn't persist
312
+ across horizontally scaled instances.
313
+
314
+ These tests verify downloads work using state_data, not in-memory cache.
315
+ """
316
+
317
+ def test_search_results_stored_in_job_state_data(self):
318
+ """
319
+ Search results must be stored in job.state_data for persistence.
320
+ This tests the Job model's ability to store search results.
321
+ """
322
+ from backend.models.job import Job, JobStatus
323
+
324
+ job = Job(
325
+ job_id="test-123",
326
+ artist="Test Artist",
327
+ title="Test Song",
328
+ status=JobStatus.PENDING,
329
+ created_at=datetime.now(timezone.utc),
330
+ updated_at=datetime.now(timezone.utc),
331
+ state_data={
332
+ 'audio_search_results': [
333
+ {
334
+ 'provider': 'YouTube',
335
+ 'title': 'Test Song',
336
+ 'artist': 'Test Artist',
337
+ 'url': 'https://youtube.com/watch?v=abc123',
338
+ 'source_id': 'abc123',
339
+ }
340
+ ]
341
+ }
342
+ )
343
+
344
+ assert 'audio_search_results' in job.state_data
345
+ assert len(job.state_data['audio_search_results']) == 1
346
+ assert job.state_data['audio_search_results'][0]['url'] == 'https://youtube.com/watch?v=abc123'
347
+
348
+ def test_audio_search_endpoint_stores_results_in_state_data(self):
349
+ """
350
+ Verify that the search_audio code path stores results in job.state_data.
351
+
352
+ This tests that the audio_search route has the correct pattern for
353
+ persisting search results. The actual integration is tested elsewhere.
354
+ """
355
+ # Verify the import and module structure exists
356
+ from backend.api.routes import audio_search
357
+ import inspect
358
+
359
+ # Get the source code of search_audio
360
+ source = inspect.getsource(audio_search.search_audio)
361
+
362
+ # Verify the endpoint stores results in state_data
363
+ # The code uses job_manager.update_job with state_data dict
364
+ assert 'audio_search_results' in source, \
365
+ "search_audio must store results under 'audio_search_results' key"
366
+ assert 'state_data' in source, \
367
+ "search_audio must use state_data for persistence"
368
+
369
+ def test_download_code_has_youtube_direct_download_branch(self):
370
+ """
371
+ Verify that _download_and_start_processing has the YouTube direct download path.
372
+
373
+ This checks that the code path exists for downloading YouTube audio directly
374
+ using the URL from state_data, avoiding the in-memory cache dependency.
375
+ """
376
+ from backend.api.routes import audio_search
377
+ import inspect
378
+
379
+ # Get the source of _download_and_start_processing
380
+ source = inspect.getsource(audio_search._download_and_start_processing)
381
+
382
+ # Verify the YouTube direct download branch exists
383
+ assert "source_name == 'YouTube'" in source, \
384
+ "_download_and_start_processing must check for YouTube source"
385
+ assert 'download_url' in source, \
386
+ "_download_and_start_processing must use download_url from state_data"
387
+ assert 'download_from_url' in source, \
388
+ "_download_and_start_processing must call download_from_url for YouTube"
389
+
390
+ def test_youtube_download_branch_avoids_cache_dependency(self):
391
+ """
392
+ Verify that the YouTube download path uses URL from state_data,
393
+ not the in-memory cache that doesn't persist across Cloud Run instances.
394
+ """
395
+ from backend.api.routes import audio_search
396
+ import inspect
397
+
398
+ source = inspect.getsource(audio_search._download_and_start_processing)
399
+
400
+ # The fix specifically uses selected.get('url') which comes from state_data
401
+ # rather than audio_search_service cache
402
+ assert "selected.get('url')" in source or "download_url" in source, \
403
+ "YouTube download must use URL from selected result (state_data)"
404
+
405
+ # Verify it doesn't rely on cache for YouTube
406
+ # The branch is: elif source_name == 'YouTube' and download_url:
407
+ assert "YouTube" in source and "download_url" in source, \
408
+ "YouTube branch must check for download_url availability"
409
+
410
+
411
+ class TestTranscriptionTimeout:
412
+ """
413
+ Issue 4: Jobs stuck in downloading state.
414
+
415
+ Root cause: Lyrics worker's AudioShake transcription could hang forever
416
+ without proper timeout, leaving jobs stuck.
417
+
418
+ These tests verify timeout handling for transcription.
419
+ """
420
+
421
+ def test_transcription_timeout_constant_exists(self):
422
+ """
423
+ Verify transcription timeout constant is defined in lyrics_worker.
424
+ """
425
+ from backend.workers.lyrics_worker import TRANSCRIPTION_TIMEOUT_SECONDS
426
+
427
+ assert TRANSCRIPTION_TIMEOUT_SECONDS > 0
428
+ assert TRANSCRIPTION_TIMEOUT_SECONDS >= 300, "Timeout should be at least 5 minutes for long songs"
429
+ assert TRANSCRIPTION_TIMEOUT_SECONDS <= 1500, "Timeout shouldn't be more than 25 minutes"
430
+
431
+ def test_transcription_timeout_value(self):
432
+ """
433
+ Verify the specific timeout value (20 minutes = 1200 seconds).
434
+
435
+ This accounts for:
436
+ - Cloud Run cold start / worker initialization (1-5 min)
437
+ - AudioShake transcription (1-2 min)
438
+ - spaCy model loading for correction (2-3 min on cold start)
439
+ - Agentic AI correction (1-3 min)
440
+ """
441
+ from backend.workers.lyrics_worker import TRANSCRIPTION_TIMEOUT_SECONDS
442
+
443
+ assert TRANSCRIPTION_TIMEOUT_SECONDS == 1200, "Transcription timeout should be 20 minutes (1200 seconds)"
444
+
445
+ @pytest.mark.asyncio
446
+ async def test_asyncio_wait_for_raises_timeout_error(self):
447
+ """
448
+ Verify asyncio.wait_for properly raises TimeoutError.
449
+ This is a sanity check for the timeout mechanism we use.
450
+ """
451
+ async def slow_operation():
452
+ await asyncio.sleep(10)
453
+ return "completed"
454
+
455
+ with pytest.raises(asyncio.TimeoutError):
456
+ await asyncio.wait_for(slow_operation(), timeout=0.1)
457
+
458
+ @pytest.mark.asyncio
459
+ async def test_lyrics_worker_timeout_converts_to_descriptive_error(self):
460
+ """
461
+ Verify that timeout is converted to a descriptive exception message.
462
+ Tests the actual error conversion logic pattern used in lyrics_worker.
463
+ """
464
+ from backend.workers.lyrics_worker import TRANSCRIPTION_TIMEOUT_SECONDS
465
+
466
+ # This simulates what happens in lyrics_worker when timeout occurs
467
+ error_message = None
468
+ try:
469
+ # Simulate the timeout handling pattern from lyrics_worker
470
+ raise asyncio.TimeoutError()
471
+ except asyncio.TimeoutError:
472
+ error_message = f"Transcription timed out after {TRANSCRIPTION_TIMEOUT_SECONDS} seconds"
473
+
474
+ assert error_message is not None
475
+ assert "timed out" in error_message.lower()
476
+ assert "1200" in error_message
477
+
478
+ def test_lyrics_worker_exception_marks_job_failed(self):
479
+ """
480
+ Any exception in lyrics worker should mark job as failed.
481
+ """
482
+ from backend.models.job import JobStatus
483
+
484
+ # Simulate the exception handling in lyrics_worker
485
+ job_status = JobStatus.DOWNLOADING
486
+ error_occurred = True
487
+
488
+ if error_occurred:
489
+ # This is what happens in the except block
490
+ job_status = JobStatus.FAILED
491
+ error_message = "Lyrics transcription failed: Transcription timed out after 1200 seconds"
492
+
493
+ assert job_status == JobStatus.FAILED
494
+ assert "timed out" in error_message.lower()
495
+
496
+
497
+ class TestJobOwnershipFiltering:
498
+ """
499
+ Additional tests for job ownership and filtering.
500
+
501
+ These verify that jobs are properly associated with users
502
+ and can be filtered by user_email.
503
+ """
504
+
505
+ def test_job_model_has_user_email_field(self):
506
+ """Job model must have user_email field."""
507
+ from backend.models.job import Job, JobStatus
508
+
509
+ job = Job(
510
+ job_id="test-123",
511
+ artist="Test",
512
+ title="Test",
513
+ status=JobStatus.PENDING,
514
+ created_at=datetime.now(timezone.utc),
515
+ updated_at=datetime.now(timezone.utc),
516
+ user_email="user@example.com",
517
+ )
518
+
519
+ assert hasattr(job, 'user_email')
520
+ assert job.user_email == "user@example.com"
521
+
522
+ def test_job_create_model_has_user_email_field(self):
523
+ """JobCreate model must have user_email field."""
524
+ from backend.models.job import JobCreate
525
+
526
+ job_create = JobCreate(
527
+ artist="Test",
528
+ title="Test",
529
+ user_email="user@example.com",
530
+ )
531
+
532
+ assert hasattr(job_create, 'user_email')
533
+ assert job_create.user_email == "user@example.com"
534
+
535
+ def test_job_create_user_email_is_optional(self):
536
+ """JobCreate user_email should be optional for backward compatibility."""
537
+ from backend.models.job import JobCreate
538
+
539
+ job_create = JobCreate(
540
+ artist="Test",
541
+ title="Test",
542
+ )
543
+
544
+ assert job_create.user_email is None
545
+
546
+ def test_jobs_can_be_filtered_by_user_email(self):
547
+ """Jobs should be filterable by user_email."""
548
+ from backend.models.job import Job, JobStatus
549
+
550
+ jobs = [
551
+ Job(
552
+ job_id="job-1",
553
+ artist="Test",
554
+ title="Test 1",
555
+ status=JobStatus.COMPLETE,
556
+ created_at=datetime.now(timezone.utc),
557
+ updated_at=datetime.now(timezone.utc),
558
+ user_email="user1@example.com",
559
+ ),
560
+ Job(
561
+ job_id="job-2",
562
+ artist="Test",
563
+ title="Test 2",
564
+ status=JobStatus.COMPLETE,
565
+ created_at=datetime.now(timezone.utc),
566
+ updated_at=datetime.now(timezone.utc),
567
+ user_email="user2@example.com",
568
+ ),
569
+ Job(
570
+ job_id="job-3",
571
+ artist="Test",
572
+ title="Test 3",
573
+ status=JobStatus.COMPLETE,
574
+ created_at=datetime.now(timezone.utc),
575
+ updated_at=datetime.now(timezone.utc),
576
+ user_email="user1@example.com",
577
+ ),
578
+ ]
579
+
580
+ user1_jobs = [j for j in jobs if j.user_email == "user1@example.com"]
581
+
582
+ assert len(user1_jobs) == 2
583
+ assert all(j.user_email == "user1@example.com" for j in user1_jobs)