karaoke-gen 0.86.7__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/style_loader.py +3 -1
  148. karaoke_gen/utils/__init__.py +163 -8
  149. karaoke_gen/video_background_processor.py +9 -4
  150. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +2 -1
  151. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +187 -42
  152. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  153. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  154. lyrics_transcriber/correction/corrector.py +192 -130
  155. lyrics_transcriber/correction/operations.py +24 -9
  156. lyrics_transcriber/frontend/package-lock.json +2 -2
  157. lyrics_transcriber/frontend/package.json +1 -1
  158. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  159. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  160. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  161. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  162. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  163. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  164. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  165. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  168. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  170. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  171. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  172. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  173. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  174. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  175. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  176. lyrics_transcriber/frontend/src/theme.ts +42 -15
  177. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  178. lyrics_transcriber/frontend/vite.config.js +5 -0
  179. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  180. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  181. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  182. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  183. lyrics_transcriber/output/generator.py +17 -3
  184. lyrics_transcriber/output/video.py +60 -95
  185. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  186. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  187. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  188. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,443 @@
1
+ """
2
+ Integration tests for worker log subcollection storage.
3
+
4
+ Tests the subcollection approach using real Firestore emulator to verify:
5
+ - Logs are stored in subcollection (jobs/{job_id}/logs)
6
+ - TTL expiry field is set correctly
7
+ - Large log volumes work without 1MB limit issues
8
+ - Concurrent writes are handled correctly
9
+ - Logs can be queried efficiently
10
+
11
+ Run with: ./scripts/run-emulator-tests.sh
12
+ """
13
+ import pytest
14
+ import time
15
+ import requests
16
+ import os
17
+ import threading
18
+ from concurrent.futures import ThreadPoolExecutor, as_completed
19
+ from datetime import datetime, timezone, timedelta
20
+
21
+
22
+ def emulators_running() -> bool:
23
+ """Check if GCP emulators are running."""
24
+ try:
25
+ requests.get("http://127.0.0.1:8080", timeout=1)
26
+ return True
27
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
28
+ return False
29
+
30
+
31
+ # Skip all tests in this module if emulators aren't running
32
+ pytestmark = pytest.mark.skipif(
33
+ not emulators_running(),
34
+ reason="GCP emulators not running. Start with: scripts/start-emulators.sh"
35
+ )
36
+
37
+ # Set up environment for emulator
38
+ os.environ["FIRESTORE_EMULATOR_HOST"] = "127.0.0.1:8080"
39
+ os.environ["GOOGLE_CLOUD_PROJECT"] = "test-project"
40
+
41
+
42
+ class TestWorkerLogsSubcollectionDirect:
43
+ """Direct Firestore tests for worker logs subcollection."""
44
+
45
+ @pytest.fixture(autouse=True)
46
+ def setup_firestore(self):
47
+ """Set up Firestore client for each test."""
48
+ from google.cloud import firestore
49
+ self.db = firestore.Client(project="test-project")
50
+ self.collection = "test-subcollection-jobs"
51
+ yield
52
+
53
+ def _create_test_job(self):
54
+ """Create a test job document."""
55
+ job_id = f"test-sub-{int(time.time() * 1000)}"
56
+ doc_ref = self.db.collection(self.collection).document(job_id)
57
+ doc_ref.set({
58
+ "job_id": job_id,
59
+ "status": "pending",
60
+ "created_at": datetime.now(timezone.utc),
61
+ "worker_logs": [] # Legacy field - kept empty for new jobs
62
+ })
63
+ return job_id
64
+
65
+ def _append_log_to_subcollection(self, job_id: str, worker: str, message: str, ttl_days: int = 30):
66
+ """Add log to subcollection at jobs/{job_id}/logs."""
67
+ import uuid
68
+ log_id = str(uuid.uuid4())
69
+ now = datetime.now(timezone.utc)
70
+
71
+ logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
72
+ logs_ref.document(log_id).set({
73
+ "id": log_id,
74
+ "job_id": job_id,
75
+ "timestamp": now,
76
+ "level": "INFO",
77
+ "worker": worker,
78
+ "message": message,
79
+ "ttl_expiry": now + timedelta(days=ttl_days)
80
+ })
81
+ return log_id
82
+
83
+ def _get_logs_from_subcollection(self, job_id: str, worker: str = None, limit: int = 500):
84
+ """Get logs from subcollection."""
85
+ logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
86
+ query = logs_ref.order_by("timestamp")
87
+
88
+ if worker:
89
+ from google.cloud.firestore_v1 import FieldFilter
90
+ query = query.where(filter=FieldFilter("worker", "==", worker))
91
+
92
+ query = query.limit(limit)
93
+
94
+ return [doc.to_dict() for doc in query.stream()]
95
+
96
+ def _count_logs_in_subcollection(self, job_id: str) -> int:
97
+ """Count logs in subcollection."""
98
+ logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
99
+ count_query = logs_ref.count()
100
+ result = count_query.get()
101
+ if result and len(result) > 0:
102
+ return result[0][0].value
103
+ return 0
104
+
105
+ def _delete_logs_subcollection(self, job_id: str, batch_size: int = 100) -> int:
106
+ """Delete all logs in subcollection."""
107
+ logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
108
+ deleted_count = 0
109
+
110
+ while True:
111
+ docs = list(logs_ref.limit(batch_size).stream())
112
+ if not docs:
113
+ break
114
+
115
+ batch = self.db.batch()
116
+ for doc in docs:
117
+ batch.delete(doc.reference)
118
+ deleted_count += 1
119
+
120
+ batch.commit()
121
+
122
+ return deleted_count
123
+
124
+ def test_subcollection_single_write(self):
125
+ """Test writing single log to subcollection."""
126
+ job_id = self._create_test_job()
127
+
128
+ log_id = self._append_log_to_subcollection(job_id, "test", "Single log message")
129
+
130
+ time.sleep(0.1)
131
+ logs = self._get_logs_from_subcollection(job_id)
132
+
133
+ assert len(logs) == 1
134
+ assert logs[0]["message"] == "Single log message"
135
+ assert logs[0]["worker"] == "test"
136
+ assert logs[0]["id"] == log_id
137
+ print("Single write to subcollection works")
138
+
139
+ def test_subcollection_ttl_field_is_set(self):
140
+ """Test TTL expiry field is set correctly."""
141
+ job_id = self._create_test_job()
142
+
143
+ self._append_log_to_subcollection(job_id, "test", "Log with TTL", ttl_days=7)
144
+
145
+ time.sleep(0.1)
146
+ logs = self._get_logs_from_subcollection(job_id)
147
+
148
+ assert len(logs) == 1
149
+ ttl_expiry = logs[0]["ttl_expiry"]
150
+ assert ttl_expiry is not None
151
+
152
+ # TTL should be ~7 days from now
153
+ expected_ttl = datetime.now(timezone.utc) + timedelta(days=7)
154
+ if hasattr(ttl_expiry, 'timestamp'):
155
+ # Firestore datetime object
156
+ diff = abs((ttl_expiry.replace(tzinfo=timezone.utc) - expected_ttl).total_seconds())
157
+ else:
158
+ diff = 0 # If already datetime
159
+ assert diff < 60, f"TTL expiry should be ~7 days from now, diff was {diff}s"
160
+ print("TTL field is set correctly")
161
+
162
+ def test_subcollection_sequential_writes(self):
163
+ """Test multiple sequential writes to subcollection."""
164
+ job_id = self._create_test_job()
165
+
166
+ for i in range(20):
167
+ self._append_log_to_subcollection(job_id, "test", f"Log {i}")
168
+
169
+ time.sleep(0.2)
170
+ logs = self._get_logs_from_subcollection(job_id)
171
+
172
+ assert len(logs) == 20, f"Expected 20 logs, got {len(logs)}"
173
+ print(f"Sequential writes: {len(logs)} logs created")
174
+
175
+ def test_subcollection_concurrent_writes(self):
176
+ """Test concurrent writes to subcollection - no race conditions."""
177
+ job_id = self._create_test_job()
178
+ num_writes = 50
179
+
180
+ def write_log(index):
181
+ self._append_log_to_subcollection(job_id, "worker", f"Concurrent Log {index}")
182
+
183
+ with ThreadPoolExecutor(max_workers=10) as executor:
184
+ futures = [executor.submit(write_log, i) for i in range(num_writes)]
185
+ for future in as_completed(futures):
186
+ future.result()
187
+
188
+ time.sleep(0.5)
189
+ logs = self._get_logs_from_subcollection(job_id)
190
+
191
+ assert len(logs) == num_writes, f"Expected {num_writes} logs, got {len(logs)}"
192
+ print(f"Concurrent writes: All {num_writes} logs preserved")
193
+
194
+ def test_subcollection_large_volume(self):
195
+ """Test large volume of logs - demonstrates no 1MB limit."""
196
+ job_id = self._create_test_job()
197
+ num_logs = 500 # Would exceed 1MB in embedded array
198
+
199
+ # Write logs in batches
200
+ batch_size = 50
201
+ for batch_start in range(0, num_logs, batch_size):
202
+ with ThreadPoolExecutor(max_workers=10) as executor:
203
+ futures = []
204
+ for i in range(batch_start, min(batch_start + batch_size, num_logs)):
205
+ futures.append(executor.submit(
206
+ self._append_log_to_subcollection,
207
+ job_id,
208
+ "stress",
209
+ f"Large volume test log {i} - " + "x" * 500 # ~500 bytes per log
210
+ ))
211
+ for future in as_completed(futures):
212
+ future.result()
213
+
214
+ time.sleep(1)
215
+ count = self._count_logs_in_subcollection(job_id)
216
+
217
+ assert count == num_logs, f"Expected {num_logs} logs, got {count}"
218
+ print(f"Large volume: {count} logs created (~{count * 500 / 1024:.1f}KB would exceed 1MB if embedded)")
219
+
220
+ def test_subcollection_filter_by_worker(self):
221
+ """Test filtering logs by worker type."""
222
+ job_id = self._create_test_job()
223
+
224
+ # Add logs from different workers
225
+ for i in range(10):
226
+ self._append_log_to_subcollection(job_id, "audio", f"Audio log {i}")
227
+ self._append_log_to_subcollection(job_id, "lyrics", f"Lyrics log {i}")
228
+ self._append_log_to_subcollection(job_id, "video", f"Video log {i}")
229
+
230
+ time.sleep(0.3)
231
+
232
+ # Query by worker
233
+ audio_logs = self._get_logs_from_subcollection(job_id, worker="audio")
234
+ lyrics_logs = self._get_logs_from_subcollection(job_id, worker="lyrics")
235
+ all_logs = self._get_logs_from_subcollection(job_id)
236
+
237
+ assert len(audio_logs) == 10, f"Expected 10 audio logs, got {len(audio_logs)}"
238
+ assert len(lyrics_logs) == 10, f"Expected 10 lyrics logs, got {len(lyrics_logs)}"
239
+ assert len(all_logs) == 30, f"Expected 30 total logs, got {len(all_logs)}"
240
+ print("Worker filtering works correctly")
241
+
242
+ def test_subcollection_ordered_by_timestamp(self):
243
+ """Test logs are returned in timestamp order."""
244
+ job_id = self._create_test_job()
245
+
246
+ # Add logs with slight delays to ensure different timestamps
247
+ for i in range(5):
248
+ self._append_log_to_subcollection(job_id, "test", f"Log {i}")
249
+ time.sleep(0.02)
250
+
251
+ logs = self._get_logs_from_subcollection(job_id)
252
+
253
+ # Verify order (timestamps should be ascending)
254
+ for i in range(len(logs) - 1):
255
+ assert logs[i]["timestamp"] <= logs[i + 1]["timestamp"], \
256
+ f"Logs not in order at index {i}"
257
+ print("Logs are ordered by timestamp")
258
+
259
+ def test_subcollection_delete_all_logs(self):
260
+ """Test deleting all logs in subcollection."""
261
+ job_id = self._create_test_job()
262
+
263
+ # Add logs
264
+ for i in range(25):
265
+ self._append_log_to_subcollection(job_id, "test", f"Log {i}")
266
+
267
+ time.sleep(0.2)
268
+ count_before = self._count_logs_in_subcollection(job_id)
269
+ assert count_before == 25
270
+
271
+ # Delete all logs
272
+ deleted = self._delete_logs_subcollection(job_id)
273
+
274
+ time.sleep(0.2)
275
+ count_after = self._count_logs_in_subcollection(job_id)
276
+
277
+ assert deleted == 25, f"Expected to delete 25 logs, deleted {deleted}"
278
+ assert count_after == 0, f"Expected 0 logs after delete, got {count_after}"
279
+ print(f"Deleted {deleted} logs from subcollection")
280
+
281
+ def test_subcollection_concurrent_workers_interleaved(self):
282
+ """Test simulating audio and lyrics workers writing concurrently."""
283
+ job_id = self._create_test_job()
284
+ logs_per_worker = 20
285
+
286
+ def audio_worker():
287
+ for i in range(logs_per_worker):
288
+ self._append_log_to_subcollection(job_id, "audio", f"Audio processing step {i}")
289
+ time.sleep(0.005)
290
+
291
+ def lyrics_worker():
292
+ for i in range(logs_per_worker):
293
+ self._append_log_to_subcollection(job_id, "lyrics", f"Lyrics processing step {i}")
294
+ time.sleep(0.005)
295
+
296
+ def video_worker():
297
+ for i in range(logs_per_worker):
298
+ self._append_log_to_subcollection(job_id, "video", f"Video encoding step {i}")
299
+ time.sleep(0.005)
300
+
301
+ # Start all workers
302
+ threads = [
303
+ threading.Thread(target=audio_worker),
304
+ threading.Thread(target=lyrics_worker),
305
+ threading.Thread(target=video_worker)
306
+ ]
307
+
308
+ for t in threads:
309
+ t.start()
310
+ for t in threads:
311
+ t.join()
312
+
313
+ time.sleep(0.5)
314
+
315
+ audio_logs = self._get_logs_from_subcollection(job_id, worker="audio")
316
+ lyrics_logs = self._get_logs_from_subcollection(job_id, worker="lyrics")
317
+ video_logs = self._get_logs_from_subcollection(job_id, worker="video")
318
+ total = self._count_logs_in_subcollection(job_id)
319
+
320
+ assert len(audio_logs) == logs_per_worker
321
+ assert len(lyrics_logs) == logs_per_worker
322
+ assert len(video_logs) == logs_per_worker
323
+ assert total == logs_per_worker * 3
324
+
325
+ print(f"Concurrent workers: {len(audio_logs)} audio + {len(lyrics_logs)} lyrics + {len(video_logs)} video = {total}")
326
+
327
+ def test_subcollection_job_deletion_cleans_up_logs(self):
328
+ """Test that deleting job document doesn't orphan logs."""
329
+ job_id = self._create_test_job()
330
+
331
+ # Add logs
332
+ for i in range(10):
333
+ self._append_log_to_subcollection(job_id, "test", f"Log {i}")
334
+
335
+ time.sleep(0.2)
336
+ assert self._count_logs_in_subcollection(job_id) == 10
337
+
338
+ # Delete logs first (must happen before parent doc deletion in real code)
339
+ deleted = self._delete_logs_subcollection(job_id)
340
+ assert deleted == 10
341
+
342
+ # Now delete job document
343
+ self.db.collection(self.collection).document(job_id).delete()
344
+
345
+ # Verify subcollection is empty (Firestore doesn't cascade delete subcollections)
346
+ time.sleep(0.2)
347
+ assert self._count_logs_in_subcollection(job_id) == 0
348
+ print("Job deletion with log cleanup works correctly")
349
+
350
+
351
+ class TestSubcollectionVsEmbeddedArray:
352
+ """Compare subcollection approach with embedded array."""
353
+
354
+ @pytest.fixture(autouse=True)
355
+ def setup_firestore(self):
356
+ """Set up Firestore client for each test."""
357
+ from google.cloud import firestore
358
+ self.db = firestore.Client(project="test-project")
359
+ self.collection = "test-comparison-jobs"
360
+ yield
361
+
362
+ def _create_test_job(self, use_array: bool):
363
+ """Create test job."""
364
+ job_id = f"test-cmp-{int(time.time() * 1000)}-{'arr' if use_array else 'sub'}"
365
+ doc_ref = self.db.collection(self.collection).document(job_id)
366
+ doc_ref.set({
367
+ "job_id": job_id,
368
+ "status": "pending",
369
+ "created_at": datetime.now(timezone.utc),
370
+ "worker_logs": []
371
+ })
372
+ return job_id
373
+
374
+ def _append_log_array(self, job_id: str, message: str):
375
+ """Add log to embedded array."""
376
+ from google.cloud import firestore
377
+ doc_ref = self.db.collection(self.collection).document(job_id)
378
+ doc_ref.update({
379
+ "worker_logs": firestore.ArrayUnion([{
380
+ "timestamp": datetime.now(timezone.utc).isoformat() + "Z",
381
+ "level": "INFO",
382
+ "worker": "test",
383
+ "message": message
384
+ }])
385
+ })
386
+
387
+ def _append_log_subcollection(self, job_id: str, message: str):
388
+ """Add log to subcollection."""
389
+ import uuid
390
+ now = datetime.now(timezone.utc)
391
+ logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
392
+ logs_ref.document(str(uuid.uuid4())).set({
393
+ "timestamp": now,
394
+ "level": "INFO",
395
+ "worker": "test",
396
+ "message": message,
397
+ "ttl_expiry": now + timedelta(days=30)
398
+ })
399
+
400
+ def test_document_size_comparison(self):
401
+ """Compare document sizes between approaches."""
402
+ array_job_id = self._create_test_job(use_array=True)
403
+ sub_job_id = self._create_test_job(use_array=False)
404
+
405
+ num_logs = 100
406
+ large_message = "x" * 5000 # 5KB per log
407
+
408
+ # Add logs to both
409
+ print("\nAdding logs to both approaches...")
410
+ for i in range(num_logs):
411
+ self._append_log_array(array_job_id, f"Array log {i}: {large_message}")
412
+ self._append_log_subcollection(sub_job_id, f"Sub log {i}: {large_message}")
413
+
414
+ time.sleep(0.5)
415
+
416
+ # Get document sizes (approximate via raw data)
417
+ array_doc = self.db.collection(self.collection).document(array_job_id).get()
418
+ sub_doc = self.db.collection(self.collection).document(sub_job_id).get()
419
+
420
+ array_data = array_doc.to_dict()
421
+ sub_data = sub_doc.to_dict()
422
+
423
+ array_logs = array_data.get("worker_logs", [])
424
+ sub_logs = sub_data.get("worker_logs", [])
425
+
426
+ print(f"\nEmbedded array: {len(array_logs)} logs in document")
427
+ print(f"Subcollection: {len(sub_logs)} logs in document (0 expected)")
428
+
429
+ # Embedded array should have all logs in document
430
+ assert len(array_logs) == num_logs
431
+
432
+ # Subcollection should have empty array in main document
433
+ assert len(sub_logs) == 0
434
+
435
+ # Subcollection logs are in subcollection
436
+ sub_count = self.db.collection(self.collection).document(sub_job_id).collection("logs").count().get()[0][0].value
437
+ assert sub_count == num_logs
438
+
439
+ print(f"Subcollection logs stored separately: {sub_count}")
440
+ print("Subcollection approach keeps main document small")
441
+
442
+
443
+ print("Worker logs subcollection integration tests ready")
@@ -0,0 +1,10 @@
1
+ # Testing dependencies
2
+ pytest>=7.4.0
3
+ pytest-asyncio>=0.21.0
4
+ pytest-cov>=4.1.0
5
+ pytest-mock>=3.11.1
6
+ httpx>=0.25.0 # For async testing
7
+
8
+ # Parent requirements (for import)
9
+ -r ../requirements.txt
10
+
@@ -0,0 +1,6 @@
1
+ pytest>=8.0.0
2
+ pytest-asyncio>=0.23.0
3
+ pytest-timeout>=2.2.0
4
+ pytest-cov>=4.1.0
5
+ requests>=2.31.0
6
+