karaoke-gen 0.90.1__py3-none-any.whl → 0.96.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +742 -0
- backend/api/routes/audio_search.py +903 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2076 -0
- backend/api/routes/health.py +344 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1610 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1014 -0
- backend/config.py +172 -0
- backend/main.py +133 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +405 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +842 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +275 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +88 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +339 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +273 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +525 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +186 -41
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration tests for worker log subcollection storage.
|
|
3
|
+
|
|
4
|
+
Tests the subcollection approach using real Firestore emulator to verify:
|
|
5
|
+
- Logs are stored in subcollection (jobs/{job_id}/logs)
|
|
6
|
+
- TTL expiry field is set correctly
|
|
7
|
+
- Large log volumes work without 1MB limit issues
|
|
8
|
+
- Concurrent writes are handled correctly
|
|
9
|
+
- Logs can be queried efficiently
|
|
10
|
+
|
|
11
|
+
Run with: ./scripts/run-emulator-tests.sh
|
|
12
|
+
"""
|
|
13
|
+
import pytest
|
|
14
|
+
import time
|
|
15
|
+
import requests
|
|
16
|
+
import os
|
|
17
|
+
import threading
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
19
|
+
from datetime import datetime, timezone, timedelta
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def emulators_running() -> bool:
|
|
23
|
+
"""Check if GCP emulators are running."""
|
|
24
|
+
try:
|
|
25
|
+
requests.get("http://127.0.0.1:8080", timeout=1)
|
|
26
|
+
return True
|
|
27
|
+
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Skip all tests in this module if emulators aren't running
|
|
32
|
+
pytestmark = pytest.mark.skipif(
|
|
33
|
+
not emulators_running(),
|
|
34
|
+
reason="GCP emulators not running. Start with: scripts/start-emulators.sh"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Set up environment for emulator
|
|
38
|
+
os.environ["FIRESTORE_EMULATOR_HOST"] = "127.0.0.1:8080"
|
|
39
|
+
os.environ["GOOGLE_CLOUD_PROJECT"] = "test-project"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TestWorkerLogsSubcollectionDirect:
|
|
43
|
+
"""Direct Firestore tests for worker logs subcollection."""
|
|
44
|
+
|
|
45
|
+
@pytest.fixture(autouse=True)
|
|
46
|
+
def setup_firestore(self):
|
|
47
|
+
"""Set up Firestore client for each test."""
|
|
48
|
+
from google.cloud import firestore
|
|
49
|
+
self.db = firestore.Client(project="test-project")
|
|
50
|
+
self.collection = "test-subcollection-jobs"
|
|
51
|
+
yield
|
|
52
|
+
|
|
53
|
+
def _create_test_job(self):
|
|
54
|
+
"""Create a test job document."""
|
|
55
|
+
job_id = f"test-sub-{int(time.time() * 1000)}"
|
|
56
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
57
|
+
doc_ref.set({
|
|
58
|
+
"job_id": job_id,
|
|
59
|
+
"status": "pending",
|
|
60
|
+
"created_at": datetime.now(timezone.utc),
|
|
61
|
+
"worker_logs": [] # Legacy field - kept empty for new jobs
|
|
62
|
+
})
|
|
63
|
+
return job_id
|
|
64
|
+
|
|
65
|
+
def _append_log_to_subcollection(self, job_id: str, worker: str, message: str, ttl_days: int = 30):
|
|
66
|
+
"""Add log to subcollection at jobs/{job_id}/logs."""
|
|
67
|
+
import uuid
|
|
68
|
+
log_id = str(uuid.uuid4())
|
|
69
|
+
now = datetime.now(timezone.utc)
|
|
70
|
+
|
|
71
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
72
|
+
logs_ref.document(log_id).set({
|
|
73
|
+
"id": log_id,
|
|
74
|
+
"job_id": job_id,
|
|
75
|
+
"timestamp": now,
|
|
76
|
+
"level": "INFO",
|
|
77
|
+
"worker": worker,
|
|
78
|
+
"message": message,
|
|
79
|
+
"ttl_expiry": now + timedelta(days=ttl_days)
|
|
80
|
+
})
|
|
81
|
+
return log_id
|
|
82
|
+
|
|
83
|
+
def _get_logs_from_subcollection(self, job_id: str, worker: str = None, limit: int = 500):
|
|
84
|
+
"""Get logs from subcollection."""
|
|
85
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
86
|
+
query = logs_ref.order_by("timestamp")
|
|
87
|
+
|
|
88
|
+
if worker:
|
|
89
|
+
from google.cloud.firestore_v1 import FieldFilter
|
|
90
|
+
query = query.where(filter=FieldFilter("worker", "==", worker))
|
|
91
|
+
|
|
92
|
+
query = query.limit(limit)
|
|
93
|
+
|
|
94
|
+
return [doc.to_dict() for doc in query.stream()]
|
|
95
|
+
|
|
96
|
+
def _count_logs_in_subcollection(self, job_id: str) -> int:
|
|
97
|
+
"""Count logs in subcollection."""
|
|
98
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
99
|
+
count_query = logs_ref.count()
|
|
100
|
+
result = count_query.get()
|
|
101
|
+
if result and len(result) > 0:
|
|
102
|
+
return result[0][0].value
|
|
103
|
+
return 0
|
|
104
|
+
|
|
105
|
+
def _delete_logs_subcollection(self, job_id: str, batch_size: int = 100) -> int:
|
|
106
|
+
"""Delete all logs in subcollection."""
|
|
107
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
108
|
+
deleted_count = 0
|
|
109
|
+
|
|
110
|
+
while True:
|
|
111
|
+
docs = list(logs_ref.limit(batch_size).stream())
|
|
112
|
+
if not docs:
|
|
113
|
+
break
|
|
114
|
+
|
|
115
|
+
batch = self.db.batch()
|
|
116
|
+
for doc in docs:
|
|
117
|
+
batch.delete(doc.reference)
|
|
118
|
+
deleted_count += 1
|
|
119
|
+
|
|
120
|
+
batch.commit()
|
|
121
|
+
|
|
122
|
+
return deleted_count
|
|
123
|
+
|
|
124
|
+
def test_subcollection_single_write(self):
|
|
125
|
+
"""Test writing single log to subcollection."""
|
|
126
|
+
job_id = self._create_test_job()
|
|
127
|
+
|
|
128
|
+
log_id = self._append_log_to_subcollection(job_id, "test", "Single log message")
|
|
129
|
+
|
|
130
|
+
time.sleep(0.1)
|
|
131
|
+
logs = self._get_logs_from_subcollection(job_id)
|
|
132
|
+
|
|
133
|
+
assert len(logs) == 1
|
|
134
|
+
assert logs[0]["message"] == "Single log message"
|
|
135
|
+
assert logs[0]["worker"] == "test"
|
|
136
|
+
assert logs[0]["id"] == log_id
|
|
137
|
+
print("Single write to subcollection works")
|
|
138
|
+
|
|
139
|
+
def test_subcollection_ttl_field_is_set(self):
|
|
140
|
+
"""Test TTL expiry field is set correctly."""
|
|
141
|
+
job_id = self._create_test_job()
|
|
142
|
+
|
|
143
|
+
self._append_log_to_subcollection(job_id, "test", "Log with TTL", ttl_days=7)
|
|
144
|
+
|
|
145
|
+
time.sleep(0.1)
|
|
146
|
+
logs = self._get_logs_from_subcollection(job_id)
|
|
147
|
+
|
|
148
|
+
assert len(logs) == 1
|
|
149
|
+
ttl_expiry = logs[0]["ttl_expiry"]
|
|
150
|
+
assert ttl_expiry is not None
|
|
151
|
+
|
|
152
|
+
# TTL should be ~7 days from now
|
|
153
|
+
expected_ttl = datetime.now(timezone.utc) + timedelta(days=7)
|
|
154
|
+
if hasattr(ttl_expiry, 'timestamp'):
|
|
155
|
+
# Firestore datetime object
|
|
156
|
+
diff = abs((ttl_expiry.replace(tzinfo=timezone.utc) - expected_ttl).total_seconds())
|
|
157
|
+
else:
|
|
158
|
+
diff = 0 # If already datetime
|
|
159
|
+
assert diff < 60, f"TTL expiry should be ~7 days from now, diff was {diff}s"
|
|
160
|
+
print("TTL field is set correctly")
|
|
161
|
+
|
|
162
|
+
def test_subcollection_sequential_writes(self):
|
|
163
|
+
"""Test multiple sequential writes to subcollection."""
|
|
164
|
+
job_id = self._create_test_job()
|
|
165
|
+
|
|
166
|
+
for i in range(20):
|
|
167
|
+
self._append_log_to_subcollection(job_id, "test", f"Log {i}")
|
|
168
|
+
|
|
169
|
+
time.sleep(0.2)
|
|
170
|
+
logs = self._get_logs_from_subcollection(job_id)
|
|
171
|
+
|
|
172
|
+
assert len(logs) == 20, f"Expected 20 logs, got {len(logs)}"
|
|
173
|
+
print(f"Sequential writes: {len(logs)} logs created")
|
|
174
|
+
|
|
175
|
+
def test_subcollection_concurrent_writes(self):
|
|
176
|
+
"""Test concurrent writes to subcollection - no race conditions."""
|
|
177
|
+
job_id = self._create_test_job()
|
|
178
|
+
num_writes = 50
|
|
179
|
+
|
|
180
|
+
def write_log(index):
|
|
181
|
+
self._append_log_to_subcollection(job_id, "worker", f"Concurrent Log {index}")
|
|
182
|
+
|
|
183
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
184
|
+
futures = [executor.submit(write_log, i) for i in range(num_writes)]
|
|
185
|
+
for future in as_completed(futures):
|
|
186
|
+
future.result()
|
|
187
|
+
|
|
188
|
+
time.sleep(0.5)
|
|
189
|
+
logs = self._get_logs_from_subcollection(job_id)
|
|
190
|
+
|
|
191
|
+
assert len(logs) == num_writes, f"Expected {num_writes} logs, got {len(logs)}"
|
|
192
|
+
print(f"Concurrent writes: All {num_writes} logs preserved")
|
|
193
|
+
|
|
194
|
+
def test_subcollection_large_volume(self):
|
|
195
|
+
"""Test large volume of logs - demonstrates no 1MB limit."""
|
|
196
|
+
job_id = self._create_test_job()
|
|
197
|
+
num_logs = 500 # Would exceed 1MB in embedded array
|
|
198
|
+
|
|
199
|
+
# Write logs in batches
|
|
200
|
+
batch_size = 50
|
|
201
|
+
for batch_start in range(0, num_logs, batch_size):
|
|
202
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
203
|
+
futures = []
|
|
204
|
+
for i in range(batch_start, min(batch_start + batch_size, num_logs)):
|
|
205
|
+
futures.append(executor.submit(
|
|
206
|
+
self._append_log_to_subcollection,
|
|
207
|
+
job_id,
|
|
208
|
+
"stress",
|
|
209
|
+
f"Large volume test log {i} - " + "x" * 500 # ~500 bytes per log
|
|
210
|
+
))
|
|
211
|
+
for future in as_completed(futures):
|
|
212
|
+
future.result()
|
|
213
|
+
|
|
214
|
+
time.sleep(1)
|
|
215
|
+
count = self._count_logs_in_subcollection(job_id)
|
|
216
|
+
|
|
217
|
+
assert count == num_logs, f"Expected {num_logs} logs, got {count}"
|
|
218
|
+
print(f"Large volume: {count} logs created (~{count * 500 / 1024:.1f}KB would exceed 1MB if embedded)")
|
|
219
|
+
|
|
220
|
+
def test_subcollection_filter_by_worker(self):
|
|
221
|
+
"""Test filtering logs by worker type."""
|
|
222
|
+
job_id = self._create_test_job()
|
|
223
|
+
|
|
224
|
+
# Add logs from different workers
|
|
225
|
+
for i in range(10):
|
|
226
|
+
self._append_log_to_subcollection(job_id, "audio", f"Audio log {i}")
|
|
227
|
+
self._append_log_to_subcollection(job_id, "lyrics", f"Lyrics log {i}")
|
|
228
|
+
self._append_log_to_subcollection(job_id, "video", f"Video log {i}")
|
|
229
|
+
|
|
230
|
+
time.sleep(0.3)
|
|
231
|
+
|
|
232
|
+
# Query by worker
|
|
233
|
+
audio_logs = self._get_logs_from_subcollection(job_id, worker="audio")
|
|
234
|
+
lyrics_logs = self._get_logs_from_subcollection(job_id, worker="lyrics")
|
|
235
|
+
all_logs = self._get_logs_from_subcollection(job_id)
|
|
236
|
+
|
|
237
|
+
assert len(audio_logs) == 10, f"Expected 10 audio logs, got {len(audio_logs)}"
|
|
238
|
+
assert len(lyrics_logs) == 10, f"Expected 10 lyrics logs, got {len(lyrics_logs)}"
|
|
239
|
+
assert len(all_logs) == 30, f"Expected 30 total logs, got {len(all_logs)}"
|
|
240
|
+
print("Worker filtering works correctly")
|
|
241
|
+
|
|
242
|
+
def test_subcollection_ordered_by_timestamp(self):
|
|
243
|
+
"""Test logs are returned in timestamp order."""
|
|
244
|
+
job_id = self._create_test_job()
|
|
245
|
+
|
|
246
|
+
# Add logs with slight delays to ensure different timestamps
|
|
247
|
+
for i in range(5):
|
|
248
|
+
self._append_log_to_subcollection(job_id, "test", f"Log {i}")
|
|
249
|
+
time.sleep(0.02)
|
|
250
|
+
|
|
251
|
+
logs = self._get_logs_from_subcollection(job_id)
|
|
252
|
+
|
|
253
|
+
# Verify order (timestamps should be ascending)
|
|
254
|
+
for i in range(len(logs) - 1):
|
|
255
|
+
assert logs[i]["timestamp"] <= logs[i + 1]["timestamp"], \
|
|
256
|
+
f"Logs not in order at index {i}"
|
|
257
|
+
print("Logs are ordered by timestamp")
|
|
258
|
+
|
|
259
|
+
def test_subcollection_delete_all_logs(self):
|
|
260
|
+
"""Test deleting all logs in subcollection."""
|
|
261
|
+
job_id = self._create_test_job()
|
|
262
|
+
|
|
263
|
+
# Add logs
|
|
264
|
+
for i in range(25):
|
|
265
|
+
self._append_log_to_subcollection(job_id, "test", f"Log {i}")
|
|
266
|
+
|
|
267
|
+
time.sleep(0.2)
|
|
268
|
+
count_before = self._count_logs_in_subcollection(job_id)
|
|
269
|
+
assert count_before == 25
|
|
270
|
+
|
|
271
|
+
# Delete all logs
|
|
272
|
+
deleted = self._delete_logs_subcollection(job_id)
|
|
273
|
+
|
|
274
|
+
time.sleep(0.2)
|
|
275
|
+
count_after = self._count_logs_in_subcollection(job_id)
|
|
276
|
+
|
|
277
|
+
assert deleted == 25, f"Expected to delete 25 logs, deleted {deleted}"
|
|
278
|
+
assert count_after == 0, f"Expected 0 logs after delete, got {count_after}"
|
|
279
|
+
print(f"Deleted {deleted} logs from subcollection")
|
|
280
|
+
|
|
281
|
+
def test_subcollection_concurrent_workers_interleaved(self):
|
|
282
|
+
"""Test simulating audio and lyrics workers writing concurrently."""
|
|
283
|
+
job_id = self._create_test_job()
|
|
284
|
+
logs_per_worker = 20
|
|
285
|
+
|
|
286
|
+
def audio_worker():
|
|
287
|
+
for i in range(logs_per_worker):
|
|
288
|
+
self._append_log_to_subcollection(job_id, "audio", f"Audio processing step {i}")
|
|
289
|
+
time.sleep(0.005)
|
|
290
|
+
|
|
291
|
+
def lyrics_worker():
|
|
292
|
+
for i in range(logs_per_worker):
|
|
293
|
+
self._append_log_to_subcollection(job_id, "lyrics", f"Lyrics processing step {i}")
|
|
294
|
+
time.sleep(0.005)
|
|
295
|
+
|
|
296
|
+
def video_worker():
|
|
297
|
+
for i in range(logs_per_worker):
|
|
298
|
+
self._append_log_to_subcollection(job_id, "video", f"Video encoding step {i}")
|
|
299
|
+
time.sleep(0.005)
|
|
300
|
+
|
|
301
|
+
# Start all workers
|
|
302
|
+
threads = [
|
|
303
|
+
threading.Thread(target=audio_worker),
|
|
304
|
+
threading.Thread(target=lyrics_worker),
|
|
305
|
+
threading.Thread(target=video_worker)
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
for t in threads:
|
|
309
|
+
t.start()
|
|
310
|
+
for t in threads:
|
|
311
|
+
t.join()
|
|
312
|
+
|
|
313
|
+
time.sleep(0.5)
|
|
314
|
+
|
|
315
|
+
audio_logs = self._get_logs_from_subcollection(job_id, worker="audio")
|
|
316
|
+
lyrics_logs = self._get_logs_from_subcollection(job_id, worker="lyrics")
|
|
317
|
+
video_logs = self._get_logs_from_subcollection(job_id, worker="video")
|
|
318
|
+
total = self._count_logs_in_subcollection(job_id)
|
|
319
|
+
|
|
320
|
+
assert len(audio_logs) == logs_per_worker
|
|
321
|
+
assert len(lyrics_logs) == logs_per_worker
|
|
322
|
+
assert len(video_logs) == logs_per_worker
|
|
323
|
+
assert total == logs_per_worker * 3
|
|
324
|
+
|
|
325
|
+
print(f"Concurrent workers: {len(audio_logs)} audio + {len(lyrics_logs)} lyrics + {len(video_logs)} video = {total}")
|
|
326
|
+
|
|
327
|
+
def test_subcollection_job_deletion_cleans_up_logs(self):
|
|
328
|
+
"""Test that deleting job document doesn't orphan logs."""
|
|
329
|
+
job_id = self._create_test_job()
|
|
330
|
+
|
|
331
|
+
# Add logs
|
|
332
|
+
for i in range(10):
|
|
333
|
+
self._append_log_to_subcollection(job_id, "test", f"Log {i}")
|
|
334
|
+
|
|
335
|
+
time.sleep(0.2)
|
|
336
|
+
assert self._count_logs_in_subcollection(job_id) == 10
|
|
337
|
+
|
|
338
|
+
# Delete logs first (must happen before parent doc deletion in real code)
|
|
339
|
+
deleted = self._delete_logs_subcollection(job_id)
|
|
340
|
+
assert deleted == 10
|
|
341
|
+
|
|
342
|
+
# Now delete job document
|
|
343
|
+
self.db.collection(self.collection).document(job_id).delete()
|
|
344
|
+
|
|
345
|
+
# Verify subcollection is empty (Firestore doesn't cascade delete subcollections)
|
|
346
|
+
time.sleep(0.2)
|
|
347
|
+
assert self._count_logs_in_subcollection(job_id) == 0
|
|
348
|
+
print("Job deletion with log cleanup works correctly")
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class TestSubcollectionVsEmbeddedArray:
|
|
352
|
+
"""Compare subcollection approach with embedded array."""
|
|
353
|
+
|
|
354
|
+
@pytest.fixture(autouse=True)
|
|
355
|
+
def setup_firestore(self):
|
|
356
|
+
"""Set up Firestore client for each test."""
|
|
357
|
+
from google.cloud import firestore
|
|
358
|
+
self.db = firestore.Client(project="test-project")
|
|
359
|
+
self.collection = "test-comparison-jobs"
|
|
360
|
+
yield
|
|
361
|
+
|
|
362
|
+
def _create_test_job(self, use_array: bool):
|
|
363
|
+
"""Create test job."""
|
|
364
|
+
job_id = f"test-cmp-{int(time.time() * 1000)}-{'arr' if use_array else 'sub'}"
|
|
365
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
366
|
+
doc_ref.set({
|
|
367
|
+
"job_id": job_id,
|
|
368
|
+
"status": "pending",
|
|
369
|
+
"created_at": datetime.now(timezone.utc),
|
|
370
|
+
"worker_logs": []
|
|
371
|
+
})
|
|
372
|
+
return job_id
|
|
373
|
+
|
|
374
|
+
def _append_log_array(self, job_id: str, message: str):
|
|
375
|
+
"""Add log to embedded array."""
|
|
376
|
+
from google.cloud import firestore
|
|
377
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
378
|
+
doc_ref.update({
|
|
379
|
+
"worker_logs": firestore.ArrayUnion([{
|
|
380
|
+
"timestamp": datetime.now(timezone.utc).isoformat() + "Z",
|
|
381
|
+
"level": "INFO",
|
|
382
|
+
"worker": "test",
|
|
383
|
+
"message": message
|
|
384
|
+
}])
|
|
385
|
+
})
|
|
386
|
+
|
|
387
|
+
def _append_log_subcollection(self, job_id: str, message: str):
|
|
388
|
+
"""Add log to subcollection."""
|
|
389
|
+
import uuid
|
|
390
|
+
now = datetime.now(timezone.utc)
|
|
391
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
392
|
+
logs_ref.document(str(uuid.uuid4())).set({
|
|
393
|
+
"timestamp": now,
|
|
394
|
+
"level": "INFO",
|
|
395
|
+
"worker": "test",
|
|
396
|
+
"message": message,
|
|
397
|
+
"ttl_expiry": now + timedelta(days=30)
|
|
398
|
+
})
|
|
399
|
+
|
|
400
|
+
def test_document_size_comparison(self):
|
|
401
|
+
"""Compare document sizes between approaches."""
|
|
402
|
+
array_job_id = self._create_test_job(use_array=True)
|
|
403
|
+
sub_job_id = self._create_test_job(use_array=False)
|
|
404
|
+
|
|
405
|
+
num_logs = 100
|
|
406
|
+
large_message = "x" * 5000 # 5KB per log
|
|
407
|
+
|
|
408
|
+
# Add logs to both
|
|
409
|
+
print("\nAdding logs to both approaches...")
|
|
410
|
+
for i in range(num_logs):
|
|
411
|
+
self._append_log_array(array_job_id, f"Array log {i}: {large_message}")
|
|
412
|
+
self._append_log_subcollection(sub_job_id, f"Sub log {i}: {large_message}")
|
|
413
|
+
|
|
414
|
+
time.sleep(0.5)
|
|
415
|
+
|
|
416
|
+
# Get document sizes (approximate via raw data)
|
|
417
|
+
array_doc = self.db.collection(self.collection).document(array_job_id).get()
|
|
418
|
+
sub_doc = self.db.collection(self.collection).document(sub_job_id).get()
|
|
419
|
+
|
|
420
|
+
array_data = array_doc.to_dict()
|
|
421
|
+
sub_data = sub_doc.to_dict()
|
|
422
|
+
|
|
423
|
+
array_logs = array_data.get("worker_logs", [])
|
|
424
|
+
sub_logs = sub_data.get("worker_logs", [])
|
|
425
|
+
|
|
426
|
+
print(f"\nEmbedded array: {len(array_logs)} logs in document")
|
|
427
|
+
print(f"Subcollection: {len(sub_logs)} logs in document (0 expected)")
|
|
428
|
+
|
|
429
|
+
# Embedded array should have all logs in document
|
|
430
|
+
assert len(array_logs) == num_logs
|
|
431
|
+
|
|
432
|
+
# Subcollection should have empty array in main document
|
|
433
|
+
assert len(sub_logs) == 0
|
|
434
|
+
|
|
435
|
+
# Subcollection logs are in subcollection
|
|
436
|
+
sub_count = self.db.collection(self.collection).document(sub_job_id).collection("logs").count().get()[0][0].value
|
|
437
|
+
assert sub_count == num_logs
|
|
438
|
+
|
|
439
|
+
print(f"Subcollection logs stored separately: {sub_count}")
|
|
440
|
+
print("Subcollection approach keeps main document small")
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
print("Worker logs subcollection integration tests ready")
|