karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +835 -0
- backend/api/routes/audio_search.py +913 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2112 -0
- backend/api/routes/health.py +409 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1629 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1513 -0
- backend/config.py +172 -0
- backend/main.py +157 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +502 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +853 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/langfuse_preloader.py +98 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +371 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +109 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +356 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +283 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/utils/test_data.py +27 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +535 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Firestore database operations for job management.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Optional, Dict, Any, List
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from google.cloud import firestore
|
|
8
|
+
from google.cloud.firestore_v1 import FieldFilter
|
|
9
|
+
|
|
10
|
+
from backend.config import settings
|
|
11
|
+
from backend.models.job import Job, JobStatus, TimelineEvent
|
|
12
|
+
from backend.models.worker_log import WorkerLogEntry
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FirestoreService:
|
|
19
|
+
"""Service for Firestore database operations."""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
"""Initialize Firestore client."""
|
|
23
|
+
self.db = firestore.Client(project=settings.google_cloud_project)
|
|
24
|
+
self.collection = settings.firestore_collection
|
|
25
|
+
self.tokens_collection = "auth_tokens" # Collection for access tokens
|
|
26
|
+
|
|
27
|
+
def create_job(self, job: Job) -> None:
|
|
28
|
+
"""Create a new job in Firestore."""
|
|
29
|
+
try:
|
|
30
|
+
doc_ref = self.db.collection(self.collection).document(job.job_id)
|
|
31
|
+
doc_ref.set(job.model_dump(mode='json'))
|
|
32
|
+
logger.info(f"Created job {job.job_id} in Firestore")
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.error(f"Error creating job {job.job_id}: {e}")
|
|
35
|
+
raise
|
|
36
|
+
|
|
37
|
+
def get_job(self, job_id: str) -> Optional[Job]:
|
|
38
|
+
"""Get a job by ID."""
|
|
39
|
+
try:
|
|
40
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
41
|
+
doc = doc_ref.get()
|
|
42
|
+
|
|
43
|
+
logger.debug(f"Fetching job {job_id} from collection {self.collection}")
|
|
44
|
+
logger.debug(f"Document exists: {doc.exists}")
|
|
45
|
+
|
|
46
|
+
if not doc.exists:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
data = doc.to_dict()
|
|
50
|
+
logger.debug(f"Document data keys: {list(data.keys()) if data else 'None'}")
|
|
51
|
+
logger.debug(f"Document data: {data}")
|
|
52
|
+
|
|
53
|
+
return Job(**data)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.error(f"Error getting job {job_id}: {e}")
|
|
56
|
+
raise
|
|
57
|
+
|
|
58
|
+
def update_job(self, job_id: str, updates: Dict[str, Any]) -> None:
|
|
59
|
+
"""Update a job with partial data."""
|
|
60
|
+
try:
|
|
61
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
62
|
+
|
|
63
|
+
# Add updated_at timestamp
|
|
64
|
+
updates['updated_at'] = datetime.utcnow()
|
|
65
|
+
|
|
66
|
+
doc_ref.update(updates)
|
|
67
|
+
logger.info(f"Updated job {job_id} in Firestore")
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.error(f"Error updating job {job_id}: {e}")
|
|
70
|
+
raise
|
|
71
|
+
|
|
72
|
+
def update_job_status(
|
|
73
|
+
self,
|
|
74
|
+
job_id: str,
|
|
75
|
+
status: JobStatus,
|
|
76
|
+
progress: Optional[int] = None,
|
|
77
|
+
message: Optional[str] = None,
|
|
78
|
+
**additional_fields
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Update job status and add timeline event."""
|
|
81
|
+
try:
|
|
82
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
83
|
+
|
|
84
|
+
# Create timeline event
|
|
85
|
+
timeline_event = TimelineEvent(
|
|
86
|
+
status=status.value,
|
|
87
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
88
|
+
progress=progress,
|
|
89
|
+
message=message
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Prepare updates
|
|
93
|
+
updates = {
|
|
94
|
+
'status': status.value,
|
|
95
|
+
'updated_at': datetime.utcnow(),
|
|
96
|
+
'timeline': firestore.ArrayUnion([timeline_event.model_dump(mode='json')])
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if progress is not None:
|
|
100
|
+
updates['progress'] = progress
|
|
101
|
+
|
|
102
|
+
# Add any additional fields
|
|
103
|
+
updates.update(additional_fields)
|
|
104
|
+
|
|
105
|
+
doc_ref.update(updates)
|
|
106
|
+
logger.info(f"Updated job {job_id} status to {status.value}")
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Error updating job status {job_id}: {e}")
|
|
109
|
+
raise
|
|
110
|
+
|
|
111
|
+
def list_jobs(
|
|
112
|
+
self,
|
|
113
|
+
status: Optional[JobStatus] = None,
|
|
114
|
+
environment: Optional[str] = None,
|
|
115
|
+
client_id: Optional[str] = None,
|
|
116
|
+
created_after: Optional[datetime] = None,
|
|
117
|
+
created_before: Optional[datetime] = None,
|
|
118
|
+
user_email: Optional[str] = None,
|
|
119
|
+
limit: int = 100
|
|
120
|
+
) -> List[Job]:
|
|
121
|
+
"""
|
|
122
|
+
List jobs with optional filters.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
status: Filter by job status
|
|
126
|
+
environment: Filter by request_metadata.environment (test/production/development)
|
|
127
|
+
client_id: Filter by request_metadata.client_id
|
|
128
|
+
created_after: Filter jobs created after this datetime
|
|
129
|
+
created_before: Filter jobs created before this datetime
|
|
130
|
+
user_email: Filter by user_email (owner of the job)
|
|
131
|
+
limit: Maximum number of jobs to return
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
List of Job objects matching filters, ordered by created_at descending
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
query = self.db.collection(self.collection)
|
|
138
|
+
|
|
139
|
+
if status:
|
|
140
|
+
query = query.where(filter=FieldFilter('status', '==', status.value))
|
|
141
|
+
|
|
142
|
+
# Filter by request_metadata fields using dot notation
|
|
143
|
+
if environment:
|
|
144
|
+
query = query.where(filter=FieldFilter('request_metadata.environment', '==', environment))
|
|
145
|
+
|
|
146
|
+
if client_id:
|
|
147
|
+
query = query.where(filter=FieldFilter('request_metadata.client_id', '==', client_id))
|
|
148
|
+
|
|
149
|
+
# Filter by user_email (job owner)
|
|
150
|
+
if user_email:
|
|
151
|
+
query = query.where(filter=FieldFilter('user_email', '==', user_email.lower()))
|
|
152
|
+
|
|
153
|
+
# Date range filters
|
|
154
|
+
if created_after:
|
|
155
|
+
query = query.where(filter=FieldFilter('created_at', '>=', created_after))
|
|
156
|
+
|
|
157
|
+
if created_before:
|
|
158
|
+
query = query.where(filter=FieldFilter('created_at', '<=', created_before))
|
|
159
|
+
|
|
160
|
+
query = query.order_by('created_at', direction=firestore.Query.DESCENDING).limit(limit)
|
|
161
|
+
|
|
162
|
+
docs = query.stream()
|
|
163
|
+
jobs = [Job(**doc.to_dict()) for doc in docs]
|
|
164
|
+
|
|
165
|
+
return jobs
|
|
166
|
+
except Exception as e:
|
|
167
|
+
logger.error(f"Error listing jobs: {e}")
|
|
168
|
+
raise
|
|
169
|
+
|
|
170
|
+
def delete_jobs_by_filter(
|
|
171
|
+
self,
|
|
172
|
+
environment: Optional[str] = None,
|
|
173
|
+
client_id: Optional[str] = None,
|
|
174
|
+
status: Optional[JobStatus] = None,
|
|
175
|
+
created_before: Optional[datetime] = None,
|
|
176
|
+
) -> int:
|
|
177
|
+
"""
|
|
178
|
+
Delete multiple jobs matching filter criteria.
|
|
179
|
+
|
|
180
|
+
CAUTION: This is a destructive operation. Use carefully.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
environment: Delete jobs with this environment (e.g., "test")
|
|
184
|
+
client_id: Delete jobs from this client
|
|
185
|
+
status: Delete jobs with this status
|
|
186
|
+
created_before: Delete jobs created before this datetime
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Number of jobs deleted
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
query = self.db.collection(self.collection)
|
|
193
|
+
|
|
194
|
+
if environment:
|
|
195
|
+
query = query.where(filter=FieldFilter('request_metadata.environment', '==', environment))
|
|
196
|
+
|
|
197
|
+
if client_id:
|
|
198
|
+
query = query.where(filter=FieldFilter('request_metadata.client_id', '==', client_id))
|
|
199
|
+
|
|
200
|
+
if status:
|
|
201
|
+
query = query.where(filter=FieldFilter('status', '==', status.value))
|
|
202
|
+
|
|
203
|
+
if created_before:
|
|
204
|
+
query = query.where(filter=FieldFilter('created_at', '<=', created_before))
|
|
205
|
+
|
|
206
|
+
# Get matching documents
|
|
207
|
+
docs = list(query.stream())
|
|
208
|
+
deleted_count = 0
|
|
209
|
+
|
|
210
|
+
# Delete in batches
|
|
211
|
+
batch = self.db.batch()
|
|
212
|
+
batch_count = 0
|
|
213
|
+
|
|
214
|
+
for doc in docs:
|
|
215
|
+
batch.delete(doc.reference)
|
|
216
|
+
batch_count += 1
|
|
217
|
+
deleted_count += 1
|
|
218
|
+
|
|
219
|
+
# Firestore batch limit is 500
|
|
220
|
+
if batch_count >= 500:
|
|
221
|
+
batch.commit()
|
|
222
|
+
batch = self.db.batch()
|
|
223
|
+
batch_count = 0
|
|
224
|
+
|
|
225
|
+
# Commit any remaining deletes
|
|
226
|
+
if batch_count > 0:
|
|
227
|
+
batch.commit()
|
|
228
|
+
|
|
229
|
+
logger.info(f"Deleted {deleted_count} jobs matching filter criteria")
|
|
230
|
+
return deleted_count
|
|
231
|
+
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logger.error(f"Error deleting jobs by filter: {e}")
|
|
234
|
+
raise
|
|
235
|
+
|
|
236
|
+
def delete_job(self, job_id: str) -> None:
|
|
237
|
+
"""Delete a job from Firestore."""
|
|
238
|
+
try:
|
|
239
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
240
|
+
doc_ref.delete()
|
|
241
|
+
logger.info(f"Deleted job {job_id} from Firestore")
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(f"Error deleting job {job_id}: {e}")
|
|
244
|
+
raise
|
|
245
|
+
|
|
246
|
+
def append_worker_log(self, job_id: str, log_entry: Dict[str, Any]) -> None:
|
|
247
|
+
"""
|
|
248
|
+
Atomically append a log entry to worker_logs using ArrayUnion.
|
|
249
|
+
|
|
250
|
+
This avoids the race condition of read-modify-write when multiple
|
|
251
|
+
workers are logging concurrently.
|
|
252
|
+
|
|
253
|
+
DEPRECATED: Use append_log_to_subcollection() instead to avoid
|
|
254
|
+
the 1MB document size limit.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
job_id: Job ID
|
|
258
|
+
log_entry: Log entry dict with timestamp, level, worker, message
|
|
259
|
+
"""
|
|
260
|
+
try:
|
|
261
|
+
doc_ref = self.db.collection(self.collection).document(job_id)
|
|
262
|
+
doc_ref.update({
|
|
263
|
+
'worker_logs': firestore.ArrayUnion([log_entry]),
|
|
264
|
+
'updated_at': datetime.utcnow()
|
|
265
|
+
})
|
|
266
|
+
# Don't log every append - too spammy
|
|
267
|
+
except Exception as e:
|
|
268
|
+
# Log but don't raise - logging shouldn't break workers
|
|
269
|
+
logger.debug(f"Error appending worker log for job {job_id}: {e}")
|
|
270
|
+
|
|
271
|
+
# ============================================
|
|
272
|
+
# Worker Log Subcollection Methods
|
|
273
|
+
# ============================================
|
|
274
|
+
# These methods store logs in a subcollection (jobs/{job_id}/logs)
|
|
275
|
+
# instead of an embedded array to avoid the 1MB document size limit.
|
|
276
|
+
|
|
277
|
+
def append_log_to_subcollection(self, job_id: str, log_entry: WorkerLogEntry) -> None:
|
|
278
|
+
"""
|
|
279
|
+
Append a log entry to the logs subcollection.
|
|
280
|
+
|
|
281
|
+
Stores logs at: jobs/{job_id}/logs/{log_id}
|
|
282
|
+
|
|
283
|
+
This approach avoids the 1MB document size limit by storing each
|
|
284
|
+
log entry as a separate document in a subcollection.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
job_id: Job ID
|
|
288
|
+
log_entry: WorkerLogEntry instance
|
|
289
|
+
"""
|
|
290
|
+
try:
|
|
291
|
+
# Ensure job_id is set on the log entry
|
|
292
|
+
log_entry.job_id = job_id
|
|
293
|
+
|
|
294
|
+
# Get subcollection reference
|
|
295
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
296
|
+
|
|
297
|
+
# Add document with auto-generated ID or use log_entry.id
|
|
298
|
+
doc_ref = logs_ref.document(log_entry.id)
|
|
299
|
+
doc_ref.set(log_entry.to_dict())
|
|
300
|
+
|
|
301
|
+
# Don't log every append - too spammy
|
|
302
|
+
except Exception as e:
|
|
303
|
+
# Log but don't raise - logging shouldn't break workers
|
|
304
|
+
logger.debug(f"Error appending log to subcollection for job {job_id}: {e}")
|
|
305
|
+
|
|
306
|
+
def get_logs_from_subcollection(
|
|
307
|
+
self,
|
|
308
|
+
job_id: str,
|
|
309
|
+
limit: int = 500,
|
|
310
|
+
since_timestamp: Optional[datetime] = None,
|
|
311
|
+
worker: Optional[str] = None,
|
|
312
|
+
offset: int = 0
|
|
313
|
+
) -> List[WorkerLogEntry]:
|
|
314
|
+
"""
|
|
315
|
+
Get log entries from the logs subcollection.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
job_id: Job ID
|
|
319
|
+
limit: Maximum number of logs to return (default 500)
|
|
320
|
+
since_timestamp: Return only logs after this timestamp
|
|
321
|
+
worker: Filter by worker name (optional)
|
|
322
|
+
offset: Number of logs to skip (for pagination)
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
List of WorkerLogEntry instances, ordered by timestamp ascending
|
|
326
|
+
"""
|
|
327
|
+
try:
|
|
328
|
+
# Get subcollection reference
|
|
329
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
330
|
+
|
|
331
|
+
# Build query
|
|
332
|
+
query = logs_ref.order_by("timestamp", direction=firestore.Query.ASCENDING)
|
|
333
|
+
|
|
334
|
+
if since_timestamp:
|
|
335
|
+
query = query.where(filter=FieldFilter("timestamp", ">", since_timestamp))
|
|
336
|
+
|
|
337
|
+
if worker:
|
|
338
|
+
query = query.where(filter=FieldFilter("worker", "==", worker))
|
|
339
|
+
|
|
340
|
+
# Apply offset and limit
|
|
341
|
+
if offset > 0:
|
|
342
|
+
query = query.offset(offset)
|
|
343
|
+
|
|
344
|
+
query = query.limit(limit)
|
|
345
|
+
|
|
346
|
+
# Execute query
|
|
347
|
+
docs = query.stream()
|
|
348
|
+
logs = [WorkerLogEntry.from_dict(doc.to_dict()) for doc in docs]
|
|
349
|
+
|
|
350
|
+
return logs
|
|
351
|
+
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.error(f"Error getting logs from subcollection for job {job_id}: {e}")
|
|
354
|
+
return []
|
|
355
|
+
|
|
356
|
+
def get_logs_count_from_subcollection(self, job_id: str) -> int:
|
|
357
|
+
"""
|
|
358
|
+
Get the total count of log entries in the subcollection.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
job_id: Job ID
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Total count of log entries
|
|
365
|
+
"""
|
|
366
|
+
try:
|
|
367
|
+
# Get subcollection reference
|
|
368
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
369
|
+
|
|
370
|
+
# Use aggregation query for efficient counting
|
|
371
|
+
count_query = logs_ref.count()
|
|
372
|
+
result = count_query.get()
|
|
373
|
+
|
|
374
|
+
# Result is a list of AggregationResult, we want the first one's count
|
|
375
|
+
if result and len(result) > 0:
|
|
376
|
+
return result[0][0].value
|
|
377
|
+
return 0
|
|
378
|
+
|
|
379
|
+
except Exception as e:
|
|
380
|
+
logger.error(f"Error counting logs for job {job_id}: {e}")
|
|
381
|
+
return 0
|
|
382
|
+
|
|
383
|
+
def delete_logs_subcollection(self, job_id: str, batch_size: int = 500) -> int:
|
|
384
|
+
"""
|
|
385
|
+
Delete all log entries in the logs subcollection.
|
|
386
|
+
|
|
387
|
+
This is used when deleting a job to clean up its logs.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
job_id: Job ID
|
|
391
|
+
batch_size: Number of documents to delete per batch
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Number of logs deleted
|
|
395
|
+
"""
|
|
396
|
+
try:
|
|
397
|
+
logs_ref = self.db.collection(self.collection).document(job_id).collection("logs")
|
|
398
|
+
deleted_count = 0
|
|
399
|
+
|
|
400
|
+
while True:
|
|
401
|
+
# Get a batch of documents
|
|
402
|
+
docs = logs_ref.limit(batch_size).stream()
|
|
403
|
+
deleted_in_batch = 0
|
|
404
|
+
|
|
405
|
+
# Delete in a batch
|
|
406
|
+
batch = self.db.batch()
|
|
407
|
+
for doc in docs:
|
|
408
|
+
batch.delete(doc.reference)
|
|
409
|
+
deleted_in_batch += 1
|
|
410
|
+
|
|
411
|
+
if deleted_in_batch == 0:
|
|
412
|
+
break
|
|
413
|
+
|
|
414
|
+
batch.commit()
|
|
415
|
+
deleted_count += deleted_in_batch
|
|
416
|
+
|
|
417
|
+
# If we deleted less than batch_size, we're done
|
|
418
|
+
if deleted_in_batch < batch_size:
|
|
419
|
+
break
|
|
420
|
+
|
|
421
|
+
if deleted_count > 0:
|
|
422
|
+
logger.info(f"Deleted {deleted_count} logs for job {job_id}")
|
|
423
|
+
|
|
424
|
+
return deleted_count
|
|
425
|
+
|
|
426
|
+
except Exception as e:
|
|
427
|
+
logger.error(f"Error deleting logs subcollection for job {job_id}: {e}")
|
|
428
|
+
return 0
|
|
429
|
+
|
|
430
|
+
# ============================================
|
|
431
|
+
# Token Management Methods
|
|
432
|
+
# ============================================
|
|
433
|
+
|
|
434
|
+
def create_token(self, token: str, token_data: Dict[str, Any]) -> None:
|
|
435
|
+
"""Create a new access token in Firestore."""
|
|
436
|
+
try:
|
|
437
|
+
doc_ref = self.db.collection(self.tokens_collection).document(token)
|
|
438
|
+
doc_ref.set(token_data)
|
|
439
|
+
logger.info(f"Created token in Firestore")
|
|
440
|
+
except Exception as e:
|
|
441
|
+
logger.error(f"Error creating token: {e}")
|
|
442
|
+
raise
|
|
443
|
+
|
|
444
|
+
def get_token(self, token: str) -> Optional[Dict[str, Any]]:
|
|
445
|
+
"""Get token data by token string."""
|
|
446
|
+
try:
|
|
447
|
+
doc_ref = self.db.collection(self.tokens_collection).document(token)
|
|
448
|
+
doc = doc_ref.get()
|
|
449
|
+
|
|
450
|
+
if not doc.exists:
|
|
451
|
+
return None
|
|
452
|
+
|
|
453
|
+
return doc.to_dict()
|
|
454
|
+
except Exception as e:
|
|
455
|
+
logger.error(f"Error getting token: {e}")
|
|
456
|
+
return None
|
|
457
|
+
|
|
458
|
+
def update_token(self, token: str, updates: Dict[str, Any]) -> None:
|
|
459
|
+
"""Update token data."""
|
|
460
|
+
try:
|
|
461
|
+
doc_ref = self.db.collection(self.tokens_collection).document(token)
|
|
462
|
+
doc_ref.update(updates)
|
|
463
|
+
logger.info(f"Updated token in Firestore")
|
|
464
|
+
except Exception as e:
|
|
465
|
+
logger.error(f"Error updating token: {e}")
|
|
466
|
+
raise
|
|
467
|
+
|
|
468
|
+
def increment_token_usage(self, token: str, job_id: str) -> None:
|
|
469
|
+
"""Increment token usage count and add job to history."""
|
|
470
|
+
try:
|
|
471
|
+
doc_ref = self.db.collection(self.tokens_collection).document(token)
|
|
472
|
+
|
|
473
|
+
# Use Firestore transaction to ensure atomic increment
|
|
474
|
+
@firestore.transactional
|
|
475
|
+
def update_in_transaction(transaction, doc_ref):
|
|
476
|
+
snapshot = doc_ref.get(transaction=transaction)
|
|
477
|
+
if not snapshot.exists:
|
|
478
|
+
raise ValueError("Token not found")
|
|
479
|
+
|
|
480
|
+
data = snapshot.to_dict()
|
|
481
|
+
current_usage = data.get("usage_count", 0)
|
|
482
|
+
jobs = data.get("jobs", [])
|
|
483
|
+
|
|
484
|
+
# Increment usage
|
|
485
|
+
transaction.update(doc_ref, {
|
|
486
|
+
"usage_count": current_usage + 1,
|
|
487
|
+
"last_used": datetime.utcnow(),
|
|
488
|
+
"jobs": firestore.ArrayUnion([{
|
|
489
|
+
"job_id": job_id,
|
|
490
|
+
"created_at": datetime.utcnow()
|
|
491
|
+
}])
|
|
492
|
+
})
|
|
493
|
+
|
|
494
|
+
transaction = self.db.transaction()
|
|
495
|
+
update_in_transaction(transaction, doc_ref)
|
|
496
|
+
|
|
497
|
+
logger.info(f"Incremented token usage for job {job_id}")
|
|
498
|
+
except Exception as e:
|
|
499
|
+
logger.error(f"Error incrementing token usage: {e}")
|
|
500
|
+
raise
|
|
501
|
+
|
|
502
|
+
def list_tokens(self) -> List[Dict[str, Any]]:
|
|
503
|
+
"""List all tokens (admin only)."""
|
|
504
|
+
try:
|
|
505
|
+
docs = self.db.collection(self.tokens_collection).stream()
|
|
506
|
+
tokens = [doc.to_dict() for doc in docs]
|
|
507
|
+
logger.info(f"Retrieved {len(tokens)} tokens from Firestore")
|
|
508
|
+
return tokens
|
|
509
|
+
except Exception as e:
|
|
510
|
+
logger.error(f"Error listing tokens: {e}")
|
|
511
|
+
return []
|
|
512
|
+
|