karaoke-gen 0.96.0__py3-none-any.whl → 0.101.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/api/routes/admin.py +696 -92
- backend/api/routes/audio_search.py +29 -8
- backend/api/routes/file_upload.py +99 -22
- backend/api/routes/health.py +65 -0
- backend/api/routes/internal.py +6 -0
- backend/api/routes/jobs.py +28 -1
- backend/api/routes/review.py +13 -6
- backend/api/routes/tenant.py +120 -0
- backend/api/routes/users.py +472 -51
- backend/main.py +31 -2
- backend/middleware/__init__.py +7 -1
- backend/middleware/tenant.py +192 -0
- backend/models/job.py +19 -3
- backend/models/tenant.py +208 -0
- backend/models/user.py +18 -0
- backend/services/email_service.py +253 -6
- backend/services/encoding_service.py +128 -31
- backend/services/firestore_service.py +6 -0
- backend/services/job_manager.py +44 -2
- backend/services/langfuse_preloader.py +98 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/stripe_service.py +133 -11
- backend/services/tenant_service.py +285 -0
- backend/services/user_service.py +85 -7
- backend/tests/emulator/conftest.py +22 -1
- backend/tests/emulator/test_made_for_you_integration.py +167 -0
- backend/tests/test_admin_job_files.py +337 -0
- backend/tests/test_admin_job_reset.py +384 -0
- backend/tests/test_admin_job_update.py +326 -0
- backend/tests/test_email_service.py +233 -0
- backend/tests/test_impersonation.py +223 -0
- backend/tests/test_job_creation_regression.py +4 -0
- backend/tests/test_job_manager.py +171 -9
- backend/tests/test_jobs_api.py +11 -1
- backend/tests/test_made_for_you.py +2086 -0
- backend/tests/test_models.py +139 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_tenant_api.py +350 -0
- backend/tests/test_tenant_middleware.py +345 -0
- backend/tests/test_tenant_models.py +406 -0
- backend/tests/test_tenant_service.py +418 -0
- backend/utils/test_data.py +27 -0
- backend/workers/screens_worker.py +16 -6
- backend/workers/video_worker.py +8 -3
- {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/METADATA +1 -1
- {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/RECORD +58 -39
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -43
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/src/api.ts +13 -5
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +90 -57
- {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/licenses/LICENSE +0 -0
backend/api/routes/admin.py
CHANGED
|
@@ -15,11 +15,13 @@ from fastapi import APIRouter, Depends, HTTPException
|
|
|
15
15
|
from pydantic import BaseModel
|
|
16
16
|
|
|
17
17
|
from backend.api.dependencies import require_admin
|
|
18
|
-
from backend.services.auth_service import UserType
|
|
18
|
+
from backend.services.auth_service import UserType, AuthResult
|
|
19
19
|
from backend.services.user_service import get_user_service, UserService, USERS_COLLECTION
|
|
20
20
|
from backend.services.job_manager import JobManager
|
|
21
21
|
from backend.services.flacfetch_client import get_flacfetch_client, FlacfetchServiceError
|
|
22
|
+
from backend.services.storage_service import StorageService
|
|
22
23
|
from backend.models.job import JobStatus
|
|
24
|
+
from backend.utils.test_data import is_test_email
|
|
23
25
|
from karaoke_gen.utils import sanitize_filename
|
|
24
26
|
|
|
25
27
|
|
|
@@ -55,18 +57,91 @@ class AdminStatsOverview(BaseModel):
|
|
|
55
57
|
total_beta_testers: int
|
|
56
58
|
|
|
57
59
|
|
|
60
|
+
class FileInfo(BaseModel):
|
|
61
|
+
"""Information about a single file with signed download URL."""
|
|
62
|
+
name: str
|
|
63
|
+
path: str # GCS path (gs://bucket/...)
|
|
64
|
+
download_url: str # Signed URL for download
|
|
65
|
+
category: str # e.g., "stems", "lyrics", "finals"
|
|
66
|
+
file_key: str # e.g., "instrumental_clean", "lrc"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class JobFilesResponse(BaseModel):
|
|
70
|
+
"""Response containing all files for a job with signed download URLs."""
|
|
71
|
+
job_id: str
|
|
72
|
+
artist: Optional[str]
|
|
73
|
+
title: Optional[str]
|
|
74
|
+
files: List[FileInfo]
|
|
75
|
+
total_files: int
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class JobUpdateRequest(BaseModel):
|
|
79
|
+
"""Request model for updating job fields."""
|
|
80
|
+
# Editable text fields
|
|
81
|
+
artist: Optional[str] = None
|
|
82
|
+
title: Optional[str] = None
|
|
83
|
+
user_email: Optional[str] = None
|
|
84
|
+
theme_id: Optional[str] = None
|
|
85
|
+
brand_prefix: Optional[str] = None
|
|
86
|
+
discord_webhook_url: Optional[str] = None
|
|
87
|
+
youtube_description: Optional[str] = None
|
|
88
|
+
youtube_description_template: Optional[str] = None
|
|
89
|
+
customer_email: Optional[str] = None
|
|
90
|
+
customer_notes: Optional[str] = None
|
|
91
|
+
|
|
92
|
+
# Editable boolean fields
|
|
93
|
+
enable_cdg: Optional[bool] = None
|
|
94
|
+
enable_txt: Optional[bool] = None
|
|
95
|
+
enable_youtube_upload: Optional[bool] = None
|
|
96
|
+
non_interactive: Optional[bool] = None
|
|
97
|
+
prep_only: Optional[bool] = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class JobUpdateResponse(BaseModel):
|
|
101
|
+
"""Response from job update endpoint."""
|
|
102
|
+
status: str
|
|
103
|
+
job_id: str
|
|
104
|
+
updated_fields: List[str]
|
|
105
|
+
message: str
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# Fields that are allowed to be updated via PATCH endpoint
|
|
109
|
+
EDITABLE_JOB_FIELDS = {
|
|
110
|
+
"artist",
|
|
111
|
+
"title",
|
|
112
|
+
"user_email",
|
|
113
|
+
"theme_id",
|
|
114
|
+
"brand_prefix",
|
|
115
|
+
"discord_webhook_url",
|
|
116
|
+
"youtube_description",
|
|
117
|
+
"youtube_description_template",
|
|
118
|
+
"customer_email",
|
|
119
|
+
"customer_notes",
|
|
120
|
+
"enable_cdg",
|
|
121
|
+
"enable_txt",
|
|
122
|
+
"enable_youtube_upload",
|
|
123
|
+
"non_interactive",
|
|
124
|
+
"prep_only",
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
58
128
|
# =============================================================================
|
|
59
129
|
# Admin Stats Endpoints
|
|
60
130
|
# =============================================================================
|
|
61
131
|
|
|
62
132
|
@router.get("/stats/overview", response_model=AdminStatsOverview)
|
|
63
133
|
async def get_admin_stats_overview(
|
|
134
|
+
exclude_test: bool = True,
|
|
64
135
|
auth_data: Tuple[str, UserType, int] = Depends(require_admin),
|
|
65
136
|
user_service: UserService = Depends(get_user_service),
|
|
66
137
|
):
|
|
67
138
|
"""
|
|
68
139
|
Get overview statistics for admin dashboard.
|
|
69
140
|
|
|
141
|
+
Args:
|
|
142
|
+
exclude_test: If True (default), exclude test data (users with @inbox.testmail.app emails
|
|
143
|
+
and jobs created by test users) from all counts.
|
|
144
|
+
|
|
70
145
|
Includes:
|
|
71
146
|
- User counts (total, active in 7d, active in 30d)
|
|
72
147
|
- Job counts (total, by status, recent)
|
|
@@ -81,48 +156,10 @@ async def get_admin_stats_overview(
|
|
|
81
156
|
seven_days_ago = now - timedelta(days=7)
|
|
82
157
|
thirty_days_ago = now - timedelta(days=30)
|
|
83
158
|
|
|
84
|
-
# Helper function to get count using aggregation
|
|
85
|
-
def get_count(query) -> int:
|
|
86
|
-
try:
|
|
87
|
-
agg_query = aggregation.AggregationQuery(query)
|
|
88
|
-
agg_query.count(alias="count")
|
|
89
|
-
results = agg_query.get()
|
|
90
|
-
return results[0][0].value if results else 0
|
|
91
|
-
except Exception as e:
|
|
92
|
-
logger.warning(f"Aggregation query failed: {e}")
|
|
93
|
-
return 0
|
|
94
|
-
|
|
95
|
-
# User statistics
|
|
96
159
|
users_collection = db.collection(USERS_COLLECTION)
|
|
97
|
-
|
|
98
|
-
total_users = get_count(users_collection)
|
|
99
|
-
|
|
100
|
-
active_users_7d = get_count(
|
|
101
|
-
users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
active_users_30d = get_count(
|
|
105
|
-
users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
total_beta_testers = get_count(
|
|
109
|
-
users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
# Job statistics
|
|
113
160
|
jobs_collection = db.collection("jobs")
|
|
114
161
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
jobs_last_7d = get_count(
|
|
118
|
-
jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
jobs_last_30d = get_count(
|
|
122
|
-
jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
# Jobs by status - map multiple statuses to simplified categories
|
|
162
|
+
# Jobs by status category mapping
|
|
126
163
|
processing_statuses = [
|
|
127
164
|
"downloading", "downloading_audio", "searching_audio", "awaiting_audio_selection",
|
|
128
165
|
"separating_stage1", "separating_stage2", "transcribing", "correcting",
|
|
@@ -131,63 +168,166 @@ async def get_admin_stats_overview(
|
|
|
131
168
|
"uploading", "notifying"
|
|
132
169
|
]
|
|
133
170
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
171
|
+
# Limits for streaming queries - these are safety limits to prevent memory issues
|
|
172
|
+
# If hit, stats may be incomplete so we log a warning
|
|
173
|
+
USERS_STREAM_LIMIT = 2000
|
|
174
|
+
JOBS_STREAM_LIMIT = 10000
|
|
175
|
+
|
|
176
|
+
if exclude_test:
|
|
177
|
+
# When excluding test data, we must stream and filter in Python
|
|
178
|
+
# because Firestore doesn't support "not ends with" queries
|
|
179
|
+
|
|
180
|
+
# Stream all users and filter
|
|
181
|
+
all_users = []
|
|
182
|
+
users_fetched = 0
|
|
183
|
+
for doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
|
|
184
|
+
users_fetched += 1
|
|
185
|
+
user_data = doc.to_dict()
|
|
186
|
+
email = user_data.get("email", "")
|
|
187
|
+
if not is_test_email(email):
|
|
188
|
+
all_users.append(user_data)
|
|
189
|
+
|
|
190
|
+
if users_fetched >= USERS_STREAM_LIMIT:
|
|
191
|
+
logger.warning(f"Users stream hit limit ({USERS_STREAM_LIMIT}), stats may be incomplete")
|
|
192
|
+
|
|
193
|
+
# Calculate user stats from filtered list
|
|
194
|
+
total_users = len(all_users)
|
|
195
|
+
active_users_7d = sum(
|
|
196
|
+
1 for u in all_users
|
|
197
|
+
if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= seven_days_ago
|
|
198
|
+
)
|
|
199
|
+
active_users_30d = sum(
|
|
200
|
+
1 for u in all_users
|
|
201
|
+
if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= thirty_days_ago
|
|
202
|
+
)
|
|
203
|
+
total_beta_testers = sum(1 for u in all_users if u.get("is_beta_tester"))
|
|
162
204
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
try:
|
|
167
|
-
# Get all users and sum recent credit transactions
|
|
168
|
-
users_docs = users_collection.limit(500).stream()
|
|
169
|
-
for user_doc in users_docs:
|
|
170
|
-
user_data = user_doc.to_dict()
|
|
205
|
+
# Calculate credits from filtered users
|
|
206
|
+
total_credits_issued_30d = 0
|
|
207
|
+
for user_data in all_users:
|
|
171
208
|
transactions = user_data.get("credit_transactions", [])
|
|
172
209
|
for txn in transactions:
|
|
173
|
-
txn_date = txn.get("created_at")
|
|
174
|
-
if txn_date:
|
|
175
|
-
|
|
176
|
-
if
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
210
|
+
txn_date = _normalize_datetime(txn.get("created_at"))
|
|
211
|
+
if txn_date and txn_date >= thirty_days_ago:
|
|
212
|
+
amount = txn.get("amount", 0)
|
|
213
|
+
if amount > 0:
|
|
214
|
+
total_credits_issued_30d += amount
|
|
215
|
+
|
|
216
|
+
# Stream all jobs and filter by user_email
|
|
217
|
+
all_jobs = []
|
|
218
|
+
jobs_fetched = 0
|
|
219
|
+
for doc in jobs_collection.limit(JOBS_STREAM_LIMIT).stream():
|
|
220
|
+
jobs_fetched += 1
|
|
221
|
+
job_data = doc.to_dict()
|
|
222
|
+
user_email = job_data.get("user_email", "")
|
|
223
|
+
if not is_test_email(user_email):
|
|
224
|
+
all_jobs.append(job_data)
|
|
225
|
+
|
|
226
|
+
if jobs_fetched >= JOBS_STREAM_LIMIT:
|
|
227
|
+
logger.warning(f"Jobs stream hit limit ({JOBS_STREAM_LIMIT}), stats may be incomplete")
|
|
228
|
+
|
|
229
|
+
# Calculate job stats from filtered list
|
|
230
|
+
total_jobs = len(all_jobs)
|
|
231
|
+
jobs_last_7d = sum(
|
|
232
|
+
1 for j in all_jobs
|
|
233
|
+
if j.get("created_at") and _normalize_datetime(j["created_at"]) >= seven_days_ago
|
|
234
|
+
)
|
|
235
|
+
jobs_last_30d = sum(
|
|
236
|
+
1 for j in all_jobs
|
|
237
|
+
if j.get("created_at") and _normalize_datetime(j["created_at"]) >= thirty_days_ago
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Jobs by status
|
|
241
|
+
jobs_by_status = JobsByStatusResponse(
|
|
242
|
+
pending=sum(1 for j in all_jobs if j.get("status") == "pending"),
|
|
243
|
+
processing=sum(1 for j in all_jobs if j.get("status") in processing_statuses),
|
|
244
|
+
awaiting_review=sum(1 for j in all_jobs if j.get("status") in ["awaiting_review", "in_review"]),
|
|
245
|
+
awaiting_instrumental=sum(1 for j in all_jobs if j.get("status") == "awaiting_instrumental_selection"),
|
|
246
|
+
complete=sum(1 for j in all_jobs if j.get("status") in ["complete", "prep_complete"]),
|
|
247
|
+
failed=sum(1 for j in all_jobs if j.get("status") == "failed"),
|
|
248
|
+
cancelled=sum(1 for j in all_jobs if j.get("status") == "cancelled"),
|
|
249
|
+
)
|
|
250
|
+
else:
|
|
251
|
+
# When including test data, use efficient aggregation queries
|
|
252
|
+
def get_count(query) -> int:
|
|
253
|
+
try:
|
|
254
|
+
agg_query = aggregation.AggregationQuery(query)
|
|
255
|
+
agg_query.count(alias="count")
|
|
256
|
+
results = agg_query.get()
|
|
257
|
+
return results[0][0].value if results else 0
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.warning(f"Aggregation query failed: {e}")
|
|
260
|
+
return 0
|
|
261
|
+
|
|
262
|
+
# User statistics
|
|
263
|
+
total_users = get_count(users_collection)
|
|
264
|
+
active_users_7d = get_count(
|
|
265
|
+
users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
|
|
266
|
+
)
|
|
267
|
+
active_users_30d = get_count(
|
|
268
|
+
users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
|
|
269
|
+
)
|
|
270
|
+
total_beta_testers = get_count(
|
|
271
|
+
users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Job statistics
|
|
275
|
+
total_jobs = get_count(jobs_collection)
|
|
276
|
+
jobs_last_7d = get_count(
|
|
277
|
+
jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
|
|
278
|
+
)
|
|
279
|
+
jobs_last_30d = get_count(
|
|
280
|
+
jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Jobs by status
|
|
284
|
+
jobs_by_status = JobsByStatusResponse(
|
|
285
|
+
pending=get_count(
|
|
286
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
|
|
287
|
+
),
|
|
288
|
+
processing=sum(
|
|
289
|
+
get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
|
|
290
|
+
for status in processing_statuses
|
|
291
|
+
),
|
|
292
|
+
awaiting_review=get_count(
|
|
293
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
|
|
294
|
+
) + get_count(
|
|
295
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
|
|
296
|
+
),
|
|
297
|
+
awaiting_instrumental=get_count(
|
|
298
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
|
|
299
|
+
),
|
|
300
|
+
complete=get_count(
|
|
301
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
|
|
302
|
+
) + get_count(
|
|
303
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
|
|
304
|
+
),
|
|
305
|
+
failed=get_count(
|
|
306
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
|
|
307
|
+
),
|
|
308
|
+
cancelled=get_count(
|
|
309
|
+
jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
|
|
310
|
+
),
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Credit statistics - sum credits added in last 30 days
|
|
314
|
+
total_credits_issued_30d = 0
|
|
315
|
+
try:
|
|
316
|
+
users_fetched = 0
|
|
317
|
+
for user_doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
|
|
318
|
+
users_fetched += 1
|
|
319
|
+
user_data = user_doc.to_dict()
|
|
320
|
+
transactions = user_data.get("credit_transactions", [])
|
|
321
|
+
for txn in transactions:
|
|
322
|
+
txn_date = _normalize_datetime(txn.get("created_at"))
|
|
323
|
+
if txn_date and txn_date >= thirty_days_ago:
|
|
186
324
|
amount = txn.get("amount", 0)
|
|
187
|
-
if amount > 0:
|
|
325
|
+
if amount > 0:
|
|
188
326
|
total_credits_issued_30d += amount
|
|
189
|
-
|
|
190
|
-
|
|
327
|
+
if users_fetched >= USERS_STREAM_LIMIT:
|
|
328
|
+
logger.warning(f"Credit calculation hit user limit ({USERS_STREAM_LIMIT}), total may be incomplete")
|
|
329
|
+
except Exception as e:
|
|
330
|
+
logger.warning(f"Error calculating credits: {e}")
|
|
191
331
|
|
|
192
332
|
return AdminStatsOverview(
|
|
193
333
|
total_users=total_users,
|
|
@@ -202,6 +342,21 @@ async def get_admin_stats_overview(
|
|
|
202
342
|
)
|
|
203
343
|
|
|
204
344
|
|
|
345
|
+
def _normalize_datetime(dt_value) -> Optional[datetime]:
|
|
346
|
+
"""Normalize datetime values from Firestore (can be datetime or ISO string)."""
|
|
347
|
+
if dt_value is None:
|
|
348
|
+
return None
|
|
349
|
+
if isinstance(dt_value, datetime):
|
|
350
|
+
return dt_value.replace(tzinfo=None)
|
|
351
|
+
if isinstance(dt_value, str):
|
|
352
|
+
try:
|
|
353
|
+
parsed = datetime.fromisoformat(dt_value.replace("Z", "+00:00"))
|
|
354
|
+
return parsed.replace(tzinfo=None)
|
|
355
|
+
except Exception:
|
|
356
|
+
return None
|
|
357
|
+
return None
|
|
358
|
+
|
|
359
|
+
|
|
205
360
|
# =============================================================================
|
|
206
361
|
# Audio Search Management Models
|
|
207
362
|
# =============================================================================
|
|
@@ -273,6 +428,7 @@ class CacheStatsResponse(BaseModel):
|
|
|
273
428
|
async def list_audio_searches(
|
|
274
429
|
limit: int = 50,
|
|
275
430
|
status_filter: Optional[str] = None,
|
|
431
|
+
exclude_test: bool = True,
|
|
276
432
|
auth_data: Tuple[str, UserType, int] = Depends(require_admin),
|
|
277
433
|
user_service: UserService = Depends(get_user_service),
|
|
278
434
|
):
|
|
@@ -287,6 +443,7 @@ async def list_audio_searches(
|
|
|
287
443
|
Args:
|
|
288
444
|
limit: Maximum number of jobs to return (default 50)
|
|
289
445
|
status_filter: Optional filter by job status (e.g., 'awaiting_audio_selection')
|
|
446
|
+
exclude_test: If True (default), exclude jobs from test users
|
|
290
447
|
"""
|
|
291
448
|
from google.cloud.firestore_v1 import FieldFilter
|
|
292
449
|
|
|
@@ -306,6 +463,11 @@ async def list_audio_searches(
|
|
|
306
463
|
|
|
307
464
|
for doc in query.stream():
|
|
308
465
|
data = doc.to_dict()
|
|
466
|
+
|
|
467
|
+
# Filter out test users if exclude_test is True
|
|
468
|
+
if exclude_test and is_test_email(data.get("user_email", "")):
|
|
469
|
+
continue
|
|
470
|
+
|
|
309
471
|
state_data = data.get("state_data", {})
|
|
310
472
|
audio_results = state_data.get("audio_search_results", [])
|
|
311
473
|
|
|
@@ -609,6 +771,374 @@ class SendCompletionEmailResponse(BaseModel):
|
|
|
609
771
|
message: str
|
|
610
772
|
|
|
611
773
|
|
|
774
|
+
# =============================================================================
|
|
775
|
+
# Job Files Endpoint
|
|
776
|
+
# =============================================================================
|
|
777
|
+
|
|
778
|
+
def _extract_files_recursive(
|
|
779
|
+
file_urls: Dict[str, Any],
|
|
780
|
+
storage: StorageService,
|
|
781
|
+
category: str = "",
|
|
782
|
+
expiration_minutes: int = 120,
|
|
783
|
+
) -> List[FileInfo]:
|
|
784
|
+
"""
|
|
785
|
+
Recursively extract files from nested file_urls structure.
|
|
786
|
+
|
|
787
|
+
Only includes entries that are GCS paths (gs://...).
|
|
788
|
+
Skips non-GCS entries like YouTube URLs.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
file_urls: Dictionary of file URLs (may be nested)
|
|
792
|
+
storage: StorageService instance for generating signed URLs
|
|
793
|
+
category: Current category name (for nested calls)
|
|
794
|
+
expiration_minutes: How long signed URLs should be valid
|
|
795
|
+
|
|
796
|
+
Returns:
|
|
797
|
+
List of FileInfo objects with signed download URLs
|
|
798
|
+
"""
|
|
799
|
+
files = []
|
|
800
|
+
|
|
801
|
+
for key, value in file_urls.items():
|
|
802
|
+
if isinstance(value, dict):
|
|
803
|
+
# Nested structure - recurse with key as category
|
|
804
|
+
nested_files = _extract_files_recursive(
|
|
805
|
+
value,
|
|
806
|
+
storage,
|
|
807
|
+
category=key if not category else f"{category}.{key}",
|
|
808
|
+
expiration_minutes=expiration_minutes,
|
|
809
|
+
)
|
|
810
|
+
files.extend(nested_files)
|
|
811
|
+
elif isinstance(value, str) and value.startswith("gs://"):
|
|
812
|
+
# GCS path - generate signed URL
|
|
813
|
+
try:
|
|
814
|
+
signed_url = storage.generate_signed_url(value, expiration_minutes=expiration_minutes)
|
|
815
|
+
# Extract filename from path
|
|
816
|
+
name = value.split("/")[-1] if "/" in value else value
|
|
817
|
+
files.append(FileInfo(
|
|
818
|
+
name=name,
|
|
819
|
+
path=value,
|
|
820
|
+
download_url=signed_url,
|
|
821
|
+
category=category,
|
|
822
|
+
file_key=key,
|
|
823
|
+
))
|
|
824
|
+
except Exception as e:
|
|
825
|
+
# Log but don't fail - file might not exist
|
|
826
|
+
logger.warning(f"Failed to generate signed URL for {value}: {e}")
|
|
827
|
+
# Skip non-GCS values (e.g., youtube URLs, video IDs)
|
|
828
|
+
|
|
829
|
+
return files
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
@router.get("/jobs/{job_id}/files", response_model=JobFilesResponse)
|
|
833
|
+
async def get_job_files(
|
|
834
|
+
job_id: str,
|
|
835
|
+
auth_data: Tuple[str, UserType, int] = Depends(require_admin),
|
|
836
|
+
):
|
|
837
|
+
"""
|
|
838
|
+
Get all files for a job with signed download URLs.
|
|
839
|
+
|
|
840
|
+
Returns a list of all files associated with the job, including:
|
|
841
|
+
- Input audio file
|
|
842
|
+
- Stem separation results (vocals, instrumentals, etc.)
|
|
843
|
+
- Lyrics files (LRC, ASS, corrections JSON)
|
|
844
|
+
- Screen files (title, end screens)
|
|
845
|
+
- Video files (with/without vocals)
|
|
846
|
+
- Final output files (various formats)
|
|
847
|
+
- Package files (CDG, TXT zips)
|
|
848
|
+
|
|
849
|
+
Each file includes a signed URL that's valid for 2 hours.
|
|
850
|
+
Non-GCS entries (like YouTube URLs) are excluded.
|
|
851
|
+
|
|
852
|
+
Requires admin authentication.
|
|
853
|
+
"""
|
|
854
|
+
job_manager = JobManager()
|
|
855
|
+
job = job_manager.get_job(job_id)
|
|
856
|
+
|
|
857
|
+
if not job:
|
|
858
|
+
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
|
|
859
|
+
|
|
860
|
+
# Extract all files with signed URLs
|
|
861
|
+
storage = StorageService()
|
|
862
|
+
file_urls = job.file_urls or {}
|
|
863
|
+
|
|
864
|
+
files = _extract_files_recursive(file_urls, storage)
|
|
865
|
+
|
|
866
|
+
return JobFilesResponse(
|
|
867
|
+
job_id=job.job_id,
|
|
868
|
+
artist=job.artist,
|
|
869
|
+
title=job.title,
|
|
870
|
+
files=files,
|
|
871
|
+
total_files=len(files),
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
@router.patch("/jobs/{job_id}", response_model=JobUpdateResponse)
|
|
876
|
+
async def update_job(
|
|
877
|
+
job_id: str,
|
|
878
|
+
request: Dict[str, Any],
|
|
879
|
+
auth_data: AuthResult = Depends(require_admin),
|
|
880
|
+
):
|
|
881
|
+
"""
|
|
882
|
+
Update editable fields of a job (admin only).
|
|
883
|
+
|
|
884
|
+
This endpoint allows admins to update certain job fields without
|
|
885
|
+
affecting the job's processing state. It's useful for:
|
|
886
|
+
- Correcting artist/title typos
|
|
887
|
+
- Changing user assignment
|
|
888
|
+
- Updating delivery settings (email, theme, etc.)
|
|
889
|
+
|
|
890
|
+
Editable fields:
|
|
891
|
+
- artist, title: Track metadata
|
|
892
|
+
- user_email: Job owner
|
|
893
|
+
- theme_id: Visual theme
|
|
894
|
+
- enable_cdg, enable_txt, enable_youtube_upload: Output options
|
|
895
|
+
- customer_email, customer_notes: Made-for-you order info
|
|
896
|
+
- brand_prefix: Brand code prefix
|
|
897
|
+
- non_interactive, prep_only: Workflow options
|
|
898
|
+
- discord_webhook_url: Notification URL
|
|
899
|
+
- youtube_description, youtube_description_template: YouTube settings
|
|
900
|
+
|
|
901
|
+
Non-editable fields (will return 400 error):
|
|
902
|
+
- job_id, status, progress: System-managed
|
|
903
|
+
- created_at, updated_at: Timestamps
|
|
904
|
+
- state_data, file_urls, timeline: Processing state
|
|
905
|
+
- worker_logs, worker_ids: Audit/tracking data
|
|
906
|
+
|
|
907
|
+
For status changes, use the reset endpoint instead.
|
|
908
|
+
"""
|
|
909
|
+
admin_email = auth_data.user_email or "unknown"
|
|
910
|
+
|
|
911
|
+
# Check for non-editable fields in request
|
|
912
|
+
non_editable_fields = set(request.keys()) - EDITABLE_JOB_FIELDS
|
|
913
|
+
if non_editable_fields:
|
|
914
|
+
raise HTTPException(
|
|
915
|
+
status_code=400,
|
|
916
|
+
detail=f"The following fields are not editable: {', '.join(sorted(non_editable_fields))}. "
|
|
917
|
+
f"Editable fields are: {', '.join(sorted(EDITABLE_JOB_FIELDS))}"
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
# Filter to only include provided fields (non-None values)
|
|
921
|
+
updates = {k: v for k, v in request.items() if v is not None}
|
|
922
|
+
|
|
923
|
+
if not updates:
|
|
924
|
+
raise HTTPException(
|
|
925
|
+
status_code=400,
|
|
926
|
+
detail="No valid fields provided for update. "
|
|
927
|
+
f"Editable fields are: {', '.join(sorted(EDITABLE_JOB_FIELDS))}"
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
job_manager = JobManager()
|
|
931
|
+
job = job_manager.get_job(job_id)
|
|
932
|
+
|
|
933
|
+
if not job:
|
|
934
|
+
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
|
|
935
|
+
|
|
936
|
+
# Perform the update
|
|
937
|
+
success = job_manager.update_job(job_id, updates)
|
|
938
|
+
|
|
939
|
+
if not success:
|
|
940
|
+
raise HTTPException(
|
|
941
|
+
status_code=500,
|
|
942
|
+
detail="Failed to update job. Please try again."
|
|
943
|
+
)
|
|
944
|
+
|
|
945
|
+
# Log the admin action
|
|
946
|
+
logger.info(
|
|
947
|
+
f"Admin {admin_email} updated job {job_id}. "
|
|
948
|
+
f"Updated fields: {list(updates.keys())}"
|
|
949
|
+
)
|
|
950
|
+
|
|
951
|
+
return JobUpdateResponse(
|
|
952
|
+
status="success",
|
|
953
|
+
job_id=job_id,
|
|
954
|
+
updated_fields=list(updates.keys()),
|
|
955
|
+
message=f"Successfully updated {len(updates)} field(s)",
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
# =============================================================================
|
|
960
|
+
# Job Reset Endpoint
|
|
961
|
+
# =============================================================================
|
|
962
|
+
|
|
963
|
+
class JobResetRequest(BaseModel):
|
|
964
|
+
"""Request model for resetting a job to a specific state."""
|
|
965
|
+
target_state: str
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
class JobResetResponse(BaseModel):
|
|
969
|
+
"""Response from job reset endpoint."""
|
|
970
|
+
status: str
|
|
971
|
+
job_id: str
|
|
972
|
+
previous_status: str
|
|
973
|
+
new_status: str
|
|
974
|
+
message: str
|
|
975
|
+
cleared_data: List[str]
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
# States that are allowed as reset targets
|
|
979
|
+
ALLOWED_RESET_STATES = {
|
|
980
|
+
"pending",
|
|
981
|
+
"awaiting_audio_selection",
|
|
982
|
+
"awaiting_review",
|
|
983
|
+
"awaiting_instrumental_selection",
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
# State data keys to clear for each reset target
|
|
987
|
+
# Keys not in this mapping are preserved
|
|
988
|
+
STATE_DATA_CLEAR_KEYS = {
|
|
989
|
+
"pending": [
|
|
990
|
+
"audio_search_results",
|
|
991
|
+
"audio_search_count",
|
|
992
|
+
"remote_search_id",
|
|
993
|
+
"audio_selection",
|
|
994
|
+
"review_complete",
|
|
995
|
+
"corrected_lyrics",
|
|
996
|
+
"instrumental_selection",
|
|
997
|
+
"video_progress",
|
|
998
|
+
"render_progress",
|
|
999
|
+
"screens_progress",
|
|
1000
|
+
],
|
|
1001
|
+
"awaiting_audio_selection": [
|
|
1002
|
+
"audio_selection",
|
|
1003
|
+
"review_complete",
|
|
1004
|
+
"corrected_lyrics",
|
|
1005
|
+
"instrumental_selection",
|
|
1006
|
+
"video_progress",
|
|
1007
|
+
"render_progress",
|
|
1008
|
+
"screens_progress",
|
|
1009
|
+
],
|
|
1010
|
+
"awaiting_review": [
|
|
1011
|
+
"review_complete",
|
|
1012
|
+
"corrected_lyrics",
|
|
1013
|
+
"instrumental_selection",
|
|
1014
|
+
"video_progress",
|
|
1015
|
+
"render_progress",
|
|
1016
|
+
"screens_progress",
|
|
1017
|
+
],
|
|
1018
|
+
"awaiting_instrumental_selection": [
|
|
1019
|
+
"instrumental_selection",
|
|
1020
|
+
"video_progress",
|
|
1021
|
+
"render_progress",
|
|
1022
|
+
"screens_progress",
|
|
1023
|
+
],
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
@router.post("/jobs/{job_id}/reset", response_model=JobResetResponse)
|
|
1028
|
+
async def reset_job(
|
|
1029
|
+
job_id: str,
|
|
1030
|
+
request: JobResetRequest,
|
|
1031
|
+
auth_data: AuthResult = Depends(require_admin),
|
|
1032
|
+
user_service: UserService = Depends(get_user_service),
|
|
1033
|
+
):
|
|
1034
|
+
"""
|
|
1035
|
+
Reset a job to a specific state for re-processing (admin only).
|
|
1036
|
+
|
|
1037
|
+
This endpoint allows admins to reset a job back to specific workflow
|
|
1038
|
+
checkpoints to re-do parts of the processing. This is useful for:
|
|
1039
|
+
- Re-running audio search after flacfetch updates
|
|
1040
|
+
- Re-reviewing lyrics after corrections
|
|
1041
|
+
- Re-selecting instrumental after hearing the result
|
|
1042
|
+
- Restarting a failed job from the beginning
|
|
1043
|
+
|
|
1044
|
+
Allowed target states:
|
|
1045
|
+
- pending: Restart from the beginning (clears all processing data)
|
|
1046
|
+
- awaiting_audio_selection: Re-select audio source
|
|
1047
|
+
- awaiting_review: Re-review lyrics (preserves audio stems)
|
|
1048
|
+
- awaiting_instrumental_selection: Re-select instrumental (preserves review)
|
|
1049
|
+
|
|
1050
|
+
State data is cleared based on the target state to ensure a clean
|
|
1051
|
+
re-processing from that point forward.
|
|
1052
|
+
"""
|
|
1053
|
+
admin_email = auth_data.user_email or "unknown"
|
|
1054
|
+
target_state = request.target_state.lower()
|
|
1055
|
+
|
|
1056
|
+
# Validate target state
|
|
1057
|
+
if target_state not in ALLOWED_RESET_STATES:
|
|
1058
|
+
raise HTTPException(
|
|
1059
|
+
status_code=400,
|
|
1060
|
+
detail=f"Invalid target state '{target_state}'. "
|
|
1061
|
+
f"Allowed states are: {', '.join(sorted(ALLOWED_RESET_STATES))}"
|
|
1062
|
+
)
|
|
1063
|
+
|
|
1064
|
+
job_manager = JobManager()
|
|
1065
|
+
job = job_manager.get_job(job_id)
|
|
1066
|
+
|
|
1067
|
+
if not job:
|
|
1068
|
+
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
|
|
1069
|
+
|
|
1070
|
+
previous_status = job.status
|
|
1071
|
+
|
|
1072
|
+
# Build update payload
|
|
1073
|
+
updates = {
|
|
1074
|
+
"status": target_state,
|
|
1075
|
+
"progress": 0,
|
|
1076
|
+
"message": f"Job reset to {target_state} by admin",
|
|
1077
|
+
"updated_at": datetime.utcnow().isoformat(),
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
# Clear state data keys based on target state
|
|
1081
|
+
keys_to_clear = STATE_DATA_CLEAR_KEYS.get(target_state, [])
|
|
1082
|
+
cleared_keys = []
|
|
1083
|
+
current_state_data = job.state_data or {}
|
|
1084
|
+
|
|
1085
|
+
for key in keys_to_clear:
|
|
1086
|
+
if key in current_state_data:
|
|
1087
|
+
cleared_keys.append(key)
|
|
1088
|
+
|
|
1089
|
+
# Add timeline event
|
|
1090
|
+
timeline_event = {
|
|
1091
|
+
"status": target_state,
|
|
1092
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
1093
|
+
"message": f"Admin reset from {previous_status} to {target_state}",
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
# Perform the update with state_data clearing
|
|
1097
|
+
# We need to set the cleared keys to DELETE_FIELD
|
|
1098
|
+
success = job_manager.update_job(job_id, updates)
|
|
1099
|
+
|
|
1100
|
+
if not success:
|
|
1101
|
+
raise HTTPException(
|
|
1102
|
+
status_code=500,
|
|
1103
|
+
detail="Failed to reset job. Please try again."
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1106
|
+
# Clear the state data keys separately using direct Firestore update
|
|
1107
|
+
from google.cloud.firestore_v1 import DELETE_FIELD, ArrayUnion
|
|
1108
|
+
|
|
1109
|
+
job_ref = user_service.db.collection("jobs").document(job_id)
|
|
1110
|
+
|
|
1111
|
+
if cleared_keys:
|
|
1112
|
+
clear_updates = {}
|
|
1113
|
+
for key in cleared_keys:
|
|
1114
|
+
clear_updates[f"state_data.{key}"] = DELETE_FIELD
|
|
1115
|
+
|
|
1116
|
+
# Add timeline event
|
|
1117
|
+
clear_updates["timeline"] = ArrayUnion([timeline_event])
|
|
1118
|
+
|
|
1119
|
+
job_ref.update(clear_updates)
|
|
1120
|
+
else:
|
|
1121
|
+
# Just add timeline event
|
|
1122
|
+
job_ref.update({
|
|
1123
|
+
"timeline": ArrayUnion([timeline_event])
|
|
1124
|
+
})
|
|
1125
|
+
|
|
1126
|
+
# Log the admin action
|
|
1127
|
+
logger.info(
|
|
1128
|
+
f"Admin {admin_email} reset job {job_id} from {previous_status} to {target_state}. "
|
|
1129
|
+
f"Cleared state_data keys: {cleared_keys}"
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
return JobResetResponse(
|
|
1133
|
+
status="success",
|
|
1134
|
+
job_id=job_id,
|
|
1135
|
+
previous_status=previous_status,
|
|
1136
|
+
new_status=target_state,
|
|
1137
|
+
message=f"Job reset from {previous_status} to {target_state}",
|
|
1138
|
+
cleared_data=cleared_keys,
|
|
1139
|
+
)
|
|
1140
|
+
|
|
1141
|
+
|
|
612
1142
|
@router.get("/jobs/{job_id}/completion-message", response_model=CompletionMessageResponse)
|
|
613
1143
|
async def get_job_completion_message(
|
|
614
1144
|
job_id: str,
|
|
@@ -740,3 +1270,77 @@ async def send_job_completion_email(
|
|
|
740
1270
|
status_code=500,
|
|
741
1271
|
detail="Failed to send email. Check email service configuration."
|
|
742
1272
|
)
|
|
1273
|
+
|
|
1274
|
+
|
|
1275
|
+
# =============================================================================
|
|
1276
|
+
# User Impersonation
|
|
1277
|
+
# =============================================================================
|
|
1278
|
+
|
|
1279
|
+
class ImpersonateUserResponse(BaseModel):
|
|
1280
|
+
"""Response from impersonate user endpoint."""
|
|
1281
|
+
session_token: str
|
|
1282
|
+
user_email: str
|
|
1283
|
+
message: str
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
@router.post("/users/{email}/impersonate", response_model=ImpersonateUserResponse)
|
|
1287
|
+
async def impersonate_user(
|
|
1288
|
+
email: str,
|
|
1289
|
+
auth_data: Tuple[str, UserType, int] = Depends(require_admin),
|
|
1290
|
+
user_service: UserService = Depends(get_user_service),
|
|
1291
|
+
):
|
|
1292
|
+
"""
|
|
1293
|
+
Create a session token to impersonate a user (admin only).
|
|
1294
|
+
|
|
1295
|
+
This allows admins to view the application exactly as a specific user would see it.
|
|
1296
|
+
The admin's original session remains valid and can be restored client-side.
|
|
1297
|
+
|
|
1298
|
+
Security:
|
|
1299
|
+
- Only admins can impersonate
|
|
1300
|
+
- Creates a real session (auditable in Firestore)
|
|
1301
|
+
- Impersonation is logged for security audit
|
|
1302
|
+
|
|
1303
|
+
Args:
|
|
1304
|
+
email: Email of the user to impersonate
|
|
1305
|
+
|
|
1306
|
+
Returns:
|
|
1307
|
+
session_token: A valid session token for the target user
|
|
1308
|
+
user_email: The impersonated user's email
|
|
1309
|
+
message: Success message
|
|
1310
|
+
"""
|
|
1311
|
+
admin_email = auth_data[0]
|
|
1312
|
+
target_email = email.lower()
|
|
1313
|
+
|
|
1314
|
+
# Cannot impersonate yourself
|
|
1315
|
+
if target_email == admin_email.lower():
|
|
1316
|
+
raise HTTPException(
|
|
1317
|
+
status_code=400,
|
|
1318
|
+
detail="Cannot impersonate yourself"
|
|
1319
|
+
)
|
|
1320
|
+
|
|
1321
|
+
# Verify target user exists
|
|
1322
|
+
target_user = user_service.get_user(target_email)
|
|
1323
|
+
if not target_user:
|
|
1324
|
+
raise HTTPException(
|
|
1325
|
+
status_code=404,
|
|
1326
|
+
detail=f"User {target_email} not found"
|
|
1327
|
+
)
|
|
1328
|
+
|
|
1329
|
+
# Create a real session for the target user
|
|
1330
|
+
session = user_service.create_session(
|
|
1331
|
+
user_email=target_email,
|
|
1332
|
+
ip_address=None, # Not tracking IP for impersonation
|
|
1333
|
+
user_agent=f"Impersonation by {admin_email}",
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
# Log impersonation for audit trail
|
|
1337
|
+
logger.info(
|
|
1338
|
+
f"IMPERSONATION: Admin {admin_email} started impersonating user {target_email}. "
|
|
1339
|
+
f"Session token prefix: {session.token[:12]}..."
|
|
1340
|
+
)
|
|
1341
|
+
|
|
1342
|
+
return ImpersonateUserResponse(
|
|
1343
|
+
session_token=session.token,
|
|
1344
|
+
user_email=target_email,
|
|
1345
|
+
message=f"Now impersonating {target_email}",
|
|
1346
|
+
)
|