karaoke-gen 0.96.0__py3-none-any.whl → 0.101.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. backend/api/routes/admin.py +696 -92
  2. backend/api/routes/audio_search.py +29 -8
  3. backend/api/routes/file_upload.py +99 -22
  4. backend/api/routes/health.py +65 -0
  5. backend/api/routes/internal.py +6 -0
  6. backend/api/routes/jobs.py +28 -1
  7. backend/api/routes/review.py +13 -6
  8. backend/api/routes/tenant.py +120 -0
  9. backend/api/routes/users.py +472 -51
  10. backend/main.py +31 -2
  11. backend/middleware/__init__.py +7 -1
  12. backend/middleware/tenant.py +192 -0
  13. backend/models/job.py +19 -3
  14. backend/models/tenant.py +208 -0
  15. backend/models/user.py +18 -0
  16. backend/services/email_service.py +253 -6
  17. backend/services/encoding_service.py +128 -31
  18. backend/services/firestore_service.py +6 -0
  19. backend/services/job_manager.py +44 -2
  20. backend/services/langfuse_preloader.py +98 -0
  21. backend/services/nltk_preloader.py +122 -0
  22. backend/services/spacy_preloader.py +65 -0
  23. backend/services/stripe_service.py +133 -11
  24. backend/services/tenant_service.py +285 -0
  25. backend/services/user_service.py +85 -7
  26. backend/tests/emulator/conftest.py +22 -1
  27. backend/tests/emulator/test_made_for_you_integration.py +167 -0
  28. backend/tests/test_admin_job_files.py +337 -0
  29. backend/tests/test_admin_job_reset.py +384 -0
  30. backend/tests/test_admin_job_update.py +326 -0
  31. backend/tests/test_email_service.py +233 -0
  32. backend/tests/test_impersonation.py +223 -0
  33. backend/tests/test_job_creation_regression.py +4 -0
  34. backend/tests/test_job_manager.py +171 -9
  35. backend/tests/test_jobs_api.py +11 -1
  36. backend/tests/test_made_for_you.py +2086 -0
  37. backend/tests/test_models.py +139 -0
  38. backend/tests/test_spacy_preloader.py +119 -0
  39. backend/tests/test_tenant_api.py +350 -0
  40. backend/tests/test_tenant_middleware.py +345 -0
  41. backend/tests/test_tenant_models.py +406 -0
  42. backend/tests/test_tenant_service.py +418 -0
  43. backend/utils/test_data.py +27 -0
  44. backend/workers/screens_worker.py +16 -6
  45. backend/workers/video_worker.py +8 -3
  46. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/METADATA +1 -1
  47. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/RECORD +58 -39
  48. lyrics_transcriber/correction/agentic/agent.py +17 -6
  49. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -43
  50. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  51. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  52. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  53. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  54. lyrics_transcriber/frontend/src/api.ts +13 -5
  55. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +90 -57
  56. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/WHEEL +0 -0
  57. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/entry_points.txt +0 -0
  58. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.101.0.dist-info}/licenses/LICENSE +0 -0
@@ -15,11 +15,13 @@ from fastapi import APIRouter, Depends, HTTPException
15
15
  from pydantic import BaseModel
16
16
 
17
17
  from backend.api.dependencies import require_admin
18
- from backend.services.auth_service import UserType
18
+ from backend.services.auth_service import UserType, AuthResult
19
19
  from backend.services.user_service import get_user_service, UserService, USERS_COLLECTION
20
20
  from backend.services.job_manager import JobManager
21
21
  from backend.services.flacfetch_client import get_flacfetch_client, FlacfetchServiceError
22
+ from backend.services.storage_service import StorageService
22
23
  from backend.models.job import JobStatus
24
+ from backend.utils.test_data import is_test_email
23
25
  from karaoke_gen.utils import sanitize_filename
24
26
 
25
27
 
@@ -55,18 +57,91 @@ class AdminStatsOverview(BaseModel):
55
57
  total_beta_testers: int
56
58
 
57
59
 
60
+ class FileInfo(BaseModel):
61
+ """Information about a single file with signed download URL."""
62
+ name: str
63
+ path: str # GCS path (gs://bucket/...)
64
+ download_url: str # Signed URL for download
65
+ category: str # e.g., "stems", "lyrics", "finals"
66
+ file_key: str # e.g., "instrumental_clean", "lrc"
67
+
68
+
69
+ class JobFilesResponse(BaseModel):
70
+ """Response containing all files for a job with signed download URLs."""
71
+ job_id: str
72
+ artist: Optional[str]
73
+ title: Optional[str]
74
+ files: List[FileInfo]
75
+ total_files: int
76
+
77
+
78
+ class JobUpdateRequest(BaseModel):
79
+ """Request model for updating job fields."""
80
+ # Editable text fields
81
+ artist: Optional[str] = None
82
+ title: Optional[str] = None
83
+ user_email: Optional[str] = None
84
+ theme_id: Optional[str] = None
85
+ brand_prefix: Optional[str] = None
86
+ discord_webhook_url: Optional[str] = None
87
+ youtube_description: Optional[str] = None
88
+ youtube_description_template: Optional[str] = None
89
+ customer_email: Optional[str] = None
90
+ customer_notes: Optional[str] = None
91
+
92
+ # Editable boolean fields
93
+ enable_cdg: Optional[bool] = None
94
+ enable_txt: Optional[bool] = None
95
+ enable_youtube_upload: Optional[bool] = None
96
+ non_interactive: Optional[bool] = None
97
+ prep_only: Optional[bool] = None
98
+
99
+
100
+ class JobUpdateResponse(BaseModel):
101
+ """Response from job update endpoint."""
102
+ status: str
103
+ job_id: str
104
+ updated_fields: List[str]
105
+ message: str
106
+
107
+
108
+ # Fields that are allowed to be updated via PATCH endpoint
109
+ EDITABLE_JOB_FIELDS = {
110
+ "artist",
111
+ "title",
112
+ "user_email",
113
+ "theme_id",
114
+ "brand_prefix",
115
+ "discord_webhook_url",
116
+ "youtube_description",
117
+ "youtube_description_template",
118
+ "customer_email",
119
+ "customer_notes",
120
+ "enable_cdg",
121
+ "enable_txt",
122
+ "enable_youtube_upload",
123
+ "non_interactive",
124
+ "prep_only",
125
+ }
126
+
127
+
58
128
  # =============================================================================
59
129
  # Admin Stats Endpoints
60
130
  # =============================================================================
61
131
 
62
132
  @router.get("/stats/overview", response_model=AdminStatsOverview)
63
133
  async def get_admin_stats_overview(
134
+ exclude_test: bool = True,
64
135
  auth_data: Tuple[str, UserType, int] = Depends(require_admin),
65
136
  user_service: UserService = Depends(get_user_service),
66
137
  ):
67
138
  """
68
139
  Get overview statistics for admin dashboard.
69
140
 
141
+ Args:
142
+ exclude_test: If True (default), exclude test data (users with @inbox.testmail.app emails
143
+ and jobs created by test users) from all counts.
144
+
70
145
  Includes:
71
146
  - User counts (total, active in 7d, active in 30d)
72
147
  - Job counts (total, by status, recent)
@@ -81,48 +156,10 @@ async def get_admin_stats_overview(
81
156
  seven_days_ago = now - timedelta(days=7)
82
157
  thirty_days_ago = now - timedelta(days=30)
83
158
 
84
- # Helper function to get count using aggregation
85
- def get_count(query) -> int:
86
- try:
87
- agg_query = aggregation.AggregationQuery(query)
88
- agg_query.count(alias="count")
89
- results = agg_query.get()
90
- return results[0][0].value if results else 0
91
- except Exception as e:
92
- logger.warning(f"Aggregation query failed: {e}")
93
- return 0
94
-
95
- # User statistics
96
159
  users_collection = db.collection(USERS_COLLECTION)
97
-
98
- total_users = get_count(users_collection)
99
-
100
- active_users_7d = get_count(
101
- users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
102
- )
103
-
104
- active_users_30d = get_count(
105
- users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
106
- )
107
-
108
- total_beta_testers = get_count(
109
- users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
110
- )
111
-
112
- # Job statistics
113
160
  jobs_collection = db.collection("jobs")
114
161
 
115
- total_jobs = get_count(jobs_collection)
116
-
117
- jobs_last_7d = get_count(
118
- jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
119
- )
120
-
121
- jobs_last_30d = get_count(
122
- jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
123
- )
124
-
125
- # Jobs by status - map multiple statuses to simplified categories
162
+ # Jobs by status category mapping
126
163
  processing_statuses = [
127
164
  "downloading", "downloading_audio", "searching_audio", "awaiting_audio_selection",
128
165
  "separating_stage1", "separating_stage2", "transcribing", "correcting",
@@ -131,63 +168,166 @@ async def get_admin_stats_overview(
131
168
  "uploading", "notifying"
132
169
  ]
133
170
 
134
- jobs_by_status = JobsByStatusResponse(
135
- pending=get_count(
136
- jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
137
- ),
138
- processing=sum(
139
- get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
140
- for status in processing_statuses
141
- ),
142
- awaiting_review=get_count(
143
- jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
144
- ) + get_count(
145
- jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
146
- ),
147
- awaiting_instrumental=get_count(
148
- jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
149
- ),
150
- complete=get_count(
151
- jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
152
- ) + get_count(
153
- jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
154
- ),
155
- failed=get_count(
156
- jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
157
- ),
158
- cancelled=get_count(
159
- jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
160
- ),
161
- )
171
+ # Limits for streaming queries - these are safety limits to prevent memory issues
172
+ # If hit, stats may be incomplete so we log a warning
173
+ USERS_STREAM_LIMIT = 2000
174
+ JOBS_STREAM_LIMIT = 10000
175
+
176
+ if exclude_test:
177
+ # When excluding test data, we must stream and filter in Python
178
+ # because Firestore doesn't support "not ends with" queries
179
+
180
+ # Stream all users and filter
181
+ all_users = []
182
+ users_fetched = 0
183
+ for doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
184
+ users_fetched += 1
185
+ user_data = doc.to_dict()
186
+ email = user_data.get("email", "")
187
+ if not is_test_email(email):
188
+ all_users.append(user_data)
189
+
190
+ if users_fetched >= USERS_STREAM_LIMIT:
191
+ logger.warning(f"Users stream hit limit ({USERS_STREAM_LIMIT}), stats may be incomplete")
192
+
193
+ # Calculate user stats from filtered list
194
+ total_users = len(all_users)
195
+ active_users_7d = sum(
196
+ 1 for u in all_users
197
+ if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= seven_days_ago
198
+ )
199
+ active_users_30d = sum(
200
+ 1 for u in all_users
201
+ if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= thirty_days_ago
202
+ )
203
+ total_beta_testers = sum(1 for u in all_users if u.get("is_beta_tester"))
162
204
 
163
- # Credit statistics - sum credits added in last 30 days
164
- # This is more expensive, so we'll just estimate from users
165
- total_credits_issued_30d = 0
166
- try:
167
- # Get all users and sum recent credit transactions
168
- users_docs = users_collection.limit(500).stream()
169
- for user_doc in users_docs:
170
- user_data = user_doc.to_dict()
205
+ # Calculate credits from filtered users
206
+ total_credits_issued_30d = 0
207
+ for user_data in all_users:
171
208
  transactions = user_data.get("credit_transactions", [])
172
209
  for txn in transactions:
173
- txn_date = txn.get("created_at")
174
- if txn_date:
175
- # Handle both datetime and string formats
176
- if isinstance(txn_date, str):
177
- try:
178
- txn_date = datetime.fromisoformat(txn_date.replace("Z", "+00:00"))
179
- except Exception:
180
- continue
181
- if isinstance(txn_date, datetime):
182
- txn_date = txn_date.replace(tzinfo=None)
183
- else:
184
- continue
185
- if txn_date >= thirty_days_ago:
210
+ txn_date = _normalize_datetime(txn.get("created_at"))
211
+ if txn_date and txn_date >= thirty_days_ago:
212
+ amount = txn.get("amount", 0)
213
+ if amount > 0:
214
+ total_credits_issued_30d += amount
215
+
216
+ # Stream all jobs and filter by user_email
217
+ all_jobs = []
218
+ jobs_fetched = 0
219
+ for doc in jobs_collection.limit(JOBS_STREAM_LIMIT).stream():
220
+ jobs_fetched += 1
221
+ job_data = doc.to_dict()
222
+ user_email = job_data.get("user_email", "")
223
+ if not is_test_email(user_email):
224
+ all_jobs.append(job_data)
225
+
226
+ if jobs_fetched >= JOBS_STREAM_LIMIT:
227
+ logger.warning(f"Jobs stream hit limit ({JOBS_STREAM_LIMIT}), stats may be incomplete")
228
+
229
+ # Calculate job stats from filtered list
230
+ total_jobs = len(all_jobs)
231
+ jobs_last_7d = sum(
232
+ 1 for j in all_jobs
233
+ if j.get("created_at") and _normalize_datetime(j["created_at"]) >= seven_days_ago
234
+ )
235
+ jobs_last_30d = sum(
236
+ 1 for j in all_jobs
237
+ if j.get("created_at") and _normalize_datetime(j["created_at"]) >= thirty_days_ago
238
+ )
239
+
240
+ # Jobs by status
241
+ jobs_by_status = JobsByStatusResponse(
242
+ pending=sum(1 for j in all_jobs if j.get("status") == "pending"),
243
+ processing=sum(1 for j in all_jobs if j.get("status") in processing_statuses),
244
+ awaiting_review=sum(1 for j in all_jobs if j.get("status") in ["awaiting_review", "in_review"]),
245
+ awaiting_instrumental=sum(1 for j in all_jobs if j.get("status") == "awaiting_instrumental_selection"),
246
+ complete=sum(1 for j in all_jobs if j.get("status") in ["complete", "prep_complete"]),
247
+ failed=sum(1 for j in all_jobs if j.get("status") == "failed"),
248
+ cancelled=sum(1 for j in all_jobs if j.get("status") == "cancelled"),
249
+ )
250
+ else:
251
+ # When including test data, use efficient aggregation queries
252
+ def get_count(query) -> int:
253
+ try:
254
+ agg_query = aggregation.AggregationQuery(query)
255
+ agg_query.count(alias="count")
256
+ results = agg_query.get()
257
+ return results[0][0].value if results else 0
258
+ except Exception as e:
259
+ logger.warning(f"Aggregation query failed: {e}")
260
+ return 0
261
+
262
+ # User statistics
263
+ total_users = get_count(users_collection)
264
+ active_users_7d = get_count(
265
+ users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
266
+ )
267
+ active_users_30d = get_count(
268
+ users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
269
+ )
270
+ total_beta_testers = get_count(
271
+ users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
272
+ )
273
+
274
+ # Job statistics
275
+ total_jobs = get_count(jobs_collection)
276
+ jobs_last_7d = get_count(
277
+ jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
278
+ )
279
+ jobs_last_30d = get_count(
280
+ jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
281
+ )
282
+
283
+ # Jobs by status
284
+ jobs_by_status = JobsByStatusResponse(
285
+ pending=get_count(
286
+ jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
287
+ ),
288
+ processing=sum(
289
+ get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
290
+ for status in processing_statuses
291
+ ),
292
+ awaiting_review=get_count(
293
+ jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
294
+ ) + get_count(
295
+ jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
296
+ ),
297
+ awaiting_instrumental=get_count(
298
+ jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
299
+ ),
300
+ complete=get_count(
301
+ jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
302
+ ) + get_count(
303
+ jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
304
+ ),
305
+ failed=get_count(
306
+ jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
307
+ ),
308
+ cancelled=get_count(
309
+ jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
310
+ ),
311
+ )
312
+
313
+ # Credit statistics - sum credits added in last 30 days
314
+ total_credits_issued_30d = 0
315
+ try:
316
+ users_fetched = 0
317
+ for user_doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
318
+ users_fetched += 1
319
+ user_data = user_doc.to_dict()
320
+ transactions = user_data.get("credit_transactions", [])
321
+ for txn in transactions:
322
+ txn_date = _normalize_datetime(txn.get("created_at"))
323
+ if txn_date and txn_date >= thirty_days_ago:
186
324
  amount = txn.get("amount", 0)
187
- if amount > 0: # Only count additions, not deductions
325
+ if amount > 0:
188
326
  total_credits_issued_30d += amount
189
- except Exception as e:
190
- logger.warning(f"Error calculating credits: {e}")
327
+ if users_fetched >= USERS_STREAM_LIMIT:
328
+ logger.warning(f"Credit calculation hit user limit ({USERS_STREAM_LIMIT}), total may be incomplete")
329
+ except Exception as e:
330
+ logger.warning(f"Error calculating credits: {e}")
191
331
 
192
332
  return AdminStatsOverview(
193
333
  total_users=total_users,
@@ -202,6 +342,21 @@ async def get_admin_stats_overview(
202
342
  )
203
343
 
204
344
 
345
+ def _normalize_datetime(dt_value) -> Optional[datetime]:
346
+ """Normalize datetime values from Firestore (can be datetime or ISO string)."""
347
+ if dt_value is None:
348
+ return None
349
+ if isinstance(dt_value, datetime):
350
+ return dt_value.replace(tzinfo=None)
351
+ if isinstance(dt_value, str):
352
+ try:
353
+ parsed = datetime.fromisoformat(dt_value.replace("Z", "+00:00"))
354
+ return parsed.replace(tzinfo=None)
355
+ except Exception:
356
+ return None
357
+ return None
358
+
359
+
205
360
  # =============================================================================
206
361
  # Audio Search Management Models
207
362
  # =============================================================================
@@ -273,6 +428,7 @@ class CacheStatsResponse(BaseModel):
273
428
  async def list_audio_searches(
274
429
  limit: int = 50,
275
430
  status_filter: Optional[str] = None,
431
+ exclude_test: bool = True,
276
432
  auth_data: Tuple[str, UserType, int] = Depends(require_admin),
277
433
  user_service: UserService = Depends(get_user_service),
278
434
  ):
@@ -287,6 +443,7 @@ async def list_audio_searches(
287
443
  Args:
288
444
  limit: Maximum number of jobs to return (default 50)
289
445
  status_filter: Optional filter by job status (e.g., 'awaiting_audio_selection')
446
+ exclude_test: If True (default), exclude jobs from test users
290
447
  """
291
448
  from google.cloud.firestore_v1 import FieldFilter
292
449
 
@@ -306,6 +463,11 @@ async def list_audio_searches(
306
463
 
307
464
  for doc in query.stream():
308
465
  data = doc.to_dict()
466
+
467
+ # Filter out test users if exclude_test is True
468
+ if exclude_test and is_test_email(data.get("user_email", "")):
469
+ continue
470
+
309
471
  state_data = data.get("state_data", {})
310
472
  audio_results = state_data.get("audio_search_results", [])
311
473
 
@@ -609,6 +771,374 @@ class SendCompletionEmailResponse(BaseModel):
609
771
  message: str
610
772
 
611
773
 
774
+ # =============================================================================
775
+ # Job Files Endpoint
776
+ # =============================================================================
777
+
778
+ def _extract_files_recursive(
779
+ file_urls: Dict[str, Any],
780
+ storage: StorageService,
781
+ category: str = "",
782
+ expiration_minutes: int = 120,
783
+ ) -> List[FileInfo]:
784
+ """
785
+ Recursively extract files from nested file_urls structure.
786
+
787
+ Only includes entries that are GCS paths (gs://...).
788
+ Skips non-GCS entries like YouTube URLs.
789
+
790
+ Args:
791
+ file_urls: Dictionary of file URLs (may be nested)
792
+ storage: StorageService instance for generating signed URLs
793
+ category: Current category name (for nested calls)
794
+ expiration_minutes: How long signed URLs should be valid
795
+
796
+ Returns:
797
+ List of FileInfo objects with signed download URLs
798
+ """
799
+ files = []
800
+
801
+ for key, value in file_urls.items():
802
+ if isinstance(value, dict):
803
+ # Nested structure - recurse with key as category
804
+ nested_files = _extract_files_recursive(
805
+ value,
806
+ storage,
807
+ category=key if not category else f"{category}.{key}",
808
+ expiration_minutes=expiration_minutes,
809
+ )
810
+ files.extend(nested_files)
811
+ elif isinstance(value, str) and value.startswith("gs://"):
812
+ # GCS path - generate signed URL
813
+ try:
814
+ signed_url = storage.generate_signed_url(value, expiration_minutes=expiration_minutes)
815
+ # Extract filename from path
816
+ name = value.split("/")[-1] if "/" in value else value
817
+ files.append(FileInfo(
818
+ name=name,
819
+ path=value,
820
+ download_url=signed_url,
821
+ category=category,
822
+ file_key=key,
823
+ ))
824
+ except Exception as e:
825
+ # Log but don't fail - file might not exist
826
+ logger.warning(f"Failed to generate signed URL for {value}: {e}")
827
+ # Skip non-GCS values (e.g., youtube URLs, video IDs)
828
+
829
+ return files
830
+
831
+
832
+ @router.get("/jobs/{job_id}/files", response_model=JobFilesResponse)
833
+ async def get_job_files(
834
+ job_id: str,
835
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
836
+ ):
837
+ """
838
+ Get all files for a job with signed download URLs.
839
+
840
+ Returns a list of all files associated with the job, including:
841
+ - Input audio file
842
+ - Stem separation results (vocals, instrumentals, etc.)
843
+ - Lyrics files (LRC, ASS, corrections JSON)
844
+ - Screen files (title, end screens)
845
+ - Video files (with/without vocals)
846
+ - Final output files (various formats)
847
+ - Package files (CDG, TXT zips)
848
+
849
+ Each file includes a signed URL that's valid for 2 hours.
850
+ Non-GCS entries (like YouTube URLs) are excluded.
851
+
852
+ Requires admin authentication.
853
+ """
854
+ job_manager = JobManager()
855
+ job = job_manager.get_job(job_id)
856
+
857
+ if not job:
858
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
859
+
860
+ # Extract all files with signed URLs
861
+ storage = StorageService()
862
+ file_urls = job.file_urls or {}
863
+
864
+ files = _extract_files_recursive(file_urls, storage)
865
+
866
+ return JobFilesResponse(
867
+ job_id=job.job_id,
868
+ artist=job.artist,
869
+ title=job.title,
870
+ files=files,
871
+ total_files=len(files),
872
+ )
873
+
874
+
875
+ @router.patch("/jobs/{job_id}", response_model=JobUpdateResponse)
876
+ async def update_job(
877
+ job_id: str,
878
+ request: Dict[str, Any],
879
+ auth_data: AuthResult = Depends(require_admin),
880
+ ):
881
+ """
882
+ Update editable fields of a job (admin only).
883
+
884
+ This endpoint allows admins to update certain job fields without
885
+ affecting the job's processing state. It's useful for:
886
+ - Correcting artist/title typos
887
+ - Changing user assignment
888
+ - Updating delivery settings (email, theme, etc.)
889
+
890
+ Editable fields:
891
+ - artist, title: Track metadata
892
+ - user_email: Job owner
893
+ - theme_id: Visual theme
894
+ - enable_cdg, enable_txt, enable_youtube_upload: Output options
895
+ - customer_email, customer_notes: Made-for-you order info
896
+ - brand_prefix: Brand code prefix
897
+ - non_interactive, prep_only: Workflow options
898
+ - discord_webhook_url: Notification URL
899
+ - youtube_description, youtube_description_template: YouTube settings
900
+
901
+ Non-editable fields (will return 400 error):
902
+ - job_id, status, progress: System-managed
903
+ - created_at, updated_at: Timestamps
904
+ - state_data, file_urls, timeline: Processing state
905
+ - worker_logs, worker_ids: Audit/tracking data
906
+
907
+ For status changes, use the reset endpoint instead.
908
+ """
909
+ admin_email = auth_data.user_email or "unknown"
910
+
911
+ # Check for non-editable fields in request
912
+ non_editable_fields = set(request.keys()) - EDITABLE_JOB_FIELDS
913
+ if non_editable_fields:
914
+ raise HTTPException(
915
+ status_code=400,
916
+ detail=f"The following fields are not editable: {', '.join(sorted(non_editable_fields))}. "
917
+ f"Editable fields are: {', '.join(sorted(EDITABLE_JOB_FIELDS))}"
918
+ )
919
+
920
+ # Filter to only include provided fields (non-None values)
921
+ updates = {k: v for k, v in request.items() if v is not None}
922
+
923
+ if not updates:
924
+ raise HTTPException(
925
+ status_code=400,
926
+ detail="No valid fields provided for update. "
927
+ f"Editable fields are: {', '.join(sorted(EDITABLE_JOB_FIELDS))}"
928
+ )
929
+
930
+ job_manager = JobManager()
931
+ job = job_manager.get_job(job_id)
932
+
933
+ if not job:
934
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
935
+
936
+ # Perform the update
937
+ success = job_manager.update_job(job_id, updates)
938
+
939
+ if not success:
940
+ raise HTTPException(
941
+ status_code=500,
942
+ detail="Failed to update job. Please try again."
943
+ )
944
+
945
+ # Log the admin action
946
+ logger.info(
947
+ f"Admin {admin_email} updated job {job_id}. "
948
+ f"Updated fields: {list(updates.keys())}"
949
+ )
950
+
951
+ return JobUpdateResponse(
952
+ status="success",
953
+ job_id=job_id,
954
+ updated_fields=list(updates.keys()),
955
+ message=f"Successfully updated {len(updates)} field(s)",
956
+ )
957
+
958
+
959
+ # =============================================================================
960
+ # Job Reset Endpoint
961
+ # =============================================================================
962
+
963
+ class JobResetRequest(BaseModel):
964
+ """Request model for resetting a job to a specific state."""
965
+ target_state: str
966
+
967
+
968
+ class JobResetResponse(BaseModel):
969
+ """Response from job reset endpoint."""
970
+ status: str
971
+ job_id: str
972
+ previous_status: str
973
+ new_status: str
974
+ message: str
975
+ cleared_data: List[str]
976
+
977
+
978
+ # States that are allowed as reset targets
979
+ ALLOWED_RESET_STATES = {
980
+ "pending",
981
+ "awaiting_audio_selection",
982
+ "awaiting_review",
983
+ "awaiting_instrumental_selection",
984
+ }
985
+
986
+ # State data keys to clear for each reset target
987
+ # Keys not in this mapping are preserved
988
+ STATE_DATA_CLEAR_KEYS = {
989
+ "pending": [
990
+ "audio_search_results",
991
+ "audio_search_count",
992
+ "remote_search_id",
993
+ "audio_selection",
994
+ "review_complete",
995
+ "corrected_lyrics",
996
+ "instrumental_selection",
997
+ "video_progress",
998
+ "render_progress",
999
+ "screens_progress",
1000
+ ],
1001
+ "awaiting_audio_selection": [
1002
+ "audio_selection",
1003
+ "review_complete",
1004
+ "corrected_lyrics",
1005
+ "instrumental_selection",
1006
+ "video_progress",
1007
+ "render_progress",
1008
+ "screens_progress",
1009
+ ],
1010
+ "awaiting_review": [
1011
+ "review_complete",
1012
+ "corrected_lyrics",
1013
+ "instrumental_selection",
1014
+ "video_progress",
1015
+ "render_progress",
1016
+ "screens_progress",
1017
+ ],
1018
+ "awaiting_instrumental_selection": [
1019
+ "instrumental_selection",
1020
+ "video_progress",
1021
+ "render_progress",
1022
+ "screens_progress",
1023
+ ],
1024
+ }
1025
+
1026
+
1027
+ @router.post("/jobs/{job_id}/reset", response_model=JobResetResponse)
1028
+ async def reset_job(
1029
+ job_id: str,
1030
+ request: JobResetRequest,
1031
+ auth_data: AuthResult = Depends(require_admin),
1032
+ user_service: UserService = Depends(get_user_service),
1033
+ ):
1034
+ """
1035
+ Reset a job to a specific state for re-processing (admin only).
1036
+
1037
+ This endpoint allows admins to reset a job back to specific workflow
1038
+ checkpoints to re-do parts of the processing. This is useful for:
1039
+ - Re-running audio search after flacfetch updates
1040
+ - Re-reviewing lyrics after corrections
1041
+ - Re-selecting instrumental after hearing the result
1042
+ - Restarting a failed job from the beginning
1043
+
1044
+ Allowed target states:
1045
+ - pending: Restart from the beginning (clears all processing data)
1046
+ - awaiting_audio_selection: Re-select audio source
1047
+ - awaiting_review: Re-review lyrics (preserves audio stems)
1048
+ - awaiting_instrumental_selection: Re-select instrumental (preserves review)
1049
+
1050
+ State data is cleared based on the target state to ensure a clean
1051
+ re-processing from that point forward.
1052
+ """
1053
+ admin_email = auth_data.user_email or "unknown"
1054
+ target_state = request.target_state.lower()
1055
+
1056
+ # Validate target state
1057
+ if target_state not in ALLOWED_RESET_STATES:
1058
+ raise HTTPException(
1059
+ status_code=400,
1060
+ detail=f"Invalid target state '{target_state}'. "
1061
+ f"Allowed states are: {', '.join(sorted(ALLOWED_RESET_STATES))}"
1062
+ )
1063
+
1064
+ job_manager = JobManager()
1065
+ job = job_manager.get_job(job_id)
1066
+
1067
+ if not job:
1068
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
1069
+
1070
+ previous_status = job.status
1071
+
1072
+ # Build update payload
1073
+ updates = {
1074
+ "status": target_state,
1075
+ "progress": 0,
1076
+ "message": f"Job reset to {target_state} by admin",
1077
+ "updated_at": datetime.utcnow().isoformat(),
1078
+ }
1079
+
1080
+ # Clear state data keys based on target state
1081
+ keys_to_clear = STATE_DATA_CLEAR_KEYS.get(target_state, [])
1082
+ cleared_keys = []
1083
+ current_state_data = job.state_data or {}
1084
+
1085
+ for key in keys_to_clear:
1086
+ if key in current_state_data:
1087
+ cleared_keys.append(key)
1088
+
1089
+ # Add timeline event
1090
+ timeline_event = {
1091
+ "status": target_state,
1092
+ "timestamp": datetime.utcnow().isoformat(),
1093
+ "message": f"Admin reset from {previous_status} to {target_state}",
1094
+ }
1095
+
1096
+ # Perform the update with state_data clearing
1097
+ # We need to set the cleared keys to DELETE_FIELD
1098
+ success = job_manager.update_job(job_id, updates)
1099
+
1100
+ if not success:
1101
+ raise HTTPException(
1102
+ status_code=500,
1103
+ detail="Failed to reset job. Please try again."
1104
+ )
1105
+
1106
+ # Clear the state data keys separately using direct Firestore update
1107
+ from google.cloud.firestore_v1 import DELETE_FIELD, ArrayUnion
1108
+
1109
+ job_ref = user_service.db.collection("jobs").document(job_id)
1110
+
1111
+ if cleared_keys:
1112
+ clear_updates = {}
1113
+ for key in cleared_keys:
1114
+ clear_updates[f"state_data.{key}"] = DELETE_FIELD
1115
+
1116
+ # Add timeline event
1117
+ clear_updates["timeline"] = ArrayUnion([timeline_event])
1118
+
1119
+ job_ref.update(clear_updates)
1120
+ else:
1121
+ # Just add timeline event
1122
+ job_ref.update({
1123
+ "timeline": ArrayUnion([timeline_event])
1124
+ })
1125
+
1126
+ # Log the admin action
1127
+ logger.info(
1128
+ f"Admin {admin_email} reset job {job_id} from {previous_status} to {target_state}. "
1129
+ f"Cleared state_data keys: {cleared_keys}"
1130
+ )
1131
+
1132
+ return JobResetResponse(
1133
+ status="success",
1134
+ job_id=job_id,
1135
+ previous_status=previous_status,
1136
+ new_status=target_state,
1137
+ message=f"Job reset from {previous_status} to {target_state}",
1138
+ cleared_data=cleared_keys,
1139
+ )
1140
+
1141
+
612
1142
  @router.get("/jobs/{job_id}/completion-message", response_model=CompletionMessageResponse)
613
1143
  async def get_job_completion_message(
614
1144
  job_id: str,
@@ -740,3 +1270,77 @@ async def send_job_completion_email(
740
1270
  status_code=500,
741
1271
  detail="Failed to send email. Check email service configuration."
742
1272
  )
1273
+
1274
+
1275
+ # =============================================================================
1276
+ # User Impersonation
1277
+ # =============================================================================
1278
+
1279
+ class ImpersonateUserResponse(BaseModel):
1280
+ """Response from impersonate user endpoint."""
1281
+ session_token: str
1282
+ user_email: str
1283
+ message: str
1284
+
1285
+
1286
+ @router.post("/users/{email}/impersonate", response_model=ImpersonateUserResponse)
1287
+ async def impersonate_user(
1288
+ email: str,
1289
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
1290
+ user_service: UserService = Depends(get_user_service),
1291
+ ):
1292
+ """
1293
+ Create a session token to impersonate a user (admin only).
1294
+
1295
+ This allows admins to view the application exactly as a specific user would see it.
1296
+ The admin's original session remains valid and can be restored client-side.
1297
+
1298
+ Security:
1299
+ - Only admins can impersonate
1300
+ - Creates a real session (auditable in Firestore)
1301
+ - Impersonation is logged for security audit
1302
+
1303
+ Args:
1304
+ email: Email of the user to impersonate
1305
+
1306
+ Returns:
1307
+ session_token: A valid session token for the target user
1308
+ user_email: The impersonated user's email
1309
+ message: Success message
1310
+ """
1311
+ admin_email = auth_data[0]
1312
+ target_email = email.lower()
1313
+
1314
+ # Cannot impersonate yourself
1315
+ if target_email == admin_email.lower():
1316
+ raise HTTPException(
1317
+ status_code=400,
1318
+ detail="Cannot impersonate yourself"
1319
+ )
1320
+
1321
+ # Verify target user exists
1322
+ target_user = user_service.get_user(target_email)
1323
+ if not target_user:
1324
+ raise HTTPException(
1325
+ status_code=404,
1326
+ detail=f"User {target_email} not found"
1327
+ )
1328
+
1329
+ # Create a real session for the target user
1330
+ session = user_service.create_session(
1331
+ user_email=target_email,
1332
+ ip_address=None, # Not tracking IP for impersonation
1333
+ user_agent=f"Impersonation by {admin_email}",
1334
+ )
1335
+
1336
+ # Log impersonation for audit trail
1337
+ logger.info(
1338
+ f"IMPERSONATION: Admin {admin_email} started impersonating user {target_email}. "
1339
+ f"Session token prefix: {session.token[:12]}..."
1340
+ )
1341
+
1342
+ return ImpersonateUserResponse(
1343
+ session_token=session.token,
1344
+ user_email=target_email,
1345
+ message=f"Now impersonating {target_email}",
1346
+ )