karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,835 @@
1
+ """
2
+ Admin API routes for dashboard statistics and management.
3
+
4
+ Handles:
5
+ - Dashboard overview statistics
6
+ - System-wide metrics
7
+ - Admin-only operations
8
+ - Audio search cache management
9
+ """
10
+ import logging
11
+ from datetime import datetime, timedelta
12
+ from typing import Tuple, List, Optional, Any, Dict
13
+
14
+ from fastapi import APIRouter, Depends, HTTPException
15
+ from pydantic import BaseModel
16
+
17
+ from backend.api.dependencies import require_admin
18
+ from backend.services.auth_service import UserType
19
+ from backend.services.user_service import get_user_service, UserService, USERS_COLLECTION
20
+ from backend.services.job_manager import JobManager
21
+ from backend.services.flacfetch_client import get_flacfetch_client, FlacfetchServiceError
22
+ from backend.models.job import JobStatus
23
+ from backend.utils.test_data import is_test_email
24
+ from karaoke_gen.utils import sanitize_filename
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+ router = APIRouter(prefix="/admin", tags=["admin"])
29
+
30
+
31
+ # =============================================================================
32
+ # Response Models
33
+ # =============================================================================
34
+
35
+ class JobsByStatusResponse(BaseModel):
36
+ """Breakdown of jobs by status."""
37
+ pending: int = 0
38
+ processing: int = 0
39
+ awaiting_review: int = 0
40
+ awaiting_instrumental: int = 0
41
+ complete: int = 0
42
+ failed: int = 0
43
+ cancelled: int = 0
44
+
45
+
46
+ class AdminStatsOverview(BaseModel):
47
+ """Overview statistics for admin dashboard."""
48
+ total_users: int
49
+ active_users_7d: int
50
+ active_users_30d: int
51
+ total_jobs: int
52
+ jobs_last_7d: int
53
+ jobs_last_30d: int
54
+ jobs_by_status: JobsByStatusResponse
55
+ total_credits_issued_30d: int
56
+ total_beta_testers: int
57
+
58
+
59
+ # =============================================================================
60
+ # Admin Stats Endpoints
61
+ # =============================================================================
62
+
63
+ @router.get("/stats/overview", response_model=AdminStatsOverview)
64
+ async def get_admin_stats_overview(
65
+ exclude_test: bool = True,
66
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
67
+ user_service: UserService = Depends(get_user_service),
68
+ ):
69
+ """
70
+ Get overview statistics for admin dashboard.
71
+
72
+ Args:
73
+ exclude_test: If True (default), exclude test data (users with @inbox.testmail.app emails
74
+ and jobs created by test users) from all counts.
75
+
76
+ Includes:
77
+ - User counts (total, active in 7d, active in 30d)
78
+ - Job counts (total, by status, recent)
79
+ - Credit statistics
80
+ - Beta program stats
81
+ """
82
+ from google.cloud.firestore_v1 import FieldFilter
83
+ from google.cloud.firestore_v1 import aggregation
84
+
85
+ db = user_service.db
86
+ now = datetime.utcnow()
87
+ seven_days_ago = now - timedelta(days=7)
88
+ thirty_days_ago = now - timedelta(days=30)
89
+
90
+ users_collection = db.collection(USERS_COLLECTION)
91
+ jobs_collection = db.collection("jobs")
92
+
93
+ # Jobs by status category mapping
94
+ processing_statuses = [
95
+ "downloading", "downloading_audio", "searching_audio", "awaiting_audio_selection",
96
+ "separating_stage1", "separating_stage2", "transcribing", "correcting",
97
+ "generating_screens", "applying_padding", "rendering_video",
98
+ "instrumental_selected", "generating_video", "encoding", "packaging",
99
+ "uploading", "notifying"
100
+ ]
101
+
102
+ # Limits for streaming queries - these are safety limits to prevent memory issues
103
+ # If hit, stats may be incomplete so we log a warning
104
+ USERS_STREAM_LIMIT = 2000
105
+ JOBS_STREAM_LIMIT = 10000
106
+
107
+ if exclude_test:
108
+ # When excluding test data, we must stream and filter in Python
109
+ # because Firestore doesn't support "not ends with" queries
110
+
111
+ # Stream all users and filter
112
+ all_users = []
113
+ users_fetched = 0
114
+ for doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
115
+ users_fetched += 1
116
+ user_data = doc.to_dict()
117
+ email = user_data.get("email", "")
118
+ if not is_test_email(email):
119
+ all_users.append(user_data)
120
+
121
+ if users_fetched >= USERS_STREAM_LIMIT:
122
+ logger.warning(f"Users stream hit limit ({USERS_STREAM_LIMIT}), stats may be incomplete")
123
+
124
+ # Calculate user stats from filtered list
125
+ total_users = len(all_users)
126
+ active_users_7d = sum(
127
+ 1 for u in all_users
128
+ if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= seven_days_ago
129
+ )
130
+ active_users_30d = sum(
131
+ 1 for u in all_users
132
+ if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= thirty_days_ago
133
+ )
134
+ total_beta_testers = sum(1 for u in all_users if u.get("is_beta_tester"))
135
+
136
+ # Calculate credits from filtered users
137
+ total_credits_issued_30d = 0
138
+ for user_data in all_users:
139
+ transactions = user_data.get("credit_transactions", [])
140
+ for txn in transactions:
141
+ txn_date = _normalize_datetime(txn.get("created_at"))
142
+ if txn_date and txn_date >= thirty_days_ago:
143
+ amount = txn.get("amount", 0)
144
+ if amount > 0:
145
+ total_credits_issued_30d += amount
146
+
147
+ # Stream all jobs and filter by user_email
148
+ all_jobs = []
149
+ jobs_fetched = 0
150
+ for doc in jobs_collection.limit(JOBS_STREAM_LIMIT).stream():
151
+ jobs_fetched += 1
152
+ job_data = doc.to_dict()
153
+ user_email = job_data.get("user_email", "")
154
+ if not is_test_email(user_email):
155
+ all_jobs.append(job_data)
156
+
157
+ if jobs_fetched >= JOBS_STREAM_LIMIT:
158
+ logger.warning(f"Jobs stream hit limit ({JOBS_STREAM_LIMIT}), stats may be incomplete")
159
+
160
+ # Calculate job stats from filtered list
161
+ total_jobs = len(all_jobs)
162
+ jobs_last_7d = sum(
163
+ 1 for j in all_jobs
164
+ if j.get("created_at") and _normalize_datetime(j["created_at"]) >= seven_days_ago
165
+ )
166
+ jobs_last_30d = sum(
167
+ 1 for j in all_jobs
168
+ if j.get("created_at") and _normalize_datetime(j["created_at"]) >= thirty_days_ago
169
+ )
170
+
171
+ # Jobs by status
172
+ jobs_by_status = JobsByStatusResponse(
173
+ pending=sum(1 for j in all_jobs if j.get("status") == "pending"),
174
+ processing=sum(1 for j in all_jobs if j.get("status") in processing_statuses),
175
+ awaiting_review=sum(1 for j in all_jobs if j.get("status") in ["awaiting_review", "in_review"]),
176
+ awaiting_instrumental=sum(1 for j in all_jobs if j.get("status") == "awaiting_instrumental_selection"),
177
+ complete=sum(1 for j in all_jobs if j.get("status") in ["complete", "prep_complete"]),
178
+ failed=sum(1 for j in all_jobs if j.get("status") == "failed"),
179
+ cancelled=sum(1 for j in all_jobs if j.get("status") == "cancelled"),
180
+ )
181
+ else:
182
+ # When including test data, use efficient aggregation queries
183
+ def get_count(query) -> int:
184
+ try:
185
+ agg_query = aggregation.AggregationQuery(query)
186
+ agg_query.count(alias="count")
187
+ results = agg_query.get()
188
+ return results[0][0].value if results else 0
189
+ except Exception as e:
190
+ logger.warning(f"Aggregation query failed: {e}")
191
+ return 0
192
+
193
+ # User statistics
194
+ total_users = get_count(users_collection)
195
+ active_users_7d = get_count(
196
+ users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
197
+ )
198
+ active_users_30d = get_count(
199
+ users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
200
+ )
201
+ total_beta_testers = get_count(
202
+ users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
203
+ )
204
+
205
+ # Job statistics
206
+ total_jobs = get_count(jobs_collection)
207
+ jobs_last_7d = get_count(
208
+ jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
209
+ )
210
+ jobs_last_30d = get_count(
211
+ jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
212
+ )
213
+
214
+ # Jobs by status
215
+ jobs_by_status = JobsByStatusResponse(
216
+ pending=get_count(
217
+ jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
218
+ ),
219
+ processing=sum(
220
+ get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
221
+ for status in processing_statuses
222
+ ),
223
+ awaiting_review=get_count(
224
+ jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
225
+ ) + get_count(
226
+ jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
227
+ ),
228
+ awaiting_instrumental=get_count(
229
+ jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
230
+ ),
231
+ complete=get_count(
232
+ jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
233
+ ) + get_count(
234
+ jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
235
+ ),
236
+ failed=get_count(
237
+ jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
238
+ ),
239
+ cancelled=get_count(
240
+ jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
241
+ ),
242
+ )
243
+
244
+ # Credit statistics - sum credits added in last 30 days
245
+ total_credits_issued_30d = 0
246
+ try:
247
+ users_fetched = 0
248
+ for user_doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
249
+ users_fetched += 1
250
+ user_data = user_doc.to_dict()
251
+ transactions = user_data.get("credit_transactions", [])
252
+ for txn in transactions:
253
+ txn_date = _normalize_datetime(txn.get("created_at"))
254
+ if txn_date and txn_date >= thirty_days_ago:
255
+ amount = txn.get("amount", 0)
256
+ if amount > 0:
257
+ total_credits_issued_30d += amount
258
+ if users_fetched >= USERS_STREAM_LIMIT:
259
+ logger.warning(f"Credit calculation hit user limit ({USERS_STREAM_LIMIT}), total may be incomplete")
260
+ except Exception as e:
261
+ logger.warning(f"Error calculating credits: {e}")
262
+
263
+ return AdminStatsOverview(
264
+ total_users=total_users,
265
+ active_users_7d=active_users_7d,
266
+ active_users_30d=active_users_30d,
267
+ total_jobs=total_jobs,
268
+ jobs_last_7d=jobs_last_7d,
269
+ jobs_last_30d=jobs_last_30d,
270
+ jobs_by_status=jobs_by_status,
271
+ total_credits_issued_30d=total_credits_issued_30d,
272
+ total_beta_testers=total_beta_testers,
273
+ )
274
+
275
+
276
+ def _normalize_datetime(dt_value) -> Optional[datetime]:
277
+ """Normalize datetime values from Firestore (can be datetime or ISO string)."""
278
+ if dt_value is None:
279
+ return None
280
+ if isinstance(dt_value, datetime):
281
+ return dt_value.replace(tzinfo=None)
282
+ if isinstance(dt_value, str):
283
+ try:
284
+ parsed = datetime.fromisoformat(dt_value.replace("Z", "+00:00"))
285
+ return parsed.replace(tzinfo=None)
286
+ except Exception:
287
+ return None
288
+ return None
289
+
290
+
291
+ # =============================================================================
292
+ # Audio Search Management Models
293
+ # =============================================================================
294
+
295
+ class AudioSearchResultSummary(BaseModel):
296
+ """Summary of a single audio search result."""
297
+ index: int
298
+ provider: str
299
+ artist: str
300
+ title: str
301
+ is_lossless: bool
302
+ quality: Optional[str] = None
303
+ seeders: Optional[int] = None
304
+
305
+
306
+ class AudioSearchJobSummary(BaseModel):
307
+ """Summary of a job with audio search results."""
308
+ job_id: str
309
+ status: str
310
+ user_email: Optional[str] = None
311
+ audio_search_artist: Optional[str] = None
312
+ audio_search_title: Optional[str] = None
313
+ created_at: Optional[datetime] = None
314
+ results_count: int
315
+ results_summary: List[AudioSearchResultSummary]
316
+ has_lossless: bool
317
+ providers: List[str]
318
+
319
+
320
+ class AudioSearchListResponse(BaseModel):
321
+ """Response for listing audio search jobs."""
322
+ jobs: List[AudioSearchJobSummary]
323
+ total: int
324
+
325
+
326
+ class ClearSearchCacheResponse(BaseModel):
327
+ """Response for clearing search cache."""
328
+ status: str
329
+ job_id: str
330
+ message: str
331
+ previous_status: str
332
+ new_status: str
333
+ results_cleared: int
334
+ flacfetch_cache_cleared: bool = False
335
+ flacfetch_error: Optional[str] = None
336
+
337
+
338
+ class ClearAllCacheResponse(BaseModel):
339
+ """Response for clearing all flacfetch cache."""
340
+ status: str
341
+ message: str
342
+ deleted_count: int
343
+
344
+
345
+ class CacheStatsResponse(BaseModel):
346
+ """Response for cache statistics."""
347
+ count: int
348
+ total_size_bytes: int
349
+ oldest_entry: Optional[str] = None
350
+ newest_entry: Optional[str] = None
351
+ configured: bool
352
+
353
+
354
+ # =============================================================================
355
+ # Audio Search Management Endpoints
356
+ # =============================================================================
357
+
358
+ @router.get("/audio-searches", response_model=AudioSearchListResponse)
359
+ async def list_audio_searches(
360
+ limit: int = 50,
361
+ status_filter: Optional[str] = None,
362
+ exclude_test: bool = True,
363
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
364
+ user_service: UserService = Depends(get_user_service),
365
+ ):
366
+ """
367
+ List jobs with audio search results.
368
+
369
+ Returns jobs that have cached audio search results, useful for:
370
+ - Monitoring search activity
371
+ - Identifying stale cached results
372
+ - Clearing cache for specific jobs
373
+
374
+ Args:
375
+ limit: Maximum number of jobs to return (default 50)
376
+ status_filter: Optional filter by job status (e.g., 'awaiting_audio_selection')
377
+ exclude_test: If True (default), exclude jobs from test users
378
+ """
379
+ from google.cloud.firestore_v1 import FieldFilter
380
+
381
+ db = user_service.db
382
+ jobs_collection = db.collection("jobs")
383
+
384
+ # Query jobs - we'll filter for those with audio_search_results in Python
385
+ # since Firestore can't query for existence of nested fields efficiently
386
+ query = jobs_collection.order_by("created_at", direction="DESCENDING").limit(500)
387
+
388
+ if status_filter:
389
+ query = jobs_collection.where(
390
+ filter=FieldFilter("status", "==", status_filter)
391
+ ).order_by("created_at", direction="DESCENDING").limit(500)
392
+
393
+ jobs_with_searches = []
394
+
395
+ for doc in query.stream():
396
+ data = doc.to_dict()
397
+
398
+ # Filter out test users if exclude_test is True
399
+ if exclude_test and is_test_email(data.get("user_email", "")):
400
+ continue
401
+
402
+ state_data = data.get("state_data", {})
403
+ audio_results = state_data.get("audio_search_results", [])
404
+
405
+ if not audio_results:
406
+ continue
407
+
408
+ # Compute has_lossless and providers from ALL results (not just first 10)
409
+ has_lossless = any(r.get("is_lossless", False) for r in audio_results)
410
+ providers = {r.get("provider", "Unknown") for r in audio_results}
411
+
412
+ # Build summary from first 10 results only
413
+ results_summary = []
414
+ for r in audio_results[:10]:
415
+ results_summary.append(AudioSearchResultSummary(
416
+ index=r.get("index", 0),
417
+ provider=r.get("provider", "Unknown"),
418
+ artist=r.get("artist", ""),
419
+ title=r.get("title", ""),
420
+ is_lossless=r.get("is_lossless", False),
421
+ quality=r.get("quality"),
422
+ seeders=r.get("seeders"),
423
+ ))
424
+
425
+ jobs_with_searches.append(AudioSearchJobSummary(
426
+ job_id=doc.id,
427
+ status=data.get("status", "unknown"),
428
+ user_email=data.get("user_email"),
429
+ audio_search_artist=data.get("audio_search_artist"),
430
+ audio_search_title=data.get("audio_search_title"),
431
+ created_at=data.get("created_at"),
432
+ results_count=len(audio_results),
433
+ results_summary=results_summary,
434
+ has_lossless=has_lossless,
435
+ providers=sorted(providers),
436
+ ))
437
+
438
+ if len(jobs_with_searches) >= limit:
439
+ break
440
+
441
+ return AudioSearchListResponse(
442
+ jobs=jobs_with_searches,
443
+ total=len(jobs_with_searches),
444
+ )
445
+
446
+
447
+ @router.post("/audio-searches/{job_id}/clear-cache", response_model=ClearSearchCacheResponse)
448
+ async def clear_audio_search_cache(
449
+ job_id: str,
450
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
451
+ user_service: UserService = Depends(get_user_service),
452
+ ):
453
+ """
454
+ Clear the cached audio search results for a job.
455
+
456
+ This will:
457
+ 1. Remove the cached search results from job.state_data
458
+ 2. Reset the job status to 'pending' so a new search can be performed
459
+ 3. Clear the flacfetch GCS cache for this artist/title (if available)
460
+
461
+ Use this when:
462
+ - Cached results are stale (e.g., flacfetch was updated)
463
+ - User wants to search again with different terms
464
+ - Results appear incomplete or incorrect
465
+ """
466
+ job_manager = JobManager()
467
+ job = job_manager.get_job(job_id)
468
+ if not job:
469
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
470
+
471
+ # Get current state
472
+ state_data = job.state_data or {}
473
+ audio_results = state_data.get("audio_search_results", [])
474
+ results_count = len(audio_results)
475
+ previous_status = job.status
476
+
477
+ if not audio_results:
478
+ raise HTTPException(
479
+ status_code=400,
480
+ detail=f"Job {job_id} has no cached audio search results"
481
+ )
482
+
483
+ # Validate job status - only allow cache clear for appropriate states
484
+ # Don't allow clearing cache for jobs that are actively processing or complete
485
+ forbidden_statuses = {
486
+ "downloading", "downloading_audio", "searching_audio",
487
+ "separating_stage1", "separating_stage2", "transcribing", "correcting",
488
+ "generating_screens", "applying_padding", "rendering_video",
489
+ "generating_video", "encoding", "packaging", "uploading",
490
+ "complete", "prep_complete",
491
+ }
492
+ if previous_status in forbidden_statuses:
493
+ raise HTTPException(
494
+ status_code=400,
495
+ detail=f"Cannot clear cache for job in '{previous_status}' state. "
496
+ f"Only jobs in pending, awaiting_audio_selection, failed, or cancelled states can have cache cleared."
497
+ )
498
+
499
+ # Clear the cache by removing the keys
500
+ db = user_service.db
501
+ job_ref = db.collection("jobs").document(job_id)
502
+
503
+ from google.cloud.firestore_v1 import DELETE_FIELD
504
+
505
+ # Update job: clear cache and reset status
506
+ job_ref.update({
507
+ "state_data.audio_search_results": DELETE_FIELD,
508
+ "state_data.audio_search_count": DELETE_FIELD,
509
+ "state_data.remote_search_id": DELETE_FIELD,
510
+ "status": "pending",
511
+ "progress": 0,
512
+ "message": "Audio search cache cleared by admin. Ready for new search.",
513
+ "updated_at": datetime.utcnow(),
514
+ })
515
+
516
+ # Also clear flacfetch's GCS cache if we have artist/title
517
+ flacfetch_cache_cleared = False
518
+ flacfetch_error = None
519
+ artist = job.audio_search_artist
520
+ title = job.audio_search_title
521
+
522
+ if artist and title:
523
+ flacfetch_client = get_flacfetch_client()
524
+ if flacfetch_client:
525
+ try:
526
+ flacfetch_cache_cleared = await flacfetch_client.clear_search_cache(artist, title)
527
+ logger.info(
528
+ f"Cleared flacfetch cache for '{artist}' - '{title}': "
529
+ f"{'deleted' if flacfetch_cache_cleared else 'no entry found'}"
530
+ )
531
+ except FlacfetchServiceError as e:
532
+ flacfetch_error = str(e)
533
+ logger.warning(f"Failed to clear flacfetch cache: {e}")
534
+ else:
535
+ flacfetch_error = "flacfetch client not configured"
536
+ logger.debug("Skipping flacfetch cache clear - client not configured")
537
+ else:
538
+ flacfetch_error = "missing artist or title"
539
+ logger.debug(f"Skipping flacfetch cache clear - missing artist ({artist}) or title ({title})")
540
+
541
+ logger.info(
542
+ f"Admin {auth_data[0]} cleared audio search cache for job {job_id}. "
543
+ f"Cleared {results_count} results. Status changed from {previous_status} to pending. "
544
+ f"Flacfetch cache cleared: {flacfetch_cache_cleared}"
545
+ )
546
+
547
+ message = f"Cleared {results_count} cached search results. Job reset to pending."
548
+ if flacfetch_cache_cleared:
549
+ message += " Flacfetch cache also cleared."
550
+ elif flacfetch_error:
551
+ message += f" Note: flacfetch cache not cleared ({flacfetch_error})."
552
+
553
+ return ClearSearchCacheResponse(
554
+ status="success",
555
+ job_id=job_id,
556
+ message=message,
557
+ previous_status=previous_status,
558
+ new_status="pending",
559
+ results_cleared=results_count,
560
+ flacfetch_cache_cleared=flacfetch_cache_cleared,
561
+ flacfetch_error=flacfetch_error,
562
+ )
563
+
564
+
565
+ @router.post("/jobs/{job_id}/reset-worker-state")
566
+ async def reset_worker_state(
567
+ job_id: str,
568
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
569
+ ):
570
+ """
571
+ Reset stale worker progress state for a job.
572
+
573
+ This clears the video_progress, render_progress, and screens_progress
574
+ from state_data, allowing workers to be re-triggered.
575
+
576
+ Use this when a job is stuck because worker progress shows 'running'
577
+ from a previous failed attempt.
578
+ """
579
+ from backend.services.job_manager import JobManager
580
+
581
+ job_manager = JobManager()
582
+ job = job_manager.get_job(job_id)
583
+
584
+ if not job:
585
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
586
+
587
+ # Reset worker progress states
588
+ job_manager.update_state_data(job_id, 'video_progress', {'stage': 'pending'})
589
+ job_manager.update_state_data(job_id, 'render_progress', {'stage': 'pending'})
590
+ job_manager.update_state_data(job_id, 'screens_progress', {'stage': 'pending'})
591
+
592
+ logger.info(f"Admin {auth_data[0]} reset worker state for job {job_id}")
593
+
594
+ return {
595
+ "status": "success",
596
+ "job_id": job_id,
597
+ "message": "Worker progress states reset to pending"
598
+ }
599
+
600
+
601
+ @router.delete("/cache", response_model=ClearAllCacheResponse)
602
+ async def clear_all_flacfetch_cache(
603
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
604
+ ):
605
+ """
606
+ Clear the entire flacfetch search cache.
607
+
608
+ This will delete all cached search results from flacfetch's GCS cache.
609
+ Use with caution - this will cause all subsequent searches to hit
610
+ the trackers fresh.
611
+
612
+ Note: This does NOT clear Firestore job.state_data caches, only the
613
+ flacfetch-side GCS cache.
614
+ """
615
+ flacfetch_client = get_flacfetch_client()
616
+ if not flacfetch_client:
617
+ raise HTTPException(
618
+ status_code=503,
619
+ detail="flacfetch client not configured"
620
+ )
621
+
622
+ try:
623
+ deleted_count = await flacfetch_client.clear_all_cache()
624
+ logger.info(
625
+ f"Admin {auth_data[0]} cleared all flacfetch cache. "
626
+ f"Deleted {deleted_count} entries."
627
+ )
628
+ return ClearAllCacheResponse(
629
+ status="success",
630
+ message=f"Cleared {deleted_count} cache entries from flacfetch.",
631
+ deleted_count=deleted_count,
632
+ )
633
+ except FlacfetchServiceError as e:
634
+ logger.error(f"Failed to clear all flacfetch cache: {e}")
635
+ raise HTTPException(
636
+ status_code=502,
637
+ detail=f"Failed to clear flacfetch cache: {e}"
638
+ )
639
+
640
+
641
+ @router.get("/cache/stats", response_model=CacheStatsResponse)
642
+ async def get_flacfetch_cache_stats(
643
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
644
+ ):
645
+ """
646
+ Get statistics about the flacfetch search cache.
647
+
648
+ Returns information about:
649
+ - Number of cached entries
650
+ - Total size in bytes
651
+ - Oldest and newest cache entries
652
+ - Whether cache is configured
653
+ """
654
+ flacfetch_client = get_flacfetch_client()
655
+ if not flacfetch_client:
656
+ raise HTTPException(
657
+ status_code=503,
658
+ detail="flacfetch client not configured"
659
+ )
660
+
661
+ try:
662
+ stats = await flacfetch_client.get_cache_stats()
663
+ return CacheStatsResponse(
664
+ count=stats.get("count", 0),
665
+ total_size_bytes=stats.get("total_size_bytes", 0),
666
+ oldest_entry=stats.get("oldest_entry"),
667
+ newest_entry=stats.get("newest_entry"),
668
+ configured=stats.get("configured", True),
669
+ )
670
+ except FlacfetchServiceError as e:
671
+ logger.error(f"Failed to get flacfetch cache stats: {e}")
672
+ raise HTTPException(
673
+ status_code=502,
674
+ detail=f"Failed to get cache stats: {e}"
675
+ )
676
+
677
+
678
+ # =============================================================================
679
+ # Job Completion Message Endpoints (for admin copy/send functionality)
680
+ # =============================================================================
681
+
682
+ class CompletionMessageResponse(BaseModel):
683
+ """Response containing the rendered completion message."""
684
+ job_id: str
685
+ message: str
686
+ subject: str
687
+ youtube_url: Optional[str] = None
688
+ dropbox_url: Optional[str] = None
689
+
690
+
691
+ class SendCompletionEmailRequest(BaseModel):
692
+ """Request to send a completion email."""
693
+ to_email: str
694
+ cc_admin: bool = True
695
+
696
+
697
+ class SendCompletionEmailResponse(BaseModel):
698
+ """Response from sending a completion email."""
699
+ success: bool
700
+ job_id: str
701
+ to_email: str
702
+ message: str
703
+
704
+
705
+ @router.get("/jobs/{job_id}/completion-message", response_model=CompletionMessageResponse)
706
+ async def get_job_completion_message(
707
+ job_id: str,
708
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
709
+ ):
710
+ """
711
+ Get the rendered completion message for a job.
712
+
713
+ Returns the plain text message that would be sent to the user,
714
+ rendered using the job completion template with the job's details.
715
+
716
+ This is useful for:
717
+ - Copying the message to clipboard (e.g., for Fiverr)
718
+ - Previewing the email before sending
719
+
720
+ Requires admin authentication.
721
+ """
722
+ from backend.services.job_notification_service import get_job_notification_service
723
+
724
+ job_manager = JobManager()
725
+ job = job_manager.get_job(job_id)
726
+
727
+ if not job:
728
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
729
+
730
+ # Get youtube, dropbox URLs, and brand_code from state_data (may be None)
731
+ state_data = job.state_data or {}
732
+ youtube_url = state_data.get('youtube_url')
733
+ dropbox_url = state_data.get('dropbox_link')
734
+ brand_code = state_data.get('brand_code')
735
+
736
+ # Render the completion message
737
+ notification_service = get_job_notification_service()
738
+ message = notification_service.get_completion_message(
739
+ job_id=job.job_id,
740
+ user_name=None, # Use default "there"
741
+ artist=job.artist,
742
+ title=job.title,
743
+ youtube_url=youtube_url,
744
+ dropbox_url=dropbox_url,
745
+ )
746
+
747
+ # Build subject: "NOMAD-1178: Artist - Title (Your karaoke video is ready!)"
748
+ # Sanitize artist/title to handle Unicode characters (curly quotes, em dashes, etc.)
749
+ # that cause email header encoding issues (MIME headers use latin-1)
750
+ safe_artist = sanitize_filename(job.artist) if job.artist else None
751
+ safe_title = sanitize_filename(job.title) if job.title else None
752
+ if brand_code and safe_artist and safe_title:
753
+ subject = f"{brand_code}: {safe_artist} - {safe_title} (Your karaoke video is ready!)"
754
+ elif safe_artist and safe_title:
755
+ subject = f"{safe_artist} - {safe_title} (Your karaoke video is ready!)"
756
+ else:
757
+ subject = "Your karaoke video is ready!"
758
+
759
+ return CompletionMessageResponse(
760
+ job_id=job_id,
761
+ message=message,
762
+ subject=subject,
763
+ youtube_url=youtube_url,
764
+ dropbox_url=dropbox_url,
765
+ )
766
+
767
+
768
+ @router.post("/jobs/{job_id}/send-completion-email", response_model=SendCompletionEmailResponse)
769
+ async def send_job_completion_email(
770
+ job_id: str,
771
+ request: SendCompletionEmailRequest,
772
+ auth_data: Tuple[str, UserType, int] = Depends(require_admin),
773
+ ):
774
+ """
775
+ Send a completion email for a job to a specified email address.
776
+
777
+ This allows admins to manually send (or re-send) completion emails,
778
+ useful for:
779
+ - Sending to customers who didn't have an email on file
780
+ - Re-sending if the original email was lost
781
+ - Sending to alternate email addresses
782
+
783
+ Requires admin authentication.
784
+ """
785
+ from backend.services.job_notification_service import get_job_notification_service
786
+ from backend.services.email_service import get_email_service
787
+
788
+ job_manager = JobManager()
789
+ job = job_manager.get_job(job_id)
790
+
791
+ if not job:
792
+ raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
793
+
794
+ # Get youtube, dropbox URLs, and brand_code from state_data (may be None)
795
+ state_data = job.state_data or {}
796
+ youtube_url = state_data.get('youtube_url')
797
+ dropbox_url = state_data.get('dropbox_link')
798
+ brand_code = state_data.get('brand_code')
799
+
800
+ # Render the completion message
801
+ notification_service = get_job_notification_service()
802
+ message = notification_service.get_completion_message(
803
+ job_id=job.job_id,
804
+ user_name=None, # Use default "there"
805
+ artist=job.artist,
806
+ title=job.title,
807
+ youtube_url=youtube_url,
808
+ dropbox_url=dropbox_url,
809
+ )
810
+
811
+ # Send the email
812
+ email_service = get_email_service()
813
+ success = email_service.send_job_completion(
814
+ to_email=request.to_email,
815
+ message_content=message,
816
+ artist=job.artist,
817
+ title=job.title,
818
+ brand_code=brand_code,
819
+ cc_admin=request.cc_admin,
820
+ )
821
+
822
+ if success:
823
+ logger.info(f"Admin sent completion email for job {job_id} to {request.to_email}")
824
+ return SendCompletionEmailResponse(
825
+ success=True,
826
+ job_id=job_id,
827
+ to_email=request.to_email,
828
+ message=f"Completion email sent to {request.to_email}",
829
+ )
830
+ else:
831
+ logger.error(f"Failed to send completion email for job {job_id}")
832
+ raise HTTPException(
833
+ status_code=500,
834
+ detail="Failed to send email. Check email service configuration."
835
+ )