karaoke-gen 0.96.0__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. backend/api/routes/admin.py +184 -91
  2. backend/api/routes/audio_search.py +16 -6
  3. backend/api/routes/file_upload.py +57 -21
  4. backend/api/routes/health.py +65 -0
  5. backend/api/routes/jobs.py +19 -0
  6. backend/api/routes/users.py +543 -44
  7. backend/main.py +25 -1
  8. backend/services/encoding_service.py +128 -31
  9. backend/services/job_manager.py +12 -1
  10. backend/services/langfuse_preloader.py +98 -0
  11. backend/services/nltk_preloader.py +122 -0
  12. backend/services/spacy_preloader.py +65 -0
  13. backend/services/stripe_service.py +96 -0
  14. backend/tests/emulator/conftest.py +22 -1
  15. backend/tests/test_job_manager.py +25 -8
  16. backend/tests/test_jobs_api.py +11 -1
  17. backend/tests/test_spacy_preloader.py +119 -0
  18. backend/utils/test_data.py +27 -0
  19. backend/workers/screens_worker.py +16 -6
  20. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  21. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +30 -25
  22. lyrics_transcriber/correction/agentic/agent.py +17 -6
  23. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -43
  24. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  25. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  26. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  27. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  28. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  29. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  30. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -20,6 +20,7 @@ from backend.services.user_service import get_user_service, UserService, USERS_C
20
20
  from backend.services.job_manager import JobManager
21
21
  from backend.services.flacfetch_client import get_flacfetch_client, FlacfetchServiceError
22
22
  from backend.models.job import JobStatus
23
+ from backend.utils.test_data import is_test_email
23
24
  from karaoke_gen.utils import sanitize_filename
24
25
 
25
26
 
@@ -61,12 +62,17 @@ class AdminStatsOverview(BaseModel):
61
62
 
62
63
  @router.get("/stats/overview", response_model=AdminStatsOverview)
63
64
  async def get_admin_stats_overview(
65
+ exclude_test: bool = True,
64
66
  auth_data: Tuple[str, UserType, int] = Depends(require_admin),
65
67
  user_service: UserService = Depends(get_user_service),
66
68
  ):
67
69
  """
68
70
  Get overview statistics for admin dashboard.
69
71
 
72
+ Args:
73
+ exclude_test: If True (default), exclude test data (users with @inbox.testmail.app emails
74
+ and jobs created by test users) from all counts.
75
+
70
76
  Includes:
71
77
  - User counts (total, active in 7d, active in 30d)
72
78
  - Job counts (total, by status, recent)
@@ -81,48 +87,10 @@ async def get_admin_stats_overview(
81
87
  seven_days_ago = now - timedelta(days=7)
82
88
  thirty_days_ago = now - timedelta(days=30)
83
89
 
84
- # Helper function to get count using aggregation
85
- def get_count(query) -> int:
86
- try:
87
- agg_query = aggregation.AggregationQuery(query)
88
- agg_query.count(alias="count")
89
- results = agg_query.get()
90
- return results[0][0].value if results else 0
91
- except Exception as e:
92
- logger.warning(f"Aggregation query failed: {e}")
93
- return 0
94
-
95
- # User statistics
96
90
  users_collection = db.collection(USERS_COLLECTION)
97
-
98
- total_users = get_count(users_collection)
99
-
100
- active_users_7d = get_count(
101
- users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
102
- )
103
-
104
- active_users_30d = get_count(
105
- users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
106
- )
107
-
108
- total_beta_testers = get_count(
109
- users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
110
- )
111
-
112
- # Job statistics
113
91
  jobs_collection = db.collection("jobs")
114
92
 
115
- total_jobs = get_count(jobs_collection)
116
-
117
- jobs_last_7d = get_count(
118
- jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
119
- )
120
-
121
- jobs_last_30d = get_count(
122
- jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
123
- )
124
-
125
- # Jobs by status - map multiple statuses to simplified categories
93
+ # Jobs by status category mapping
126
94
  processing_statuses = [
127
95
  "downloading", "downloading_audio", "searching_audio", "awaiting_audio_selection",
128
96
  "separating_stage1", "separating_stage2", "transcribing", "correcting",
@@ -131,63 +99,166 @@ async def get_admin_stats_overview(
131
99
  "uploading", "notifying"
132
100
  ]
133
101
 
134
- jobs_by_status = JobsByStatusResponse(
135
- pending=get_count(
136
- jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
137
- ),
138
- processing=sum(
139
- get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
140
- for status in processing_statuses
141
- ),
142
- awaiting_review=get_count(
143
- jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
144
- ) + get_count(
145
- jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
146
- ),
147
- awaiting_instrumental=get_count(
148
- jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
149
- ),
150
- complete=get_count(
151
- jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
152
- ) + get_count(
153
- jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
154
- ),
155
- failed=get_count(
156
- jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
157
- ),
158
- cancelled=get_count(
159
- jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
160
- ),
161
- )
102
+ # Limits for streaming queries - these are safety limits to prevent memory issues
103
+ # If hit, stats may be incomplete so we log a warning
104
+ USERS_STREAM_LIMIT = 2000
105
+ JOBS_STREAM_LIMIT = 10000
106
+
107
+ if exclude_test:
108
+ # When excluding test data, we must stream and filter in Python
109
+ # because Firestore doesn't support "not ends with" queries
110
+
111
+ # Stream all users and filter
112
+ all_users = []
113
+ users_fetched = 0
114
+ for doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
115
+ users_fetched += 1
116
+ user_data = doc.to_dict()
117
+ email = user_data.get("email", "")
118
+ if not is_test_email(email):
119
+ all_users.append(user_data)
120
+
121
+ if users_fetched >= USERS_STREAM_LIMIT:
122
+ logger.warning(f"Users stream hit limit ({USERS_STREAM_LIMIT}), stats may be incomplete")
123
+
124
+ # Calculate user stats from filtered list
125
+ total_users = len(all_users)
126
+ active_users_7d = sum(
127
+ 1 for u in all_users
128
+ if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= seven_days_ago
129
+ )
130
+ active_users_30d = sum(
131
+ 1 for u in all_users
132
+ if u.get("last_login_at") and _normalize_datetime(u["last_login_at"]) >= thirty_days_ago
133
+ )
134
+ total_beta_testers = sum(1 for u in all_users if u.get("is_beta_tester"))
162
135
 
163
- # Credit statistics - sum credits added in last 30 days
164
- # This is more expensive, so we'll just estimate from users
165
- total_credits_issued_30d = 0
166
- try:
167
- # Get all users and sum recent credit transactions
168
- users_docs = users_collection.limit(500).stream()
169
- for user_doc in users_docs:
170
- user_data = user_doc.to_dict()
136
+ # Calculate credits from filtered users
137
+ total_credits_issued_30d = 0
138
+ for user_data in all_users:
171
139
  transactions = user_data.get("credit_transactions", [])
172
140
  for txn in transactions:
173
- txn_date = txn.get("created_at")
174
- if txn_date:
175
- # Handle both datetime and string formats
176
- if isinstance(txn_date, str):
177
- try:
178
- txn_date = datetime.fromisoformat(txn_date.replace("Z", "+00:00"))
179
- except Exception:
180
- continue
181
- if isinstance(txn_date, datetime):
182
- txn_date = txn_date.replace(tzinfo=None)
183
- else:
184
- continue
185
- if txn_date >= thirty_days_ago:
141
+ txn_date = _normalize_datetime(txn.get("created_at"))
142
+ if txn_date and txn_date >= thirty_days_ago:
143
+ amount = txn.get("amount", 0)
144
+ if amount > 0:
145
+ total_credits_issued_30d += amount
146
+
147
+ # Stream all jobs and filter by user_email
148
+ all_jobs = []
149
+ jobs_fetched = 0
150
+ for doc in jobs_collection.limit(JOBS_STREAM_LIMIT).stream():
151
+ jobs_fetched += 1
152
+ job_data = doc.to_dict()
153
+ user_email = job_data.get("user_email", "")
154
+ if not is_test_email(user_email):
155
+ all_jobs.append(job_data)
156
+
157
+ if jobs_fetched >= JOBS_STREAM_LIMIT:
158
+ logger.warning(f"Jobs stream hit limit ({JOBS_STREAM_LIMIT}), stats may be incomplete")
159
+
160
+ # Calculate job stats from filtered list
161
+ total_jobs = len(all_jobs)
162
+ jobs_last_7d = sum(
163
+ 1 for j in all_jobs
164
+ if j.get("created_at") and _normalize_datetime(j["created_at"]) >= seven_days_ago
165
+ )
166
+ jobs_last_30d = sum(
167
+ 1 for j in all_jobs
168
+ if j.get("created_at") and _normalize_datetime(j["created_at"]) >= thirty_days_ago
169
+ )
170
+
171
+ # Jobs by status
172
+ jobs_by_status = JobsByStatusResponse(
173
+ pending=sum(1 for j in all_jobs if j.get("status") == "pending"),
174
+ processing=sum(1 for j in all_jobs if j.get("status") in processing_statuses),
175
+ awaiting_review=sum(1 for j in all_jobs if j.get("status") in ["awaiting_review", "in_review"]),
176
+ awaiting_instrumental=sum(1 for j in all_jobs if j.get("status") == "awaiting_instrumental_selection"),
177
+ complete=sum(1 for j in all_jobs if j.get("status") in ["complete", "prep_complete"]),
178
+ failed=sum(1 for j in all_jobs if j.get("status") == "failed"),
179
+ cancelled=sum(1 for j in all_jobs if j.get("status") == "cancelled"),
180
+ )
181
+ else:
182
+ # When including test data, use efficient aggregation queries
183
+ def get_count(query) -> int:
184
+ try:
185
+ agg_query = aggregation.AggregationQuery(query)
186
+ agg_query.count(alias="count")
187
+ results = agg_query.get()
188
+ return results[0][0].value if results else 0
189
+ except Exception as e:
190
+ logger.warning(f"Aggregation query failed: {e}")
191
+ return 0
192
+
193
+ # User statistics
194
+ total_users = get_count(users_collection)
195
+ active_users_7d = get_count(
196
+ users_collection.where(filter=FieldFilter("last_login_at", ">=", seven_days_ago))
197
+ )
198
+ active_users_30d = get_count(
199
+ users_collection.where(filter=FieldFilter("last_login_at", ">=", thirty_days_ago))
200
+ )
201
+ total_beta_testers = get_count(
202
+ users_collection.where(filter=FieldFilter("is_beta_tester", "==", True))
203
+ )
204
+
205
+ # Job statistics
206
+ total_jobs = get_count(jobs_collection)
207
+ jobs_last_7d = get_count(
208
+ jobs_collection.where(filter=FieldFilter("created_at", ">=", seven_days_ago))
209
+ )
210
+ jobs_last_30d = get_count(
211
+ jobs_collection.where(filter=FieldFilter("created_at", ">=", thirty_days_ago))
212
+ )
213
+
214
+ # Jobs by status
215
+ jobs_by_status = JobsByStatusResponse(
216
+ pending=get_count(
217
+ jobs_collection.where(filter=FieldFilter("status", "==", "pending"))
218
+ ),
219
+ processing=sum(
220
+ get_count(jobs_collection.where(filter=FieldFilter("status", "==", status)))
221
+ for status in processing_statuses
222
+ ),
223
+ awaiting_review=get_count(
224
+ jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_review"))
225
+ ) + get_count(
226
+ jobs_collection.where(filter=FieldFilter("status", "==", "in_review"))
227
+ ),
228
+ awaiting_instrumental=get_count(
229
+ jobs_collection.where(filter=FieldFilter("status", "==", "awaiting_instrumental_selection"))
230
+ ),
231
+ complete=get_count(
232
+ jobs_collection.where(filter=FieldFilter("status", "==", "complete"))
233
+ ) + get_count(
234
+ jobs_collection.where(filter=FieldFilter("status", "==", "prep_complete"))
235
+ ),
236
+ failed=get_count(
237
+ jobs_collection.where(filter=FieldFilter("status", "==", "failed"))
238
+ ),
239
+ cancelled=get_count(
240
+ jobs_collection.where(filter=FieldFilter("status", "==", "cancelled"))
241
+ ),
242
+ )
243
+
244
+ # Credit statistics - sum credits added in last 30 days
245
+ total_credits_issued_30d = 0
246
+ try:
247
+ users_fetched = 0
248
+ for user_doc in users_collection.limit(USERS_STREAM_LIMIT).stream():
249
+ users_fetched += 1
250
+ user_data = user_doc.to_dict()
251
+ transactions = user_data.get("credit_transactions", [])
252
+ for txn in transactions:
253
+ txn_date = _normalize_datetime(txn.get("created_at"))
254
+ if txn_date and txn_date >= thirty_days_ago:
186
255
  amount = txn.get("amount", 0)
187
- if amount > 0: # Only count additions, not deductions
256
+ if amount > 0:
188
257
  total_credits_issued_30d += amount
189
- except Exception as e:
190
- logger.warning(f"Error calculating credits: {e}")
258
+ if users_fetched >= USERS_STREAM_LIMIT:
259
+ logger.warning(f"Credit calculation hit user limit ({USERS_STREAM_LIMIT}), total may be incomplete")
260
+ except Exception as e:
261
+ logger.warning(f"Error calculating credits: {e}")
191
262
 
192
263
  return AdminStatsOverview(
193
264
  total_users=total_users,
@@ -202,6 +273,21 @@ async def get_admin_stats_overview(
202
273
  )
203
274
 
204
275
 
276
+ def _normalize_datetime(dt_value) -> Optional[datetime]:
277
+ """Normalize datetime values from Firestore (can be datetime or ISO string)."""
278
+ if dt_value is None:
279
+ return None
280
+ if isinstance(dt_value, datetime):
281
+ return dt_value.replace(tzinfo=None)
282
+ if isinstance(dt_value, str):
283
+ try:
284
+ parsed = datetime.fromisoformat(dt_value.replace("Z", "+00:00"))
285
+ return parsed.replace(tzinfo=None)
286
+ except Exception:
287
+ return None
288
+ return None
289
+
290
+
205
291
  # =============================================================================
206
292
  # Audio Search Management Models
207
293
  # =============================================================================
@@ -273,6 +359,7 @@ class CacheStatsResponse(BaseModel):
273
359
  async def list_audio_searches(
274
360
  limit: int = 50,
275
361
  status_filter: Optional[str] = None,
362
+ exclude_test: bool = True,
276
363
  auth_data: Tuple[str, UserType, int] = Depends(require_admin),
277
364
  user_service: UserService = Depends(get_user_service),
278
365
  ):
@@ -287,6 +374,7 @@ async def list_audio_searches(
287
374
  Args:
288
375
  limit: Maximum number of jobs to return (default 50)
289
376
  status_filter: Optional filter by job status (e.g., 'awaiting_audio_selection')
377
+ exclude_test: If True (default), exclude jobs from test users
290
378
  """
291
379
  from google.cloud.firestore_v1 import FieldFilter
292
380
 
@@ -306,6 +394,11 @@ async def list_audio_searches(
306
394
 
307
395
  for doc in query.stream():
308
396
  data = doc.to_dict()
397
+
398
+ # Filter out test users if exclude_test is True
399
+ if exclude_test and is_test_email(data.get("user_email", "")):
400
+ continue
401
+
309
402
  state_data = data.get("state_data", {})
310
403
  audio_results = state_data.get("audio_search_results", [])
311
404
 
@@ -33,6 +33,7 @@ from backend.services.audio_search_service import (
33
33
  NoResultsError,
34
34
  DownloadError,
35
35
  )
36
+ from backend.services.theme_service import get_theme_service
36
37
  from backend.config import get_settings
37
38
  from backend.version import VERSION
38
39
  from backend.api.dependencies import require_auth
@@ -534,9 +535,18 @@ async def search_audio(
534
535
  # Extract request metadata
535
536
  request_metadata = extract_request_metadata(request, created_from="audio_search")
536
537
 
538
+ # Apply default theme if none specified
539
+ # This ensures all karaoke videos use the Nomad theme by default
540
+ effective_theme_id = body.theme_id
541
+ if effective_theme_id is None:
542
+ theme_service = get_theme_service()
543
+ effective_theme_id = theme_service.get_default_theme_id()
544
+ if effective_theme_id:
545
+ logger.info(f"Applying default theme: {effective_theme_id}")
546
+
537
547
  # Resolve CDG/TXT defaults based on theme
538
548
  resolved_cdg, resolved_txt = _resolve_cdg_txt_defaults(
539
- body.theme_id, body.enable_cdg, body.enable_txt
549
+ effective_theme_id, body.enable_cdg, body.enable_txt
540
550
  )
541
551
 
542
552
  # Use authenticated user's email
@@ -552,7 +562,7 @@ async def search_audio(
552
562
  job_create = JobCreate(
553
563
  artist=effective_display_artist, # Display value for title screens, filenames
554
564
  title=effective_display_title, # Display value for title screens, filenames
555
- theme_id=body.theme_id,
565
+ theme_id=effective_theme_id,
556
566
  color_overrides=body.color_overrides or {},
557
567
  enable_cdg=resolved_cdg,
558
568
  enable_txt=resolved_txt,
@@ -591,11 +601,11 @@ async def search_audio(
591
601
  # If theme is set and no custom style files are being uploaded, prepare theme style now
592
602
  # This copies the theme's style_params.json to the job folder so LyricsTranscriber
593
603
  # can access the style configuration for preview videos
594
- if body.theme_id and not body.style_files:
604
+ if effective_theme_id and not body.style_files:
595
605
  from backend.api.routes.file_upload import _prepare_theme_for_job
596
606
  try:
597
607
  style_params_path, theme_style_assets, youtube_desc = _prepare_theme_for_job(
598
- job_id, body.theme_id, body.color_overrides
608
+ job_id, effective_theme_id, body.color_overrides
599
609
  )
600
610
  theme_update = {
601
611
  'style_params_gcs_path': style_params_path,
@@ -604,9 +614,9 @@ async def search_audio(
604
614
  if youtube_desc and not effective_youtube_description:
605
615
  theme_update['youtube_description_template'] = youtube_desc
606
616
  job_manager.update_job(job_id, theme_update)
607
- logger.info(f"Applied theme '{body.theme_id}' to job {job_id}")
617
+ logger.info(f"Applied theme '{effective_theme_id}' to job {job_id}")
608
618
  except Exception as e:
609
- logger.warning(f"Failed to prepare theme '{body.theme_id}' for job {job_id}: {e}")
619
+ logger.warning(f"Failed to prepare theme '{effective_theme_id}' for job {job_id}: {e}")
610
620
  # Continue without theme - job can still be processed with defaults
611
621
 
612
622
  # Handle style file uploads if provided
@@ -562,9 +562,18 @@ async def upload_and_create_job(
562
562
  detail=f"Invalid color_overrides JSON: {e}"
563
563
  )
564
564
 
565
+ # Apply default theme if none specified
566
+ # This ensures all karaoke videos use the Nomad theme by default
567
+ effective_theme_id = theme_id
568
+ if effective_theme_id is None:
569
+ theme_service = get_theme_service()
570
+ effective_theme_id = theme_service.get_default_theme_id()
571
+ if effective_theme_id:
572
+ logger.info(f"Applying default theme: {effective_theme_id}")
573
+
565
574
  # Resolve CDG/TXT defaults based on theme
566
575
  resolved_cdg, resolved_txt = _resolve_cdg_txt_defaults(
567
- theme_id, enable_cdg, enable_txt
576
+ effective_theme_id, enable_cdg, enable_txt
568
577
  )
569
578
 
570
579
  # Check if any custom style files are being uploaded (overrides theme)
@@ -596,7 +605,7 @@ async def upload_and_create_job(
596
605
  artist=artist,
597
606
  title=title,
598
607
  filename=file.filename,
599
- theme_id=theme_id,
608
+ theme_id=effective_theme_id,
600
609
  color_overrides=parsed_color_overrides,
601
610
  enable_cdg=resolved_cdg,
602
611
  enable_txt=resolved_txt,
@@ -638,16 +647,16 @@ async def upload_and_create_job(
638
647
  theme_style_params_path = None
639
648
  theme_style_assets = {}
640
649
  theme_youtube_desc = None
641
- if theme_id and not has_custom_style_files:
650
+ if effective_theme_id and not has_custom_style_files:
642
651
  try:
643
652
  theme_style_params_path, theme_style_assets, theme_youtube_desc = _prepare_theme_for_job(
644
- job_id, theme_id, parsed_color_overrides or None
653
+ job_id, effective_theme_id, parsed_color_overrides or None
645
654
  )
646
- logger.info(f"Applied theme '{theme_id}' to job {job_id}")
655
+ logger.info(f"Applied theme '{effective_theme_id}' to job {job_id}")
647
656
  except HTTPException:
648
657
  raise # Re-raise validation errors (e.g., theme not found)
649
658
  except Exception as e:
650
- logger.warning(f"Failed to prepare theme '{theme_id}' for job {job_id}: {e}")
659
+ logger.warning(f"Failed to prepare theme '{effective_theme_id}' for job {job_id}: {e}")
651
660
  # Continue without theme - job can still be processed with defaults
652
661
 
653
662
  # Upload main audio file to GCS
@@ -1097,9 +1106,18 @@ async def create_job_with_upload_urls(
1097
1106
  # Get original audio filename
1098
1107
  audio_file = audio_files[0]
1099
1108
 
1109
+ # Apply default theme if none specified
1110
+ # This ensures all karaoke videos use the Nomad theme by default
1111
+ effective_theme_id = body.theme_id
1112
+ if effective_theme_id is None:
1113
+ theme_service = get_theme_service()
1114
+ effective_theme_id = theme_service.get_default_theme_id()
1115
+ if effective_theme_id:
1116
+ logger.info(f"Applying default theme: {effective_theme_id}")
1117
+
1100
1118
  # Resolve CDG/TXT defaults based on theme
1101
1119
  resolved_cdg, resolved_txt = _resolve_cdg_txt_defaults(
1102
- body.theme_id, body.enable_cdg, body.enable_txt
1120
+ effective_theme_id, body.enable_cdg, body.enable_txt
1103
1121
  )
1104
1122
 
1105
1123
  # Check if style_params is being uploaded (overrides theme)
@@ -1113,7 +1131,7 @@ async def create_job_with_upload_urls(
1113
1131
  artist=body.artist,
1114
1132
  title=body.title,
1115
1133
  filename=audio_file.filename,
1116
- theme_id=body.theme_id,
1134
+ theme_id=effective_theme_id,
1117
1135
  color_overrides=body.color_overrides or {},
1118
1136
  enable_cdg=resolved_cdg,
1119
1137
  enable_txt=resolved_txt,
@@ -1145,9 +1163,9 @@ async def create_job_with_upload_urls(
1145
1163
  logger.info(f"Created job {job_id} for {body.artist} - {body.title} (signed URL upload flow)")
1146
1164
 
1147
1165
  # If theme is set and no style_params uploaded, prepare theme style now
1148
- if body.theme_id and not has_style_params_upload:
1166
+ if effective_theme_id and not has_style_params_upload:
1149
1167
  style_params_path, style_assets, youtube_desc = _prepare_theme_for_job(
1150
- job_id, body.theme_id, body.color_overrides
1168
+ job_id, effective_theme_id, body.color_overrides
1151
1169
  )
1152
1170
  # Update job with theme style data
1153
1171
  update_data = {
@@ -1157,7 +1175,7 @@ async def create_job_with_upload_urls(
1157
1175
  if youtube_desc and not body.youtube_description:
1158
1176
  update_data['youtube_description_template'] = youtube_desc
1159
1177
  job_manager.update_job(job_id, update_data)
1160
- logger.info(f"Applied theme '{body.theme_id}' to job {job_id}")
1178
+ logger.info(f"Applied theme '{effective_theme_id}' to job {job_id}")
1161
1179
 
1162
1180
  # Generate signed upload URLs for each file
1163
1181
  upload_urls = []
@@ -1521,9 +1539,18 @@ async def create_job_from_url(
1521
1539
  artist = body.artist
1522
1540
  title = body.title
1523
1541
 
1542
+ # Apply default theme if none specified
1543
+ # This ensures all karaoke videos use the Nomad theme by default
1544
+ effective_theme_id = body.theme_id
1545
+ if effective_theme_id is None:
1546
+ theme_service = get_theme_service()
1547
+ effective_theme_id = theme_service.get_default_theme_id()
1548
+ if effective_theme_id:
1549
+ logger.info(f"Applying default theme: {effective_theme_id}")
1550
+
1524
1551
  # Resolve CDG/TXT defaults based on theme
1525
1552
  resolved_cdg, resolved_txt = _resolve_cdg_txt_defaults(
1526
- body.theme_id, body.enable_cdg, body.enable_txt
1553
+ effective_theme_id, body.enable_cdg, body.enable_txt
1527
1554
  )
1528
1555
 
1529
1556
  # Prefer authenticated user's email over request body
@@ -1535,7 +1562,7 @@ async def create_job_from_url(
1535
1562
  artist=artist,
1536
1563
  title=title,
1537
1564
  filename=None, # No file uploaded
1538
- theme_id=body.theme_id,
1565
+ theme_id=effective_theme_id,
1539
1566
  color_overrides=body.color_overrides or {},
1540
1567
  enable_cdg=resolved_cdg,
1541
1568
  enable_txt=resolved_txt,
@@ -1564,9 +1591,9 @@ async def create_job_from_url(
1564
1591
  metrics.record_job_created(job_id, source="url")
1565
1592
 
1566
1593
  # If theme is set, prepare theme style now
1567
- if body.theme_id:
1594
+ if effective_theme_id:
1568
1595
  style_params_path, style_assets, youtube_desc = _prepare_theme_for_job(
1569
- job_id, body.theme_id, body.color_overrides
1596
+ job_id, effective_theme_id, body.color_overrides
1570
1597
  )
1571
1598
  # Update job with theme style data
1572
1599
  update_data = {
@@ -1576,7 +1603,7 @@ async def create_job_from_url(
1576
1603
  if youtube_desc and not body.youtube_description:
1577
1604
  update_data['youtube_description_template'] = youtube_desc
1578
1605
  job_manager.update_job(job_id, update_data)
1579
- logger.info(f"Applied theme '{body.theme_id}' to job {job_id}")
1606
+ logger.info(f"Applied theme '{effective_theme_id}' to job {job_id}")
1580
1607
 
1581
1608
  logger.info(f"Created URL-based job {job_id} for URL: {body.url}")
1582
1609
  if artist:
@@ -1768,9 +1795,18 @@ async def create_finalise_only_job(
1768
1795
  # Extract request metadata
1769
1796
  request_metadata = extract_request_metadata(request, created_from="finalise_only_upload")
1770
1797
 
1798
+ # Apply default theme if none specified
1799
+ # This ensures all karaoke videos use the Nomad theme by default
1800
+ effective_theme_id = body.theme_id
1801
+ if effective_theme_id is None:
1802
+ theme_service = get_theme_service()
1803
+ effective_theme_id = theme_service.get_default_theme_id()
1804
+ if effective_theme_id:
1805
+ logger.info(f"Applying default theme: {effective_theme_id}")
1806
+
1771
1807
  # Resolve CDG/TXT defaults based on theme
1772
1808
  resolved_cdg, resolved_txt = _resolve_cdg_txt_defaults(
1773
- body.theme_id, body.enable_cdg, body.enable_txt
1809
+ effective_theme_id, body.enable_cdg, body.enable_txt
1774
1810
  )
1775
1811
 
1776
1812
  # Check if style_params is being uploaded (overrides theme)
@@ -1784,7 +1820,7 @@ async def create_finalise_only_job(
1784
1820
  artist=body.artist,
1785
1821
  title=body.title,
1786
1822
  filename="finalise_only", # No single audio file - using prep outputs
1787
- theme_id=body.theme_id,
1823
+ theme_id=effective_theme_id,
1788
1824
  color_overrides=body.color_overrides or {},
1789
1825
  enable_cdg=resolved_cdg,
1790
1826
  enable_txt=resolved_txt,
@@ -1808,9 +1844,9 @@ async def create_finalise_only_job(
1808
1844
  logger.info(f"Created finalise-only job {job_id} for {body.artist} - {body.title}")
1809
1845
 
1810
1846
  # If theme is set and no style_params uploaded, prepare theme style now
1811
- if body.theme_id and not has_style_params_upload:
1847
+ if effective_theme_id and not has_style_params_upload:
1812
1848
  style_params_path, style_assets, youtube_desc = _prepare_theme_for_job(
1813
- job_id, body.theme_id, body.color_overrides
1849
+ job_id, effective_theme_id, body.color_overrides
1814
1850
  )
1815
1851
  # Update job with theme style data
1816
1852
  update_data = {
@@ -1820,7 +1856,7 @@ async def create_finalise_only_job(
1820
1856
  if youtube_desc and not body.youtube_description:
1821
1857
  update_data['youtube_description_template'] = youtube_desc
1822
1858
  job_manager.update_job(job_id, update_data)
1823
- logger.info(f"Applied theme '{body.theme_id}' to job {job_id}")
1859
+ logger.info(f"Applied theme '{effective_theme_id}' to job {job_id}")
1824
1860
 
1825
1861
  # Generate signed upload URLs for each file
1826
1862
  upload_urls = []