karaoke-gen 0.96.0__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. backend/api/routes/admin.py +184 -91
  2. backend/api/routes/audio_search.py +16 -6
  3. backend/api/routes/file_upload.py +57 -21
  4. backend/api/routes/health.py +65 -0
  5. backend/api/routes/jobs.py +19 -0
  6. backend/api/routes/users.py +543 -44
  7. backend/main.py +25 -1
  8. backend/services/encoding_service.py +128 -31
  9. backend/services/job_manager.py +12 -1
  10. backend/services/langfuse_preloader.py +98 -0
  11. backend/services/nltk_preloader.py +122 -0
  12. backend/services/spacy_preloader.py +65 -0
  13. backend/services/stripe_service.py +96 -0
  14. backend/tests/emulator/conftest.py +22 -1
  15. backend/tests/test_job_manager.py +25 -8
  16. backend/tests/test_jobs_api.py +11 -1
  17. backend/tests/test_spacy_preloader.py +119 -0
  18. backend/utils/test_data.py +27 -0
  19. backend/workers/screens_worker.py +16 -6
  20. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  21. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +30 -25
  22. lyrics_transcriber/correction/agentic/agent.py +17 -6
  23. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -43
  24. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  25. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  26. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  27. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  28. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  29. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  30. {karaoke_gen-0.96.0.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -13,6 +13,9 @@ from backend.services.flacfetch_client import get_flacfetch_client
13
13
  from backend.services.email_service import get_email_service
14
14
  from backend.services.stripe_service import get_stripe_service
15
15
  from backend.services.encoding_service import get_encoding_service
16
+ from backend.services.spacy_preloader import get_preloaded_model, is_model_preloaded
17
+ from backend.services.nltk_preloader import get_preloaded_cmudict, is_cmudict_preloaded
18
+ from backend.services.langfuse_preloader import get_preloaded_langfuse_handler, is_langfuse_preloaded, is_langfuse_configured
16
19
 
17
20
  router = APIRouter()
18
21
  logger = logging.getLogger(__name__)
@@ -334,6 +337,68 @@ async def detailed_health_check() -> Dict[str, Any]:
334
337
  }
335
338
 
336
339
 
340
+ @router.get("/health/preload-status")
341
+ async def preload_status() -> Dict[str, Any]:
342
+ """
343
+ Check status of preloaded resources for performance optimization.
344
+
345
+ Use this endpoint to verify that NLTK, SpaCy, and Langfuse resources
346
+ were successfully preloaded at container startup. If any show as
347
+ not preloaded, check Cloud Run startup logs for errors.
348
+
349
+ Expected state after successful deployment:
350
+ - spacy.preloaded: true
351
+ - nltk.preloaded: true
352
+ - langfuse.preloaded: true (if configured) or configured: false
353
+ """
354
+ # SpaCy status
355
+ spacy_model = get_preloaded_model("en_core_web_sm")
356
+ spacy_status = {
357
+ "preloaded": is_model_preloaded("en_core_web_sm"),
358
+ "model": "en_core_web_sm",
359
+ }
360
+ if spacy_model:
361
+ spacy_status["vocab_size"] = len(spacy_model.vocab)
362
+
363
+ # NLTK status
364
+ cmudict = get_preloaded_cmudict()
365
+ nltk_status = {
366
+ "preloaded": is_cmudict_preloaded(),
367
+ "resource": "cmudict",
368
+ }
369
+ if cmudict:
370
+ nltk_status["entries"] = len(cmudict)
371
+
372
+ # Langfuse status
373
+ langfuse_handler = get_preloaded_langfuse_handler()
374
+ langfuse_status = {
375
+ "configured": is_langfuse_configured(),
376
+ "preloaded": is_langfuse_preloaded(),
377
+ }
378
+ if langfuse_handler:
379
+ langfuse_status["handler_type"] = type(langfuse_handler).__name__
380
+
381
+ # Overall status
382
+ all_preloaded = (
383
+ spacy_status["preloaded"]
384
+ and nltk_status["preloaded"]
385
+ and (langfuse_status["preloaded"] or not langfuse_status["configured"])
386
+ )
387
+
388
+ return {
389
+ "status": "ok" if all_preloaded else "degraded",
390
+ "message": "All resources preloaded" if all_preloaded else "Some resources not preloaded - check startup logs",
391
+ "spacy": spacy_status,
392
+ "nltk": nltk_status,
393
+ "langfuse": langfuse_status,
394
+ "performance_impact": {
395
+ "spacy_preload": "Saves ~60s on first lyrics correction",
396
+ "nltk_preload": "Saves ~100-150s on SyllablesMatchHandler init",
397
+ "langfuse_preload": "Saves ~200s on AgenticCorrector init",
398
+ }
399
+ }
400
+
401
+
337
402
  @router.get("/readiness")
338
403
  async def readiness_check() -> Dict[str, str]:
339
404
  """Readiness check endpoint for Cloud Run."""
@@ -25,10 +25,12 @@ from backend.models.requests import (
25
25
  from backend.services.job_manager import JobManager
26
26
  from backend.services.worker_service import get_worker_service
27
27
  from backend.services.storage_service import StorageService
28
+ from backend.services.theme_service import get_theme_service
28
29
  from backend.config import get_settings
29
30
  from backend.api.dependencies import require_admin, require_auth, require_instrumental_auth
30
31
  from backend.services.auth_service import UserType, AuthResult
31
32
  from backend.services.metrics import metrics
33
+ from backend.utils.test_data import is_test_email
32
34
 
33
35
 
34
36
  logger = logging.getLogger(__name__)
@@ -92,11 +94,22 @@ async def create_job(
92
94
  settings = get_settings()
93
95
  effective_enable_youtube_upload = request.enable_youtube_upload if request.enable_youtube_upload is not None else settings.default_enable_youtube_upload
94
96
 
97
+ # Apply default theme - all jobs require a theme
98
+ theme_service = get_theme_service()
99
+ effective_theme_id = theme_service.get_default_theme_id()
100
+ if not effective_theme_id:
101
+ raise HTTPException(
102
+ status_code=422,
103
+ detail="No default theme configured. Please contact support or specify a theme_id."
104
+ )
105
+ logger.info(f"Applying default theme: {effective_theme_id}")
106
+
95
107
  # Create job with all preferences
96
108
  job_create = JobCreate(
97
109
  url=str(request.url),
98
110
  artist=request.artist,
99
111
  title=request.title,
112
+ theme_id=effective_theme_id, # Required - all jobs must have a theme
100
113
  enable_cdg=request.enable_cdg,
101
114
  enable_txt=request.enable_txt,
102
115
  enable_youtube_upload=effective_enable_youtube_upload,
@@ -182,6 +195,7 @@ async def list_jobs(
182
195
  client_id: Optional[str] = None,
183
196
  created_after: Optional[str] = None,
184
197
  created_before: Optional[str] = None,
198
+ exclude_test: bool = True,
185
199
  limit: int = 100,
186
200
  auth_result: AuthResult = Depends(require_auth)
187
201
  ) -> List[Job]:
@@ -196,6 +210,7 @@ async def list_jobs(
196
210
  client_id: Filter by request_metadata.client_id (customer identifier)
197
211
  created_after: Filter jobs created after this ISO datetime (e.g., 2024-01-01T00:00:00Z)
198
212
  created_before: Filter jobs created before this ISO datetime
213
+ exclude_test: If True (default), exclude jobs from test users (admin only)
199
214
  limit: Maximum number of jobs to return (default 100)
200
215
 
201
216
  Returns:
@@ -242,6 +257,10 @@ async def list_jobs(
242
257
  limit=limit
243
258
  )
244
259
 
260
+ # Filter out test user jobs if exclude_test is True (admin only)
261
+ if exclude_test and auth_result.is_admin:
262
+ jobs = [j for j in jobs if not is_test_email(j.user_email or "")]
263
+
245
264
  logger.debug(f"Listed {len(jobs)} jobs for user={auth_result.user_email}, admin={auth_result.is_admin}")
246
265
  return jobs
247
266
  except HTTPException: