karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -268,12 +268,26 @@ class LyricsCorrector:
268
268
  _adapt = None
269
269
  _ModelRouter = None
270
270
 
271
+ # Pre-initialized agentic corrector (created once, reused for all gaps)
272
+ _agentic_agent = None
273
+
271
274
  if use_agentic_env:
272
275
  try:
273
276
  from lyrics_transcriber.correction.agentic.agent import AgenticCorrector as _AgenticCorrector
274
277
  from lyrics_transcriber.correction.agentic.adapter import adapt_proposals_to_word_corrections as _adapt
275
278
  from lyrics_transcriber.correction.agentic.router import ModelRouter as _ModelRouter
276
279
  self.logger.info("🤖 Agentic modules imported successfully - running in AGENTIC-ONLY mode")
280
+
281
+ # Create agent ONCE and reuse for all gaps (avoids repeated model initialization)
282
+ _router = _ModelRouter()
283
+ model_id = _router.choose_model("gap", uncertainty=0.5) # Use default uncertainty
284
+ self.logger.info(f"🤖 Creating single AgenticCorrector with model: {model_id}")
285
+ _agentic_agent = _AgenticCorrector.from_model(
286
+ model=model_id,
287
+ session_id=session_id,
288
+ cache_dir=str(self._cache_dir)
289
+ )
290
+ self.logger.info("🤖 AgenticCorrector initialized and ready for all gaps")
277
291
  except Exception as e:
278
292
  self.logger.error(f"🤖 Failed to import agentic modules but USE_AGENTIC_AI=1: {e}")
279
293
  raise RuntimeError(f"Agentic AI correction is enabled but required modules could not be imported: {e}") from e
@@ -443,145 +457,193 @@ class LyricsCorrector:
443
457
  sys.exit(0)
444
458
  # === END TEMPORARY CODE ===
445
459
 
446
- for i, gap in enumerate(gap_sequences, 1):
447
- # Check deadline before processing each gap (agentic mode only)
448
- # This allows us to abort early and return uncorrected results for human review
449
- if deadline and use_agentic_env and time.time() > deadline:
450
- self.logger.warning(
451
- f"⏰ AGENTIC TIMEOUT: Deadline exceeded after processing {i-1}/{len(gap_sequences)} gaps. "
452
- "Skipping remaining gaps - human review will correct any issues."
453
- )
454
- # Break out of loop - continue with whatever corrections we have (likely none)
455
- break
460
+ # AGENTIC-ONLY MODE: Process all gaps in parallel for better performance
461
+ if use_agentic_env:
462
+ from concurrent.futures import ThreadPoolExecutor, as_completed
463
+ from lyrics_transcriber.correction.agentic.providers.config import ProviderConfig
456
464
 
457
- self.logger.info(f"Processing gap {i}/{len(gap_sequences)} at position {gap.transcription_position}")
465
+ # Get parallel processing config
466
+ _config = ProviderConfig.from_env()
467
+ max_workers = _config.max_parallel_gaps
468
+ self.logger.info(f"🤖 Processing {len(gap_sequences)} gaps in parallel (max_workers={max_workers})")
458
469
 
459
- # Get the actual words for logging
460
- gap_words = [word_map[word_id] for word_id in gap.transcribed_word_ids]
461
- self.logger.debug(f"Gap text: '{' '.join(w.text for w in gap_words)}'")
470
+ # Pre-compute shared data structures once (not per-gap)
471
+ all_transcribed_words = []
472
+ for seg in segments:
473
+ all_transcribed_words.extend(seg.words)
474
+ word_position = {w.id: idx for idx, w in enumerate(all_transcribed_words)}
462
475
 
463
- # AGENTIC-ONLY MODE: Use agentic correction exclusively
464
- if use_agentic_env:
465
- self.logger.info(f"🤖 Attempting agentic correction for gap {i}/{len(gap_sequences)}")
476
+ # Build reference contexts once (same for all gaps)
477
+ reference_contexts = {}
478
+ for source, lyrics_data in self.reference_lyrics.items():
479
+ if lyrics_data and lyrics_data.segments:
480
+ ref_words = []
481
+ for seg in lyrics_data.segments:
482
+ ref_words.extend([w.text for w in seg.words])
483
+ reference_contexts[source] = " ".join(ref_words)
484
+
485
+ # Get artist and title once
486
+ artist = metadata.get("artist") if metadata else None
487
+ title = metadata.get("title") if metadata else None
488
+
489
+ # Prepare all gap inputs upfront
490
+ gap_inputs = []
491
+ for i, gap in enumerate(gap_sequences, 1):
492
+ # Prepare gap words data
493
+ gap_words_data = []
494
+ for word_id in gap.transcribed_word_ids:
495
+ if word_id in word_map:
496
+ word = word_map[word_id]
497
+ gap_words_data.append({
498
+ "id": word_id,
499
+ "text": word.text,
500
+ "start_time": getattr(word, 'start_time', 0),
501
+ "end_time": getattr(word, 'end_time', 0)
502
+ })
503
+
504
+ # Compute context words
505
+ gap_positions = [word_position[wid] for wid in gap.transcribed_word_ids if wid in word_position]
506
+ preceding_words = ""
507
+ following_words = ""
508
+
509
+ if gap_positions:
510
+ first_gap_pos = min(gap_positions)
511
+ last_gap_pos = max(gap_positions)
512
+
513
+ # Get 10 words before
514
+ start_pos = max(0, first_gap_pos - 10)
515
+ preceding_list = [all_transcribed_words[idx].text for idx in range(start_pos, first_gap_pos) if idx < len(all_transcribed_words)]
516
+ preceding_words = " ".join(preceding_list)
517
+
518
+ # Get 10 words after
519
+ end_pos = min(len(all_transcribed_words), last_gap_pos + 11)
520
+ following_list = [all_transcribed_words[idx].text for idx in range(last_gap_pos + 1, end_pos) if idx < len(all_transcribed_words)]
521
+ following_words = " ".join(following_list)
522
+
523
+ gap_inputs.append({
524
+ 'index': i,
525
+ 'gap': gap,
526
+ 'gap_id': f"gap_{i}",
527
+ 'gap_words': gap_words_data,
528
+ 'preceding_words': preceding_words,
529
+ 'following_words': following_words,
530
+ 'reference_contexts': reference_contexts,
531
+ 'artist': artist,
532
+ 'title': title
533
+ })
534
+
535
+ # Function to process a single gap (runs in thread pool)
536
+ def process_single_gap(gap_input):
537
+ """Process a single gap and return proposals. Thread-safe."""
538
+ idx = gap_input['index']
466
539
  try:
467
- # Prepare gap data for classification-first workflow
468
- gap_words_data = []
469
- for word_id in gap.transcribed_word_ids:
470
- if word_id in word_map:
471
- word = word_map[word_id]
472
- gap_words_data.append({
473
- "id": word_id,
474
- "text": word.text,
475
- "start_time": getattr(word, 'start_time', 0),
476
- "end_time": getattr(word, 'end_time', 0)
477
- })
478
-
479
- # Get context words
480
- all_transcribed_words = []
481
- for seg in segments:
482
- all_transcribed_words.extend(seg.words)
483
- word_position = {w.id: idx for idx, w in enumerate(all_transcribed_words)}
484
-
485
- gap_positions = [word_position[wid] for wid in gap.transcribed_word_ids if wid in word_position]
486
- preceding_words = ""
487
- following_words = ""
488
-
489
- if gap_positions:
490
- first_gap_pos = min(gap_positions)
491
- last_gap_pos = max(gap_positions)
492
-
493
- # Get 10 words before
494
- start_pos = max(0, first_gap_pos - 10)
495
- preceding_list = [all_transcribed_words[idx].text for idx in range(start_pos, first_gap_pos) if idx < len(all_transcribed_words)]
496
- preceding_words = " ".join(preceding_list)
497
-
498
- # Get 10 words after
499
- end_pos = min(len(all_transcribed_words), last_gap_pos + 11)
500
- following_list = [all_transcribed_words[idx].text for idx in range(last_gap_pos + 1, end_pos) if idx < len(all_transcribed_words)]
501
- following_words = " ".join(following_list)
502
-
503
- # Get reference contexts from all sources
504
- reference_contexts = {}
505
- for source, lyrics_data in self.reference_lyrics.items():
506
- if lyrics_data and lyrics_data.segments:
507
- ref_words = []
508
- for seg in lyrics_data.segments:
509
- ref_words.extend([w.text for w in seg.words])
510
- # For now, use full text (handlers will extract relevant portions)
511
- reference_contexts[source] = " ".join(ref_words)
512
-
513
- # Get artist and title from metadata
514
- artist = metadata.get("artist") if metadata else None
515
- title = metadata.get("title") if metadata else None
516
-
517
- # Choose model via router
518
- _router = _ModelRouter()
519
- uncertainty = 0.3 if len(gap_words_data) <= 2 else 0.7
520
- model_id = _router.choose_model("gap", uncertainty)
521
- self.logger.debug(f"🤖 Router selected model: {model_id}")
522
-
523
- # Create agent and use new classification-first workflow
524
- self.logger.debug(f"🤖 Creating AgenticCorrector with model: {model_id}")
525
- _agent = _AgenticCorrector.from_model(
526
- model=model_id,
527
- session_id=session_id,
528
- cache_dir=str(self._cache_dir)
529
- )
530
-
531
- # Use new propose_for_gap method
532
- self.logger.debug(f"🤖 Calling agent.propose_for_gap() for gap {i}")
533
- _proposals = _agent.propose_for_gap(
534
- gap_id=f"gap_{i}",
535
- gap_words=gap_words_data,
536
- preceding_words=preceding_words,
537
- following_words=following_words,
538
- reference_contexts=reference_contexts,
539
- artist=artist,
540
- title=title
540
+ proposals = _agentic_agent.propose_for_gap(
541
+ gap_id=gap_input['gap_id'],
542
+ gap_words=gap_input['gap_words'],
543
+ preceding_words=gap_input['preceding_words'],
544
+ following_words=gap_input['following_words'],
545
+ reference_contexts=gap_input['reference_contexts'],
546
+ artist=gap_input['artist'],
547
+ title=gap_input['title']
541
548
  )
542
- self.logger.debug(f"🤖 Agent returned {len(_proposals) if _proposals else 0} proposals")
543
- _agentic_corrections = _adapt(_proposals, word_map, linear_position_map) if _proposals else []
544
- self.logger.debug(f"🤖 Adapter returned {len(_agentic_corrections)} corrections")
545
-
546
- if _agentic_corrections:
547
- self.logger.info(f"🤖 Applying {len(_agentic_corrections)} agentic corrections for gap {i}")
548
- affected_word_ids = [w.id for w in self._get_affected_words(gap, segments)]
549
- affected_segment_ids = [s.id for s in self._get_affected_segments(gap, segments)]
550
- updated_segments = self._apply_corrections_to_segments(self._get_affected_segments(gap, segments), _agentic_corrections)
551
- for correction in _agentic_corrections:
552
- if correction.word_id and correction.corrected_word_id:
553
- word_id_map[correction.word_id] = correction.corrected_word_id
554
- for old_seg, new_seg in zip(self._get_affected_segments(gap, segments), updated_segments):
555
- segment_id_map[old_seg.id] = new_seg.id
556
- step = CorrectionStep(
557
- handler_name="AgenticCorrector",
558
- affected_word_ids=affected_word_ids,
559
- affected_segment_ids=affected_segment_ids,
560
- corrections=_agentic_corrections,
561
- segments_before=self._get_affected_segments(gap, segments),
562
- segments_after=updated_segments,
563
- created_word_ids=[w.id for w in self._get_new_words(updated_segments, affected_word_ids)],
564
- deleted_word_ids=[id for id in affected_word_ids if not self._word_exists(id, updated_segments)],
549
+ return {'index': idx, 'gap': gap_input['gap'], 'proposals': proposals, 'error': None}
550
+ except Exception as e:
551
+ return {'index': idx, 'gap': gap_input['gap'], 'proposals': None, 'error': str(e)}
552
+
553
+ # Process gaps in parallel
554
+ results = [None] * len(gap_inputs)
555
+ completed_count = 0
556
+ errors = []
557
+
558
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
559
+ # Submit all tasks
560
+ future_to_input = {executor.submit(process_single_gap, g): g for g in gap_inputs}
561
+
562
+ # Collect results as they complete
563
+ for future in as_completed(future_to_input):
564
+ # Check deadline
565
+ if deadline and time.time() > deadline:
566
+ self.logger.warning(
567
+ f"⏰ AGENTIC TIMEOUT: Deadline exceeded after processing {completed_count}/{len(gap_sequences)} gaps. "
568
+ "Cancelling remaining gaps - human review will correct any issues."
565
569
  )
566
- correction_steps.append(step)
567
- all_corrections.extend(_agentic_corrections)
568
- # Log corrections made
569
- for correction in _agentic_corrections:
570
- self.logger.info(
571
- f"Made correction: '{correction.original_word}' -> '{correction.corrected_word}' "
572
- f"(confidence: {correction.confidence:.2f}, reason: {correction.reason})"
573
- )
570
+ # Cancel remaining futures (use list() to avoid mutating dict during iteration)
571
+ for f in list(future_to_input.keys()):
572
+ f.cancel()
573
+ break
574
+
575
+ result = future.result()
576
+ idx = result['index'] - 1 # Convert 1-based to 0-based
577
+ results[idx] = result
578
+ completed_count += 1
579
+
580
+ if result['error']:
581
+ errors.append(f"Gap {result['index']}: {result['error']}")
582
+ self.logger.error(f"🤖 Gap {result['index']} failed: {result['error']}")
574
583
  else:
575
- self.logger.info(f"🤖 No agentic corrections needed for gap {i}")
576
-
577
- except Exception as e:
578
- # In agentic-only mode, fail fast instead of falling back
579
- self.logger.error(f"🤖 Agentic correction failed for gap {i}: {e}", exc_info=True)
580
- raise RuntimeError(f"Agentic AI correction failed for gap {i}: {e}") from e
581
-
582
- # Skip rule-based handlers completely in agentic mode
584
+ proposal_count = len(result['proposals']) if result['proposals'] else 0
585
+ self.logger.info(f"🤖 Gap {result['index']}/{len(gap_sequences)} completed ({proposal_count} proposals)")
586
+
587
+ self.logger.info(f"🤖 Parallel processing complete: {completed_count}/{len(gap_sequences)} gaps processed")
588
+
589
+ # If any errors occurred, fail fast
590
+ if errors:
591
+ raise RuntimeError(f"Agentic AI correction failed for {len(errors)} gaps: {'; '.join(errors)}")
592
+
593
+ # Apply corrections sequentially (must be in order due to segment modifications)
594
+ for result in results:
595
+ if result is None:
596
+ continue # Skipped due to deadline
597
+
598
+ i = result['index']
599
+ gap = result['gap']
600
+ _proposals = result['proposals']
601
+
602
+ _agentic_corrections = _adapt(_proposals, word_map, linear_position_map) if _proposals else []
603
+
604
+ if _agentic_corrections:
605
+ self.logger.info(f"🤖 Applying {len(_agentic_corrections)} agentic corrections for gap {i}")
606
+ affected_word_ids = [w.id for w in self._get_affected_words(gap, segments)]
607
+ affected_segment_ids = [s.id for s in self._get_affected_segments(gap, segments)]
608
+ updated_segments = self._apply_corrections_to_segments(self._get_affected_segments(gap, segments), _agentic_corrections)
609
+ for correction in _agentic_corrections:
610
+ if correction.word_id and correction.corrected_word_id:
611
+ word_id_map[correction.word_id] = correction.corrected_word_id
612
+ for old_seg, new_seg in zip(self._get_affected_segments(gap, segments), updated_segments):
613
+ segment_id_map[old_seg.id] = new_seg.id
614
+ step = CorrectionStep(
615
+ handler_name="AgenticCorrector",
616
+ affected_word_ids=affected_word_ids,
617
+ affected_segment_ids=affected_segment_ids,
618
+ corrections=_agentic_corrections,
619
+ segments_before=self._get_affected_segments(gap, segments),
620
+ segments_after=updated_segments,
621
+ created_word_ids=[w.id for w in self._get_new_words(updated_segments, affected_word_ids)],
622
+ deleted_word_ids=[id for id in affected_word_ids if not self._word_exists(id, updated_segments)],
623
+ )
624
+ correction_steps.append(step)
625
+ all_corrections.extend(_agentic_corrections)
626
+ # Log corrections made
627
+ for correction in _agentic_corrections:
628
+ self.logger.info(
629
+ f"Made correction: '{correction.original_word}' -> '{correction.corrected_word}' "
630
+ f"(confidence: {correction.confidence:.2f}, reason: {correction.reason})"
631
+ )
632
+ else:
633
+ self.logger.debug(f"🤖 No agentic corrections needed for gap {i}")
634
+
635
+ # RULE-BASED MODE: Process gaps sequentially
636
+ for i, gap in enumerate(gap_sequences, 1):
637
+ # Skip if we already processed in agentic mode
638
+ if use_agentic_env:
583
639
  continue
584
640
 
641
+ self.logger.info(f"Processing gap {i}/{len(gap_sequences)} at position {gap.transcription_position}")
642
+
643
+ # Get the actual words for logging
644
+ gap_words = [word_map[word_id] for word_id in gap.transcribed_word_ids]
645
+ self.logger.debug(f"Gap text: '{' '.join(w.text for w in gap_words)}'")
646
+
585
647
  # RULE-BASED MODE: Try each handler in order
586
648
  for handler in self.handlers:
587
649
  handler_name = handler.__class__.__name__
@@ -1,6 +1,7 @@
1
1
  from typing import List, Tuple, Dict, Any, Optional
2
2
  import spacy
3
3
  import logging
4
+ import time
4
5
  import pyphen
5
6
  import nltk
6
7
  from nltk.corpus import cmudict
@@ -11,6 +12,15 @@ from lyrics_transcriber.types import GapSequence, WordCorrection
11
12
  from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
12
13
  from lyrics_transcriber.correction.handlers.word_operations import WordOperations
13
14
 
15
+ # Try to import preloaders (may not exist in standalone library usage)
16
+ try:
17
+ from backend.services.spacy_preloader import get_preloaded_model
18
+ from backend.services.nltk_preloader import get_preloaded_cmudict
19
+
20
+ _HAS_PRELOADER = True
21
+ except ImportError:
22
+ _HAS_PRELOADER = False
23
+
14
24
 
15
25
  class SyllablesMatchHandler(GapCorrectionHandler):
16
26
  """Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
@@ -18,11 +28,27 @@ class SyllablesMatchHandler(GapCorrectionHandler):
18
28
  def __init__(self, logger: Optional[logging.Logger] = None):
19
29
  super().__init__(logger)
20
30
  self.logger = logger or logging.getLogger(__name__)
31
+ init_start = time.time()
21
32
 
22
33
  # Marking SpacySyllables as used to prevent unused import warning
23
34
  _ = SpacySyllables
24
35
 
25
- # Load spacy model with syllables pipeline
36
+ # Try to use preloaded model first (avoids 60+ second load on Cloud Run)
37
+ if _HAS_PRELOADER:
38
+ preloaded = get_preloaded_model("en_core_web_sm")
39
+ if preloaded is not None:
40
+ self.logger.info("Using preloaded SpaCy model for syllable analysis")
41
+ self.nlp = preloaded
42
+ # Add syllables component if not already present
43
+ if "syllables" not in self.nlp.pipe_names:
44
+ self.nlp.add_pipe("syllables", after="tagger")
45
+ self._init_nltk_resources()
46
+ init_elapsed = time.time() - init_start
47
+ self.logger.info(f"Initialized SyllablesMatchHandler in {init_elapsed:.2f}s (preloaded)")
48
+ return
49
+
50
+ # Fall back to loading model directly
51
+ self.logger.info("Loading SpaCy model for syllable analysis (not preloaded)...")
26
52
  try:
27
53
  self.nlp = spacy.load("en_core_web_sm")
28
54
  except OSError:
@@ -43,10 +69,26 @@ class SyllablesMatchHandler(GapCorrectionHandler):
43
69
  if "syllables" not in self.nlp.pipe_names:
44
70
  self.nlp.add_pipe("syllables", after="tagger")
45
71
 
72
+ self._init_nltk_resources()
73
+ init_elapsed = time.time() - init_start
74
+ self.logger.info(f"Initialized SyllablesMatchHandler in {init_elapsed:.2f}s (lazy loaded)")
75
+
76
+ def _init_nltk_resources(self):
77
+ """Initialize NLTK resources (Pyphen and CMU dictionary)."""
78
+
46
79
  # Initialize Pyphen for English
47
80
  self.dic = pyphen.Pyphen(lang="en_US")
48
81
 
49
- # Initialize NLTK's CMU dictionary
82
+ # Try to use preloaded cmudict first (avoids 50-100+ second download on Cloud Run)
83
+ if _HAS_PRELOADER:
84
+ preloaded_cmudict = get_preloaded_cmudict()
85
+ if preloaded_cmudict is not None:
86
+ self.logger.debug("Using preloaded NLTK cmudict")
87
+ self.cmudict = preloaded_cmudict
88
+ return
89
+
90
+ # Fall back to loading directly
91
+ self.logger.info("Loading NLTK cmudict (not preloaded)...")
50
92
  try:
51
93
  self.cmudict = cmudict.dict()
52
94
  except LookupError:
@@ -274,11 +274,12 @@ class CorrectionOperations:
274
274
  audio_filepath: str,
275
275
  artist: Optional[str] = None,
276
276
  title: Optional[str] = None,
277
- logger: Optional[logging.Logger] = None
277
+ logger: Optional[logging.Logger] = None,
278
+ ass_only: bool = False,
278
279
  ) -> Dict[str, Any]:
279
280
  """
280
281
  Generate a preview video with current corrections.
281
-
282
+
282
283
  Args:
283
284
  correction_result: Current correction result
284
285
  updated_data: Updated correction data for preview
@@ -287,10 +288,12 @@ class CorrectionOperations:
287
288
  artist: Optional artist name
288
289
  title: Optional title
289
290
  logger: Optional logger instance
290
-
291
+ ass_only: If True, generate only ASS subtitles without video encoding.
292
+ Useful when video encoding is offloaded to external service.
293
+
291
294
  Returns:
292
- Dict with status, preview_hash, and video_path
293
-
295
+ Dict with status, preview_hash, and video_path (or ass_path if ass_only)
296
+
294
297
  Raises:
295
298
  ValueError: If preview video generation fails
296
299
  """
@@ -338,15 +341,27 @@ class CorrectionOperations:
338
341
  audio_filepath=audio_filepath,
339
342
  artist=artist,
340
343
  title=title,
344
+ ass_only=ass_only,
341
345
  )
342
-
346
+
347
+ # When ass_only, we only need the ASS file (video encoding done externally)
348
+ if ass_only:
349
+ if not preview_outputs.ass:
350
+ raise ValueError("Preview ASS generation failed")
351
+ logger.info(f"Generated preview ASS: {preview_outputs.ass}")
352
+ return {
353
+ "status": "success",
354
+ "preview_hash": preview_hash,
355
+ "ass_path": preview_outputs.ass,
356
+ }
357
+
343
358
  if not preview_outputs.video:
344
359
  raise ValueError("Preview video generation failed")
345
-
360
+
346
361
  logger.info(f"Generated preview video: {preview_outputs.video}")
347
-
362
+
348
363
  return {
349
364
  "status": "success",
350
365
  "preview_hash": preview_hash,
351
- "video_path": preview_outputs.video
366
+ "video_path": preview_outputs.video,
352
367
  }
@@ -5,6 +5,14 @@ import logging
5
5
  from lyrics_transcriber.correction.text_utils import clean_text
6
6
  from lyrics_transcriber.types import PhraseType, PhraseScore
7
7
 
8
+ # Try to import preloader (may not exist in standalone library usage)
9
+ try:
10
+ from backend.services.spacy_preloader import get_preloaded_model
11
+
12
+ _HAS_PRELOADER = True
13
+ except ImportError:
14
+ _HAS_PRELOADER = False
15
+
8
16
 
9
17
  class PhraseAnalyzer:
10
18
  """Language-agnostic phrase analyzer using spaCy"""
@@ -17,6 +25,16 @@ class PhraseAnalyzer:
17
25
  language_code: spaCy language model to use
18
26
  """
19
27
  self.logger = logger
28
+
29
+ # Try to use preloaded model first (avoids 60+ second load on Cloud Run)
30
+ if _HAS_PRELOADER:
31
+ preloaded = get_preloaded_model(language_code)
32
+ if preloaded is not None:
33
+ self.logger.info(f"Using preloaded SpaCy model: {language_code}")
34
+ self.nlp = preloaded
35
+ return
36
+
37
+ # Fall back to loading model directly
20
38
  self.logger.info(f"Initializing PhraseAnalyzer with language model: {language_code}")
21
39
  try:
22
40
  self.nlp = spacy.load(language_code)
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "lyrics-transcriber-frontend",
3
- "version": "0.83.0",
3
+ "version": "0.84.0",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "lyrics-transcriber-frontend",
9
- "version": "0.83.0",
9
+ "version": "0.84.0",
10
10
  "dependencies": {
11
11
  "@emotion/react": "^11.14.0",
12
12
  "@emotion/styled": "^11.14.0",
@@ -2,7 +2,7 @@
2
2
  "name": "lyrics-transcriber-frontend",
3
3
  "private": true,
4
4
  "homepage": "https://nomadkaraoke.github.io/lyrics-transcriber-frontend",
5
- "version": "0.83.0",
5
+ "version": "0.86.0",
6
6
  "type": "module",
7
7
  "scripts": {
8
8
  "dev": "vite",
@@ -98,7 +98,7 @@ export const AIFeedbackModal: React.FC<Props> = ({ isOpen, onClose, onSubmit, su
98
98
  onClick={() =>
99
99
  onSubmit({ reviewerAction, finalText: finalText || undefined, reasonCategory, reasonDetail: reasonDetail || undefined })
100
100
  }
101
- style={{ background: '#f97316', color: '#fff', border: 'none', borderRadius: 4, padding: '6px 12px', cursor: 'pointer' }}
101
+ style={{ background: '#ff7acc', color: '#fff', border: 'none', borderRadius: 4, padding: '6px 12px', cursor: 'pointer' }}
102
102
  >
103
103
  Submit
104
104
  </button>
@@ -43,21 +43,21 @@ const WordContainer = styled(Box, {
43
43
  '50%': { opacity: 0.5 }
44
44
  },
45
45
  '&:hover': {
46
- backgroundColor: 'rgba(34, 197, 94, 0.35)' // green tint hover for dark mode
46
+ backgroundColor: 'rgba(34, 197, 94, 0.35)' // green tint hover - works for both modes
47
47
  }
48
48
  }))
49
49
 
50
- const OriginalWordLabel = styled(Box)({
50
+ const OriginalWordLabel = styled(Box)(({ theme }) => ({
51
51
  position: 'absolute',
52
52
  top: '-14px',
53
53
  left: '0',
54
54
  fontSize: '0.6rem',
55
- color: '#888888', // slate-400 for dark mode
55
+ color: theme.palette.text.secondary, // Theme-aware text color
56
56
  textDecoration: 'line-through',
57
57
  opacity: 0.7,
58
58
  whiteSpace: 'nowrap',
59
59
  pointerEvents: 'none'
60
- })
60
+ }))
61
61
 
62
62
  const ActionsContainer = styled(Box)({
63
63
  display: 'inline-flex',
@@ -72,10 +72,14 @@ const ActionButton = styled(IconButton)(({ theme }) => ({
72
72
  minHeight: '20px',
73
73
  width: '20px',
74
74
  height: '20px',
75
- backgroundColor: 'rgba(30, 41, 59, 0.9)', // slate-800 with opacity for dark mode
76
- border: '1px solid rgba(248, 250, 252, 0.1)', // light border for dark mode
75
+ backgroundColor: theme.palette.mode === 'dark'
76
+ ? 'rgba(30, 41, 59, 0.9)' // slate-800 with opacity for dark mode
77
+ : 'rgba(241, 245, 249, 0.9)', // slate-100 for light mode
78
+ border: `1px solid ${theme.palette.divider}`,
77
79
  '&:hover': {
78
- backgroundColor: 'rgba(51, 65, 85, 1)', // slate-700 for dark mode
80
+ backgroundColor: theme.palette.mode === 'dark'
81
+ ? 'rgba(51, 65, 85, 1)' // slate-700 for dark mode
82
+ : 'rgba(226, 232, 240, 1)', // slate-200 for light mode
79
83
  transform: 'scale(1.1)'
80
84
  },
81
85
  '& .MuiSvgIcon-root': {