karaoke-gen 0.82.0__py3-none-any.whl → 0.86.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -786,8 +786,8 @@
786
786
 
787
787
  if (waveformRes.ok) {
788
788
  waveformData = await waveformRes.json();
789
- // API returns duration_seconds, not duration
790
- duration = waveformData.duration_seconds || 0;
789
+ // API may return duration_seconds (cloud) or duration (local)
790
+ duration = waveformData.duration_seconds || waveformData.duration || 0;
791
791
  }
792
792
 
793
793
  // Set initial selection based on recommendation
@@ -3,6 +3,7 @@ import re
3
3
  import logging
4
4
  import shutil
5
5
  import json
6
+ from typing import Optional
6
7
  from lyrics_transcriber import LyricsTranscriber, OutputConfig, TranscriberConfig, LyricsConfig
7
8
  from lyrics_transcriber.core.controller import LyricsControllerResult
8
9
  from dotenv import load_dotenv
@@ -252,10 +253,11 @@ class LyricsProcessor:
252
253
  "See README.md 'Transcription Providers' section for detailed setup instructions."
253
254
  )
254
255
 
255
- def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
256
+ def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None,
257
+ agentic_deadline: Optional[float] = None):
256
258
  """
257
259
  Transcribe lyrics for a track.
258
-
260
+
259
261
  Args:
260
262
  input_audio_wav: Path to the audio file
261
263
  artist: Original artist name (used for filename generation)
@@ -263,7 +265,9 @@ class LyricsProcessor:
263
265
  track_output_dir: Output directory path
264
266
  lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
265
267
  lyrics_title: Title for lyrics processing (defaults to title if None)
266
-
268
+ agentic_deadline: Optional Unix timestamp. If agentic correction is still
269
+ running after this time, it will abort and return uncorrected results.
270
+
267
271
  Raises:
268
272
  ValueError: If transcription is enabled but no providers are configured
269
273
  """
@@ -423,8 +427,8 @@ class LyricsProcessor:
423
427
  logger=self.logger,
424
428
  )
425
429
 
426
- # Process and get results
427
- results: LyricsControllerResult = transcriber.process()
430
+ # Process and get results (pass deadline for agentic timeout)
431
+ results: LyricsControllerResult = transcriber.process(agentic_deadline=agentic_deadline)
428
432
  self.logger.info(f"Transcriber Results Filepaths:")
429
433
  for key, value in results.__dict__.items():
430
434
  if key.endswith("_filepath"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: karaoke-gen
3
- Version: 0.82.0
3
+ Version: 0.86.5
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -42,7 +42,7 @@ Requires-Dist: kbputils (>=0.0.16,<0.0.17)
42
42
  Requires-Dist: langchain (>=0.3.0)
43
43
  Requires-Dist: langchain-anthropic (>=0.2.0)
44
44
  Requires-Dist: langchain-core (>=0.3.0)
45
- Requires-Dist: langchain-google-vertexai (>=3.1.1)
45
+ Requires-Dist: langchain-google-genai (>=2.0.0)
46
46
  Requires-Dist: langchain-ollama (>=0.2.0)
47
47
  Requires-Dist: langchain-openai (>=0.2.0)
48
48
  Requires-Dist: langfuse (>=3.0.0)
@@ -8,12 +8,12 @@ karaoke_gen/instrumental_review/analyzer.py,sha256=Heg8TbrwM4g5IV7bavmO6EfVD4M0U
8
8
  karaoke_gen/instrumental_review/editor.py,sha256=_DGTjKMk5WhoGtLGtTvHzU522LJyQQ_DSY1r8fULuiA,11568
9
9
  karaoke_gen/instrumental_review/models.py,sha256=cUSb_JheJK0cGdKx9f59-9sRvRrhrgdTdKBzQN3lHto,5226
10
10
  karaoke_gen/instrumental_review/server.py,sha256=Ick90X77t2EeMRwtx2U08sSybadQyWH7G0tDG-4JqP4,19377
11
- karaoke_gen/instrumental_review/static/index.html,sha256=1lzo_W5B4HxNStWPiVaP4I6ctqDkXAABJkQmojvBDqc,63235
11
+ karaoke_gen/instrumental_review/static/index.html,sha256=anhmEGAhL0rV25q5V8GEnWxs2RnF7qA3dADwvGWCd88,63277
12
12
  karaoke_gen/instrumental_review/waveform.py,sha256=Q6LBPZrJAD6mzZ7TmRf3Tf4gwYhUYTHumJKytLs3hSg,12940
13
13
  karaoke_gen/karaoke_finalise/__init__.py,sha256=HqZ7TIhgt_tYZ-nb_NNCaejWAcF_aK-7wJY5TaW_keM,46
14
14
  karaoke_gen/karaoke_finalise/karaoke_finalise.py,sha256=Wn1KcdRyINT63UxKUPT9uB-bsrFVih0Im_cjXtequS0,93534
15
15
  karaoke_gen/karaoke_gen.py,sha256=84n2SE0MixJr01_btLmm5cVdf35hJvp7W638b8TKR-Q,65734
16
- karaoke_gen/lyrics_processor.py,sha256=9BtL2uJa4Ekrodj2w_SXSeOraVKCB2kzYuHcGHTFpo8,23979
16
+ karaoke_gen/lyrics_processor.py,sha256=jXEjkQVFauojKXhs3cXytnpfS2ig1o9iHON0-I8RwPw,24297
17
17
  karaoke_gen/metadata.py,sha256=SZW6TuUpkGGU98gRdjPfrR8F4vWXjnfCSGry2XD5_A4,6689
18
18
  karaoke_gen/pipeline/__init__.py,sha256=-MZnba4qobr1qGDamG9CieLl2pWCZMEB5_Yur62RKeM,2106
19
19
  karaoke_gen/pipeline/base.py,sha256=yg4LIm7Mc9ER0zCmZcUv4huEkotSSXK_0OAFio-TSNI,6235
@@ -45,11 +45,11 @@ lyrics_transcriber/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
45
45
  lyrics_transcriber/cli/cli_main.py,sha256=F72ENLTj934bXjHAUbRm0toCK73qnuJhwEm9agBVKHQ,11596
46
46
  lyrics_transcriber/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  lyrics_transcriber/core/config.py,sha256=_X_d1wSYTJjSquqbODYCwPdOYpnSR9KERwvr_jkdYls,2056
48
- lyrics_transcriber/core/controller.py,sha256=dUJvnehr9_Mv3Syj_TWZQsQVsDD1w8AdF5_1xISA2cw,31661
48
+ lyrics_transcriber/core/controller.py,sha256=zRjdxOrJEaa2depvzZvwVQiEFmf8Ew3Aek89O1HUEas,32223
49
49
  lyrics_transcriber/correction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  lyrics_transcriber/correction/agentic/__init__.py,sha256=p7PHiebuvRs8RDlPDs-9gLZKzXG5KfWg3fFCdDhY6pE,222
51
51
  lyrics_transcriber/correction/agentic/adapter.py,sha256=Z0JBTAA7xlSdctCHqO9nBMl78C4XmqsLKKtS6BvNZNI,2912
52
- lyrics_transcriber/correction/agentic/agent.py,sha256=KZjAKaxv6HXkNFNGDc-hXM7He3ew5kp9Q_OR43SrfJo,12310
52
+ lyrics_transcriber/correction/agentic/agent.py,sha256=GV6TkrIQBhibJllXDnp9zBBmaf_vnoSVuJZmm6WVhS0,12722
53
53
  lyrics_transcriber/correction/agentic/feedback/aggregator.py,sha256=323t8LDbE26ni83woyN7uVMSuSQhnqTgwJc-d-KuDbs,273
54
54
  lyrics_transcriber/correction/agentic/feedback/collector.py,sha256=HT-2cAP_bx7Iv-0-tpZv534do111g0FlTUt2XaKoUtA,415
55
55
  lyrics_transcriber/correction/agentic/feedback/retention.py,sha256=dUCUsKPCzHVQxiLLBXcdfAZ5NqiG25go0Z6GFXeK0vY,881
@@ -84,13 +84,13 @@ lyrics_transcriber/correction/agentic/prompts/langfuse_prompts.py,sha256=hjQhyY_
84
84
  lyrics_transcriber/correction/agentic/providers/__init__.py,sha256=PS7C4sKDfa6S9lSo33GXIRamCLsv0Jn7u0GtXuhiRD4,95
85
85
  lyrics_transcriber/correction/agentic/providers/base.py,sha256=bExuntMLLInMmWWNzN81_ScWQJhNYbtlF3wZYhlX-qw,1059
86
86
  lyrics_transcriber/correction/agentic/providers/circuit_breaker.py,sha256=D3Jg4YHqvy4gzlxfkALa7PztyYQpJb8NwJAonMS0TSI,4694
87
- lyrics_transcriber/correction/agentic/providers/config.py,sha256=w6-fkapEy3BgoFIsRfZ44XUCV4zuicFSNoSoVAe5lYE,3282
87
+ lyrics_transcriber/correction/agentic/providers/config.py,sha256=NnGigthJSWMz_d99qh-ClQaVqjODRoYrwTHVftQOlR8,4156
88
88
  lyrics_transcriber/correction/agentic/providers/constants.py,sha256=cXLzKTyFVt9q6wQd_gWcv3EZ5Sm27AOAz6NyPapcess,695
89
89
  lyrics_transcriber/correction/agentic/providers/health.py,sha256=F8pHY5BQYvylGRDGXUHplcAJooAyiqVLRhBl4kHC1H8,710
90
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=H3C3BNjAixfkOJojxWXv-P-svlgj5rJEJdk0zPIjh7E,8540
91
- lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=CeVDblf1HdphtUHVn3Cgl07YAeUuSxTjEHHFJN8Frj0,8257
90
+ lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=yX5JGGALDCFgji34gZ924GePsfjELOPqgxD6Cx7kKEg,12915
91
+ lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=90EjVwoKTWo8jXTrroI7GXM9AU-_ACx9g_fHB4vnR2w,9919
92
92
  lyrics_transcriber/correction/agentic/providers/response_cache.py,sha256=Byr7fQJsgUMFlsvHeVCxTiFjjnbsg3KIlEmEEtAo-Gw,7047
93
- lyrics_transcriber/correction/agentic/providers/response_parser.py,sha256=a8pdUYKBS5X72gck3u1ndFYB__UN0UijAdxNhbHp8ZQ,3809
93
+ lyrics_transcriber/correction/agentic/providers/response_parser.py,sha256=c2KypM-yHbIXXakHV5s-qh8fl8FhssLPVo3pJbyAiG4,4301
94
94
  lyrics_transcriber/correction/agentic/providers/retry_executor.py,sha256=hX21Zwy2cSECAw7k13ndEinWRqwjo4xYoSCQ2B2CUf0,3912
95
95
  lyrics_transcriber/correction/agentic/router.py,sha256=akP28A0lftmsnSyMOW6k7iTC1pv4LEgilXhIkcfJzlE,1437
96
96
  lyrics_transcriber/correction/agentic/workflows/__init__.py,sha256=OsBExAbIIKxJgX6FKXFOgcUjIG9AWJQV_fESZVdO8mo,77
@@ -98,7 +98,7 @@ lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py,sha256=gMu
98
98
  lyrics_transcriber/correction/agentic/workflows/correction_graph.py,sha256=kgZKnz0h9cG1EfhW7BSSl-kSpQtJrRM_S86kAniXfE4,1815
99
99
  lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py,sha256=KsKLD3AP66YYmXfUn-mVZjERYLtU1Zs4a-7CB2zDfas,596
100
100
  lyrics_transcriber/correction/anchor_sequence.py,sha256=5tl4Cjiw5UlLbEb1Oy-g3ebKCinXSwohdaCB9-rTMtI,43798
101
- lyrics_transcriber/correction/corrector.py,sha256=qW6GwOOLM8zxYtYMmGy9Rzk_4mJzdpGiCXW3LQFXn14,38362
101
+ lyrics_transcriber/correction/corrector.py,sha256=2yVFUHzqEXZ7aeJjm6durF6WtrhYVTm6nqOQn-dtNI4,40545
102
102
  lyrics_transcriber/correction/feedback/__init__.py,sha256=i1gd0Vb4qvlzZQ3lqA3fJjt288YP7f-MBPwOzZ7Rjh4,68
103
103
  lyrics_transcriber/correction/feedback/schemas.py,sha256=OiF_WUqcqiEKIoburYM8kWAIundy82PQE7ImsdP8UCk,4416
104
104
  lyrics_transcriber/correction/feedback/store.py,sha256=T4IDzf1eRA9n-wdLLrLyAW1ELYgXwK9RikJgX_B3fN8,8788
@@ -286,8 +286,8 @@ lyrics_transcriber/transcribers/whisper.py,sha256=YcCB1ic9H6zL1GS0jD0emu8-qlcH0Q
286
286
  lyrics_transcriber/types.py,sha256=UJjaxhVd2o14AG4G8ToU598p0JeYdiTFjpG38jGCoYQ,27917
287
287
  lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
288
288
  lyrics_transcriber/utils/word_utils.py,sha256=-cMGpj9UV4F6IsoDKAV2i1aiqSO8eI91HMAm_igtVMk,958
289
- karaoke_gen-0.82.0.dist-info/METADATA,sha256=k51l0dhnVIM5kn6lFbTCLi1oKbcV_2iSulloMXVkU0Q,23077
290
- karaoke_gen-0.82.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
291
- karaoke_gen-0.82.0.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
292
- karaoke_gen-0.82.0.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
293
- karaoke_gen-0.82.0.dist-info/RECORD,,
289
+ karaoke_gen-0.86.5.dist-info/METADATA,sha256=Bv8ezXed1IakQYY4hFXnar8dOd31kb3RUjQUPkcdVzs,23074
290
+ karaoke_gen-0.86.5.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
291
+ karaoke_gen-0.86.5.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
292
+ karaoke_gen-0.86.5.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
293
+ karaoke_gen-0.86.5.dist-info/RECORD,,
@@ -289,8 +289,13 @@ class LyricsTranscriber:
289
289
  """Initialize output generation service."""
290
290
  return OutputGenerator(config=self.output_config, logger=self.logger)
291
291
 
292
- def process(self) -> LyricsControllerResult:
293
- """Main processing method that orchestrates the entire workflow."""
292
+ def process(self, agentic_deadline: Optional[float] = None) -> LyricsControllerResult:
293
+ """Main processing method that orchestrates the entire workflow.
294
+
295
+ Args:
296
+ agentic_deadline: Optional Unix timestamp. If agentic correction is still
297
+ running after this time, it will abort and return uncorrected results.
298
+ """
294
299
 
295
300
  self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
296
301
 
@@ -390,7 +395,7 @@ class LyricsTranscriber:
390
395
 
391
396
  # Step 3: Process and correct lyrics if enabled AND we have transcription results
392
397
  if self.output_config.run_correction and self.results.transcription_results:
393
- self.correct_lyrics()
398
+ self.correct_lyrics(agentic_deadline=agentic_deadline)
394
399
  elif self.output_config.run_correction:
395
400
  self.logger.info("Skipping lyrics correction - no transcription results available")
396
401
 
@@ -494,8 +499,13 @@ class LyricsTranscriber:
494
499
  else:
495
500
  self.logger.debug(" - LocalWhisper: DISABLED (enable_local_whisper=False)")
496
501
 
497
- def correct_lyrics(self) -> None:
498
- """Run lyrics correction using transcription and internet lyrics."""
502
+ def correct_lyrics(self, agentic_deadline: Optional[float] = None) -> None:
503
+ """Run lyrics correction using transcription and internet lyrics.
504
+
505
+ Args:
506
+ agentic_deadline: Optional Unix timestamp. If agentic correction is still
507
+ running after this time, it will abort and return uncorrected results.
508
+ """
499
509
  self.logger.info("Starting lyrics correction process")
500
510
 
501
511
  # Check if we have reference lyrics to work with
@@ -553,6 +563,7 @@ class LyricsTranscriber:
553
563
  transcription_results=self.results.transcription_results,
554
564
  lyrics_results=self.results.lyrics_results,
555
565
  metadata=metadata,
566
+ agentic_deadline=agentic_deadline,
556
567
  )
557
568
 
558
569
  # Store corrected results
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  import json
6
+ import time
6
7
  from typing import Dict, Any, List, Optional
7
8
 
8
9
  from .providers.base import BaseAIProvider
@@ -120,7 +121,7 @@ class AgenticCorrector:
120
121
  title: Optional[str] = None
121
122
  ) -> Optional[GapClassification]:
122
123
  """Classify a gap using the AI provider.
123
-
124
+
124
125
  Args:
125
126
  gap_id: Unique identifier for the gap
126
127
  gap_text: The text of the gap
@@ -129,10 +130,13 @@ class AgenticCorrector:
129
130
  reference_contexts: Dictionary of reference lyrics from each source
130
131
  artist: Song artist name
131
132
  title: Song title
132
-
133
+
133
134
  Returns:
134
135
  GapClassification object or None if classification fails
135
136
  """
137
+ logger.info(f"🤖 Classifying gap {gap_id} ({len(gap_text)} chars)")
138
+ start_time = time.time()
139
+
136
140
  # Build classification prompt
137
141
  prompt = build_classification_prompt(
138
142
  gap_text=gap_text,
@@ -143,7 +147,7 @@ class AgenticCorrector:
143
147
  title=title,
144
148
  gap_id=gap_id
145
149
  )
146
-
150
+
147
151
  # Call AI provider to get classification
148
152
  try:
149
153
  data = self._provider.generate_correction_proposals(
@@ -151,17 +155,25 @@ class AgenticCorrector:
151
155
  schema=GapClassification.model_json_schema(),
152
156
  session_id=self._session_id
153
157
  )
154
-
158
+
159
+ elapsed = time.time() - start_time
160
+
155
161
  # Extract first result
156
162
  if data and len(data) > 0:
157
163
  item = data[0]
158
164
  if isinstance(item, dict) and "error" not in item:
159
165
  classification = GapClassification.model_validate(item)
160
- logger.debug(f"🤖 Classified gap {gap_id} as {classification.category} (confidence: {classification.confidence})")
166
+ logger.info(
167
+ f"🤖 Classified gap {gap_id} as {classification.category} "
168
+ f"(confidence: {classification.confidence:.2f}) in {elapsed:.2f}s"
169
+ )
161
170
  return classification
171
+ else:
172
+ logger.warning(f"🤖 Classification returned error for gap {gap_id}: {item}")
162
173
  except Exception as e:
163
- logger.warning(f"🤖 Failed to classify gap {gap_id}: {e}")
164
-
174
+ elapsed = time.time() - start_time
175
+ logger.warning(f"🤖 Failed to classify gap {gap_id} after {elapsed:.2f}s: {e}")
176
+
165
177
  return None
166
178
 
167
179
  def propose_for_gap(
@@ -23,13 +23,23 @@ class ProviderConfig:
23
23
  gcp_project_id: Optional[str] = None
24
24
  gcp_location: str = "global"
25
25
 
26
- request_timeout_seconds: float = 30.0
26
+ # Timeout increased to 120s to handle Vertex AI connection establishment
27
+ # and potential network latency. The 499 "operation cancelled" errors seen
28
+ # at ~60s suggest internal timeouts; 120s provides headroom.
29
+ request_timeout_seconds: float = 120.0
27
30
  max_retries: int = 2
28
- retry_backoff_base_seconds: float = 0.2
31
+ # Backoff increased from 0.2s to 2.0s base - if a request times out,
32
+ # retrying immediately is unlikely to help. Give the service time to recover.
33
+ retry_backoff_base_seconds: float = 2.0
29
34
  retry_backoff_factor: float = 2.0
30
35
  circuit_breaker_failure_threshold: int = 3
31
36
  circuit_breaker_open_seconds: int = 60
32
37
 
38
+ # Initialization timeouts - fail fast instead of hanging forever
39
+ # These are separate from request_timeout to catch connection establishment issues
40
+ initialization_timeout_seconds: float = 30.0 # Model creation + warm-up
41
+ warmup_timeout_seconds: float = 15.0 # Just the warm-up call
42
+
33
43
  @staticmethod
34
44
  def from_env(cache_dir: Optional[str] = None) -> "ProviderConfig":
35
45
  """Create config from environment variables.
@@ -53,12 +63,14 @@ class ProviderConfig:
53
63
  cache_dir=cache_dir,
54
64
  gcp_project_id=os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCP_PROJECT_ID"),
55
65
  gcp_location=os.getenv("GCP_LOCATION", "global"),
56
- request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "30.0")),
66
+ request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "120.0")),
57
67
  max_retries=int(os.getenv("AGENTIC_MAX_RETRIES", "2")),
58
- retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "0.2")),
68
+ retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "2.0")),
59
69
  retry_backoff_factor=float(os.getenv("AGENTIC_BACKOFF_FACTOR", "2.0")),
60
70
  circuit_breaker_failure_threshold=int(os.getenv("AGENTIC_CIRCUIT_THRESHOLD", "3")),
61
71
  circuit_breaker_open_seconds=int(os.getenv("AGENTIC_CIRCUIT_OPEN_SECONDS", "60")),
72
+ initialization_timeout_seconds=float(os.getenv("AGENTIC_INIT_TIMEOUT_SECONDS", "30.0")),
73
+ warmup_timeout_seconds=float(os.getenv("AGENTIC_WARMUP_TIMEOUT_SECONDS", "15.0")),
62
74
  )
63
75
 
64
76
  def validate_environment(self, logger: Optional[object] = None) -> None:
@@ -13,6 +13,8 @@ from __future__ import annotations
13
13
 
14
14
  import logging
15
15
  import os
16
+ import time
17
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
16
18
  from typing import List, Dict, Any, Optional
17
19
  from datetime import datetime
18
20
 
@@ -33,6 +35,14 @@ from .constants import (
33
35
 
34
36
  logger = logging.getLogger(__name__)
35
37
 
38
+ # Error constant for initialization timeout
39
+ INIT_TIMEOUT_ERROR = "initialization_timeout"
40
+
41
+
42
+ class InitializationTimeoutError(Exception):
43
+ """Raised when model initialization exceeds the configured timeout."""
44
+ pass
45
+
36
46
 
37
47
  class LangChainBridge(BaseAIProvider):
38
48
  """Provider bridge using LangChain ChatModels with reliability patterns.
@@ -87,6 +97,7 @@ class LangChainBridge(BaseAIProvider):
87
97
 
88
98
  # Lazy-initialized chat model
89
99
  self._chat_model: Optional[Any] = None
100
+ self._warmed_up: bool = False
90
101
 
91
102
  def name(self) -> str:
92
103
  """Return provider name for logging."""
@@ -130,13 +141,45 @@ class LangChainBridge(BaseAIProvider):
130
141
  "until": open_until
131
142
  }]
132
143
 
133
- # Step 2: Get or create chat model
144
+ # Step 2: Get or create chat model with initialization timeout
134
145
  if not self._chat_model:
146
+ timeout = self._config.initialization_timeout_seconds
147
+ logger.info(f"🤖 Initializing model {self._model} with {timeout}s timeout...")
148
+ init_start = time.time()
149
+
135
150
  try:
136
- self._chat_model = self._factory.create_chat_model(
137
- self._model,
138
- self._config
139
- )
151
+ # Use ThreadPoolExecutor for cross-platform timeout
152
+ with ThreadPoolExecutor(max_workers=1) as executor:
153
+ future = executor.submit(
154
+ self._factory.create_chat_model,
155
+ self._model,
156
+ self._config
157
+ )
158
+ try:
159
+ self._chat_model = future.result(timeout=timeout)
160
+ except FuturesTimeoutError:
161
+ raise InitializationTimeoutError(
162
+ f"Model initialization timed out after {timeout}s. "
163
+ f"This may indicate network issues or service unavailability."
164
+ ) from None
165
+
166
+ init_elapsed = time.time() - init_start
167
+ logger.info(f"🤖 Model created in {init_elapsed:.2f}s, starting warm-up...")
168
+
169
+ # Warm up the model to establish connection before real work
170
+ self._warm_up_model()
171
+
172
+ total_elapsed = time.time() - init_start
173
+ logger.info(f"🤖 Model initialization complete in {total_elapsed:.2f}s")
174
+
175
+ except InitializationTimeoutError as e:
176
+ self._circuit_breaker.record_failure(self._model)
177
+ logger.exception("🤖 Model initialization timeout")
178
+ return [{
179
+ "error": INIT_TIMEOUT_ERROR,
180
+ "message": str(e),
181
+ "timeout_seconds": timeout
182
+ }]
140
183
  except Exception as e:
141
184
  self._circuit_breaker.record_failure(self._model)
142
185
  logger.error(f"🤖 Failed to initialize chat model: {e}")
@@ -146,24 +189,27 @@ class LangChainBridge(BaseAIProvider):
146
189
  }]
147
190
 
148
191
  # Step 3: Execute with retry logic
149
- logger.debug(
150
- f"🤖 [LangChain] Sending prompt to {self._model}: "
151
- f"{prompt[:PROMPT_LOG_LENGTH]}..."
192
+ logger.info(
193
+ f"🤖 [LangChain] Sending prompt to {self._model} ({len(prompt)} chars)"
152
194
  )
153
-
195
+ logger.debug(f"🤖 [LangChain] Prompt preview: {prompt[:PROMPT_LOG_LENGTH]}...")
196
+
197
+ invoke_start = time.time()
154
198
  result = self._executor.execute_with_retry(
155
199
  operation=lambda: self._invoke_model(prompt),
156
200
  operation_name=f"invoke_{self._model}"
157
201
  )
158
-
202
+ invoke_elapsed = time.time() - invoke_start
203
+
159
204
  # Step 4: Handle result and update circuit breaker
160
205
  if result.success:
161
206
  self._circuit_breaker.record_success(self._model)
162
-
207
+
163
208
  logger.info(
164
- f"🤖 [LangChain] Got response from {self._model}: "
165
- f"{result.value[:RESPONSE_LOG_LENGTH]}..."
209
+ f"🤖 [LangChain] Got response from {self._model} in {invoke_elapsed:.2f}s "
210
+ f"({len(result.value)} chars)"
166
211
  )
212
+ logger.debug(f"🤖 [LangChain] Response preview: {result.value[:RESPONSE_LOG_LENGTH]}...")
167
213
 
168
214
  # Step 5: Cache the raw response for future use
169
215
  self._cache.set(
@@ -225,3 +271,47 @@ class LangChainBridge(BaseAIProvider):
225
271
 
226
272
  return content
227
273
 
274
+ def _warm_up_model(self) -> None:
275
+ """Send a lightweight request to warm up the model connection.
276
+
277
+ This helps establish the REST connection and potentially warm up any
278
+ server-side resources before processing real correction requests.
279
+ The warm-up uses a timeout to fail fast if the service is unresponsive.
280
+ """
281
+ if self._warmed_up:
282
+ return
283
+
284
+ timeout = self._config.warmup_timeout_seconds
285
+ # Use print with flush=True for visibility when output is redirected
286
+ print(f"🔥 Warming up {self._model} connection (timeout: {timeout}s)...", flush=True)
287
+ logger.info(f"🔥 Warming up {self._model} connection (timeout: {timeout}s)...")
288
+
289
+ warmup_start = time.time()
290
+ try:
291
+ from langchain_core.messages import HumanMessage
292
+
293
+ # Minimal prompt that requires almost no processing
294
+ warm_up_prompt = 'Respond with exactly: {"status":"ready"}'
295
+
296
+ # Use ThreadPoolExecutor for timeout on warm-up call
297
+ with ThreadPoolExecutor(max_workers=1) as executor:
298
+ future = executor.submit(
299
+ self._chat_model.invoke,
300
+ [HumanMessage(content=warm_up_prompt)]
301
+ )
302
+ try:
303
+ future.result(timeout=timeout)
304
+ except FuturesTimeoutError:
305
+ raise TimeoutError(f"Warm-up timed out after {timeout}s") from None
306
+
307
+ elapsed = time.time() - warmup_start
308
+ self._warmed_up = True
309
+ print(f"🔥 Warm-up complete for {self._model} in {elapsed:.2f}s", flush=True)
310
+ logger.info(f"🔥 Warm-up complete for {self._model} in {elapsed:.2f}s")
311
+ except Exception as e:
312
+ elapsed = time.time() - warmup_start
313
+ # Don't fail the actual request if warm-up fails
314
+ # Just log and continue - the real request might still work
315
+ print(f"🔥 Warm-up failed for {self._model} after {elapsed:.2f}s: {e} (continuing anyway)", flush=True)
316
+ logger.warning(f"🔥 Warm-up failed for {self._model} after {elapsed:.2f}s: {e} (continuing anyway)")
317
+
@@ -3,12 +3,19 @@ from __future__ import annotations
3
3
 
4
4
  import logging
5
5
  import os
6
+ import time
6
7
  from typing import Any, Optional, List
7
8
 
8
9
  from .config import ProviderConfig
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
13
+ # Error message constant for TRY003 compliance
14
+ GOOGLE_API_KEY_MISSING_ERROR = (
15
+ "GOOGLE_API_KEY environment variable is required for Google/Gemini models. "
16
+ "Get an API key from https://aistudio.google.com/app/apikey"
17
+ )
18
+
12
19
 
13
20
  class ModelFactory:
14
21
  """Creates and configures LangChain ChatModels with observability.
@@ -203,22 +210,56 @@ class ModelFactory:
203
210
  def _create_vertexai_model(
204
211
  self, model_name: str, callbacks: List[Any], config: ProviderConfig
205
212
  ) -> Any:
206
- """Create ChatVertexAI model for Google Gemini via Vertex AI.
213
+ """Create ChatGoogleGenerativeAI model for Google Gemini.
214
+
215
+ Uses the unified langchain-google-genai package which supports both:
216
+ - Vertex AI backend (service account / ADC auth) - when project is set
217
+ - Google AI Studio backend (API key auth) - when only api_key is set
207
218
 
208
- Uses Application Default Credentials (ADC) for authentication.
209
- In Cloud Run, this uses the service account automatically.
210
- Locally, run: gcloud auth application-default login
219
+ On Cloud Run, ADC (Application Default Credentials) are used automatically
220
+ when the project parameter is provided, using the service account attached
221
+ to the Cloud Run service.
222
+
223
+ This is a REST-based API that avoids the gRPC connection issues
224
+ seen with the deprecated langchain-google-vertexai package.
211
225
  """
212
- from langchain_google_vertexai import ChatVertexAI
226
+ from langchain_google_genai import ChatGoogleGenerativeAI
213
227
 
214
- model = ChatVertexAI(
215
- model=model_name,
216
- project=config.gcp_project_id,
217
- location=config.gcp_location,
218
- timeout=config.request_timeout_seconds,
219
- max_retries=config.max_retries,
220
- callbacks=callbacks,
221
- )
222
- logger.debug(f"🤖 Created Vertex AI model: {model_name} (project={config.gcp_project_id})")
228
+ start_time = time.time()
229
+
230
+ # Determine authentication method
231
+ api_key = config.google_api_key
232
+ project = config.gcp_project_id
233
+
234
+ # Prefer Vertex AI (service account) if project is set, otherwise require API key
235
+ if not project and not api_key:
236
+ raise ValueError(GOOGLE_API_KEY_MISSING_ERROR)
237
+
238
+ if project:
239
+ logger.info(f"🤖 Creating Google Gemini model via Vertex AI (project={project}): {model_name}")
240
+ else:
241
+ logger.info(f"🤖 Creating Google Gemini model via AI Studio API: {model_name}")
242
+
243
+ # Build kwargs - only include api_key if set (otherwise ADC is used)
244
+ model_kwargs = {
245
+ "model": model_name,
246
+ "convert_system_message_to_human": True, # Gemini doesn't support system messages
247
+ "max_retries": config.max_retries,
248
+ "timeout": config.request_timeout_seconds,
249
+ "callbacks": callbacks,
250
+ }
251
+
252
+ # Add project to trigger Vertex AI backend with ADC
253
+ if project:
254
+ model_kwargs["project"] = project
255
+
256
+ # Add API key if available (can be used with or without project)
257
+ if api_key:
258
+ model_kwargs["google_api_key"] = api_key
259
+
260
+ model = ChatGoogleGenerativeAI(**model_kwargs)
261
+
262
+ elapsed = time.time() - start_time
263
+ logger.info(f"🤖 Google Gemini model created in {elapsed:.2f}s: {model_name}")
223
264
  return model
224
265
 
@@ -51,22 +51,34 @@ class ResponseParser:
51
51
 
52
52
  def _attempt_json_fix(self, content: str) -> str:
53
53
  """Attempt to fix common JSON formatting issues.
54
-
54
+
55
55
  Args:
56
56
  content: Raw JSON string
57
-
57
+
58
58
  Returns:
59
59
  Fixed JSON string (or original if no fixes applied)
60
60
  """
61
+ import re
62
+
63
+ # Fix 0: Strip markdown code fences (```json ... ``` or ``` ... ```)
64
+ # Models often wrap JSON in markdown code blocks
65
+ fixed = content.strip()
66
+ if fixed.startswith("```"):
67
+ # Remove opening fence (with optional language identifier)
68
+ fixed = re.sub(r'^```\w*\s*\n?', '', fixed)
69
+ # Remove closing fence
70
+ fixed = re.sub(r'\n?```\s*$', '', fixed)
71
+ fixed = fixed.strip()
72
+ logger.debug("🤖 Stripped markdown code fences from response")
73
+
61
74
  # Fix 1: Replace invalid escape sequences like \' with '
62
75
  # (JSON only allows \", \\, \/, \b, \f, \n, \r, \t)
63
- fixed = content.replace("\\'", "'")
64
-
76
+ fixed = fixed.replace("\\'", "'")
77
+
65
78
  # Fix 2: Remove any trailing commas before } or ]
66
- import re
67
79
  fixed = re.sub(r',\s*}', '}', fixed)
68
80
  fixed = re.sub(r',\s*]', ']', fixed)
69
-
81
+
70
82
  return fixed
71
83
 
72
84
  def _normalize_json_response(self, data: Any) -> List[Dict[str, Any]]:
@@ -4,6 +4,7 @@ from pathlib import Path
4
4
  from copy import deepcopy
5
5
  import os
6
6
  import shortuuid
7
+ import time
7
8
 
8
9
  from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
9
10
  from lyrics_transcriber.correction.handlers.no_space_punct_match import NoSpacePunctuationMatchHandler
@@ -107,8 +108,24 @@ class LyricsCorrector:
107
108
  transcription_results: List[TranscriptionResult],
108
109
  lyrics_results: Dict[str, LyricsData],
109
110
  metadata: Optional[Dict[str, Any]] = None,
111
+ agentic_deadline: Optional[float] = None,
110
112
  ) -> CorrectionResult:
111
- """Execute the correction process."""
113
+ """Execute the correction process.
114
+
115
+ Args:
116
+ transcription_results: List of transcription results to correct.
117
+ lyrics_results: Dictionary of lyrics data from various sources.
118
+ metadata: Optional metadata including artist, title, audio file hash.
119
+ agentic_deadline: Optional Unix timestamp (from time.time()). If agentic
120
+ correction is still running after this time, it will abort and return
121
+ uncorrected results for human review.
122
+
123
+ Note:
124
+ The deadline is checked between gap iterations, not during LLM processing.
125
+ A single long-running LLM call may exceed the deadline. The caller should
126
+ wrap this method with an outer timeout (e.g., asyncio.wait_for) as a safety
127
+ net for hung operations.
128
+ """
112
129
  # Optional agentic routing flag from environment; default off for safety
113
130
  agentic_enabled = os.getenv("USE_AGENTIC_AI", "").lower() in {"1", "true", "yes"}
114
131
  self.logger.info(f"🤖 AGENTIC MODE: {'ENABLED' if agentic_enabled else 'DISABLED'} (USE_AGENTIC_AI={os.getenv('USE_AGENTIC_AI', 'NOT_SET')})")
@@ -132,9 +149,9 @@ class LyricsCorrector:
132
149
  # Store anchor sequences for use in correction handlers
133
150
  self._anchor_sequences = anchor_sequences
134
151
 
135
- # Process corrections with metadata
152
+ # Process corrections with metadata and optional deadline for agentic timeout
136
153
  corrections, corrected_segments, correction_steps, word_id_map, segment_id_map = self._process_corrections(
137
- primary_transcription.segments, gap_sequences, metadata=metadata
154
+ primary_transcription.segments, gap_sequences, metadata=metadata, deadline=agentic_deadline
138
155
  )
139
156
 
140
157
  # Calculate correction ratio
@@ -178,10 +195,22 @@ class LyricsCorrector:
178
195
  return leading_space + new_word.strip() + trailing_space
179
196
 
180
197
  def _process_corrections(
181
- self, segments: List[LyricsSegment], gap_sequences: List[GapSequence], metadata: Optional[Dict[str, Any]] = None
198
+ self, segments: List[LyricsSegment], gap_sequences: List[GapSequence], metadata: Optional[Dict[str, Any]] = None,
199
+ deadline: Optional[float] = None
182
200
  ) -> Tuple[List[WordCorrection], List[LyricsSegment], List[CorrectionStep], Dict[str, str], Dict[str, str]]:
183
201
  """Process corrections using handlers.
184
202
 
203
+ Args:
204
+ segments: List of lyrics segments to process.
205
+ gap_sequences: List of gap sequences to correct.
206
+ metadata: Optional metadata including artist, title, audio file hash.
207
+ deadline: Optional Unix timestamp (from time.time()). When agentic mode is
208
+ enabled and this deadline is exceeded, remaining gaps are skipped and
209
+ the method returns with whatever corrections have been made (likely none).
210
+
211
+ Returns:
212
+ Tuple of (corrections, corrected_segments, correction_steps, word_id_map, segment_id_map).
213
+
185
214
  The correction flow works as follows:
186
215
  1. First pass: Process all gaps
187
216
  - Iterate through each gap sequence
@@ -415,6 +444,16 @@ class LyricsCorrector:
415
444
  # === END TEMPORARY CODE ===
416
445
 
417
446
  for i, gap in enumerate(gap_sequences, 1):
447
+ # Check deadline before processing each gap (agentic mode only)
448
+ # This allows us to abort early and return uncorrected results for human review
449
+ if deadline and use_agentic_env and time.time() > deadline:
450
+ self.logger.warning(
451
+ f"⏰ AGENTIC TIMEOUT: Deadline exceeded after processing {i-1}/{len(gap_sequences)} gaps. "
452
+ "Skipping remaining gaps - human review will correct any issues."
453
+ )
454
+ # Break out of loop - continue with whatever corrections we have (likely none)
455
+ break
456
+
418
457
  self.logger.info(f"Processing gap {i}/{len(gap_sequences)} at position {gap.transcription_position}")
419
458
 
420
459
  # Get the actual words for logging