karaoke-gen 0.81.1__py3-none-any.whl → 0.86.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -786,8 +786,8 @@
786
786
 
787
787
  if (waveformRes.ok) {
788
788
  waveformData = await waveformRes.json();
789
- // API returns duration_seconds, not duration
790
- duration = waveformData.duration_seconds || 0;
789
+ // API may return duration_seconds (cloud) or duration (local)
790
+ duration = waveformData.duration_seconds || waveformData.duration || 0;
791
791
  }
792
792
 
793
793
  // Set initial selection based on recommendation
@@ -3,6 +3,7 @@ import re
3
3
  import logging
4
4
  import shutil
5
5
  import json
6
+ from typing import Optional
6
7
  from lyrics_transcriber import LyricsTranscriber, OutputConfig, TranscriberConfig, LyricsConfig
7
8
  from lyrics_transcriber.core.controller import LyricsControllerResult
8
9
  from dotenv import load_dotenv
@@ -252,10 +253,11 @@ class LyricsProcessor:
252
253
  "See README.md 'Transcription Providers' section for detailed setup instructions."
253
254
  )
254
255
 
255
- def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None):
256
+ def transcribe_lyrics(self, input_audio_wav, artist, title, track_output_dir, lyrics_artist=None, lyrics_title=None,
257
+ agentic_deadline: Optional[float] = None):
256
258
  """
257
259
  Transcribe lyrics for a track.
258
-
260
+
259
261
  Args:
260
262
  input_audio_wav: Path to the audio file
261
263
  artist: Original artist name (used for filename generation)
@@ -263,7 +265,9 @@ class LyricsProcessor:
263
265
  track_output_dir: Output directory path
264
266
  lyrics_artist: Artist name for lyrics processing (defaults to artist if None)
265
267
  lyrics_title: Title for lyrics processing (defaults to title if None)
266
-
268
+ agentic_deadline: Optional Unix timestamp. If agentic correction is still
269
+ running after this time, it will abort and return uncorrected results.
270
+
267
271
  Raises:
268
272
  ValueError: If transcription is enabled but no providers are configured
269
273
  """
@@ -423,8 +427,8 @@ class LyricsProcessor:
423
427
  logger=self.logger,
424
428
  )
425
429
 
426
- # Process and get results
427
- results: LyricsControllerResult = transcriber.process()
430
+ # Process and get results (pass deadline for agentic timeout)
431
+ results: LyricsControllerResult = transcriber.process(agentic_deadline=agentic_deadline)
428
432
  self.logger.info(f"Transcriber Results Filepaths:")
429
433
  for key, value in results.__dict__.items():
430
434
  if key.endswith("_filepath"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: karaoke-gen
3
- Version: 0.81.1
3
+ Version: 0.86.5
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -42,7 +42,7 @@ Requires-Dist: kbputils (>=0.0.16,<0.0.17)
42
42
  Requires-Dist: langchain (>=0.3.0)
43
43
  Requires-Dist: langchain-anthropic (>=0.2.0)
44
44
  Requires-Dist: langchain-core (>=0.3.0)
45
- Requires-Dist: langchain-google-vertexai (>=2.0.0)
45
+ Requires-Dist: langchain-google-genai (>=2.0.0)
46
46
  Requires-Dist: langchain-ollama (>=0.2.0)
47
47
  Requires-Dist: langchain-openai (>=0.2.0)
48
48
  Requires-Dist: langfuse (>=3.0.0)
@@ -8,12 +8,12 @@ karaoke_gen/instrumental_review/analyzer.py,sha256=Heg8TbrwM4g5IV7bavmO6EfVD4M0U
8
8
  karaoke_gen/instrumental_review/editor.py,sha256=_DGTjKMk5WhoGtLGtTvHzU522LJyQQ_DSY1r8fULuiA,11568
9
9
  karaoke_gen/instrumental_review/models.py,sha256=cUSb_JheJK0cGdKx9f59-9sRvRrhrgdTdKBzQN3lHto,5226
10
10
  karaoke_gen/instrumental_review/server.py,sha256=Ick90X77t2EeMRwtx2U08sSybadQyWH7G0tDG-4JqP4,19377
11
- karaoke_gen/instrumental_review/static/index.html,sha256=1lzo_W5B4HxNStWPiVaP4I6ctqDkXAABJkQmojvBDqc,63235
11
+ karaoke_gen/instrumental_review/static/index.html,sha256=anhmEGAhL0rV25q5V8GEnWxs2RnF7qA3dADwvGWCd88,63277
12
12
  karaoke_gen/instrumental_review/waveform.py,sha256=Q6LBPZrJAD6mzZ7TmRf3Tf4gwYhUYTHumJKytLs3hSg,12940
13
13
  karaoke_gen/karaoke_finalise/__init__.py,sha256=HqZ7TIhgt_tYZ-nb_NNCaejWAcF_aK-7wJY5TaW_keM,46
14
14
  karaoke_gen/karaoke_finalise/karaoke_finalise.py,sha256=Wn1KcdRyINT63UxKUPT9uB-bsrFVih0Im_cjXtequS0,93534
15
15
  karaoke_gen/karaoke_gen.py,sha256=84n2SE0MixJr01_btLmm5cVdf35hJvp7W638b8TKR-Q,65734
16
- karaoke_gen/lyrics_processor.py,sha256=9BtL2uJa4Ekrodj2w_SXSeOraVKCB2kzYuHcGHTFpo8,23979
16
+ karaoke_gen/lyrics_processor.py,sha256=jXEjkQVFauojKXhs3cXytnpfS2ig1o9iHON0-I8RwPw,24297
17
17
  karaoke_gen/metadata.py,sha256=SZW6TuUpkGGU98gRdjPfrR8F4vWXjnfCSGry2XD5_A4,6689
18
18
  karaoke_gen/pipeline/__init__.py,sha256=-MZnba4qobr1qGDamG9CieLl2pWCZMEB5_Yur62RKeM,2106
19
19
  karaoke_gen/pipeline/base.py,sha256=yg4LIm7Mc9ER0zCmZcUv4huEkotSSXK_0OAFio-TSNI,6235
@@ -45,11 +45,11 @@ lyrics_transcriber/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
45
45
  lyrics_transcriber/cli/cli_main.py,sha256=F72ENLTj934bXjHAUbRm0toCK73qnuJhwEm9agBVKHQ,11596
46
46
  lyrics_transcriber/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  lyrics_transcriber/core/config.py,sha256=_X_d1wSYTJjSquqbODYCwPdOYpnSR9KERwvr_jkdYls,2056
48
- lyrics_transcriber/core/controller.py,sha256=dUJvnehr9_Mv3Syj_TWZQsQVsDD1w8AdF5_1xISA2cw,31661
48
+ lyrics_transcriber/core/controller.py,sha256=zRjdxOrJEaa2depvzZvwVQiEFmf8Ew3Aek89O1HUEas,32223
49
49
  lyrics_transcriber/correction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  lyrics_transcriber/correction/agentic/__init__.py,sha256=p7PHiebuvRs8RDlPDs-9gLZKzXG5KfWg3fFCdDhY6pE,222
51
51
  lyrics_transcriber/correction/agentic/adapter.py,sha256=Z0JBTAA7xlSdctCHqO9nBMl78C4XmqsLKKtS6BvNZNI,2912
52
- lyrics_transcriber/correction/agentic/agent.py,sha256=KZjAKaxv6HXkNFNGDc-hXM7He3ew5kp9Q_OR43SrfJo,12310
52
+ lyrics_transcriber/correction/agentic/agent.py,sha256=GV6TkrIQBhibJllXDnp9zBBmaf_vnoSVuJZmm6WVhS0,12722
53
53
  lyrics_transcriber/correction/agentic/feedback/aggregator.py,sha256=323t8LDbE26ni83woyN7uVMSuSQhnqTgwJc-d-KuDbs,273
54
54
  lyrics_transcriber/correction/agentic/feedback/collector.py,sha256=HT-2cAP_bx7Iv-0-tpZv534do111g0FlTUt2XaKoUtA,415
55
55
  lyrics_transcriber/correction/agentic/feedback/retention.py,sha256=dUCUsKPCzHVQxiLLBXcdfAZ5NqiG25go0Z6GFXeK0vY,881
@@ -75,29 +75,30 @@ lyrics_transcriber/correction/agentic/models/observability_metrics.py,sha256=xGd
75
75
  lyrics_transcriber/correction/agentic/models/schemas.py,sha256=skWXqGkJnv9NvmvjktBqrH_4Ohyzg2x0ZMsVINbXKdg,2141
76
76
  lyrics_transcriber/correction/agentic/models/utils.py,sha256=tX7flxCB4aLrgZWkHuEt7Gr8kaLkMsXzTdWSec6Xsts,580
77
77
  lyrics_transcriber/correction/agentic/observability/__init__.py,sha256=RuaepVsltWdaF1aF_YmNVJTJ6_bbNDFo3Sp-ruBvyHA,85
78
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py,sha256=GvgisZyy5_tDeC7Hd8SDWbd-9aAqYnMTd52uNeQ2p-I,1221
78
+ lyrics_transcriber/correction/agentic/observability/langfuse_integration.py,sha256=5oBfoFT-QExZttD2wlIzXRhgSglNElFFwz8Et36vZos,7014
79
79
  lyrics_transcriber/correction/agentic/observability/metrics.py,sha256=Js_m6ljdI6Xgd9X9eHtboCsf9gjYsN1zOv3_XSwjgKk,1907
80
80
  lyrics_transcriber/correction/agentic/observability/performance.py,sha256=ekjzgL65gfs1SpKR_befu1wdWZU9xDlcafJm8htSvks,328
81
- lyrics_transcriber/correction/agentic/prompts/__init__.py,sha256=YPgEN82oygmT_pfIj2RpZM-WOLoFv6rBAPKeIRstXuI,48
82
- lyrics_transcriber/correction/agentic/prompts/classifier.py,sha256=pKbL4Cyj0-c_Ot9IxfKBOL7PnL0ZfUvKPLZwOMr-NDo,9730
81
+ lyrics_transcriber/correction/agentic/prompts/__init__.py,sha256=riiZ-f4jlvq4QjtyCpmv-sSzfcLy7O99pMBwV1H5Usc,605
82
+ lyrics_transcriber/correction/agentic/prompts/classifier.py,sha256=FwUSL59Y-5q9J1CDW8iyzyiajcy4-uq5MzfWu0If_Yo,11899
83
+ lyrics_transcriber/correction/agentic/prompts/langfuse_prompts.py,sha256=hjQhyY_GBuZt_oY9DacutXvA9dJCZksRY2fKmveJm_A,10898
83
84
  lyrics_transcriber/correction/agentic/providers/__init__.py,sha256=PS7C4sKDfa6S9lSo33GXIRamCLsv0Jn7u0GtXuhiRD4,95
84
85
  lyrics_transcriber/correction/agentic/providers/base.py,sha256=bExuntMLLInMmWWNzN81_ScWQJhNYbtlF3wZYhlX-qw,1059
85
86
  lyrics_transcriber/correction/agentic/providers/circuit_breaker.py,sha256=D3Jg4YHqvy4gzlxfkALa7PztyYQpJb8NwJAonMS0TSI,4694
86
- lyrics_transcriber/correction/agentic/providers/config.py,sha256=2dy9zynj8hU3LdRkb2RmKSOztsX4_Ay23EU-RfUGCrM,3206
87
- lyrics_transcriber/correction/agentic/providers/constants.py,sha256=aDIEsDvNQLEGlGk8klAaRxJmdldGBDFqwYLuCmlYoNM,692
87
+ lyrics_transcriber/correction/agentic/providers/config.py,sha256=NnGigthJSWMz_d99qh-ClQaVqjODRoYrwTHVftQOlR8,4156
88
+ lyrics_transcriber/correction/agentic/providers/constants.py,sha256=cXLzKTyFVt9q6wQd_gWcv3EZ5Sm27AOAz6NyPapcess,695
88
89
  lyrics_transcriber/correction/agentic/providers/health.py,sha256=F8pHY5BQYvylGRDGXUHplcAJooAyiqVLRhBl4kHC1H8,710
89
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=hderNRLrSZn49LrGBrgdCvBP5E7tPAugjaw7TFbb0JY,7957
90
- lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=iKbpMEeTyhPN8n9abVf645TfovnFEz3ia1g6XLHqp4s,8613
90
+ lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=yX5JGGALDCFgji34gZ924GePsfjELOPqgxD6Cx7kKEg,12915
91
+ lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=90EjVwoKTWo8jXTrroI7GXM9AU-_ACx9g_fHB4vnR2w,9919
91
92
  lyrics_transcriber/correction/agentic/providers/response_cache.py,sha256=Byr7fQJsgUMFlsvHeVCxTiFjjnbsg3KIlEmEEtAo-Gw,7047
92
- lyrics_transcriber/correction/agentic/providers/response_parser.py,sha256=a8pdUYKBS5X72gck3u1ndFYB__UN0UijAdxNhbHp8ZQ,3809
93
+ lyrics_transcriber/correction/agentic/providers/response_parser.py,sha256=c2KypM-yHbIXXakHV5s-qh8fl8FhssLPVo3pJbyAiG4,4301
93
94
  lyrics_transcriber/correction/agentic/providers/retry_executor.py,sha256=hX21Zwy2cSECAw7k13ndEinWRqwjo4xYoSCQ2B2CUf0,3912
94
- lyrics_transcriber/correction/agentic/router.py,sha256=_JtnXgcIdui6qeN9x0EawThDGZavAwfpbtEJAYVlQTY,1334
95
+ lyrics_transcriber/correction/agentic/router.py,sha256=akP28A0lftmsnSyMOW6k7iTC1pv4LEgilXhIkcfJzlE,1437
95
96
  lyrics_transcriber/correction/agentic/workflows/__init__.py,sha256=OsBExAbIIKxJgX6FKXFOgcUjIG9AWJQV_fESZVdO8mo,77
96
97
  lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py,sha256=gMuLTUxkgYaciMsI4yrZSC3wi--7V_PgaDNE-Vd6FE8,575
97
98
  lyrics_transcriber/correction/agentic/workflows/correction_graph.py,sha256=kgZKnz0h9cG1EfhW7BSSl-kSpQtJrRM_S86kAniXfE4,1815
98
99
  lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py,sha256=KsKLD3AP66YYmXfUn-mVZjERYLtU1Zs4a-7CB2zDfas,596
99
100
  lyrics_transcriber/correction/anchor_sequence.py,sha256=5tl4Cjiw5UlLbEb1Oy-g3ebKCinXSwohdaCB9-rTMtI,43798
100
- lyrics_transcriber/correction/corrector.py,sha256=e8N7Yys6MCmz8PbHkkl7KuxH1m3MWlH1vwCa1r3YcqA,40223
101
+ lyrics_transcriber/correction/corrector.py,sha256=2yVFUHzqEXZ7aeJjm6durF6WtrhYVTm6nqOQn-dtNI4,40545
101
102
  lyrics_transcriber/correction/feedback/__init__.py,sha256=i1gd0Vb4qvlzZQ3lqA3fJjt288YP7f-MBPwOzZ7Rjh4,68
102
103
  lyrics_transcriber/correction/feedback/schemas.py,sha256=OiF_WUqcqiEKIoburYM8kWAIundy82PQE7ImsdP8UCk,4416
103
104
  lyrics_transcriber/correction/feedback/store.py,sha256=T4IDzf1eRA9n-wdLLrLyAW1ELYgXwK9RikJgX_B3fN8,8788
@@ -105,8 +106,6 @@ lyrics_transcriber/correction/handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
105
106
  lyrics_transcriber/correction/handlers/base.py,sha256=ZXYMFgbCmlD62dpqdFwFPlcePdHKEFrABffnG_Mu5mI,1687
106
107
  lyrics_transcriber/correction/handlers/extend_anchor.py,sha256=IADgdPmEMokUQhh6mP-wQWLYf6GfWTvJbBjOk08A-aw,6384
107
108
  lyrics_transcriber/correction/handlers/levenshtein.py,sha256=hMERQHVgiUDSHtamYrAjqZ3qMMok4VmQ_MYM2-nrX6w,7864
108
- lyrics_transcriber/correction/handlers/llm.py,sha256=ufqHtohdU5dUXE3DikzbloAWGVgMu1wnw6P4WHRmpdk,14580
109
- lyrics_transcriber/correction/handlers/llm_providers.py,sha256=MV-KCRseccg-DEimMS0D2bXJ2xhy59r2n8UZjICUoEY,2067
110
109
  lyrics_transcriber/correction/handlers/no_space_punct_match.py,sha256=jY2fa547Qc8B63xIhF9VyWMaq5jds6E6wBqyVq6KANw,7057
111
110
  lyrics_transcriber/correction/handlers/relaxed_word_count_match.py,sha256=x4k__6gav4-STk_TycLcg5Sw4x2vUFAj5fWmOv7Yd_w,3911
112
111
  lyrics_transcriber/correction/handlers/repeat.py,sha256=1PJADW44egYh7N9D2fN-gDIusWVglFjGHrCZuTQYNpA,4313
@@ -287,8 +286,8 @@ lyrics_transcriber/transcribers/whisper.py,sha256=YcCB1ic9H6zL1GS0jD0emu8-qlcH0Q
287
286
  lyrics_transcriber/types.py,sha256=UJjaxhVd2o14AG4G8ToU598p0JeYdiTFjpG38jGCoYQ,27917
288
287
  lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
289
288
  lyrics_transcriber/utils/word_utils.py,sha256=-cMGpj9UV4F6IsoDKAV2i1aiqSO8eI91HMAm_igtVMk,958
290
- karaoke_gen-0.81.1.dist-info/METADATA,sha256=zA3O3rRKeXu_LX28aeN3knlhx3WcpXD8Ozf4s_LT2C0,23077
291
- karaoke_gen-0.81.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
292
- karaoke_gen-0.81.1.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
293
- karaoke_gen-0.81.1.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
294
- karaoke_gen-0.81.1.dist-info/RECORD,,
289
+ karaoke_gen-0.86.5.dist-info/METADATA,sha256=Bv8ezXed1IakQYY4hFXnar8dOd31kb3RUjQUPkcdVzs,23074
290
+ karaoke_gen-0.86.5.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
291
+ karaoke_gen-0.86.5.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
292
+ karaoke_gen-0.86.5.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
293
+ karaoke_gen-0.86.5.dist-info/RECORD,,
@@ -289,8 +289,13 @@ class LyricsTranscriber:
289
289
  """Initialize output generation service."""
290
290
  return OutputGenerator(config=self.output_config, logger=self.logger)
291
291
 
292
- def process(self) -> LyricsControllerResult:
293
- """Main processing method that orchestrates the entire workflow."""
292
+ def process(self, agentic_deadline: Optional[float] = None) -> LyricsControllerResult:
293
+ """Main processing method that orchestrates the entire workflow.
294
+
295
+ Args:
296
+ agentic_deadline: Optional Unix timestamp. If agentic correction is still
297
+ running after this time, it will abort and return uncorrected results.
298
+ """
294
299
 
295
300
  self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
296
301
 
@@ -390,7 +395,7 @@ class LyricsTranscriber:
390
395
 
391
396
  # Step 3: Process and correct lyrics if enabled AND we have transcription results
392
397
  if self.output_config.run_correction and self.results.transcription_results:
393
- self.correct_lyrics()
398
+ self.correct_lyrics(agentic_deadline=agentic_deadline)
394
399
  elif self.output_config.run_correction:
395
400
  self.logger.info("Skipping lyrics correction - no transcription results available")
396
401
 
@@ -494,8 +499,13 @@ class LyricsTranscriber:
494
499
  else:
495
500
  self.logger.debug(" - LocalWhisper: DISABLED (enable_local_whisper=False)")
496
501
 
497
- def correct_lyrics(self) -> None:
498
- """Run lyrics correction using transcription and internet lyrics."""
502
+ def correct_lyrics(self, agentic_deadline: Optional[float] = None) -> None:
503
+ """Run lyrics correction using transcription and internet lyrics.
504
+
505
+ Args:
506
+ agentic_deadline: Optional Unix timestamp. If agentic correction is still
507
+ running after this time, it will abort and return uncorrected results.
508
+ """
499
509
  self.logger.info("Starting lyrics correction process")
500
510
 
501
511
  # Check if we have reference lyrics to work with
@@ -553,6 +563,7 @@ class LyricsTranscriber:
553
563
  transcription_results=self.results.transcription_results,
554
564
  lyrics_results=self.results.lyrics_results,
555
565
  metadata=metadata,
566
+ agentic_deadline=agentic_deadline,
556
567
  )
557
568
 
558
569
  # Store corrected results
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  import os
5
5
  import json
6
+ import time
6
7
  from typing import Dict, Any, List, Optional
7
8
 
8
9
  from .providers.base import BaseAIProvider
@@ -120,7 +121,7 @@ class AgenticCorrector:
120
121
  title: Optional[str] = None
121
122
  ) -> Optional[GapClassification]:
122
123
  """Classify a gap using the AI provider.
123
-
124
+
124
125
  Args:
125
126
  gap_id: Unique identifier for the gap
126
127
  gap_text: The text of the gap
@@ -129,10 +130,13 @@ class AgenticCorrector:
129
130
  reference_contexts: Dictionary of reference lyrics from each source
130
131
  artist: Song artist name
131
132
  title: Song title
132
-
133
+
133
134
  Returns:
134
135
  GapClassification object or None if classification fails
135
136
  """
137
+ logger.info(f"🤖 Classifying gap {gap_id} ({len(gap_text)} chars)")
138
+ start_time = time.time()
139
+
136
140
  # Build classification prompt
137
141
  prompt = build_classification_prompt(
138
142
  gap_text=gap_text,
@@ -143,7 +147,7 @@ class AgenticCorrector:
143
147
  title=title,
144
148
  gap_id=gap_id
145
149
  )
146
-
150
+
147
151
  # Call AI provider to get classification
148
152
  try:
149
153
  data = self._provider.generate_correction_proposals(
@@ -151,17 +155,25 @@ class AgenticCorrector:
151
155
  schema=GapClassification.model_json_schema(),
152
156
  session_id=self._session_id
153
157
  )
154
-
158
+
159
+ elapsed = time.time() - start_time
160
+
155
161
  # Extract first result
156
162
  if data and len(data) > 0:
157
163
  item = data[0]
158
164
  if isinstance(item, dict) and "error" not in item:
159
165
  classification = GapClassification.model_validate(item)
160
- logger.debug(f"🤖 Classified gap {gap_id} as {classification.category} (confidence: {classification.confidence})")
166
+ logger.info(
167
+ f"🤖 Classified gap {gap_id} as {classification.category} "
168
+ f"(confidence: {classification.confidence:.2f}) in {elapsed:.2f}s"
169
+ )
161
170
  return classification
171
+ else:
172
+ logger.warning(f"🤖 Classification returned error for gap {gap_id}: {item}")
162
173
  except Exception as e:
163
- logger.warning(f"🤖 Failed to classify gap {gap_id}: {e}")
164
-
174
+ elapsed = time.time() - start_time
175
+ logger.warning(f"🤖 Failed to classify gap {gap_id} after {elapsed:.2f}s: {e}")
176
+
165
177
  return None
166
178
 
167
179
  def propose_for_gap(
@@ -1,28 +1,115 @@
1
- from typing import Optional, Dict, Any
1
+ """LangFuse integration for agentic correction observability and prompt management.
2
+
3
+ This module provides:
4
+ - Client initialization with fail-fast behavior when configured
5
+ - Metrics recording for observability
6
+ - Prompt fetching for dynamic prompt management
7
+ - Dataset fetching for few-shot examples
8
+ """
9
+
10
+ from typing import Optional, Dict, Any, List
2
11
  import os
3
- import threading
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Module-level client singleton
17
+ _langfuse_client: Optional[Any] = None
18
+ _client_initialized: bool = False
19
+
20
+
21
+ class LangFuseConfigError(Exception):
22
+ """Raised when LangFuse is configured but initialization fails."""
23
+ pass
24
+
4
25
 
26
+ def is_langfuse_configured() -> bool:
27
+ """Check if LangFuse credentials are configured in environment."""
28
+ public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
29
+ secret_key = os.getenv("LANGFUSE_SECRET_KEY")
30
+ return bool(public_key and secret_key)
5
31
 
6
- def setup_langfuse(client_name: str = "agentic-corrector") -> Optional[object]:
32
+
33
+ def setup_langfuse() -> Optional[object]:
7
34
  """Initialize Langfuse client if keys are present; return client or None.
8
35
 
9
36
  This avoids hard dependency at import time; caller can check for None and
10
37
  no-op if observability is not configured.
38
+
39
+ Note: This function does NOT fail fast - use get_langfuse_client() for
40
+ fail-fast behavior when LangFuse is required.
11
41
  """
12
42
  secret = os.getenv("LANGFUSE_SECRET_KEY")
13
43
  public = os.getenv("LANGFUSE_PUBLIC_KEY")
14
- host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
44
+ host = os.getenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
15
45
  if not (secret and public):
16
46
  return None
17
47
  try:
18
48
  from langfuse import Langfuse # type: ignore
19
49
 
20
- client = Langfuse(secret_key=secret, public_key=public, host=host, sdk_integration=client_name)
50
+ client = Langfuse(secret_key=secret, public_key=public, host=host)
21
51
  return client
22
52
  except Exception:
23
53
  return None
24
54
 
25
55
 
56
+ def get_langfuse_client() -> Optional[Any]:
57
+ """Get or create the LangFuse client singleton.
58
+
59
+ Unlike setup_langfuse(), this function implements fail-fast behavior:
60
+ if LangFuse keys are configured but initialization fails, it raises
61
+ an exception rather than returning None.
62
+
63
+ Returns:
64
+ Langfuse client instance, or None if not configured
65
+
66
+ Raises:
67
+ LangFuseConfigError: If keys are set but initialization fails
68
+ """
69
+ global _langfuse_client, _client_initialized
70
+
71
+ if _client_initialized:
72
+ return _langfuse_client
73
+
74
+ secret = os.getenv("LANGFUSE_SECRET_KEY")
75
+ public = os.getenv("LANGFUSE_PUBLIC_KEY")
76
+ host = os.getenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
77
+
78
+ if not (secret and public):
79
+ logger.debug("LangFuse keys not configured, client disabled")
80
+ _client_initialized = True
81
+ return None
82
+
83
+ try:
84
+ from langfuse import Langfuse
85
+
86
+ _langfuse_client = Langfuse(
87
+ secret_key=secret,
88
+ public_key=public,
89
+ host=host,
90
+ )
91
+ _client_initialized = True
92
+ logger.info(f"LangFuse client initialized (host: {host})")
93
+ return _langfuse_client
94
+
95
+ except Exception as e:
96
+ # Fail fast - if keys are set, we expect LangFuse to work
97
+ raise LangFuseConfigError(
98
+ f"LangFuse keys are set but initialization failed: {e}\n"
99
+ f"Check:\n"
100
+ f" - LANGFUSE_PUBLIC_KEY: {public[:10] if public else 'not set'}...\n"
101
+ f" - LANGFUSE_SECRET_KEY: {'set' if secret else 'not set'}\n"
102
+ f" - LANGFUSE_HOST: {host}"
103
+ ) from e
104
+
105
+
106
+ def reset_langfuse_client() -> None:
107
+ """Reset the global LangFuse client (for testing)."""
108
+ global _langfuse_client, _client_initialized
109
+ _langfuse_client = None
110
+ _client_initialized = False
111
+
112
+
26
113
  def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any]) -> None:
27
114
  """Record custom metrics to Langfuse if initialized."""
28
115
  if client is None:
@@ -33,3 +120,89 @@ def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any])
33
120
  except Exception:
34
121
  # Swallow observability errors to never impact core flow
35
122
  pass
123
+
124
+
125
+ def fetch_prompt(name: str, client: Optional[Any] = None, label: Optional[str] = "production") -> Any:
126
+ """Fetch a prompt template from LangFuse.
127
+
128
+ Args:
129
+ name: The prompt name in LangFuse
130
+ client: Optional pre-initialized client. If None, uses get_langfuse_client()
131
+ label: Prompt label to fetch (default: "production"). If the labeled version
132
+ is not found, falls back to version 1.
133
+
134
+ Returns:
135
+ LangFuse prompt object
136
+
137
+ Raises:
138
+ LangFuseConfigError: If LangFuse is not configured
139
+ RuntimeError: If prompt fetch fails
140
+ """
141
+ if client is None:
142
+ client = get_langfuse_client()
143
+
144
+ if client is None:
145
+ raise LangFuseConfigError(
146
+ f"Cannot fetch prompt '{name}': LangFuse is not configured. "
147
+ f"Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY."
148
+ )
149
+
150
+ try:
151
+ # Try to fetch with the specified label (default: production)
152
+ prompt = client.get_prompt(name, label=label)
153
+ logger.debug(f"Fetched prompt '{name}' (label={label}) from LangFuse")
154
+ return prompt
155
+ except Exception as label_error:
156
+ # If labeled version not found, try fetching version 1 as fallback
157
+ # This handles newly created prompts that haven't been promoted yet
158
+ try:
159
+ prompt = client.get_prompt(name, version=1)
160
+ logger.warning(
161
+ f"Prompt '{name}' label '{label}' not found, using version 1. "
162
+ f"Consider promoting this prompt in LangFuse UI."
163
+ )
164
+ return prompt
165
+ except Exception as version_error:
166
+ raise RuntimeError(
167
+ f"Failed to fetch prompt '{name}' from LangFuse: "
168
+ f"Label '{label}' error: {label_error}, "
169
+ f"Version 1 fallback error: {version_error}"
170
+ ) from version_error
171
+
172
+
173
+ def fetch_dataset(name: str, client: Optional[Any] = None) -> List[Dict[str, Any]]:
174
+ """Fetch a dataset from LangFuse and return its items.
175
+
176
+ Args:
177
+ name: The dataset name in LangFuse
178
+ client: Optional pre-initialized client. If None, uses get_langfuse_client()
179
+
180
+ Returns:
181
+ List of dataset item inputs (the actual example data)
182
+
183
+ Raises:
184
+ LangFuseConfigError: If LangFuse is not configured
185
+ RuntimeError: If dataset fetch fails
186
+ """
187
+ if client is None:
188
+ client = get_langfuse_client()
189
+
190
+ if client is None:
191
+ raise LangFuseConfigError(
192
+ f"Cannot fetch dataset '{name}': LangFuse is not configured. "
193
+ f"Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY."
194
+ )
195
+
196
+ try:
197
+ dataset = client.get_dataset(name)
198
+ items = []
199
+ for item in dataset.items:
200
+ if hasattr(item, 'input') and item.input:
201
+ items.append(item.input)
202
+
203
+ logger.debug(f"Fetched {len(items)} items from dataset '{name}'")
204
+ return items
205
+ except Exception as e:
206
+ raise RuntimeError(
207
+ f"Failed to fetch dataset '{name}' from LangFuse: {e}"
208
+ ) from e
@@ -1,2 +1,25 @@
1
1
  """Prompt templates for agentic correction."""
2
2
 
3
+ from .classifier import (
4
+ build_classification_prompt,
5
+ build_classification_prompt_hardcoded,
6
+ get_hardcoded_examples,
7
+ )
8
+ from .langfuse_prompts import (
9
+ LangFusePromptService,
10
+ LangFusePromptError,
11
+ LangFuseDatasetError,
12
+ get_prompt_service,
13
+ reset_prompt_service,
14
+ )
15
+
16
+ __all__ = [
17
+ "build_classification_prompt",
18
+ "build_classification_prompt_hardcoded",
19
+ "get_hardcoded_examples",
20
+ "LangFusePromptService",
21
+ "LangFusePromptError",
22
+ "LangFuseDatasetError",
23
+ "get_prompt_service",
24
+ "reset_prompt_service",
25
+ ]
@@ -1,23 +1,35 @@
1
- """Gap classification prompt builder for agentic correction."""
1
+ """Gap classification prompt builder for agentic correction.
2
+
3
+ This module provides two modes of operation:
4
+ 1. LangFuse mode: Prompts and examples fetched from LangFuse for dynamic iteration
5
+ 2. Hardcoded mode: Fallback for local development when LangFuse is not configured
6
+
7
+ The main entry point is `build_classification_prompt()` which automatically
8
+ selects the appropriate mode based on LangFuse configuration.
9
+ """
2
10
 
3
11
  from typing import Dict, List, Optional
4
12
  import yaml
5
13
  import os
14
+ import logging
6
15
  from pathlib import Path
7
16
 
17
+ logger = logging.getLogger(__name__)
18
+
8
19
 
9
20
  def load_few_shot_examples() -> Dict[str, List[Dict]]:
10
21
  """Load few-shot examples from examples.yaml if it exists."""
11
22
  examples_path = Path(__file__).parent / "examples.yaml"
12
-
23
+
13
24
  if not examples_path.exists():
14
25
  return get_hardcoded_examples()
15
-
26
+
16
27
  try:
17
28
  with open(examples_path, 'r') as f:
18
29
  data = yaml.safe_load(f)
19
30
  return data.get('examples_by_category', {})
20
- except Exception:
31
+ except Exception as e:
32
+ logger.warning(f"Failed to load examples.yaml, using hardcoded examples: {e}")
21
33
  return get_hardcoded_examples()
22
34
 
23
35
 
@@ -122,7 +134,12 @@ def build_classification_prompt(
122
134
  gap_id: Optional[str] = None
123
135
  ) -> str:
124
136
  """Build a prompt for classifying a gap in the transcription.
125
-
137
+
138
+ This function automatically selects between LangFuse and hardcoded prompts:
139
+ - If LangFuse is configured (LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY set),
140
+ fetches the prompt template and examples from LangFuse.
141
+ - Otherwise, uses hardcoded prompts for local development.
142
+
126
143
  Args:
127
144
  gap_text: The text of the gap that needs classification
128
145
  preceding_words: Text immediately before the gap
@@ -131,7 +148,50 @@ def build_classification_prompt(
131
148
  artist: Song artist name for context
132
149
  title: Song title for context
133
150
  gap_id: Identifier for the gap
134
-
151
+
152
+ Returns:
153
+ Formatted prompt string for the LLM
154
+
155
+ Raises:
156
+ LangFusePromptError: If LangFuse is configured but prompt fetch fails
157
+ """
158
+ from .langfuse_prompts import get_prompt_service
159
+
160
+ service = get_prompt_service()
161
+ return service.get_classification_prompt(
162
+ gap_text=gap_text,
163
+ preceding_words=preceding_words,
164
+ following_words=following_words,
165
+ reference_contexts=reference_contexts,
166
+ artist=artist,
167
+ title=title,
168
+ gap_id=gap_id
169
+ )
170
+
171
+
172
+ def build_classification_prompt_hardcoded(
173
+ gap_text: str,
174
+ preceding_words: str,
175
+ following_words: str,
176
+ reference_contexts: Dict[str, str],
177
+ artist: Optional[str] = None,
178
+ title: Optional[str] = None,
179
+ gap_id: Optional[str] = None
180
+ ) -> str:
181
+ """Build a prompt for classifying a gap using hardcoded templates.
182
+
183
+ This is the fallback implementation used when LangFuse is not configured.
184
+ It is also used as the source of truth for migrating prompts to LangFuse.
185
+
186
+ Args:
187
+ gap_text: The text of the gap that needs classification
188
+ preceding_words: Text immediately before the gap
189
+ following_words: Text immediately after the gap
190
+ reference_contexts: Dictionary of reference lyrics from each source
191
+ artist: Song artist name for context
192
+ title: Song title for context
193
+ gap_id: Identifier for the gap
194
+
135
195
  Returns:
136
196
  Formatted prompt string for the LLM
137
197
  """