karaoke-gen 0.81.1__py3-none-any.whl → 0.82.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: karaoke-gen
3
- Version: 0.81.1
3
+ Version: 0.82.0
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -42,7 +42,7 @@ Requires-Dist: kbputils (>=0.0.16,<0.0.17)
42
42
  Requires-Dist: langchain (>=0.3.0)
43
43
  Requires-Dist: langchain-anthropic (>=0.2.0)
44
44
  Requires-Dist: langchain-core (>=0.3.0)
45
- Requires-Dist: langchain-google-vertexai (>=2.0.0)
45
+ Requires-Dist: langchain-google-vertexai (>=3.1.1)
46
46
  Requires-Dist: langchain-ollama (>=0.2.0)
47
47
  Requires-Dist: langchain-openai (>=0.2.0)
48
48
  Requires-Dist: langfuse (>=3.0.0)
@@ -75,29 +75,30 @@ lyrics_transcriber/correction/agentic/models/observability_metrics.py,sha256=xGd
75
75
  lyrics_transcriber/correction/agentic/models/schemas.py,sha256=skWXqGkJnv9NvmvjktBqrH_4Ohyzg2x0ZMsVINbXKdg,2141
76
76
  lyrics_transcriber/correction/agentic/models/utils.py,sha256=tX7flxCB4aLrgZWkHuEt7Gr8kaLkMsXzTdWSec6Xsts,580
77
77
  lyrics_transcriber/correction/agentic/observability/__init__.py,sha256=RuaepVsltWdaF1aF_YmNVJTJ6_bbNDFo3Sp-ruBvyHA,85
78
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py,sha256=GvgisZyy5_tDeC7Hd8SDWbd-9aAqYnMTd52uNeQ2p-I,1221
78
+ lyrics_transcriber/correction/agentic/observability/langfuse_integration.py,sha256=5oBfoFT-QExZttD2wlIzXRhgSglNElFFwz8Et36vZos,7014
79
79
  lyrics_transcriber/correction/agentic/observability/metrics.py,sha256=Js_m6ljdI6Xgd9X9eHtboCsf9gjYsN1zOv3_XSwjgKk,1907
80
80
  lyrics_transcriber/correction/agentic/observability/performance.py,sha256=ekjzgL65gfs1SpKR_befu1wdWZU9xDlcafJm8htSvks,328
81
- lyrics_transcriber/correction/agentic/prompts/__init__.py,sha256=YPgEN82oygmT_pfIj2RpZM-WOLoFv6rBAPKeIRstXuI,48
82
- lyrics_transcriber/correction/agentic/prompts/classifier.py,sha256=pKbL4Cyj0-c_Ot9IxfKBOL7PnL0ZfUvKPLZwOMr-NDo,9730
81
+ lyrics_transcriber/correction/agentic/prompts/__init__.py,sha256=riiZ-f4jlvq4QjtyCpmv-sSzfcLy7O99pMBwV1H5Usc,605
82
+ lyrics_transcriber/correction/agentic/prompts/classifier.py,sha256=FwUSL59Y-5q9J1CDW8iyzyiajcy4-uq5MzfWu0If_Yo,11899
83
+ lyrics_transcriber/correction/agentic/prompts/langfuse_prompts.py,sha256=hjQhyY_GBuZt_oY9DacutXvA9dJCZksRY2fKmveJm_A,10898
83
84
  lyrics_transcriber/correction/agentic/providers/__init__.py,sha256=PS7C4sKDfa6S9lSo33GXIRamCLsv0Jn7u0GtXuhiRD4,95
84
85
  lyrics_transcriber/correction/agentic/providers/base.py,sha256=bExuntMLLInMmWWNzN81_ScWQJhNYbtlF3wZYhlX-qw,1059
85
86
  lyrics_transcriber/correction/agentic/providers/circuit_breaker.py,sha256=D3Jg4YHqvy4gzlxfkALa7PztyYQpJb8NwJAonMS0TSI,4694
86
- lyrics_transcriber/correction/agentic/providers/config.py,sha256=2dy9zynj8hU3LdRkb2RmKSOztsX4_Ay23EU-RfUGCrM,3206
87
- lyrics_transcriber/correction/agentic/providers/constants.py,sha256=aDIEsDvNQLEGlGk8klAaRxJmdldGBDFqwYLuCmlYoNM,692
87
+ lyrics_transcriber/correction/agentic/providers/config.py,sha256=w6-fkapEy3BgoFIsRfZ44XUCV4zuicFSNoSoVAe5lYE,3282
88
+ lyrics_transcriber/correction/agentic/providers/constants.py,sha256=cXLzKTyFVt9q6wQd_gWcv3EZ5Sm27AOAz6NyPapcess,695
88
89
  lyrics_transcriber/correction/agentic/providers/health.py,sha256=F8pHY5BQYvylGRDGXUHplcAJooAyiqVLRhBl4kHC1H8,710
89
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=hderNRLrSZn49LrGBrgdCvBP5E7tPAugjaw7TFbb0JY,7957
90
- lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=iKbpMEeTyhPN8n9abVf645TfovnFEz3ia1g6XLHqp4s,8613
90
+ lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=H3C3BNjAixfkOJojxWXv-P-svlgj5rJEJdk0zPIjh7E,8540
91
+ lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=CeVDblf1HdphtUHVn3Cgl07YAeUuSxTjEHHFJN8Frj0,8257
91
92
  lyrics_transcriber/correction/agentic/providers/response_cache.py,sha256=Byr7fQJsgUMFlsvHeVCxTiFjjnbsg3KIlEmEEtAo-Gw,7047
92
93
  lyrics_transcriber/correction/agentic/providers/response_parser.py,sha256=a8pdUYKBS5X72gck3u1ndFYB__UN0UijAdxNhbHp8ZQ,3809
93
94
  lyrics_transcriber/correction/agentic/providers/retry_executor.py,sha256=hX21Zwy2cSECAw7k13ndEinWRqwjo4xYoSCQ2B2CUf0,3912
94
- lyrics_transcriber/correction/agentic/router.py,sha256=_JtnXgcIdui6qeN9x0EawThDGZavAwfpbtEJAYVlQTY,1334
95
+ lyrics_transcriber/correction/agentic/router.py,sha256=akP28A0lftmsnSyMOW6k7iTC1pv4LEgilXhIkcfJzlE,1437
95
96
  lyrics_transcriber/correction/agentic/workflows/__init__.py,sha256=OsBExAbIIKxJgX6FKXFOgcUjIG9AWJQV_fESZVdO8mo,77
96
97
  lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py,sha256=gMuLTUxkgYaciMsI4yrZSC3wi--7V_PgaDNE-Vd6FE8,575
97
98
  lyrics_transcriber/correction/agentic/workflows/correction_graph.py,sha256=kgZKnz0h9cG1EfhW7BSSl-kSpQtJrRM_S86kAniXfE4,1815
98
99
  lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py,sha256=KsKLD3AP66YYmXfUn-mVZjERYLtU1Zs4a-7CB2zDfas,596
99
100
  lyrics_transcriber/correction/anchor_sequence.py,sha256=5tl4Cjiw5UlLbEb1Oy-g3ebKCinXSwohdaCB9-rTMtI,43798
100
- lyrics_transcriber/correction/corrector.py,sha256=e8N7Yys6MCmz8PbHkkl7KuxH1m3MWlH1vwCa1r3YcqA,40223
101
+ lyrics_transcriber/correction/corrector.py,sha256=qW6GwOOLM8zxYtYMmGy9Rzk_4mJzdpGiCXW3LQFXn14,38362
101
102
  lyrics_transcriber/correction/feedback/__init__.py,sha256=i1gd0Vb4qvlzZQ3lqA3fJjt288YP7f-MBPwOzZ7Rjh4,68
102
103
  lyrics_transcriber/correction/feedback/schemas.py,sha256=OiF_WUqcqiEKIoburYM8kWAIundy82PQE7ImsdP8UCk,4416
103
104
  lyrics_transcriber/correction/feedback/store.py,sha256=T4IDzf1eRA9n-wdLLrLyAW1ELYgXwK9RikJgX_B3fN8,8788
@@ -105,8 +106,6 @@ lyrics_transcriber/correction/handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
105
106
  lyrics_transcriber/correction/handlers/base.py,sha256=ZXYMFgbCmlD62dpqdFwFPlcePdHKEFrABffnG_Mu5mI,1687
106
107
  lyrics_transcriber/correction/handlers/extend_anchor.py,sha256=IADgdPmEMokUQhh6mP-wQWLYf6GfWTvJbBjOk08A-aw,6384
107
108
  lyrics_transcriber/correction/handlers/levenshtein.py,sha256=hMERQHVgiUDSHtamYrAjqZ3qMMok4VmQ_MYM2-nrX6w,7864
108
- lyrics_transcriber/correction/handlers/llm.py,sha256=ufqHtohdU5dUXE3DikzbloAWGVgMu1wnw6P4WHRmpdk,14580
109
- lyrics_transcriber/correction/handlers/llm_providers.py,sha256=MV-KCRseccg-DEimMS0D2bXJ2xhy59r2n8UZjICUoEY,2067
110
109
  lyrics_transcriber/correction/handlers/no_space_punct_match.py,sha256=jY2fa547Qc8B63xIhF9VyWMaq5jds6E6wBqyVq6KANw,7057
111
110
  lyrics_transcriber/correction/handlers/relaxed_word_count_match.py,sha256=x4k__6gav4-STk_TycLcg5Sw4x2vUFAj5fWmOv7Yd_w,3911
112
111
  lyrics_transcriber/correction/handlers/repeat.py,sha256=1PJADW44egYh7N9D2fN-gDIusWVglFjGHrCZuTQYNpA,4313
@@ -287,8 +286,8 @@ lyrics_transcriber/transcribers/whisper.py,sha256=YcCB1ic9H6zL1GS0jD0emu8-qlcH0Q
287
286
  lyrics_transcriber/types.py,sha256=UJjaxhVd2o14AG4G8ToU598p0JeYdiTFjpG38jGCoYQ,27917
288
287
  lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
289
288
  lyrics_transcriber/utils/word_utils.py,sha256=-cMGpj9UV4F6IsoDKAV2i1aiqSO8eI91HMAm_igtVMk,958
290
- karaoke_gen-0.81.1.dist-info/METADATA,sha256=zA3O3rRKeXu_LX28aeN3knlhx3WcpXD8Ozf4s_LT2C0,23077
291
- karaoke_gen-0.81.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
292
- karaoke_gen-0.81.1.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
293
- karaoke_gen-0.81.1.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
294
- karaoke_gen-0.81.1.dist-info/RECORD,,
289
+ karaoke_gen-0.82.0.dist-info/METADATA,sha256=k51l0dhnVIM5kn6lFbTCLi1oKbcV_2iSulloMXVkU0Q,23077
290
+ karaoke_gen-0.82.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
291
+ karaoke_gen-0.82.0.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
292
+ karaoke_gen-0.82.0.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
293
+ karaoke_gen-0.82.0.dist-info/RECORD,,
@@ -1,28 +1,115 @@
1
- from typing import Optional, Dict, Any
1
+ """LangFuse integration for agentic correction observability and prompt management.
2
+
3
+ This module provides:
4
+ - Client initialization with fail-fast behavior when configured
5
+ - Metrics recording for observability
6
+ - Prompt fetching for dynamic prompt management
7
+ - Dataset fetching for few-shot examples
8
+ """
9
+
10
+ from typing import Optional, Dict, Any, List
2
11
  import os
3
- import threading
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Module-level client singleton
17
+ _langfuse_client: Optional[Any] = None
18
+ _client_initialized: bool = False
19
+
20
+
21
+ class LangFuseConfigError(Exception):
22
+ """Raised when LangFuse is configured but initialization fails."""
23
+ pass
24
+
4
25
 
26
+ def is_langfuse_configured() -> bool:
27
+ """Check if LangFuse credentials are configured in environment."""
28
+ public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
29
+ secret_key = os.getenv("LANGFUSE_SECRET_KEY")
30
+ return bool(public_key and secret_key)
5
31
 
6
- def setup_langfuse(client_name: str = "agentic-corrector") -> Optional[object]:
32
+
33
+ def setup_langfuse() -> Optional[object]:
7
34
  """Initialize Langfuse client if keys are present; return client or None.
8
35
 
9
36
  This avoids hard dependency at import time; caller can check for None and
10
37
  no-op if observability is not configured.
38
+
39
+ Note: This function does NOT fail fast - use get_langfuse_client() for
40
+ fail-fast behavior when LangFuse is required.
11
41
  """
12
42
  secret = os.getenv("LANGFUSE_SECRET_KEY")
13
43
  public = os.getenv("LANGFUSE_PUBLIC_KEY")
14
- host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
44
+ host = os.getenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
15
45
  if not (secret and public):
16
46
  return None
17
47
  try:
18
48
  from langfuse import Langfuse # type: ignore
19
49
 
20
- client = Langfuse(secret_key=secret, public_key=public, host=host, sdk_integration=client_name)
50
+ client = Langfuse(secret_key=secret, public_key=public, host=host)
21
51
  return client
22
52
  except Exception:
23
53
  return None
24
54
 
25
55
 
56
+ def get_langfuse_client() -> Optional[Any]:
57
+ """Get or create the LangFuse client singleton.
58
+
59
+ Unlike setup_langfuse(), this function implements fail-fast behavior:
60
+ if LangFuse keys are configured but initialization fails, it raises
61
+ an exception rather than returning None.
62
+
63
+ Returns:
64
+ Langfuse client instance, or None if not configured
65
+
66
+ Raises:
67
+ LangFuseConfigError: If keys are set but initialization fails
68
+ """
69
+ global _langfuse_client, _client_initialized
70
+
71
+ if _client_initialized:
72
+ return _langfuse_client
73
+
74
+ secret = os.getenv("LANGFUSE_SECRET_KEY")
75
+ public = os.getenv("LANGFUSE_PUBLIC_KEY")
76
+ host = os.getenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
77
+
78
+ if not (secret and public):
79
+ logger.debug("LangFuse keys not configured, client disabled")
80
+ _client_initialized = True
81
+ return None
82
+
83
+ try:
84
+ from langfuse import Langfuse
85
+
86
+ _langfuse_client = Langfuse(
87
+ secret_key=secret,
88
+ public_key=public,
89
+ host=host,
90
+ )
91
+ _client_initialized = True
92
+ logger.info(f"LangFuse client initialized (host: {host})")
93
+ return _langfuse_client
94
+
95
+ except Exception as e:
96
+ # Fail fast - if keys are set, we expect LangFuse to work
97
+ raise LangFuseConfigError(
98
+ f"LangFuse keys are set but initialization failed: {e}\n"
99
+ f"Check:\n"
100
+ f" - LANGFUSE_PUBLIC_KEY: {public[:10] if public else 'not set'}...\n"
101
+ f" - LANGFUSE_SECRET_KEY: {'set' if secret else 'not set'}\n"
102
+ f" - LANGFUSE_HOST: {host}"
103
+ ) from e
104
+
105
+
106
+ def reset_langfuse_client() -> None:
107
+ """Reset the global LangFuse client (for testing)."""
108
+ global _langfuse_client, _client_initialized
109
+ _langfuse_client = None
110
+ _client_initialized = False
111
+
112
+
26
113
  def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any]) -> None:
27
114
  """Record custom metrics to Langfuse if initialized."""
28
115
  if client is None:
@@ -33,3 +120,89 @@ def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any])
33
120
  except Exception:
34
121
  # Swallow observability errors to never impact core flow
35
122
  pass
123
+
124
+
125
+ def fetch_prompt(name: str, client: Optional[Any] = None, label: Optional[str] = "production") -> Any:
126
+ """Fetch a prompt template from LangFuse.
127
+
128
+ Args:
129
+ name: The prompt name in LangFuse
130
+ client: Optional pre-initialized client. If None, uses get_langfuse_client()
131
+ label: Prompt label to fetch (default: "production"). If the labeled version
132
+ is not found, falls back to version 1.
133
+
134
+ Returns:
135
+ LangFuse prompt object
136
+
137
+ Raises:
138
+ LangFuseConfigError: If LangFuse is not configured
139
+ RuntimeError: If prompt fetch fails
140
+ """
141
+ if client is None:
142
+ client = get_langfuse_client()
143
+
144
+ if client is None:
145
+ raise LangFuseConfigError(
146
+ f"Cannot fetch prompt '{name}': LangFuse is not configured. "
147
+ f"Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY."
148
+ )
149
+
150
+ try:
151
+ # Try to fetch with the specified label (default: production)
152
+ prompt = client.get_prompt(name, label=label)
153
+ logger.debug(f"Fetched prompt '{name}' (label={label}) from LangFuse")
154
+ return prompt
155
+ except Exception as label_error:
156
+ # If labeled version not found, try fetching version 1 as fallback
157
+ # This handles newly created prompts that haven't been promoted yet
158
+ try:
159
+ prompt = client.get_prompt(name, version=1)
160
+ logger.warning(
161
+ f"Prompt '{name}' label '{label}' not found, using version 1. "
162
+ f"Consider promoting this prompt in LangFuse UI."
163
+ )
164
+ return prompt
165
+ except Exception as version_error:
166
+ raise RuntimeError(
167
+ f"Failed to fetch prompt '{name}' from LangFuse: "
168
+ f"Label '{label}' error: {label_error}, "
169
+ f"Version 1 fallback error: {version_error}"
170
+ ) from version_error
171
+
172
+
173
+ def fetch_dataset(name: str, client: Optional[Any] = None) -> List[Dict[str, Any]]:
174
+ """Fetch a dataset from LangFuse and return its items.
175
+
176
+ Args:
177
+ name: The dataset name in LangFuse
178
+ client: Optional pre-initialized client. If None, uses get_langfuse_client()
179
+
180
+ Returns:
181
+ List of dataset item inputs (the actual example data)
182
+
183
+ Raises:
184
+ LangFuseConfigError: If LangFuse is not configured
185
+ RuntimeError: If dataset fetch fails
186
+ """
187
+ if client is None:
188
+ client = get_langfuse_client()
189
+
190
+ if client is None:
191
+ raise LangFuseConfigError(
192
+ f"Cannot fetch dataset '{name}': LangFuse is not configured. "
193
+ f"Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY."
194
+ )
195
+
196
+ try:
197
+ dataset = client.get_dataset(name)
198
+ items = []
199
+ for item in dataset.items:
200
+ if hasattr(item, 'input') and item.input:
201
+ items.append(item.input)
202
+
203
+ logger.debug(f"Fetched {len(items)} items from dataset '{name}'")
204
+ return items
205
+ except Exception as e:
206
+ raise RuntimeError(
207
+ f"Failed to fetch dataset '{name}' from LangFuse: {e}"
208
+ ) from e
@@ -1,2 +1,25 @@
1
1
  """Prompt templates for agentic correction."""
2
2
 
3
+ from .classifier import (
4
+ build_classification_prompt,
5
+ build_classification_prompt_hardcoded,
6
+ get_hardcoded_examples,
7
+ )
8
+ from .langfuse_prompts import (
9
+ LangFusePromptService,
10
+ LangFusePromptError,
11
+ LangFuseDatasetError,
12
+ get_prompt_service,
13
+ reset_prompt_service,
14
+ )
15
+
16
+ __all__ = [
17
+ "build_classification_prompt",
18
+ "build_classification_prompt_hardcoded",
19
+ "get_hardcoded_examples",
20
+ "LangFusePromptService",
21
+ "LangFusePromptError",
22
+ "LangFuseDatasetError",
23
+ "get_prompt_service",
24
+ "reset_prompt_service",
25
+ ]
@@ -1,23 +1,35 @@
1
- """Gap classification prompt builder for agentic correction."""
1
+ """Gap classification prompt builder for agentic correction.
2
+
3
+ This module provides two modes of operation:
4
+ 1. LangFuse mode: Prompts and examples fetched from LangFuse for dynamic iteration
5
+ 2. Hardcoded mode: Fallback for local development when LangFuse is not configured
6
+
7
+ The main entry point is `build_classification_prompt()` which automatically
8
+ selects the appropriate mode based on LangFuse configuration.
9
+ """
2
10
 
3
11
  from typing import Dict, List, Optional
4
12
  import yaml
5
13
  import os
14
+ import logging
6
15
  from pathlib import Path
7
16
 
17
+ logger = logging.getLogger(__name__)
18
+
8
19
 
9
20
  def load_few_shot_examples() -> Dict[str, List[Dict]]:
10
21
  """Load few-shot examples from examples.yaml if it exists."""
11
22
  examples_path = Path(__file__).parent / "examples.yaml"
12
-
23
+
13
24
  if not examples_path.exists():
14
25
  return get_hardcoded_examples()
15
-
26
+
16
27
  try:
17
28
  with open(examples_path, 'r') as f:
18
29
  data = yaml.safe_load(f)
19
30
  return data.get('examples_by_category', {})
20
- except Exception:
31
+ except Exception as e:
32
+ logger.warning(f"Failed to load examples.yaml, using hardcoded examples: {e}")
21
33
  return get_hardcoded_examples()
22
34
 
23
35
 
@@ -122,7 +134,12 @@ def build_classification_prompt(
122
134
  gap_id: Optional[str] = None
123
135
  ) -> str:
124
136
  """Build a prompt for classifying a gap in the transcription.
125
-
137
+
138
+ This function automatically selects between LangFuse and hardcoded prompts:
139
+ - If LangFuse is configured (LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY set),
140
+ fetches the prompt template and examples from LangFuse.
141
+ - Otherwise, uses hardcoded prompts for local development.
142
+
126
143
  Args:
127
144
  gap_text: The text of the gap that needs classification
128
145
  preceding_words: Text immediately before the gap
@@ -131,7 +148,50 @@ def build_classification_prompt(
131
148
  artist: Song artist name for context
132
149
  title: Song title for context
133
150
  gap_id: Identifier for the gap
134
-
151
+
152
+ Returns:
153
+ Formatted prompt string for the LLM
154
+
155
+ Raises:
156
+ LangFusePromptError: If LangFuse is configured but prompt fetch fails
157
+ """
158
+ from .langfuse_prompts import get_prompt_service
159
+
160
+ service = get_prompt_service()
161
+ return service.get_classification_prompt(
162
+ gap_text=gap_text,
163
+ preceding_words=preceding_words,
164
+ following_words=following_words,
165
+ reference_contexts=reference_contexts,
166
+ artist=artist,
167
+ title=title,
168
+ gap_id=gap_id
169
+ )
170
+
171
+
172
+ def build_classification_prompt_hardcoded(
173
+ gap_text: str,
174
+ preceding_words: str,
175
+ following_words: str,
176
+ reference_contexts: Dict[str, str],
177
+ artist: Optional[str] = None,
178
+ title: Optional[str] = None,
179
+ gap_id: Optional[str] = None
180
+ ) -> str:
181
+ """Build a prompt for classifying a gap using hardcoded templates.
182
+
183
+ This is the fallback implementation used when LangFuse is not configured.
184
+ It is also used as the source of truth for migrating prompts to LangFuse.
185
+
186
+ Args:
187
+ gap_text: The text of the gap that needs classification
188
+ preceding_words: Text immediately before the gap
189
+ following_words: Text immediately after the gap
190
+ reference_contexts: Dictionary of reference lyrics from each source
191
+ artist: Song artist name for context
192
+ title: Song title for context
193
+ gap_id: Identifier for the gap
194
+
135
195
  Returns:
136
196
  Formatted prompt string for the LLM
137
197
  """
@@ -0,0 +1,298 @@
1
+ """LangFuse prompt management for agentic correction.
2
+
3
+ This module provides prompt fetching from LangFuse, enabling dynamic prompt
4
+ iteration without code redeployment.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Any
8
+ import logging
9
+ import os
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class LangFusePromptError(Exception):
15
+ """Raised when LangFuse prompt fetching fails."""
16
+ pass
17
+
18
+
19
+ class LangFuseDatasetError(Exception):
20
+ """Raised when LangFuse dataset fetching fails."""
21
+ pass
22
+
23
+
24
+ class LangFusePromptService:
25
+ """Fetches prompts and datasets from LangFuse for agentic correction.
26
+
27
+ This service handles:
28
+ - Fetching prompt templates from LangFuse
29
+ - Fetching few-shot examples from LangFuse datasets
30
+ - Compiling prompts with dynamic variables
31
+ - Fail-fast behavior when LangFuse is configured but unavailable
32
+
33
+ When LangFuse keys are not configured, falls back to hardcoded prompts
34
+ for local development.
35
+ """
36
+
37
+ # Prompt and dataset names in LangFuse
38
+ CLASSIFIER_PROMPT_NAME = "gap-classifier"
39
+ EXAMPLES_DATASET_NAME = "gap-classifier-examples"
40
+
41
+ def __init__(self, client: Optional[Any] = None):
42
+ """Initialize the prompt service.
43
+
44
+ Args:
45
+ client: Optional pre-initialized Langfuse client (for testing).
46
+ If None, will initialize from environment variables.
47
+ """
48
+ self._client = client
49
+ self._initialized = False
50
+ self._use_langfuse = self._should_use_langfuse()
51
+
52
+ if self._use_langfuse and client is None:
53
+ self._init_client()
54
+
55
+ def _should_use_langfuse(self) -> bool:
56
+ """Check if LangFuse credentials are configured."""
57
+ public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
58
+ secret_key = os.getenv("LANGFUSE_SECRET_KEY")
59
+ return bool(public_key and secret_key)
60
+
61
+ def _init_client(self) -> None:
62
+ """Initialize the Langfuse client using the shared singleton."""
63
+ from ..observability.langfuse_integration import get_langfuse_client, LangFuseConfigError
64
+
65
+ try:
66
+ self._client = get_langfuse_client()
67
+ if self._client:
68
+ self._initialized = True
69
+ logger.info("LangFuse prompt service initialized")
70
+ else:
71
+ logger.debug("LangFuse keys not configured, will use hardcoded prompts")
72
+ except LangFuseConfigError as e:
73
+ # Re-raise as RuntimeError for consistent error handling
74
+ raise RuntimeError(str(e)) from e
75
+
76
+ def get_classification_prompt(
77
+ self,
78
+ gap_text: str,
79
+ preceding_words: str,
80
+ following_words: str,
81
+ reference_contexts: Dict[str, str],
82
+ artist: Optional[str] = None,
83
+ title: Optional[str] = None,
84
+ gap_id: Optional[str] = None
85
+ ) -> str:
86
+ """Fetch and compile the gap classification prompt.
87
+
88
+ If LangFuse is configured, fetches the prompt template and examples
89
+ from LangFuse. Otherwise, falls back to hardcoded prompts.
90
+
91
+ Args:
92
+ gap_text: The text of the gap that needs classification
93
+ preceding_words: Text immediately before the gap
94
+ following_words: Text immediately after the gap
95
+ reference_contexts: Dictionary of reference lyrics from each source
96
+ artist: Song artist name for context
97
+ title: Song title for context
98
+ gap_id: Identifier for the gap
99
+
100
+ Returns:
101
+ Compiled prompt string ready for LLM
102
+
103
+ Raises:
104
+ LangFusePromptError: If LangFuse is configured but prompt fetch fails
105
+ """
106
+ if not self._use_langfuse:
107
+ # Fall back to hardcoded prompt for development
108
+ from .classifier import build_classification_prompt_hardcoded
109
+ return build_classification_prompt_hardcoded(
110
+ gap_text=gap_text,
111
+ preceding_words=preceding_words,
112
+ following_words=following_words,
113
+ reference_contexts=reference_contexts,
114
+ artist=artist,
115
+ title=title,
116
+ gap_id=gap_id
117
+ )
118
+
119
+ # Fetch from LangFuse
120
+ try:
121
+ prompt_template = self._fetch_prompt(self.CLASSIFIER_PROMPT_NAME)
122
+ examples = self._fetch_examples()
123
+
124
+ # Build component strings
125
+ song_context = self._build_song_context(artist, title)
126
+ examples_text = self._format_examples(examples)
127
+ references_text = self._format_references(reference_contexts)
128
+
129
+ # Compile the prompt with variables
130
+ compiled = prompt_template.compile(
131
+ song_context=song_context,
132
+ examples_text=examples_text,
133
+ gap_id=gap_id or "unknown",
134
+ preceding_words=preceding_words,
135
+ gap_text=gap_text,
136
+ following_words=following_words,
137
+ references_text=references_text
138
+ )
139
+
140
+ logger.debug(f"Compiled LangFuse prompt for gap {gap_id}")
141
+ return compiled
142
+
143
+ except Exception as e:
144
+ raise LangFusePromptError(
145
+ f"Failed to fetch/compile prompt from LangFuse: {e}"
146
+ ) from e
147
+
148
+ def _fetch_prompt(self, name: str, label: str = "production") -> Any:
149
+ """Fetch a prompt template from LangFuse.
150
+
151
+ Args:
152
+ name: The prompt name in LangFuse
153
+ label: Prompt label to fetch (default: "production"). Falls back to
154
+ version 1 if labeled version not found.
155
+
156
+ Returns:
157
+ LangFuse prompt object
158
+
159
+ Raises:
160
+ LangFusePromptError: If fetch fails
161
+ """
162
+ if not self._client:
163
+ raise LangFusePromptError("LangFuse client not initialized")
164
+
165
+ try:
166
+ # Try to fetch with the specified label (default: production)
167
+ prompt = self._client.get_prompt(name, label=label)
168
+ logger.debug(f"Fetched prompt '{name}' (label={label}) from LangFuse")
169
+ return prompt
170
+ except Exception as label_error:
171
+ # If labeled version not found, try fetching version 1 as fallback
172
+ # This handles newly created prompts that haven't been promoted yet
173
+ try:
174
+ prompt = self._client.get_prompt(name, version=1)
175
+ logger.warning(
176
+ f"Prompt '{name}' label '{label}' not found, using version 1. "
177
+ f"Consider promoting this prompt in LangFuse UI."
178
+ )
179
+ return prompt
180
+ except Exception as version_error:
181
+ raise LangFusePromptError(
182
+ f"Failed to fetch prompt '{name}' from LangFuse: "
183
+ f"Label '{label}' error: {label_error}, "
184
+ f"Version 1 fallback error: {version_error}"
185
+ ) from version_error
186
+
187
+ def _fetch_examples(self) -> List[Dict[str, Any]]:
188
+ """Fetch few-shot examples from LangFuse dataset.
189
+
190
+ Returns:
191
+ List of example dictionaries
192
+
193
+ Raises:
194
+ LangFuseDatasetError: If fetch fails
195
+ """
196
+ if not self._client:
197
+ raise LangFuseDatasetError("LangFuse client not initialized")
198
+
199
+ try:
200
+ dataset = self._client.get_dataset(self.EXAMPLES_DATASET_NAME)
201
+ examples = []
202
+ for item in dataset.items:
203
+ # Dataset items have 'input' field with the example data
204
+ if hasattr(item, 'input') and item.input:
205
+ examples.append(item.input)
206
+
207
+ logger.debug(f"Fetched {len(examples)} examples from LangFuse dataset")
208
+ return examples
209
+ except Exception as e:
210
+ raise LangFuseDatasetError(
211
+ f"Failed to fetch dataset '{self.EXAMPLES_DATASET_NAME}' from LangFuse: {e}"
212
+ ) from e
213
+
214
+ def _build_song_context(self, artist: Optional[str], title: Optional[str]) -> str:
215
+ """Build song context section for the prompt."""
216
+ if artist and title:
217
+ return (
218
+ f"\n## Song Context\n\n"
219
+ f"**Artist:** {artist}\n"
220
+ f"**Title:** {title}\n\n"
221
+ f"Note: The song title and artist name may help identify proper nouns "
222
+ f"or unusual words that could be mis-heard.\n"
223
+ )
224
+ return ""
225
+
226
+ def _format_examples(self, examples: List[Dict[str, Any]]) -> str:
227
+ """Format few-shot examples for inclusion in prompt.
228
+
229
+ Args:
230
+ examples: List of example dictionaries from LangFuse dataset
231
+
232
+ Returns:
233
+ Formatted examples string
234
+ """
235
+ if not examples:
236
+ return ""
237
+
238
+ # Group examples by category
239
+ examples_by_category: Dict[str, List[Dict]] = {}
240
+ for ex in examples:
241
+ category = ex.get("category", "unknown")
242
+ if category not in examples_by_category:
243
+ examples_by_category[category] = []
244
+ examples_by_category[category].append(ex)
245
+
246
+ # Build formatted text
247
+ text = "## Example Classifications\n\n"
248
+ for category, category_examples in examples_by_category.items():
249
+ text += f"### {category.upper().replace('_', ' ')}\n\n"
250
+ for ex in category_examples[:2]: # Limit to 2 examples per category
251
+ text += f"**Gap:** {ex.get('gap_text', '')}\n"
252
+ text += f"**Context:** ...{ex.get('preceding', '')}... [GAP] ...{ex.get('following', '')}...\n"
253
+ if 'reference' in ex:
254
+ text += f"**Reference:** {ex['reference']}\n"
255
+ text += f"**Reasoning:** {ex.get('reasoning', '')}\n"
256
+ text += f"**Action:** {ex.get('action', '')}\n\n"
257
+
258
+ return text
259
+
260
+ def _format_references(self, reference_contexts: Dict[str, str]) -> str:
261
+ """Format reference lyrics for inclusion in prompt.
262
+
263
+ Args:
264
+ reference_contexts: Dictionary of reference lyrics from each source
265
+
266
+ Returns:
267
+ Formatted references string
268
+ """
269
+ if not reference_contexts:
270
+ return ""
271
+
272
+ text = "## Available Reference Lyrics\n\n"
273
+ for source, context in reference_contexts.items():
274
+ text += f"**{source.upper()}:** {context}\n\n"
275
+
276
+ return text
277
+
278
+
279
+ # Module-level singleton for convenience
280
+ _prompt_service: Optional[LangFusePromptService] = None
281
+
282
+
283
+ def get_prompt_service() -> LangFusePromptService:
284
+ """Get or create the global prompt service instance.
285
+
286
+ Returns:
287
+ LangFusePromptService singleton instance
288
+ """
289
+ global _prompt_service
290
+ if _prompt_service is None:
291
+ _prompt_service = LangFusePromptService()
292
+ return _prompt_service
293
+
294
+
295
+ def reset_prompt_service() -> None:
296
+ """Reset the global prompt service instance (for testing)."""
297
+ global _prompt_service
298
+ _prompt_service = None
@@ -19,8 +19,9 @@ class ProviderConfig:
19
19
  cache_dir: str
20
20
 
21
21
  # GCP/Vertex AI settings
22
+ # Note: Gemini 3 models require 'global' location (not regional like us-central1)
22
23
  gcp_project_id: Optional[str] = None
23
- gcp_location: str = "us-central1"
24
+ gcp_location: str = "global"
24
25
 
25
26
  request_timeout_seconds: float = 30.0
26
27
  max_retries: int = 2
@@ -51,7 +52,7 @@ class ProviderConfig:
51
52
  privacy_mode=os.getenv("PRIVACY_MODE", "false").lower() in {"1", "true", "yes"},
52
53
  cache_dir=cache_dir,
53
54
  gcp_project_id=os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCP_PROJECT_ID"),
54
- gcp_location=os.getenv("GCP_LOCATION", "us-central1"),
55
+ gcp_location=os.getenv("GCP_LOCATION", "global"),
55
56
  request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "30.0")),
56
57
  max_retries=int(os.getenv("AGENTIC_MAX_RETRIES", "2")),
57
58
  retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "0.2")),
@@ -8,7 +8,7 @@ RESPONSE_LOG_LENGTH = 500 # Characters to log from responses
8
8
  MODEL_SPEC_FORMAT = "provider/model" # Expected format for model identifiers
9
9
 
10
10
  # Default Langfuse host
11
- DEFAULT_LANGFUSE_HOST = "https://cloud.langfuse.com"
11
+ DEFAULT_LANGFUSE_HOST = "https://us.cloud.langfuse.com"
12
12
 
13
13
  # Raw response indicator
14
14
  RAW_RESPONSE_KEY = "raw" # Key used to wrap unparsed responses
@@ -187,26 +187,41 @@ class LangChainBridge(BaseAIProvider):
187
187
 
188
188
  def _invoke_model(self, prompt: str) -> str:
189
189
  """Invoke the chat model with a prompt.
190
-
190
+
191
191
  This is a simple wrapper that can be passed to the retry executor.
192
-
192
+
193
193
  Args:
194
194
  prompt: The prompt to send
195
-
195
+
196
196
  Returns:
197
197
  Response content as string
198
-
198
+
199
199
  Raises:
200
200
  Exception: Any error from the model invocation
201
201
  """
202
202
  from langchain_core.messages import HumanMessage
203
-
203
+
204
204
  # Prepare config with session_id in metadata (Langfuse format)
205
205
  config = {}
206
206
  if hasattr(self, '_session_id') and self._session_id:
207
207
  config["metadata"] = {"langfuse_session_id": self._session_id}
208
208
  logger.debug(f"🤖 [LangChain] Invoking with session_id: {self._session_id}")
209
-
209
+
210
210
  response = self._chat_model.invoke([HumanMessage(content=prompt)], config=config)
211
- return response.content
211
+ content = response.content
212
+
213
+ # Handle multimodal response format from Gemini 3+ models
214
+ # Response can be a list of content parts: [{'type': 'text', 'text': '...'}]
215
+ if isinstance(content, list):
216
+ # Extract text from the first text content part
217
+ for part in content:
218
+ if isinstance(part, dict) and part.get('type') == 'text':
219
+ return part.get('text', '')
220
+ # Fallback: concatenate all text parts
221
+ return ''.join(
222
+ part.get('text', '') if isinstance(part, dict) else str(part)
223
+ for part in content
224
+ )
225
+
226
+ return content
212
227
 
@@ -100,19 +100,10 @@ class ModelFactory:
100
100
  return
101
101
 
102
102
  try:
103
- from langfuse import Langfuse
104
103
  from langfuse.langchain import CallbackHandler
105
-
106
- # Initialize Langfuse client first (this is required!)
107
- langfuse_client = Langfuse(
108
- public_key=public_key,
109
- secret_key=secret_key,
110
- host=os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"),
111
- )
112
-
113
- # Then create callback handler with the same public_key
114
- # The handler will use the initialized client
115
- self._langfuse_handler = CallbackHandler(public_key=public_key)
104
+
105
+ # CallbackHandler auto-discovers credentials from environment variables
106
+ self._langfuse_handler = CallbackHandler()
116
107
  logger.info(f"🤖 Langfuse callback handler initialized for {model_spec}")
117
108
  except Exception as e:
118
109
  # If Langfuse keys are set, we MUST fail fast
@@ -224,6 +215,7 @@ class ModelFactory:
224
215
  model=model_name,
225
216
  project=config.gcp_project_id,
226
217
  location=config.gcp_location,
218
+ timeout=config.request_timeout_seconds,
227
219
  max_retries=config.max_retries,
228
220
  callbacks=callbacks,
229
221
  )
@@ -6,6 +6,7 @@ from typing import Dict, Any
6
6
  from .providers.config import ProviderConfig
7
7
 
8
8
  # Default model for cloud deployments - Gemini 3 Flash via Vertex AI
9
+ # Note: Gemini 3 models require 'global' location (not regional like us-central1)
9
10
  DEFAULT_CLOUD_MODEL = "vertexai/gemini-3-flash-preview"
10
11
 
11
12
 
@@ -33,7 +34,7 @@ class ModelRouter:
33
34
  if self._config.privacy_mode:
34
35
  return "ollama/llama3.2:latest"
35
36
 
36
- # Default to Gemini 3 Flash for all cases (fast, cost-effective)
37
+ # Default to Gemini 3 Flash for all cases (fast, cost-effective, latest capabilities)
37
38
  return DEFAULT_CLOUD_MODEL
38
39
 
39
40
 
@@ -6,7 +6,6 @@ import os
6
6
  import shortuuid
7
7
 
8
8
  from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
9
- from lyrics_transcriber.correction.handlers.llm import LLMHandler
10
9
  from lyrics_transcriber.correction.handlers.no_space_punct_match import NoSpacePunctuationMatchHandler
11
10
  from lyrics_transcriber.correction.handlers.relaxed_word_count_match import RelaxedWordCountMatchHandler
12
11
  from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
@@ -27,7 +26,6 @@ from lyrics_transcriber.correction.anchor_sequence import AnchorSequenceFinder
27
26
  from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
28
27
  from lyrics_transcriber.correction.handlers.extend_anchor import ExtendAnchorHandler
29
28
  from lyrics_transcriber.utils.word_utils import WordUtils
30
- from lyrics_transcriber.correction.handlers.llm_providers import OllamaProvider, OpenAIProvider
31
29
 
32
30
 
33
31
  class LyricsCorrector:
@@ -57,60 +55,18 @@ class LyricsCorrector:
57
55
  ]
58
56
 
59
57
  # Create all handlers but respect enabled_handlers if provided
58
+ # Note: Legacy LLMHandler removed - use AgenticCorrector via USE_AGENTIC_AI=1 instead
60
59
  all_handlers = [
61
60
  ("ExtendAnchorHandler", ExtendAnchorHandler(logger=self.logger)),
62
61
  ("WordCountMatchHandler", WordCountMatchHandler(logger=self.logger)),
63
62
  ("SyllablesMatchHandler", SyllablesMatchHandler(logger=self.logger)),
64
63
  ("RelaxedWordCountMatchHandler", RelaxedWordCountMatchHandler(logger=self.logger)),
65
64
  ("NoSpacePunctuationMatchHandler", NoSpacePunctuationMatchHandler(logger=self.logger)),
66
- (
67
- "LLMHandler_Ollama_R17B",
68
- LLMHandler(
69
- provider=OllamaProvider(model="deepseek-r1:7b", logger=self.logger),
70
- name="LLMHandler_Ollama_R17B",
71
- logger=self.logger,
72
- cache_dir=self._cache_dir,
73
- ),
74
- ),
75
65
  ("RepeatCorrectionHandler", RepeatCorrectionHandler(logger=self.logger)),
76
66
  ("SoundAlikeHandler", SoundAlikeHandler(logger=self.logger)),
77
67
  ("LevenshteinHandler", LevenshteinHandler(logger=self.logger)),
78
68
  ]
79
69
 
80
- # Add OpenRouter handlers only if API key is available
81
- if os.getenv("OPENROUTER_API_KEY"):
82
- openrouter_handlers = [
83
- (
84
- "LLMHandler_OpenRouter_Sonnet",
85
- LLMHandler(
86
- provider=OpenAIProvider(
87
- model="anthropic/claude-3-sonnet",
88
- api_key=os.getenv("OPENROUTER_API_KEY"),
89
- base_url="https://openrouter.ai/api/v1",
90
- logger=self.logger,
91
- ),
92
- name="LLMHandler_OpenRouter_Sonnet",
93
- logger=self.logger,
94
- cache_dir=self._cache_dir,
95
- ),
96
- ),
97
- (
98
- "LLMHandler_OpenRouter_R1",
99
- LLMHandler(
100
- provider=OpenAIProvider(
101
- model="deepseek/deepseek-r1",
102
- api_key=os.getenv("OPENROUTER_API_KEY"),
103
- base_url="https://openrouter.ai/api/v1",
104
- logger=self.logger,
105
- ),
106
- name="LLMHandler_OpenRouter_R1",
107
- logger=self.logger,
108
- cache_dir=self._cache_dir,
109
- ),
110
- ),
111
- ]
112
- all_handlers.extend(openrouter_handlers)
113
-
114
70
  # Store all handler information
115
71
  self.all_handlers = [
116
72
  {
@@ -1,293 +0,0 @@
1
- from typing import List, Optional, Tuple, Dict, Any, Union
2
- import logging
3
- import json
4
- from datetime import datetime
5
- from pathlib import Path
6
-
7
- from lyrics_transcriber.types import GapSequence, WordCorrection
8
- from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
9
- from lyrics_transcriber.correction.handlers.word_operations import WordOperations
10
- from lyrics_transcriber.correction.handlers.llm_providers import LLMProvider
11
-
12
-
13
- class LLMHandler(GapCorrectionHandler):
14
- """Uses an LLM to analyze and correct gaps by comparing with reference lyrics."""
15
-
16
- def __init__(
17
- self, provider: LLMProvider, name: str, logger: Optional[logging.Logger] = None, cache_dir: Optional[Union[str, Path]] = None
18
- ):
19
- super().__init__(logger)
20
- self.logger = logger or logging.getLogger(__name__)
21
- self.provider = provider
22
- self.name = name
23
- self.cache_dir = Path(cache_dir) if cache_dir else None
24
-
25
- def _format_prompt(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> str:
26
- """Format the prompt for the LLM with context about the gap and reference lyrics."""
27
- word_map = data.get("word_map", {})
28
- metadata = data.get("metadata", {}) if data else {}
29
-
30
- if not word_map:
31
- self.logger.error("No word_map provided in data")
32
- return ""
33
-
34
- # Format transcribed words with their IDs
35
- transcribed_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in gap.transcribed_word_ids if word_id in word_map]
36
-
37
- prompt = (
38
- "You are a lyrics correction expert. You will be given transcribed lyrics that may contain errors "
39
- "and reference lyrics from multiple sources. Your task is to analyze each word in the transcribed text "
40
- "and suggest specific corrections based on the reference lyrics.\n\n"
41
- "Each word has a unique ID. When suggesting corrections, you must specify the ID of the word being corrected. "
42
- "This ensures accuracy in applying your corrections.\n\n"
43
- "For each correction, specify:\n"
44
- "1. The word ID being corrected\n"
45
- "2. The correction type ('replace', 'split', 'combine', or 'delete')\n"
46
- "3. The corrected text\n"
47
- "4. Your confidence level\n"
48
- "5. The reason for the correction\n\n"
49
- )
50
-
51
- # Add song context if available
52
- if metadata and metadata.get("artist") and metadata.get("title"):
53
- prompt += f"Song: {metadata['title']}\nArtist: {metadata['artist']}\n\n"
54
-
55
- # Format transcribed words with IDs
56
- prompt += "Transcribed words:\n"
57
- for word in transcribed_words:
58
- prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
59
-
60
- prompt += "\nReference lyrics from different sources:\n"
61
-
62
- # Add each reference source with words and their IDs
63
- for source, word_ids in gap.reference_word_ids.items():
64
- reference_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in word_ids if word_id in word_map]
65
- prompt += f"\n{source} immediate context:\n"
66
- for word in reference_words:
67
- prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
68
-
69
- # Add full lyrics if available
70
- if metadata and metadata.get("full_reference_texts", {}).get(source):
71
- prompt += f"\nFull {source} lyrics:\n{metadata['full_reference_texts'][source]}\n"
72
-
73
- # Add context about surrounding anchors if available
74
- if gap.preceding_anchor_id:
75
- preceding_anchor = next((a.anchor for a in data.get("anchor_sequences", []) if a.anchor.id == gap.preceding_anchor_id), None)
76
- if preceding_anchor:
77
- anchor_words = [
78
- {"id": word_id, "text": word_map[word_id].text}
79
- for word_id in preceding_anchor.transcribed_word_ids
80
- if word_id in word_map
81
- ]
82
- prompt += "\nPreceding correct words:\n"
83
- for word in anchor_words:
84
- prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
85
-
86
- prompt += (
87
- "\nProvide corrections in the following JSON format:\n"
88
- "{\n"
89
- ' "corrections": [\n'
90
- " {\n"
91
- ' "word_id": "id_of_word_to_correct",\n'
92
- ' "type": "replace|split|combine|delete",\n'
93
- ' "corrected_text": "new text",\n'
94
- ' "reference_word_id": "id_from_reference_lyrics", // Optional, use when matching a specific reference word\n'
95
- ' "confidence": 0.9,\n'
96
- ' "reason": "explanation of correction"\n'
97
- " }\n"
98
- " ]\n"
99
- "}\n\n"
100
- "Important rules:\n"
101
- "1. Always include the word_id for each correction\n"
102
- "2. For 'split' type, corrected_text should contain the space-separated words\n"
103
- "3. For 'combine' type, word_id should be the first word to combine\n"
104
- "4. Include reference_word_id when the correction matches a specific reference word\n"
105
- "5. Only suggest corrections when you're confident they improve the lyrics\n"
106
- "6. Preserve any existing words that match the reference lyrics\n"
107
- "7. Respond ONLY with the JSON object, no other text"
108
- )
109
-
110
- return prompt
111
-
112
- def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
113
- """LLM handler can attempt to handle any gap with reference words."""
114
- if not gap.reference_word_ids:
115
- self.logger.debug("No reference words available")
116
- return False, {}
117
-
118
- return True, {}
119
-
120
- def _write_debug_info(self, prompt: str, response: str, gap_index: int, audio_file_hash: Optional[str] = None) -> None:
121
- """Write prompt and response to debug files."""
122
- if not self.cache_dir:
123
- self.logger.warning("No cache directory provided, skipping LLM debug output")
124
- return
125
-
126
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
127
- debug_dir = self.cache_dir / "llm_debug"
128
- debug_dir.mkdir(exist_ok=True, parents=True)
129
-
130
- hash_prefix = f"{audio_file_hash}_" if audio_file_hash else ""
131
- filename = debug_dir / f"llm_debug_{hash_prefix}{gap_index}_{timestamp}.txt"
132
-
133
- debug_content = "=== LLM PROMPT ===\n" f"{prompt}\n\n" "=== LLM RESPONSE ===\n" f"{response}\n"
134
-
135
- try:
136
- with open(filename, "w", encoding="utf-8") as f:
137
- f.write(debug_content)
138
- except IOError as e:
139
- self.logger.error(f"Failed to write LLM debug file: {e}")
140
-
141
- def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
142
- """Process the gap using the LLM and create corrections based on its response."""
143
- if not data or "word_map" not in data:
144
- self.logger.error("No word_map provided in data")
145
- return []
146
-
147
- word_map = data["word_map"]
148
- transcribed_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids if word_id in word_map]
149
-
150
- # Calculate reference positions using the centralized method
151
- reference_positions = (
152
- WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", [])) or {}
153
- ) # Ensure empty dict if None
154
-
155
- prompt = self._format_prompt(gap, data)
156
- if not prompt:
157
- return []
158
-
159
- # Get a unique index for this gap based on its position
160
- gap_index = gap.transcription_position
161
-
162
- try:
163
- self.logger.debug(f"Processing gap words: {transcribed_words}")
164
- self.logger.debug(f"Reference word IDs: {gap.reference_word_ids}")
165
-
166
- response = self.provider.generate_response(prompt)
167
-
168
- # Write debug info to files
169
- self._write_debug_info(prompt, response, gap_index, audio_file_hash=data.get("audio_file_hash"))
170
-
171
- try:
172
- corrections_data = json.loads(response)
173
- except json.JSONDecodeError as e:
174
- self.logger.error(f"Failed to parse LLM response as JSON: {e}")
175
- self.logger.error(f"Raw response content: {response}")
176
- return []
177
-
178
- # Check if corrections exist and are non-empty
179
- if not corrections_data.get("corrections"):
180
- self.logger.debug("No corrections suggested by LLM")
181
- return []
182
-
183
- corrections = []
184
- for correction in corrections_data["corrections"]:
185
- # Validate word_id exists in gap
186
- if correction["word_id"] not in gap.transcribed_word_ids:
187
- self.logger.error(f"LLM suggested correction for word_id {correction['word_id']} which is not in the gap")
188
- continue
189
-
190
- # Get original word from word map
191
- original_word = word_map[correction["word_id"]]
192
- position = gap.transcription_position + gap.transcribed_word_ids.index(correction["word_id"])
193
-
194
- self.logger.debug(f"Processing correction: {correction}")
195
-
196
- if correction["type"] == "replace":
197
- self.logger.debug(
198
- f"Creating replacement: '{original_word.text}' -> '{correction['corrected_text']}' " f"at position {position}"
199
- )
200
- corrections.append(
201
- WordOperations.create_word_replacement_correction(
202
- original_word=original_word.text,
203
- corrected_word=correction["corrected_text"],
204
- original_position=position,
205
- source="LLM",
206
- confidence=correction["confidence"],
207
- reason=correction["reason"],
208
- handler=self.name,
209
- reference_positions=reference_positions,
210
- original_word_id=correction["word_id"],
211
- corrected_word_id=correction.get("reference_word_id"),
212
- )
213
- )
214
- elif correction["type"] == "split":
215
- split_words = correction["corrected_text"].split()
216
- self.logger.debug(f"Creating split: '{original_word.text}' -> {split_words} " f"at position {position}")
217
-
218
- # Get reference word IDs if provided
219
- reference_word_ids = correction.get("reference_word_ids", [None] * len(split_words))
220
-
221
- corrections.extend(
222
- WordOperations.create_word_split_corrections(
223
- original_word=original_word.text,
224
- reference_words=split_words,
225
- original_position=position,
226
- source="LLM",
227
- confidence=correction["confidence"],
228
- reason=correction["reason"],
229
- handler=self.name,
230
- reference_positions=reference_positions,
231
- original_word_id=correction["word_id"],
232
- corrected_word_ids=reference_word_ids,
233
- )
234
- )
235
- elif correction["type"] == "combine":
236
- # Get all word IDs to combine
237
- word_ids_to_combine = []
238
- current_idx = gap.transcribed_word_ids.index(correction["word_id"])
239
- words_needed = len(correction["corrected_text"].split())
240
-
241
- if current_idx + words_needed <= len(gap.transcribed_word_ids):
242
- word_ids_to_combine = gap.transcribed_word_ids[current_idx : current_idx + words_needed]
243
- else:
244
- self.logger.error(f"Not enough words available to combine at position {position}")
245
- continue
246
-
247
- words_to_combine = [word_map[word_id].text for word_id in word_ids_to_combine]
248
-
249
- self.logger.debug(
250
- f"Creating combine: {words_to_combine} -> '{correction['corrected_text']}' " f"at position {position}"
251
- )
252
-
253
- corrections.extend(
254
- WordOperations.create_word_combine_corrections(
255
- original_words=words_to_combine,
256
- reference_word=correction["corrected_text"],
257
- original_position=position,
258
- source="LLM",
259
- confidence=correction["confidence"],
260
- combine_reason=correction["reason"],
261
- delete_reason=f"Part of combining words: {correction['reason']}",
262
- handler=self.name,
263
- reference_positions=reference_positions,
264
- original_word_ids=word_ids_to_combine,
265
- corrected_word_id=correction.get("reference_word_id"),
266
- )
267
- )
268
- elif correction["type"] == "delete":
269
- self.logger.debug(f"Creating deletion: '{original_word.text}' at position {position}")
270
- corrections.append(
271
- WordCorrection(
272
- original_word=original_word.text,
273
- corrected_word="",
274
- segment_index=0,
275
- original_position=position,
276
- confidence=correction["confidence"],
277
- source="LLM",
278
- reason=correction["reason"],
279
- alternatives={},
280
- is_deletion=True,
281
- handler=self.name,
282
- reference_positions=reference_positions,
283
- word_id=correction["word_id"],
284
- corrected_word_id=None,
285
- )
286
- )
287
-
288
- self.logger.debug(f"Created {len(corrections)} corrections: {[f'{c.original_word}->{c.corrected_word}' for c in corrections]}")
289
- return corrections
290
-
291
- except Exception as e:
292
- self.logger.error(f"Unexpected error in LLM handler: {e}")
293
- return []
@@ -1,60 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Optional
3
- import logging
4
- from ollama import chat as ollama_chat
5
- import openai
6
-
7
-
8
- class LLMProvider(ABC):
9
- """Abstract base class for LLM providers."""
10
-
11
- def __init__(self, logger: Optional[logging.Logger] = None):
12
- self.logger = logger or logging.getLogger(__name__)
13
-
14
- @abstractmethod
15
- def generate_response(self, prompt: str, **kwargs) -> str:
16
- """Generate a response from the LLM.
17
-
18
- Args:
19
- prompt: The prompt to send to the LLM
20
- **kwargs: Additional provider-specific parameters
21
-
22
- Returns:
23
- str: The LLM's response
24
- """
25
- pass
26
-
27
-
28
- class OllamaProvider(LLMProvider):
29
- """Provider for local Ollama models."""
30
-
31
- def __init__(self, model: str, logger: Optional[logging.Logger] = None):
32
- super().__init__(logger)
33
- self.model = model
34
-
35
- def generate_response(self, prompt: str, **kwargs) -> str:
36
- try:
37
- response = ollama_chat(model=self.model, messages=[{"role": "user", "content": prompt}], format="json")
38
- return response.message.content
39
- except Exception as e:
40
- self.logger.error(f"Error generating Ollama response: {e}")
41
- raise
42
-
43
-
44
- class OpenAIProvider(LLMProvider):
45
- """Provider for OpenAI-compatible APIs (including OpenRouter)."""
46
-
47
- def __init__(self, model: str, api_key: str, base_url: Optional[str] = None, logger: Optional[logging.Logger] = None):
48
- super().__init__(logger)
49
- self.model = model
50
- self.client = openai.OpenAI(api_key=api_key, base_url=base_url)
51
-
52
- def generate_response(self, prompt: str, **kwargs) -> str:
53
- try:
54
- response = self.client.chat.completions.create(
55
- model=self.model, messages=[{"role": "user", "content": prompt}], response_format={"type": "json_object"}, **kwargs
56
- )
57
- return response.choices[0].message.content
58
- except Exception as e:
59
- self.logger.error(f"Error generating OpenAI response: {e}")
60
- raise