karaoke-gen 0.81.1__py3-none-any.whl → 0.82.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {karaoke_gen-0.81.1.dist-info → karaoke_gen-0.82.0.dist-info}/METADATA +2 -2
- {karaoke_gen-0.81.1.dist-info → karaoke_gen-0.82.0.dist-info}/RECORD +15 -16
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +178 -5
- lyrics_transcriber/correction/agentic/prompts/__init__.py +23 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +66 -6
- lyrics_transcriber/correction/agentic/prompts/langfuse_prompts.py +298 -0
- lyrics_transcriber/correction/agentic/providers/config.py +3 -2
- lyrics_transcriber/correction/agentic/providers/constants.py +1 -1
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +22 -7
- lyrics_transcriber/correction/agentic/providers/model_factory.py +4 -12
- lyrics_transcriber/correction/agentic/router.py +2 -1
- lyrics_transcriber/correction/corrector.py +1 -45
- lyrics_transcriber/correction/handlers/llm.py +0 -293
- lyrics_transcriber/correction/handlers/llm_providers.py +0 -60
- {karaoke_gen-0.81.1.dist-info → karaoke_gen-0.82.0.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.81.1.dist-info → karaoke_gen-0.82.0.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.81.1.dist-info → karaoke_gen-0.82.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: karaoke-gen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.82.0
|
|
4
4
|
Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -42,7 +42,7 @@ Requires-Dist: kbputils (>=0.0.16,<0.0.17)
|
|
|
42
42
|
Requires-Dist: langchain (>=0.3.0)
|
|
43
43
|
Requires-Dist: langchain-anthropic (>=0.2.0)
|
|
44
44
|
Requires-Dist: langchain-core (>=0.3.0)
|
|
45
|
-
Requires-Dist: langchain-google-vertexai (>=
|
|
45
|
+
Requires-Dist: langchain-google-vertexai (>=3.1.1)
|
|
46
46
|
Requires-Dist: langchain-ollama (>=0.2.0)
|
|
47
47
|
Requires-Dist: langchain-openai (>=0.2.0)
|
|
48
48
|
Requires-Dist: langfuse (>=3.0.0)
|
|
@@ -75,29 +75,30 @@ lyrics_transcriber/correction/agentic/models/observability_metrics.py,sha256=xGd
|
|
|
75
75
|
lyrics_transcriber/correction/agentic/models/schemas.py,sha256=skWXqGkJnv9NvmvjktBqrH_4Ohyzg2x0ZMsVINbXKdg,2141
|
|
76
76
|
lyrics_transcriber/correction/agentic/models/utils.py,sha256=tX7flxCB4aLrgZWkHuEt7Gr8kaLkMsXzTdWSec6Xsts,580
|
|
77
77
|
lyrics_transcriber/correction/agentic/observability/__init__.py,sha256=RuaepVsltWdaF1aF_YmNVJTJ6_bbNDFo3Sp-ruBvyHA,85
|
|
78
|
-
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py,sha256=
|
|
78
|
+
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py,sha256=5oBfoFT-QExZttD2wlIzXRhgSglNElFFwz8Et36vZos,7014
|
|
79
79
|
lyrics_transcriber/correction/agentic/observability/metrics.py,sha256=Js_m6ljdI6Xgd9X9eHtboCsf9gjYsN1zOv3_XSwjgKk,1907
|
|
80
80
|
lyrics_transcriber/correction/agentic/observability/performance.py,sha256=ekjzgL65gfs1SpKR_befu1wdWZU9xDlcafJm8htSvks,328
|
|
81
|
-
lyrics_transcriber/correction/agentic/prompts/__init__.py,sha256=
|
|
82
|
-
lyrics_transcriber/correction/agentic/prompts/classifier.py,sha256=
|
|
81
|
+
lyrics_transcriber/correction/agentic/prompts/__init__.py,sha256=riiZ-f4jlvq4QjtyCpmv-sSzfcLy7O99pMBwV1H5Usc,605
|
|
82
|
+
lyrics_transcriber/correction/agentic/prompts/classifier.py,sha256=FwUSL59Y-5q9J1CDW8iyzyiajcy4-uq5MzfWu0If_Yo,11899
|
|
83
|
+
lyrics_transcriber/correction/agentic/prompts/langfuse_prompts.py,sha256=hjQhyY_GBuZt_oY9DacutXvA9dJCZksRY2fKmveJm_A,10898
|
|
83
84
|
lyrics_transcriber/correction/agentic/providers/__init__.py,sha256=PS7C4sKDfa6S9lSo33GXIRamCLsv0Jn7u0GtXuhiRD4,95
|
|
84
85
|
lyrics_transcriber/correction/agentic/providers/base.py,sha256=bExuntMLLInMmWWNzN81_ScWQJhNYbtlF3wZYhlX-qw,1059
|
|
85
86
|
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py,sha256=D3Jg4YHqvy4gzlxfkALa7PztyYQpJb8NwJAonMS0TSI,4694
|
|
86
|
-
lyrics_transcriber/correction/agentic/providers/config.py,sha256=
|
|
87
|
-
lyrics_transcriber/correction/agentic/providers/constants.py,sha256=
|
|
87
|
+
lyrics_transcriber/correction/agentic/providers/config.py,sha256=w6-fkapEy3BgoFIsRfZ44XUCV4zuicFSNoSoVAe5lYE,3282
|
|
88
|
+
lyrics_transcriber/correction/agentic/providers/constants.py,sha256=cXLzKTyFVt9q6wQd_gWcv3EZ5Sm27AOAz6NyPapcess,695
|
|
88
89
|
lyrics_transcriber/correction/agentic/providers/health.py,sha256=F8pHY5BQYvylGRDGXUHplcAJooAyiqVLRhBl4kHC1H8,710
|
|
89
|
-
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=
|
|
90
|
-
lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=
|
|
90
|
+
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py,sha256=H3C3BNjAixfkOJojxWXv-P-svlgj5rJEJdk0zPIjh7E,8540
|
|
91
|
+
lyrics_transcriber/correction/agentic/providers/model_factory.py,sha256=CeVDblf1HdphtUHVn3Cgl07YAeUuSxTjEHHFJN8Frj0,8257
|
|
91
92
|
lyrics_transcriber/correction/agentic/providers/response_cache.py,sha256=Byr7fQJsgUMFlsvHeVCxTiFjjnbsg3KIlEmEEtAo-Gw,7047
|
|
92
93
|
lyrics_transcriber/correction/agentic/providers/response_parser.py,sha256=a8pdUYKBS5X72gck3u1ndFYB__UN0UijAdxNhbHp8ZQ,3809
|
|
93
94
|
lyrics_transcriber/correction/agentic/providers/retry_executor.py,sha256=hX21Zwy2cSECAw7k13ndEinWRqwjo4xYoSCQ2B2CUf0,3912
|
|
94
|
-
lyrics_transcriber/correction/agentic/router.py,sha256=
|
|
95
|
+
lyrics_transcriber/correction/agentic/router.py,sha256=akP28A0lftmsnSyMOW6k7iTC1pv4LEgilXhIkcfJzlE,1437
|
|
95
96
|
lyrics_transcriber/correction/agentic/workflows/__init__.py,sha256=OsBExAbIIKxJgX6FKXFOgcUjIG9AWJQV_fESZVdO8mo,77
|
|
96
97
|
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py,sha256=gMuLTUxkgYaciMsI4yrZSC3wi--7V_PgaDNE-Vd6FE8,575
|
|
97
98
|
lyrics_transcriber/correction/agentic/workflows/correction_graph.py,sha256=kgZKnz0h9cG1EfhW7BSSl-kSpQtJrRM_S86kAniXfE4,1815
|
|
98
99
|
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py,sha256=KsKLD3AP66YYmXfUn-mVZjERYLtU1Zs4a-7CB2zDfas,596
|
|
99
100
|
lyrics_transcriber/correction/anchor_sequence.py,sha256=5tl4Cjiw5UlLbEb1Oy-g3ebKCinXSwohdaCB9-rTMtI,43798
|
|
100
|
-
lyrics_transcriber/correction/corrector.py,sha256=
|
|
101
|
+
lyrics_transcriber/correction/corrector.py,sha256=qW6GwOOLM8zxYtYMmGy9Rzk_4mJzdpGiCXW3LQFXn14,38362
|
|
101
102
|
lyrics_transcriber/correction/feedback/__init__.py,sha256=i1gd0Vb4qvlzZQ3lqA3fJjt288YP7f-MBPwOzZ7Rjh4,68
|
|
102
103
|
lyrics_transcriber/correction/feedback/schemas.py,sha256=OiF_WUqcqiEKIoburYM8kWAIundy82PQE7ImsdP8UCk,4416
|
|
103
104
|
lyrics_transcriber/correction/feedback/store.py,sha256=T4IDzf1eRA9n-wdLLrLyAW1ELYgXwK9RikJgX_B3fN8,8788
|
|
@@ -105,8 +106,6 @@ lyrics_transcriber/correction/handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
|
|
|
105
106
|
lyrics_transcriber/correction/handlers/base.py,sha256=ZXYMFgbCmlD62dpqdFwFPlcePdHKEFrABffnG_Mu5mI,1687
|
|
106
107
|
lyrics_transcriber/correction/handlers/extend_anchor.py,sha256=IADgdPmEMokUQhh6mP-wQWLYf6GfWTvJbBjOk08A-aw,6384
|
|
107
108
|
lyrics_transcriber/correction/handlers/levenshtein.py,sha256=hMERQHVgiUDSHtamYrAjqZ3qMMok4VmQ_MYM2-nrX6w,7864
|
|
108
|
-
lyrics_transcriber/correction/handlers/llm.py,sha256=ufqHtohdU5dUXE3DikzbloAWGVgMu1wnw6P4WHRmpdk,14580
|
|
109
|
-
lyrics_transcriber/correction/handlers/llm_providers.py,sha256=MV-KCRseccg-DEimMS0D2bXJ2xhy59r2n8UZjICUoEY,2067
|
|
110
109
|
lyrics_transcriber/correction/handlers/no_space_punct_match.py,sha256=jY2fa547Qc8B63xIhF9VyWMaq5jds6E6wBqyVq6KANw,7057
|
|
111
110
|
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py,sha256=x4k__6gav4-STk_TycLcg5Sw4x2vUFAj5fWmOv7Yd_w,3911
|
|
112
111
|
lyrics_transcriber/correction/handlers/repeat.py,sha256=1PJADW44egYh7N9D2fN-gDIusWVglFjGHrCZuTQYNpA,4313
|
|
@@ -287,8 +286,8 @@ lyrics_transcriber/transcribers/whisper.py,sha256=YcCB1ic9H6zL1GS0jD0emu8-qlcH0Q
|
|
|
287
286
|
lyrics_transcriber/types.py,sha256=UJjaxhVd2o14AG4G8ToU598p0JeYdiTFjpG38jGCoYQ,27917
|
|
288
287
|
lyrics_transcriber/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
289
288
|
lyrics_transcriber/utils/word_utils.py,sha256=-cMGpj9UV4F6IsoDKAV2i1aiqSO8eI91HMAm_igtVMk,958
|
|
290
|
-
karaoke_gen-0.
|
|
291
|
-
karaoke_gen-0.
|
|
292
|
-
karaoke_gen-0.
|
|
293
|
-
karaoke_gen-0.
|
|
294
|
-
karaoke_gen-0.
|
|
289
|
+
karaoke_gen-0.82.0.dist-info/METADATA,sha256=k51l0dhnVIM5kn6lFbTCLi1oKbcV_2iSulloMXVkU0Q,23077
|
|
290
|
+
karaoke_gen-0.82.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
291
|
+
karaoke_gen-0.82.0.dist-info/entry_points.txt,sha256=xIyLe7K84ZyjO8L0_AmNectz93QjGSs5AkApMtlAd4g,160
|
|
292
|
+
karaoke_gen-0.82.0.dist-info/licenses/LICENSE,sha256=81R_4XwMZDODHD7JcZeUR8IiCU8AD7Ajl6bmwR9tYDk,1074
|
|
293
|
+
karaoke_gen-0.82.0.dist-info/RECORD,,
|
|
@@ -1,28 +1,115 @@
|
|
|
1
|
-
|
|
1
|
+
"""LangFuse integration for agentic correction observability and prompt management.
|
|
2
|
+
|
|
3
|
+
This module provides:
|
|
4
|
+
- Client initialization with fail-fast behavior when configured
|
|
5
|
+
- Metrics recording for observability
|
|
6
|
+
- Prompt fetching for dynamic prompt management
|
|
7
|
+
- Dataset fetching for few-shot examples
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Optional, Dict, Any, List
|
|
2
11
|
import os
|
|
3
|
-
import
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Module-level client singleton
|
|
17
|
+
_langfuse_client: Optional[Any] = None
|
|
18
|
+
_client_initialized: bool = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LangFuseConfigError(Exception):
|
|
22
|
+
"""Raised when LangFuse is configured but initialization fails."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
4
25
|
|
|
26
|
+
def is_langfuse_configured() -> bool:
|
|
27
|
+
"""Check if LangFuse credentials are configured in environment."""
|
|
28
|
+
public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
29
|
+
secret_key = os.getenv("LANGFUSE_SECRET_KEY")
|
|
30
|
+
return bool(public_key and secret_key)
|
|
5
31
|
|
|
6
|
-
|
|
32
|
+
|
|
33
|
+
def setup_langfuse() -> Optional[object]:
|
|
7
34
|
"""Initialize Langfuse client if keys are present; return client or None.
|
|
8
35
|
|
|
9
36
|
This avoids hard dependency at import time; caller can check for None and
|
|
10
37
|
no-op if observability is not configured.
|
|
38
|
+
|
|
39
|
+
Note: This function does NOT fail fast - use get_langfuse_client() for
|
|
40
|
+
fail-fast behavior when LangFuse is required.
|
|
11
41
|
"""
|
|
12
42
|
secret = os.getenv("LANGFUSE_SECRET_KEY")
|
|
13
43
|
public = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
14
|
-
host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
|
44
|
+
host = os.getenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
|
|
15
45
|
if not (secret and public):
|
|
16
46
|
return None
|
|
17
47
|
try:
|
|
18
48
|
from langfuse import Langfuse # type: ignore
|
|
19
49
|
|
|
20
|
-
client = Langfuse(secret_key=secret, public_key=public, host=host
|
|
50
|
+
client = Langfuse(secret_key=secret, public_key=public, host=host)
|
|
21
51
|
return client
|
|
22
52
|
except Exception:
|
|
23
53
|
return None
|
|
24
54
|
|
|
25
55
|
|
|
56
|
+
def get_langfuse_client() -> Optional[Any]:
|
|
57
|
+
"""Get or create the LangFuse client singleton.
|
|
58
|
+
|
|
59
|
+
Unlike setup_langfuse(), this function implements fail-fast behavior:
|
|
60
|
+
if LangFuse keys are configured but initialization fails, it raises
|
|
61
|
+
an exception rather than returning None.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Langfuse client instance, or None if not configured
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
LangFuseConfigError: If keys are set but initialization fails
|
|
68
|
+
"""
|
|
69
|
+
global _langfuse_client, _client_initialized
|
|
70
|
+
|
|
71
|
+
if _client_initialized:
|
|
72
|
+
return _langfuse_client
|
|
73
|
+
|
|
74
|
+
secret = os.getenv("LANGFUSE_SECRET_KEY")
|
|
75
|
+
public = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
76
|
+
host = os.getenv("LANGFUSE_HOST", "https://us.cloud.langfuse.com")
|
|
77
|
+
|
|
78
|
+
if not (secret and public):
|
|
79
|
+
logger.debug("LangFuse keys not configured, client disabled")
|
|
80
|
+
_client_initialized = True
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
from langfuse import Langfuse
|
|
85
|
+
|
|
86
|
+
_langfuse_client = Langfuse(
|
|
87
|
+
secret_key=secret,
|
|
88
|
+
public_key=public,
|
|
89
|
+
host=host,
|
|
90
|
+
)
|
|
91
|
+
_client_initialized = True
|
|
92
|
+
logger.info(f"LangFuse client initialized (host: {host})")
|
|
93
|
+
return _langfuse_client
|
|
94
|
+
|
|
95
|
+
except Exception as e:
|
|
96
|
+
# Fail fast - if keys are set, we expect LangFuse to work
|
|
97
|
+
raise LangFuseConfigError(
|
|
98
|
+
f"LangFuse keys are set but initialization failed: {e}\n"
|
|
99
|
+
f"Check:\n"
|
|
100
|
+
f" - LANGFUSE_PUBLIC_KEY: {public[:10] if public else 'not set'}...\n"
|
|
101
|
+
f" - LANGFUSE_SECRET_KEY: {'set' if secret else 'not set'}\n"
|
|
102
|
+
f" - LANGFUSE_HOST: {host}"
|
|
103
|
+
) from e
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def reset_langfuse_client() -> None:
|
|
107
|
+
"""Reset the global LangFuse client (for testing)."""
|
|
108
|
+
global _langfuse_client, _client_initialized
|
|
109
|
+
_langfuse_client = None
|
|
110
|
+
_client_initialized = False
|
|
111
|
+
|
|
112
|
+
|
|
26
113
|
def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any]) -> None:
|
|
27
114
|
"""Record custom metrics to Langfuse if initialized."""
|
|
28
115
|
if client is None:
|
|
@@ -33,3 +120,89 @@ def record_metrics(client: Optional[object], name: str, metrics: Dict[str, Any])
|
|
|
33
120
|
except Exception:
|
|
34
121
|
# Swallow observability errors to never impact core flow
|
|
35
122
|
pass
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def fetch_prompt(name: str, client: Optional[Any] = None, label: Optional[str] = "production") -> Any:
|
|
126
|
+
"""Fetch a prompt template from LangFuse.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
name: The prompt name in LangFuse
|
|
130
|
+
client: Optional pre-initialized client. If None, uses get_langfuse_client()
|
|
131
|
+
label: Prompt label to fetch (default: "production"). If the labeled version
|
|
132
|
+
is not found, falls back to version 1.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
LangFuse prompt object
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
LangFuseConfigError: If LangFuse is not configured
|
|
139
|
+
RuntimeError: If prompt fetch fails
|
|
140
|
+
"""
|
|
141
|
+
if client is None:
|
|
142
|
+
client = get_langfuse_client()
|
|
143
|
+
|
|
144
|
+
if client is None:
|
|
145
|
+
raise LangFuseConfigError(
|
|
146
|
+
f"Cannot fetch prompt '{name}': LangFuse is not configured. "
|
|
147
|
+
f"Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY."
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Try to fetch with the specified label (default: production)
|
|
152
|
+
prompt = client.get_prompt(name, label=label)
|
|
153
|
+
logger.debug(f"Fetched prompt '{name}' (label={label}) from LangFuse")
|
|
154
|
+
return prompt
|
|
155
|
+
except Exception as label_error:
|
|
156
|
+
# If labeled version not found, try fetching version 1 as fallback
|
|
157
|
+
# This handles newly created prompts that haven't been promoted yet
|
|
158
|
+
try:
|
|
159
|
+
prompt = client.get_prompt(name, version=1)
|
|
160
|
+
logger.warning(
|
|
161
|
+
f"Prompt '{name}' label '{label}' not found, using version 1. "
|
|
162
|
+
f"Consider promoting this prompt in LangFuse UI."
|
|
163
|
+
)
|
|
164
|
+
return prompt
|
|
165
|
+
except Exception as version_error:
|
|
166
|
+
raise RuntimeError(
|
|
167
|
+
f"Failed to fetch prompt '{name}' from LangFuse: "
|
|
168
|
+
f"Label '{label}' error: {label_error}, "
|
|
169
|
+
f"Version 1 fallback error: {version_error}"
|
|
170
|
+
) from version_error
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def fetch_dataset(name: str, client: Optional[Any] = None) -> List[Dict[str, Any]]:
|
|
174
|
+
"""Fetch a dataset from LangFuse and return its items.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
name: The dataset name in LangFuse
|
|
178
|
+
client: Optional pre-initialized client. If None, uses get_langfuse_client()
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of dataset item inputs (the actual example data)
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
LangFuseConfigError: If LangFuse is not configured
|
|
185
|
+
RuntimeError: If dataset fetch fails
|
|
186
|
+
"""
|
|
187
|
+
if client is None:
|
|
188
|
+
client = get_langfuse_client()
|
|
189
|
+
|
|
190
|
+
if client is None:
|
|
191
|
+
raise LangFuseConfigError(
|
|
192
|
+
f"Cannot fetch dataset '{name}': LangFuse is not configured. "
|
|
193
|
+
f"Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
dataset = client.get_dataset(name)
|
|
198
|
+
items = []
|
|
199
|
+
for item in dataset.items:
|
|
200
|
+
if hasattr(item, 'input') and item.input:
|
|
201
|
+
items.append(item.input)
|
|
202
|
+
|
|
203
|
+
logger.debug(f"Fetched {len(items)} items from dataset '{name}'")
|
|
204
|
+
return items
|
|
205
|
+
except Exception as e:
|
|
206
|
+
raise RuntimeError(
|
|
207
|
+
f"Failed to fetch dataset '{name}' from LangFuse: {e}"
|
|
208
|
+
) from e
|
|
@@ -1,2 +1,25 @@
|
|
|
1
1
|
"""Prompt templates for agentic correction."""
|
|
2
2
|
|
|
3
|
+
from .classifier import (
|
|
4
|
+
build_classification_prompt,
|
|
5
|
+
build_classification_prompt_hardcoded,
|
|
6
|
+
get_hardcoded_examples,
|
|
7
|
+
)
|
|
8
|
+
from .langfuse_prompts import (
|
|
9
|
+
LangFusePromptService,
|
|
10
|
+
LangFusePromptError,
|
|
11
|
+
LangFuseDatasetError,
|
|
12
|
+
get_prompt_service,
|
|
13
|
+
reset_prompt_service,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"build_classification_prompt",
|
|
18
|
+
"build_classification_prompt_hardcoded",
|
|
19
|
+
"get_hardcoded_examples",
|
|
20
|
+
"LangFusePromptService",
|
|
21
|
+
"LangFusePromptError",
|
|
22
|
+
"LangFuseDatasetError",
|
|
23
|
+
"get_prompt_service",
|
|
24
|
+
"reset_prompt_service",
|
|
25
|
+
]
|
|
@@ -1,23 +1,35 @@
|
|
|
1
|
-
"""Gap classification prompt builder for agentic correction.
|
|
1
|
+
"""Gap classification prompt builder for agentic correction.
|
|
2
|
+
|
|
3
|
+
This module provides two modes of operation:
|
|
4
|
+
1. LangFuse mode: Prompts and examples fetched from LangFuse for dynamic iteration
|
|
5
|
+
2. Hardcoded mode: Fallback for local development when LangFuse is not configured
|
|
6
|
+
|
|
7
|
+
The main entry point is `build_classification_prompt()` which automatically
|
|
8
|
+
selects the appropriate mode based on LangFuse configuration.
|
|
9
|
+
"""
|
|
2
10
|
|
|
3
11
|
from typing import Dict, List, Optional
|
|
4
12
|
import yaml
|
|
5
13
|
import os
|
|
14
|
+
import logging
|
|
6
15
|
from pathlib import Path
|
|
7
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
8
19
|
|
|
9
20
|
def load_few_shot_examples() -> Dict[str, List[Dict]]:
|
|
10
21
|
"""Load few-shot examples from examples.yaml if it exists."""
|
|
11
22
|
examples_path = Path(__file__).parent / "examples.yaml"
|
|
12
|
-
|
|
23
|
+
|
|
13
24
|
if not examples_path.exists():
|
|
14
25
|
return get_hardcoded_examples()
|
|
15
|
-
|
|
26
|
+
|
|
16
27
|
try:
|
|
17
28
|
with open(examples_path, 'r') as f:
|
|
18
29
|
data = yaml.safe_load(f)
|
|
19
30
|
return data.get('examples_by_category', {})
|
|
20
|
-
except Exception:
|
|
31
|
+
except Exception as e:
|
|
32
|
+
logger.warning(f"Failed to load examples.yaml, using hardcoded examples: {e}")
|
|
21
33
|
return get_hardcoded_examples()
|
|
22
34
|
|
|
23
35
|
|
|
@@ -122,7 +134,12 @@ def build_classification_prompt(
|
|
|
122
134
|
gap_id: Optional[str] = None
|
|
123
135
|
) -> str:
|
|
124
136
|
"""Build a prompt for classifying a gap in the transcription.
|
|
125
|
-
|
|
137
|
+
|
|
138
|
+
This function automatically selects between LangFuse and hardcoded prompts:
|
|
139
|
+
- If LangFuse is configured (LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY set),
|
|
140
|
+
fetches the prompt template and examples from LangFuse.
|
|
141
|
+
- Otherwise, uses hardcoded prompts for local development.
|
|
142
|
+
|
|
126
143
|
Args:
|
|
127
144
|
gap_text: The text of the gap that needs classification
|
|
128
145
|
preceding_words: Text immediately before the gap
|
|
@@ -131,7 +148,50 @@ def build_classification_prompt(
|
|
|
131
148
|
artist: Song artist name for context
|
|
132
149
|
title: Song title for context
|
|
133
150
|
gap_id: Identifier for the gap
|
|
134
|
-
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Formatted prompt string for the LLM
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
LangFusePromptError: If LangFuse is configured but prompt fetch fails
|
|
157
|
+
"""
|
|
158
|
+
from .langfuse_prompts import get_prompt_service
|
|
159
|
+
|
|
160
|
+
service = get_prompt_service()
|
|
161
|
+
return service.get_classification_prompt(
|
|
162
|
+
gap_text=gap_text,
|
|
163
|
+
preceding_words=preceding_words,
|
|
164
|
+
following_words=following_words,
|
|
165
|
+
reference_contexts=reference_contexts,
|
|
166
|
+
artist=artist,
|
|
167
|
+
title=title,
|
|
168
|
+
gap_id=gap_id
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def build_classification_prompt_hardcoded(
|
|
173
|
+
gap_text: str,
|
|
174
|
+
preceding_words: str,
|
|
175
|
+
following_words: str,
|
|
176
|
+
reference_contexts: Dict[str, str],
|
|
177
|
+
artist: Optional[str] = None,
|
|
178
|
+
title: Optional[str] = None,
|
|
179
|
+
gap_id: Optional[str] = None
|
|
180
|
+
) -> str:
|
|
181
|
+
"""Build a prompt for classifying a gap using hardcoded templates.
|
|
182
|
+
|
|
183
|
+
This is the fallback implementation used when LangFuse is not configured.
|
|
184
|
+
It is also used as the source of truth for migrating prompts to LangFuse.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
gap_text: The text of the gap that needs classification
|
|
188
|
+
preceding_words: Text immediately before the gap
|
|
189
|
+
following_words: Text immediately after the gap
|
|
190
|
+
reference_contexts: Dictionary of reference lyrics from each source
|
|
191
|
+
artist: Song artist name for context
|
|
192
|
+
title: Song title for context
|
|
193
|
+
gap_id: Identifier for the gap
|
|
194
|
+
|
|
135
195
|
Returns:
|
|
136
196
|
Formatted prompt string for the LLM
|
|
137
197
|
"""
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""LangFuse prompt management for agentic correction.
|
|
2
|
+
|
|
3
|
+
This module provides prompt fetching from LangFuse, enabling dynamic prompt
|
|
4
|
+
iteration without code redeployment.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Optional, Any
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LangFusePromptError(Exception):
|
|
15
|
+
"""Raised when LangFuse prompt fetching fails."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LangFuseDatasetError(Exception):
|
|
20
|
+
"""Raised when LangFuse dataset fetching fails."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LangFusePromptService:
|
|
25
|
+
"""Fetches prompts and datasets from LangFuse for agentic correction.
|
|
26
|
+
|
|
27
|
+
This service handles:
|
|
28
|
+
- Fetching prompt templates from LangFuse
|
|
29
|
+
- Fetching few-shot examples from LangFuse datasets
|
|
30
|
+
- Compiling prompts with dynamic variables
|
|
31
|
+
- Fail-fast behavior when LangFuse is configured but unavailable
|
|
32
|
+
|
|
33
|
+
When LangFuse keys are not configured, falls back to hardcoded prompts
|
|
34
|
+
for local development.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
# Prompt and dataset names in LangFuse
|
|
38
|
+
CLASSIFIER_PROMPT_NAME = "gap-classifier"
|
|
39
|
+
EXAMPLES_DATASET_NAME = "gap-classifier-examples"
|
|
40
|
+
|
|
41
|
+
def __init__(self, client: Optional[Any] = None):
|
|
42
|
+
"""Initialize the prompt service.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
client: Optional pre-initialized Langfuse client (for testing).
|
|
46
|
+
If None, will initialize from environment variables.
|
|
47
|
+
"""
|
|
48
|
+
self._client = client
|
|
49
|
+
self._initialized = False
|
|
50
|
+
self._use_langfuse = self._should_use_langfuse()
|
|
51
|
+
|
|
52
|
+
if self._use_langfuse and client is None:
|
|
53
|
+
self._init_client()
|
|
54
|
+
|
|
55
|
+
def _should_use_langfuse(self) -> bool:
|
|
56
|
+
"""Check if LangFuse credentials are configured."""
|
|
57
|
+
public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
58
|
+
secret_key = os.getenv("LANGFUSE_SECRET_KEY")
|
|
59
|
+
return bool(public_key and secret_key)
|
|
60
|
+
|
|
61
|
+
def _init_client(self) -> None:
|
|
62
|
+
"""Initialize the Langfuse client using the shared singleton."""
|
|
63
|
+
from ..observability.langfuse_integration import get_langfuse_client, LangFuseConfigError
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
self._client = get_langfuse_client()
|
|
67
|
+
if self._client:
|
|
68
|
+
self._initialized = True
|
|
69
|
+
logger.info("LangFuse prompt service initialized")
|
|
70
|
+
else:
|
|
71
|
+
logger.debug("LangFuse keys not configured, will use hardcoded prompts")
|
|
72
|
+
except LangFuseConfigError as e:
|
|
73
|
+
# Re-raise as RuntimeError for consistent error handling
|
|
74
|
+
raise RuntimeError(str(e)) from e
|
|
75
|
+
|
|
76
|
+
def get_classification_prompt(
|
|
77
|
+
self,
|
|
78
|
+
gap_text: str,
|
|
79
|
+
preceding_words: str,
|
|
80
|
+
following_words: str,
|
|
81
|
+
reference_contexts: Dict[str, str],
|
|
82
|
+
artist: Optional[str] = None,
|
|
83
|
+
title: Optional[str] = None,
|
|
84
|
+
gap_id: Optional[str] = None
|
|
85
|
+
) -> str:
|
|
86
|
+
"""Fetch and compile the gap classification prompt.
|
|
87
|
+
|
|
88
|
+
If LangFuse is configured, fetches the prompt template and examples
|
|
89
|
+
from LangFuse. Otherwise, falls back to hardcoded prompts.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
gap_text: The text of the gap that needs classification
|
|
93
|
+
preceding_words: Text immediately before the gap
|
|
94
|
+
following_words: Text immediately after the gap
|
|
95
|
+
reference_contexts: Dictionary of reference lyrics from each source
|
|
96
|
+
artist: Song artist name for context
|
|
97
|
+
title: Song title for context
|
|
98
|
+
gap_id: Identifier for the gap
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Compiled prompt string ready for LLM
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
LangFusePromptError: If LangFuse is configured but prompt fetch fails
|
|
105
|
+
"""
|
|
106
|
+
if not self._use_langfuse:
|
|
107
|
+
# Fall back to hardcoded prompt for development
|
|
108
|
+
from .classifier import build_classification_prompt_hardcoded
|
|
109
|
+
return build_classification_prompt_hardcoded(
|
|
110
|
+
gap_text=gap_text,
|
|
111
|
+
preceding_words=preceding_words,
|
|
112
|
+
following_words=following_words,
|
|
113
|
+
reference_contexts=reference_contexts,
|
|
114
|
+
artist=artist,
|
|
115
|
+
title=title,
|
|
116
|
+
gap_id=gap_id
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Fetch from LangFuse
|
|
120
|
+
try:
|
|
121
|
+
prompt_template = self._fetch_prompt(self.CLASSIFIER_PROMPT_NAME)
|
|
122
|
+
examples = self._fetch_examples()
|
|
123
|
+
|
|
124
|
+
# Build component strings
|
|
125
|
+
song_context = self._build_song_context(artist, title)
|
|
126
|
+
examples_text = self._format_examples(examples)
|
|
127
|
+
references_text = self._format_references(reference_contexts)
|
|
128
|
+
|
|
129
|
+
# Compile the prompt with variables
|
|
130
|
+
compiled = prompt_template.compile(
|
|
131
|
+
song_context=song_context,
|
|
132
|
+
examples_text=examples_text,
|
|
133
|
+
gap_id=gap_id or "unknown",
|
|
134
|
+
preceding_words=preceding_words,
|
|
135
|
+
gap_text=gap_text,
|
|
136
|
+
following_words=following_words,
|
|
137
|
+
references_text=references_text
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
logger.debug(f"Compiled LangFuse prompt for gap {gap_id}")
|
|
141
|
+
return compiled
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
raise LangFusePromptError(
|
|
145
|
+
f"Failed to fetch/compile prompt from LangFuse: {e}"
|
|
146
|
+
) from e
|
|
147
|
+
|
|
148
|
+
def _fetch_prompt(self, name: str, label: str = "production") -> Any:
|
|
149
|
+
"""Fetch a prompt template from LangFuse.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
name: The prompt name in LangFuse
|
|
153
|
+
label: Prompt label to fetch (default: "production"). Falls back to
|
|
154
|
+
version 1 if labeled version not found.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
LangFuse prompt object
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
LangFusePromptError: If fetch fails
|
|
161
|
+
"""
|
|
162
|
+
if not self._client:
|
|
163
|
+
raise LangFusePromptError("LangFuse client not initialized")
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
# Try to fetch with the specified label (default: production)
|
|
167
|
+
prompt = self._client.get_prompt(name, label=label)
|
|
168
|
+
logger.debug(f"Fetched prompt '{name}' (label={label}) from LangFuse")
|
|
169
|
+
return prompt
|
|
170
|
+
except Exception as label_error:
|
|
171
|
+
# If labeled version not found, try fetching version 1 as fallback
|
|
172
|
+
# This handles newly created prompts that haven't been promoted yet
|
|
173
|
+
try:
|
|
174
|
+
prompt = self._client.get_prompt(name, version=1)
|
|
175
|
+
logger.warning(
|
|
176
|
+
f"Prompt '{name}' label '{label}' not found, using version 1. "
|
|
177
|
+
f"Consider promoting this prompt in LangFuse UI."
|
|
178
|
+
)
|
|
179
|
+
return prompt
|
|
180
|
+
except Exception as version_error:
|
|
181
|
+
raise LangFusePromptError(
|
|
182
|
+
f"Failed to fetch prompt '{name}' from LangFuse: "
|
|
183
|
+
f"Label '{label}' error: {label_error}, "
|
|
184
|
+
f"Version 1 fallback error: {version_error}"
|
|
185
|
+
) from version_error
|
|
186
|
+
|
|
187
|
+
def _fetch_examples(self) -> List[Dict[str, Any]]:
|
|
188
|
+
"""Fetch few-shot examples from LangFuse dataset.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
List of example dictionaries
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
LangFuseDatasetError: If fetch fails
|
|
195
|
+
"""
|
|
196
|
+
if not self._client:
|
|
197
|
+
raise LangFuseDatasetError("LangFuse client not initialized")
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
dataset = self._client.get_dataset(self.EXAMPLES_DATASET_NAME)
|
|
201
|
+
examples = []
|
|
202
|
+
for item in dataset.items:
|
|
203
|
+
# Dataset items have 'input' field with the example data
|
|
204
|
+
if hasattr(item, 'input') and item.input:
|
|
205
|
+
examples.append(item.input)
|
|
206
|
+
|
|
207
|
+
logger.debug(f"Fetched {len(examples)} examples from LangFuse dataset")
|
|
208
|
+
return examples
|
|
209
|
+
except Exception as e:
|
|
210
|
+
raise LangFuseDatasetError(
|
|
211
|
+
f"Failed to fetch dataset '{self.EXAMPLES_DATASET_NAME}' from LangFuse: {e}"
|
|
212
|
+
) from e
|
|
213
|
+
|
|
214
|
+
def _build_song_context(self, artist: Optional[str], title: Optional[str]) -> str:
|
|
215
|
+
"""Build song context section for the prompt."""
|
|
216
|
+
if artist and title:
|
|
217
|
+
return (
|
|
218
|
+
f"\n## Song Context\n\n"
|
|
219
|
+
f"**Artist:** {artist}\n"
|
|
220
|
+
f"**Title:** {title}\n\n"
|
|
221
|
+
f"Note: The song title and artist name may help identify proper nouns "
|
|
222
|
+
f"or unusual words that could be mis-heard.\n"
|
|
223
|
+
)
|
|
224
|
+
return ""
|
|
225
|
+
|
|
226
|
+
def _format_examples(self, examples: List[Dict[str, Any]]) -> str:
|
|
227
|
+
"""Format few-shot examples for inclusion in prompt.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
examples: List of example dictionaries from LangFuse dataset
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Formatted examples string
|
|
234
|
+
"""
|
|
235
|
+
if not examples:
|
|
236
|
+
return ""
|
|
237
|
+
|
|
238
|
+
# Group examples by category
|
|
239
|
+
examples_by_category: Dict[str, List[Dict]] = {}
|
|
240
|
+
for ex in examples:
|
|
241
|
+
category = ex.get("category", "unknown")
|
|
242
|
+
if category not in examples_by_category:
|
|
243
|
+
examples_by_category[category] = []
|
|
244
|
+
examples_by_category[category].append(ex)
|
|
245
|
+
|
|
246
|
+
# Build formatted text
|
|
247
|
+
text = "## Example Classifications\n\n"
|
|
248
|
+
for category, category_examples in examples_by_category.items():
|
|
249
|
+
text += f"### {category.upper().replace('_', ' ')}\n\n"
|
|
250
|
+
for ex in category_examples[:2]: # Limit to 2 examples per category
|
|
251
|
+
text += f"**Gap:** {ex.get('gap_text', '')}\n"
|
|
252
|
+
text += f"**Context:** ...{ex.get('preceding', '')}... [GAP] ...{ex.get('following', '')}...\n"
|
|
253
|
+
if 'reference' in ex:
|
|
254
|
+
text += f"**Reference:** {ex['reference']}\n"
|
|
255
|
+
text += f"**Reasoning:** {ex.get('reasoning', '')}\n"
|
|
256
|
+
text += f"**Action:** {ex.get('action', '')}\n\n"
|
|
257
|
+
|
|
258
|
+
return text
|
|
259
|
+
|
|
260
|
+
def _format_references(self, reference_contexts: Dict[str, str]) -> str:
|
|
261
|
+
"""Format reference lyrics for inclusion in prompt.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
reference_contexts: Dictionary of reference lyrics from each source
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Formatted references string
|
|
268
|
+
"""
|
|
269
|
+
if not reference_contexts:
|
|
270
|
+
return ""
|
|
271
|
+
|
|
272
|
+
text = "## Available Reference Lyrics\n\n"
|
|
273
|
+
for source, context in reference_contexts.items():
|
|
274
|
+
text += f"**{source.upper()}:** {context}\n\n"
|
|
275
|
+
|
|
276
|
+
return text
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# Module-level singleton for convenience
|
|
280
|
+
_prompt_service: Optional[LangFusePromptService] = None
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def get_prompt_service() -> LangFusePromptService:
|
|
284
|
+
"""Get or create the global prompt service instance.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
LangFusePromptService singleton instance
|
|
288
|
+
"""
|
|
289
|
+
global _prompt_service
|
|
290
|
+
if _prompt_service is None:
|
|
291
|
+
_prompt_service = LangFusePromptService()
|
|
292
|
+
return _prompt_service
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def reset_prompt_service() -> None:
|
|
296
|
+
"""Reset the global prompt service instance (for testing)."""
|
|
297
|
+
global _prompt_service
|
|
298
|
+
_prompt_service = None
|
|
@@ -19,8 +19,9 @@ class ProviderConfig:
|
|
|
19
19
|
cache_dir: str
|
|
20
20
|
|
|
21
21
|
# GCP/Vertex AI settings
|
|
22
|
+
# Note: Gemini 3 models require 'global' location (not regional like us-central1)
|
|
22
23
|
gcp_project_id: Optional[str] = None
|
|
23
|
-
gcp_location: str = "
|
|
24
|
+
gcp_location: str = "global"
|
|
24
25
|
|
|
25
26
|
request_timeout_seconds: float = 30.0
|
|
26
27
|
max_retries: int = 2
|
|
@@ -51,7 +52,7 @@ class ProviderConfig:
|
|
|
51
52
|
privacy_mode=os.getenv("PRIVACY_MODE", "false").lower() in {"1", "true", "yes"},
|
|
52
53
|
cache_dir=cache_dir,
|
|
53
54
|
gcp_project_id=os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCP_PROJECT_ID"),
|
|
54
|
-
gcp_location=os.getenv("GCP_LOCATION", "
|
|
55
|
+
gcp_location=os.getenv("GCP_LOCATION", "global"),
|
|
55
56
|
request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "30.0")),
|
|
56
57
|
max_retries=int(os.getenv("AGENTIC_MAX_RETRIES", "2")),
|
|
57
58
|
retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "0.2")),
|
|
@@ -8,7 +8,7 @@ RESPONSE_LOG_LENGTH = 500 # Characters to log from responses
|
|
|
8
8
|
MODEL_SPEC_FORMAT = "provider/model" # Expected format for model identifiers
|
|
9
9
|
|
|
10
10
|
# Default Langfuse host
|
|
11
|
-
DEFAULT_LANGFUSE_HOST = "https://cloud.langfuse.com"
|
|
11
|
+
DEFAULT_LANGFUSE_HOST = "https://us.cloud.langfuse.com"
|
|
12
12
|
|
|
13
13
|
# Raw response indicator
|
|
14
14
|
RAW_RESPONSE_KEY = "raw" # Key used to wrap unparsed responses
|
|
@@ -187,26 +187,41 @@ class LangChainBridge(BaseAIProvider):
|
|
|
187
187
|
|
|
188
188
|
def _invoke_model(self, prompt: str) -> str:
|
|
189
189
|
"""Invoke the chat model with a prompt.
|
|
190
|
-
|
|
190
|
+
|
|
191
191
|
This is a simple wrapper that can be passed to the retry executor.
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
Args:
|
|
194
194
|
prompt: The prompt to send
|
|
195
|
-
|
|
195
|
+
|
|
196
196
|
Returns:
|
|
197
197
|
Response content as string
|
|
198
|
-
|
|
198
|
+
|
|
199
199
|
Raises:
|
|
200
200
|
Exception: Any error from the model invocation
|
|
201
201
|
"""
|
|
202
202
|
from langchain_core.messages import HumanMessage
|
|
203
|
-
|
|
203
|
+
|
|
204
204
|
# Prepare config with session_id in metadata (Langfuse format)
|
|
205
205
|
config = {}
|
|
206
206
|
if hasattr(self, '_session_id') and self._session_id:
|
|
207
207
|
config["metadata"] = {"langfuse_session_id": self._session_id}
|
|
208
208
|
logger.debug(f"🤖 [LangChain] Invoking with session_id: {self._session_id}")
|
|
209
|
-
|
|
209
|
+
|
|
210
210
|
response = self._chat_model.invoke([HumanMessage(content=prompt)], config=config)
|
|
211
|
-
|
|
211
|
+
content = response.content
|
|
212
|
+
|
|
213
|
+
# Handle multimodal response format from Gemini 3+ models
|
|
214
|
+
# Response can be a list of content parts: [{'type': 'text', 'text': '...'}]
|
|
215
|
+
if isinstance(content, list):
|
|
216
|
+
# Extract text from the first text content part
|
|
217
|
+
for part in content:
|
|
218
|
+
if isinstance(part, dict) and part.get('type') == 'text':
|
|
219
|
+
return part.get('text', '')
|
|
220
|
+
# Fallback: concatenate all text parts
|
|
221
|
+
return ''.join(
|
|
222
|
+
part.get('text', '') if isinstance(part, dict) else str(part)
|
|
223
|
+
for part in content
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return content
|
|
212
227
|
|
|
@@ -100,19 +100,10 @@ class ModelFactory:
|
|
|
100
100
|
return
|
|
101
101
|
|
|
102
102
|
try:
|
|
103
|
-
from langfuse import Langfuse
|
|
104
103
|
from langfuse.langchain import CallbackHandler
|
|
105
|
-
|
|
106
|
-
#
|
|
107
|
-
|
|
108
|
-
public_key=public_key,
|
|
109
|
-
secret_key=secret_key,
|
|
110
|
-
host=os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"),
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
# Then create callback handler with the same public_key
|
|
114
|
-
# The handler will use the initialized client
|
|
115
|
-
self._langfuse_handler = CallbackHandler(public_key=public_key)
|
|
104
|
+
|
|
105
|
+
# CallbackHandler auto-discovers credentials from environment variables
|
|
106
|
+
self._langfuse_handler = CallbackHandler()
|
|
116
107
|
logger.info(f"🤖 Langfuse callback handler initialized for {model_spec}")
|
|
117
108
|
except Exception as e:
|
|
118
109
|
# If Langfuse keys are set, we MUST fail fast
|
|
@@ -224,6 +215,7 @@ class ModelFactory:
|
|
|
224
215
|
model=model_name,
|
|
225
216
|
project=config.gcp_project_id,
|
|
226
217
|
location=config.gcp_location,
|
|
218
|
+
timeout=config.request_timeout_seconds,
|
|
227
219
|
max_retries=config.max_retries,
|
|
228
220
|
callbacks=callbacks,
|
|
229
221
|
)
|
|
@@ -6,6 +6,7 @@ from typing import Dict, Any
|
|
|
6
6
|
from .providers.config import ProviderConfig
|
|
7
7
|
|
|
8
8
|
# Default model for cloud deployments - Gemini 3 Flash via Vertex AI
|
|
9
|
+
# Note: Gemini 3 models require 'global' location (not regional like us-central1)
|
|
9
10
|
DEFAULT_CLOUD_MODEL = "vertexai/gemini-3-flash-preview"
|
|
10
11
|
|
|
11
12
|
|
|
@@ -33,7 +34,7 @@ class ModelRouter:
|
|
|
33
34
|
if self._config.privacy_mode:
|
|
34
35
|
return "ollama/llama3.2:latest"
|
|
35
36
|
|
|
36
|
-
# Default to Gemini 3 Flash for all cases (fast, cost-effective)
|
|
37
|
+
# Default to Gemini 3 Flash for all cases (fast, cost-effective, latest capabilities)
|
|
37
38
|
return DEFAULT_CLOUD_MODEL
|
|
38
39
|
|
|
39
40
|
|
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import shortuuid
|
|
7
7
|
|
|
8
8
|
from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
|
|
9
|
-
from lyrics_transcriber.correction.handlers.llm import LLMHandler
|
|
10
9
|
from lyrics_transcriber.correction.handlers.no_space_punct_match import NoSpacePunctuationMatchHandler
|
|
11
10
|
from lyrics_transcriber.correction.handlers.relaxed_word_count_match import RelaxedWordCountMatchHandler
|
|
12
11
|
from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
|
|
@@ -27,7 +26,6 @@ from lyrics_transcriber.correction.anchor_sequence import AnchorSequenceFinder
|
|
|
27
26
|
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
|
28
27
|
from lyrics_transcriber.correction.handlers.extend_anchor import ExtendAnchorHandler
|
|
29
28
|
from lyrics_transcriber.utils.word_utils import WordUtils
|
|
30
|
-
from lyrics_transcriber.correction.handlers.llm_providers import OllamaProvider, OpenAIProvider
|
|
31
29
|
|
|
32
30
|
|
|
33
31
|
class LyricsCorrector:
|
|
@@ -57,60 +55,18 @@ class LyricsCorrector:
|
|
|
57
55
|
]
|
|
58
56
|
|
|
59
57
|
# Create all handlers but respect enabled_handlers if provided
|
|
58
|
+
# Note: Legacy LLMHandler removed - use AgenticCorrector via USE_AGENTIC_AI=1 instead
|
|
60
59
|
all_handlers = [
|
|
61
60
|
("ExtendAnchorHandler", ExtendAnchorHandler(logger=self.logger)),
|
|
62
61
|
("WordCountMatchHandler", WordCountMatchHandler(logger=self.logger)),
|
|
63
62
|
("SyllablesMatchHandler", SyllablesMatchHandler(logger=self.logger)),
|
|
64
63
|
("RelaxedWordCountMatchHandler", RelaxedWordCountMatchHandler(logger=self.logger)),
|
|
65
64
|
("NoSpacePunctuationMatchHandler", NoSpacePunctuationMatchHandler(logger=self.logger)),
|
|
66
|
-
(
|
|
67
|
-
"LLMHandler_Ollama_R17B",
|
|
68
|
-
LLMHandler(
|
|
69
|
-
provider=OllamaProvider(model="deepseek-r1:7b", logger=self.logger),
|
|
70
|
-
name="LLMHandler_Ollama_R17B",
|
|
71
|
-
logger=self.logger,
|
|
72
|
-
cache_dir=self._cache_dir,
|
|
73
|
-
),
|
|
74
|
-
),
|
|
75
65
|
("RepeatCorrectionHandler", RepeatCorrectionHandler(logger=self.logger)),
|
|
76
66
|
("SoundAlikeHandler", SoundAlikeHandler(logger=self.logger)),
|
|
77
67
|
("LevenshteinHandler", LevenshteinHandler(logger=self.logger)),
|
|
78
68
|
]
|
|
79
69
|
|
|
80
|
-
# Add OpenRouter handlers only if API key is available
|
|
81
|
-
if os.getenv("OPENROUTER_API_KEY"):
|
|
82
|
-
openrouter_handlers = [
|
|
83
|
-
(
|
|
84
|
-
"LLMHandler_OpenRouter_Sonnet",
|
|
85
|
-
LLMHandler(
|
|
86
|
-
provider=OpenAIProvider(
|
|
87
|
-
model="anthropic/claude-3-sonnet",
|
|
88
|
-
api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
89
|
-
base_url="https://openrouter.ai/api/v1",
|
|
90
|
-
logger=self.logger,
|
|
91
|
-
),
|
|
92
|
-
name="LLMHandler_OpenRouter_Sonnet",
|
|
93
|
-
logger=self.logger,
|
|
94
|
-
cache_dir=self._cache_dir,
|
|
95
|
-
),
|
|
96
|
-
),
|
|
97
|
-
(
|
|
98
|
-
"LLMHandler_OpenRouter_R1",
|
|
99
|
-
LLMHandler(
|
|
100
|
-
provider=OpenAIProvider(
|
|
101
|
-
model="deepseek/deepseek-r1",
|
|
102
|
-
api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
103
|
-
base_url="https://openrouter.ai/api/v1",
|
|
104
|
-
logger=self.logger,
|
|
105
|
-
),
|
|
106
|
-
name="LLMHandler_OpenRouter_R1",
|
|
107
|
-
logger=self.logger,
|
|
108
|
-
cache_dir=self._cache_dir,
|
|
109
|
-
),
|
|
110
|
-
),
|
|
111
|
-
]
|
|
112
|
-
all_handlers.extend(openrouter_handlers)
|
|
113
|
-
|
|
114
70
|
# Store all handler information
|
|
115
71
|
self.all_handlers = [
|
|
116
72
|
{
|
|
@@ -1,293 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional, Tuple, Dict, Any, Union
|
|
2
|
-
import logging
|
|
3
|
-
import json
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from lyrics_transcriber.types import GapSequence, WordCorrection
|
|
8
|
-
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
|
9
|
-
from lyrics_transcriber.correction.handlers.word_operations import WordOperations
|
|
10
|
-
from lyrics_transcriber.correction.handlers.llm_providers import LLMProvider
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class LLMHandler(GapCorrectionHandler):
|
|
14
|
-
"""Uses an LLM to analyze and correct gaps by comparing with reference lyrics."""
|
|
15
|
-
|
|
16
|
-
def __init__(
|
|
17
|
-
self, provider: LLMProvider, name: str, logger: Optional[logging.Logger] = None, cache_dir: Optional[Union[str, Path]] = None
|
|
18
|
-
):
|
|
19
|
-
super().__init__(logger)
|
|
20
|
-
self.logger = logger or logging.getLogger(__name__)
|
|
21
|
-
self.provider = provider
|
|
22
|
-
self.name = name
|
|
23
|
-
self.cache_dir = Path(cache_dir) if cache_dir else None
|
|
24
|
-
|
|
25
|
-
def _format_prompt(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> str:
|
|
26
|
-
"""Format the prompt for the LLM with context about the gap and reference lyrics."""
|
|
27
|
-
word_map = data.get("word_map", {})
|
|
28
|
-
metadata = data.get("metadata", {}) if data else {}
|
|
29
|
-
|
|
30
|
-
if not word_map:
|
|
31
|
-
self.logger.error("No word_map provided in data")
|
|
32
|
-
return ""
|
|
33
|
-
|
|
34
|
-
# Format transcribed words with their IDs
|
|
35
|
-
transcribed_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in gap.transcribed_word_ids if word_id in word_map]
|
|
36
|
-
|
|
37
|
-
prompt = (
|
|
38
|
-
"You are a lyrics correction expert. You will be given transcribed lyrics that may contain errors "
|
|
39
|
-
"and reference lyrics from multiple sources. Your task is to analyze each word in the transcribed text "
|
|
40
|
-
"and suggest specific corrections based on the reference lyrics.\n\n"
|
|
41
|
-
"Each word has a unique ID. When suggesting corrections, you must specify the ID of the word being corrected. "
|
|
42
|
-
"This ensures accuracy in applying your corrections.\n\n"
|
|
43
|
-
"For each correction, specify:\n"
|
|
44
|
-
"1. The word ID being corrected\n"
|
|
45
|
-
"2. The correction type ('replace', 'split', 'combine', or 'delete')\n"
|
|
46
|
-
"3. The corrected text\n"
|
|
47
|
-
"4. Your confidence level\n"
|
|
48
|
-
"5. The reason for the correction\n\n"
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
# Add song context if available
|
|
52
|
-
if metadata and metadata.get("artist") and metadata.get("title"):
|
|
53
|
-
prompt += f"Song: {metadata['title']}\nArtist: {metadata['artist']}\n\n"
|
|
54
|
-
|
|
55
|
-
# Format transcribed words with IDs
|
|
56
|
-
prompt += "Transcribed words:\n"
|
|
57
|
-
for word in transcribed_words:
|
|
58
|
-
prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
|
|
59
|
-
|
|
60
|
-
prompt += "\nReference lyrics from different sources:\n"
|
|
61
|
-
|
|
62
|
-
# Add each reference source with words and their IDs
|
|
63
|
-
for source, word_ids in gap.reference_word_ids.items():
|
|
64
|
-
reference_words = [{"id": word_id, "text": word_map[word_id].text} for word_id in word_ids if word_id in word_map]
|
|
65
|
-
prompt += f"\n{source} immediate context:\n"
|
|
66
|
-
for word in reference_words:
|
|
67
|
-
prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
|
|
68
|
-
|
|
69
|
-
# Add full lyrics if available
|
|
70
|
-
if metadata and metadata.get("full_reference_texts", {}).get(source):
|
|
71
|
-
prompt += f"\nFull {source} lyrics:\n{metadata['full_reference_texts'][source]}\n"
|
|
72
|
-
|
|
73
|
-
# Add context about surrounding anchors if available
|
|
74
|
-
if gap.preceding_anchor_id:
|
|
75
|
-
preceding_anchor = next((a.anchor for a in data.get("anchor_sequences", []) if a.anchor.id == gap.preceding_anchor_id), None)
|
|
76
|
-
if preceding_anchor:
|
|
77
|
-
anchor_words = [
|
|
78
|
-
{"id": word_id, "text": word_map[word_id].text}
|
|
79
|
-
for word_id in preceding_anchor.transcribed_word_ids
|
|
80
|
-
if word_id in word_map
|
|
81
|
-
]
|
|
82
|
-
prompt += "\nPreceding correct words:\n"
|
|
83
|
-
for word in anchor_words:
|
|
84
|
-
prompt += f"- ID: {word['id']}, Text: '{word['text']}'\n"
|
|
85
|
-
|
|
86
|
-
prompt += (
|
|
87
|
-
"\nProvide corrections in the following JSON format:\n"
|
|
88
|
-
"{\n"
|
|
89
|
-
' "corrections": [\n'
|
|
90
|
-
" {\n"
|
|
91
|
-
' "word_id": "id_of_word_to_correct",\n'
|
|
92
|
-
' "type": "replace|split|combine|delete",\n'
|
|
93
|
-
' "corrected_text": "new text",\n'
|
|
94
|
-
' "reference_word_id": "id_from_reference_lyrics", // Optional, use when matching a specific reference word\n'
|
|
95
|
-
' "confidence": 0.9,\n'
|
|
96
|
-
' "reason": "explanation of correction"\n'
|
|
97
|
-
" }\n"
|
|
98
|
-
" ]\n"
|
|
99
|
-
"}\n\n"
|
|
100
|
-
"Important rules:\n"
|
|
101
|
-
"1. Always include the word_id for each correction\n"
|
|
102
|
-
"2. For 'split' type, corrected_text should contain the space-separated words\n"
|
|
103
|
-
"3. For 'combine' type, word_id should be the first word to combine\n"
|
|
104
|
-
"4. Include reference_word_id when the correction matches a specific reference word\n"
|
|
105
|
-
"5. Only suggest corrections when you're confident they improve the lyrics\n"
|
|
106
|
-
"6. Preserve any existing words that match the reference lyrics\n"
|
|
107
|
-
"7. Respond ONLY with the JSON object, no other text"
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
return prompt
|
|
111
|
-
|
|
112
|
-
def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
|
|
113
|
-
"""LLM handler can attempt to handle any gap with reference words."""
|
|
114
|
-
if not gap.reference_word_ids:
|
|
115
|
-
self.logger.debug("No reference words available")
|
|
116
|
-
return False, {}
|
|
117
|
-
|
|
118
|
-
return True, {}
|
|
119
|
-
|
|
120
|
-
def _write_debug_info(self, prompt: str, response: str, gap_index: int, audio_file_hash: Optional[str] = None) -> None:
|
|
121
|
-
"""Write prompt and response to debug files."""
|
|
122
|
-
if not self.cache_dir:
|
|
123
|
-
self.logger.warning("No cache directory provided, skipping LLM debug output")
|
|
124
|
-
return
|
|
125
|
-
|
|
126
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
127
|
-
debug_dir = self.cache_dir / "llm_debug"
|
|
128
|
-
debug_dir.mkdir(exist_ok=True, parents=True)
|
|
129
|
-
|
|
130
|
-
hash_prefix = f"{audio_file_hash}_" if audio_file_hash else ""
|
|
131
|
-
filename = debug_dir / f"llm_debug_{hash_prefix}{gap_index}_{timestamp}.txt"
|
|
132
|
-
|
|
133
|
-
debug_content = "=== LLM PROMPT ===\n" f"{prompt}\n\n" "=== LLM RESPONSE ===\n" f"{response}\n"
|
|
134
|
-
|
|
135
|
-
try:
|
|
136
|
-
with open(filename, "w", encoding="utf-8") as f:
|
|
137
|
-
f.write(debug_content)
|
|
138
|
-
except IOError as e:
|
|
139
|
-
self.logger.error(f"Failed to write LLM debug file: {e}")
|
|
140
|
-
|
|
141
|
-
def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
|
|
142
|
-
"""Process the gap using the LLM and create corrections based on its response."""
|
|
143
|
-
if not data or "word_map" not in data:
|
|
144
|
-
self.logger.error("No word_map provided in data")
|
|
145
|
-
return []
|
|
146
|
-
|
|
147
|
-
word_map = data["word_map"]
|
|
148
|
-
transcribed_words = [word_map[word_id].text for word_id in gap.transcribed_word_ids if word_id in word_map]
|
|
149
|
-
|
|
150
|
-
# Calculate reference positions using the centralized method
|
|
151
|
-
reference_positions = (
|
|
152
|
-
WordOperations.calculate_reference_positions(gap, anchor_sequences=data.get("anchor_sequences", [])) or {}
|
|
153
|
-
) # Ensure empty dict if None
|
|
154
|
-
|
|
155
|
-
prompt = self._format_prompt(gap, data)
|
|
156
|
-
if not prompt:
|
|
157
|
-
return []
|
|
158
|
-
|
|
159
|
-
# Get a unique index for this gap based on its position
|
|
160
|
-
gap_index = gap.transcription_position
|
|
161
|
-
|
|
162
|
-
try:
|
|
163
|
-
self.logger.debug(f"Processing gap words: {transcribed_words}")
|
|
164
|
-
self.logger.debug(f"Reference word IDs: {gap.reference_word_ids}")
|
|
165
|
-
|
|
166
|
-
response = self.provider.generate_response(prompt)
|
|
167
|
-
|
|
168
|
-
# Write debug info to files
|
|
169
|
-
self._write_debug_info(prompt, response, gap_index, audio_file_hash=data.get("audio_file_hash"))
|
|
170
|
-
|
|
171
|
-
try:
|
|
172
|
-
corrections_data = json.loads(response)
|
|
173
|
-
except json.JSONDecodeError as e:
|
|
174
|
-
self.logger.error(f"Failed to parse LLM response as JSON: {e}")
|
|
175
|
-
self.logger.error(f"Raw response content: {response}")
|
|
176
|
-
return []
|
|
177
|
-
|
|
178
|
-
# Check if corrections exist and are non-empty
|
|
179
|
-
if not corrections_data.get("corrections"):
|
|
180
|
-
self.logger.debug("No corrections suggested by LLM")
|
|
181
|
-
return []
|
|
182
|
-
|
|
183
|
-
corrections = []
|
|
184
|
-
for correction in corrections_data["corrections"]:
|
|
185
|
-
# Validate word_id exists in gap
|
|
186
|
-
if correction["word_id"] not in gap.transcribed_word_ids:
|
|
187
|
-
self.logger.error(f"LLM suggested correction for word_id {correction['word_id']} which is not in the gap")
|
|
188
|
-
continue
|
|
189
|
-
|
|
190
|
-
# Get original word from word map
|
|
191
|
-
original_word = word_map[correction["word_id"]]
|
|
192
|
-
position = gap.transcription_position + gap.transcribed_word_ids.index(correction["word_id"])
|
|
193
|
-
|
|
194
|
-
self.logger.debug(f"Processing correction: {correction}")
|
|
195
|
-
|
|
196
|
-
if correction["type"] == "replace":
|
|
197
|
-
self.logger.debug(
|
|
198
|
-
f"Creating replacement: '{original_word.text}' -> '{correction['corrected_text']}' " f"at position {position}"
|
|
199
|
-
)
|
|
200
|
-
corrections.append(
|
|
201
|
-
WordOperations.create_word_replacement_correction(
|
|
202
|
-
original_word=original_word.text,
|
|
203
|
-
corrected_word=correction["corrected_text"],
|
|
204
|
-
original_position=position,
|
|
205
|
-
source="LLM",
|
|
206
|
-
confidence=correction["confidence"],
|
|
207
|
-
reason=correction["reason"],
|
|
208
|
-
handler=self.name,
|
|
209
|
-
reference_positions=reference_positions,
|
|
210
|
-
original_word_id=correction["word_id"],
|
|
211
|
-
corrected_word_id=correction.get("reference_word_id"),
|
|
212
|
-
)
|
|
213
|
-
)
|
|
214
|
-
elif correction["type"] == "split":
|
|
215
|
-
split_words = correction["corrected_text"].split()
|
|
216
|
-
self.logger.debug(f"Creating split: '{original_word.text}' -> {split_words} " f"at position {position}")
|
|
217
|
-
|
|
218
|
-
# Get reference word IDs if provided
|
|
219
|
-
reference_word_ids = correction.get("reference_word_ids", [None] * len(split_words))
|
|
220
|
-
|
|
221
|
-
corrections.extend(
|
|
222
|
-
WordOperations.create_word_split_corrections(
|
|
223
|
-
original_word=original_word.text,
|
|
224
|
-
reference_words=split_words,
|
|
225
|
-
original_position=position,
|
|
226
|
-
source="LLM",
|
|
227
|
-
confidence=correction["confidence"],
|
|
228
|
-
reason=correction["reason"],
|
|
229
|
-
handler=self.name,
|
|
230
|
-
reference_positions=reference_positions,
|
|
231
|
-
original_word_id=correction["word_id"],
|
|
232
|
-
corrected_word_ids=reference_word_ids,
|
|
233
|
-
)
|
|
234
|
-
)
|
|
235
|
-
elif correction["type"] == "combine":
|
|
236
|
-
# Get all word IDs to combine
|
|
237
|
-
word_ids_to_combine = []
|
|
238
|
-
current_idx = gap.transcribed_word_ids.index(correction["word_id"])
|
|
239
|
-
words_needed = len(correction["corrected_text"].split())
|
|
240
|
-
|
|
241
|
-
if current_idx + words_needed <= len(gap.transcribed_word_ids):
|
|
242
|
-
word_ids_to_combine = gap.transcribed_word_ids[current_idx : current_idx + words_needed]
|
|
243
|
-
else:
|
|
244
|
-
self.logger.error(f"Not enough words available to combine at position {position}")
|
|
245
|
-
continue
|
|
246
|
-
|
|
247
|
-
words_to_combine = [word_map[word_id].text for word_id in word_ids_to_combine]
|
|
248
|
-
|
|
249
|
-
self.logger.debug(
|
|
250
|
-
f"Creating combine: {words_to_combine} -> '{correction['corrected_text']}' " f"at position {position}"
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
corrections.extend(
|
|
254
|
-
WordOperations.create_word_combine_corrections(
|
|
255
|
-
original_words=words_to_combine,
|
|
256
|
-
reference_word=correction["corrected_text"],
|
|
257
|
-
original_position=position,
|
|
258
|
-
source="LLM",
|
|
259
|
-
confidence=correction["confidence"],
|
|
260
|
-
combine_reason=correction["reason"],
|
|
261
|
-
delete_reason=f"Part of combining words: {correction['reason']}",
|
|
262
|
-
handler=self.name,
|
|
263
|
-
reference_positions=reference_positions,
|
|
264
|
-
original_word_ids=word_ids_to_combine,
|
|
265
|
-
corrected_word_id=correction.get("reference_word_id"),
|
|
266
|
-
)
|
|
267
|
-
)
|
|
268
|
-
elif correction["type"] == "delete":
|
|
269
|
-
self.logger.debug(f"Creating deletion: '{original_word.text}' at position {position}")
|
|
270
|
-
corrections.append(
|
|
271
|
-
WordCorrection(
|
|
272
|
-
original_word=original_word.text,
|
|
273
|
-
corrected_word="",
|
|
274
|
-
segment_index=0,
|
|
275
|
-
original_position=position,
|
|
276
|
-
confidence=correction["confidence"],
|
|
277
|
-
source="LLM",
|
|
278
|
-
reason=correction["reason"],
|
|
279
|
-
alternatives={},
|
|
280
|
-
is_deletion=True,
|
|
281
|
-
handler=self.name,
|
|
282
|
-
reference_positions=reference_positions,
|
|
283
|
-
word_id=correction["word_id"],
|
|
284
|
-
corrected_word_id=None,
|
|
285
|
-
)
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
self.logger.debug(f"Created {len(corrections)} corrections: {[f'{c.original_word}->{c.corrected_word}' for c in corrections]}")
|
|
289
|
-
return corrections
|
|
290
|
-
|
|
291
|
-
except Exception as e:
|
|
292
|
-
self.logger.error(f"Unexpected error in LLM handler: {e}")
|
|
293
|
-
return []
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Optional
|
|
3
|
-
import logging
|
|
4
|
-
from ollama import chat as ollama_chat
|
|
5
|
-
import openai
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class LLMProvider(ABC):
|
|
9
|
-
"""Abstract base class for LLM providers."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, logger: Optional[logging.Logger] = None):
|
|
12
|
-
self.logger = logger or logging.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
@abstractmethod
|
|
15
|
-
def generate_response(self, prompt: str, **kwargs) -> str:
|
|
16
|
-
"""Generate a response from the LLM.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
prompt: The prompt to send to the LLM
|
|
20
|
-
**kwargs: Additional provider-specific parameters
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
str: The LLM's response
|
|
24
|
-
"""
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class OllamaProvider(LLMProvider):
|
|
29
|
-
"""Provider for local Ollama models."""
|
|
30
|
-
|
|
31
|
-
def __init__(self, model: str, logger: Optional[logging.Logger] = None):
|
|
32
|
-
super().__init__(logger)
|
|
33
|
-
self.model = model
|
|
34
|
-
|
|
35
|
-
def generate_response(self, prompt: str, **kwargs) -> str:
|
|
36
|
-
try:
|
|
37
|
-
response = ollama_chat(model=self.model, messages=[{"role": "user", "content": prompt}], format="json")
|
|
38
|
-
return response.message.content
|
|
39
|
-
except Exception as e:
|
|
40
|
-
self.logger.error(f"Error generating Ollama response: {e}")
|
|
41
|
-
raise
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class OpenAIProvider(LLMProvider):
|
|
45
|
-
"""Provider for OpenAI-compatible APIs (including OpenRouter)."""
|
|
46
|
-
|
|
47
|
-
def __init__(self, model: str, api_key: str, base_url: Optional[str] = None, logger: Optional[logging.Logger] = None):
|
|
48
|
-
super().__init__(logger)
|
|
49
|
-
self.model = model
|
|
50
|
-
self.client = openai.OpenAI(api_key=api_key, base_url=base_url)
|
|
51
|
-
|
|
52
|
-
def generate_response(self, prompt: str, **kwargs) -> str:
|
|
53
|
-
try:
|
|
54
|
-
response = self.client.chat.completions.create(
|
|
55
|
-
model=self.model, messages=[{"role": "user", "content": prompt}], response_format={"type": "json_object"}, **kwargs
|
|
56
|
-
)
|
|
57
|
-
return response.choices[0].message.content
|
|
58
|
-
except Exception as e:
|
|
59
|
-
self.logger.error(f"Error generating OpenAI response: {e}")
|
|
60
|
-
raise
|
|
File without changes
|
|
File without changes
|
|
File without changes
|