karaoke-gen 0.76.20__py3-none-any.whl → 0.82.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/instrumental_review/static/index.html +179 -16
- karaoke_gen/karaoke_gen.py +5 -4
- karaoke_gen/lyrics_processor.py +25 -6
- {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.82.0.dist-info}/METADATA +79 -3
- {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.82.0.dist-info}/RECORD +33 -31
- lyrics_transcriber/core/config.py +8 -0
- lyrics_transcriber/core/controller.py +43 -1
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +178 -5
- lyrics_transcriber/correction/agentic/prompts/__init__.py +23 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +66 -6
- lyrics_transcriber/correction/agentic/prompts/langfuse_prompts.py +298 -0
- lyrics_transcriber/correction/agentic/providers/config.py +7 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +1 -1
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +22 -7
- lyrics_transcriber/correction/agentic/providers/model_factory.py +28 -13
- lyrics_transcriber/correction/agentic/router.py +18 -13
- lyrics_transcriber/correction/corrector.py +1 -45
- lyrics_transcriber/frontend/.gitignore +1 -0
- lyrics_transcriber/frontend/e2e/agentic-corrections.spec.ts +207 -0
- lyrics_transcriber/frontend/e2e/fixtures/agentic-correction-data.json +226 -0
- lyrics_transcriber/frontend/package.json +4 -1
- lyrics_transcriber/frontend/playwright.config.ts +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +34 -30
- lyrics_transcriber/frontend/src/components/Header.tsx +141 -34
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +120 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +11 -1
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +122 -35
- lyrics_transcriber/frontend/src/components/shared/types.ts +6 -0
- lyrics_transcriber/output/generator.py +50 -3
- lyrics_transcriber/transcribers/local_whisper.py +260 -0
- lyrics_transcriber/correction/handlers/llm.py +0 -293
- lyrics_transcriber/correction/handlers/llm_providers.py +0 -60
- {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.82.0.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.82.0.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.76.20.dist-info → karaoke_gen-0.82.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""LangFuse prompt management for agentic correction.
|
|
2
|
+
|
|
3
|
+
This module provides prompt fetching from LangFuse, enabling dynamic prompt
|
|
4
|
+
iteration without code redeployment.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Optional, Any
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LangFusePromptError(Exception):
|
|
15
|
+
"""Raised when LangFuse prompt fetching fails."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LangFuseDatasetError(Exception):
|
|
20
|
+
"""Raised when LangFuse dataset fetching fails."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LangFusePromptService:
|
|
25
|
+
"""Fetches prompts and datasets from LangFuse for agentic correction.
|
|
26
|
+
|
|
27
|
+
This service handles:
|
|
28
|
+
- Fetching prompt templates from LangFuse
|
|
29
|
+
- Fetching few-shot examples from LangFuse datasets
|
|
30
|
+
- Compiling prompts with dynamic variables
|
|
31
|
+
- Fail-fast behavior when LangFuse is configured but unavailable
|
|
32
|
+
|
|
33
|
+
When LangFuse keys are not configured, falls back to hardcoded prompts
|
|
34
|
+
for local development.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
# Prompt and dataset names in LangFuse
|
|
38
|
+
CLASSIFIER_PROMPT_NAME = "gap-classifier"
|
|
39
|
+
EXAMPLES_DATASET_NAME = "gap-classifier-examples"
|
|
40
|
+
|
|
41
|
+
def __init__(self, client: Optional[Any] = None):
|
|
42
|
+
"""Initialize the prompt service.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
client: Optional pre-initialized Langfuse client (for testing).
|
|
46
|
+
If None, will initialize from environment variables.
|
|
47
|
+
"""
|
|
48
|
+
self._client = client
|
|
49
|
+
self._initialized = False
|
|
50
|
+
self._use_langfuse = self._should_use_langfuse()
|
|
51
|
+
|
|
52
|
+
if self._use_langfuse and client is None:
|
|
53
|
+
self._init_client()
|
|
54
|
+
|
|
55
|
+
def _should_use_langfuse(self) -> bool:
|
|
56
|
+
"""Check if LangFuse credentials are configured."""
|
|
57
|
+
public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
58
|
+
secret_key = os.getenv("LANGFUSE_SECRET_KEY")
|
|
59
|
+
return bool(public_key and secret_key)
|
|
60
|
+
|
|
61
|
+
def _init_client(self) -> None:
|
|
62
|
+
"""Initialize the Langfuse client using the shared singleton."""
|
|
63
|
+
from ..observability.langfuse_integration import get_langfuse_client, LangFuseConfigError
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
self._client = get_langfuse_client()
|
|
67
|
+
if self._client:
|
|
68
|
+
self._initialized = True
|
|
69
|
+
logger.info("LangFuse prompt service initialized")
|
|
70
|
+
else:
|
|
71
|
+
logger.debug("LangFuse keys not configured, will use hardcoded prompts")
|
|
72
|
+
except LangFuseConfigError as e:
|
|
73
|
+
# Re-raise as RuntimeError for consistent error handling
|
|
74
|
+
raise RuntimeError(str(e)) from e
|
|
75
|
+
|
|
76
|
+
def get_classification_prompt(
|
|
77
|
+
self,
|
|
78
|
+
gap_text: str,
|
|
79
|
+
preceding_words: str,
|
|
80
|
+
following_words: str,
|
|
81
|
+
reference_contexts: Dict[str, str],
|
|
82
|
+
artist: Optional[str] = None,
|
|
83
|
+
title: Optional[str] = None,
|
|
84
|
+
gap_id: Optional[str] = None
|
|
85
|
+
) -> str:
|
|
86
|
+
"""Fetch and compile the gap classification prompt.
|
|
87
|
+
|
|
88
|
+
If LangFuse is configured, fetches the prompt template and examples
|
|
89
|
+
from LangFuse. Otherwise, falls back to hardcoded prompts.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
gap_text: The text of the gap that needs classification
|
|
93
|
+
preceding_words: Text immediately before the gap
|
|
94
|
+
following_words: Text immediately after the gap
|
|
95
|
+
reference_contexts: Dictionary of reference lyrics from each source
|
|
96
|
+
artist: Song artist name for context
|
|
97
|
+
title: Song title for context
|
|
98
|
+
gap_id: Identifier for the gap
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Compiled prompt string ready for LLM
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
LangFusePromptError: If LangFuse is configured but prompt fetch fails
|
|
105
|
+
"""
|
|
106
|
+
if not self._use_langfuse:
|
|
107
|
+
# Fall back to hardcoded prompt for development
|
|
108
|
+
from .classifier import build_classification_prompt_hardcoded
|
|
109
|
+
return build_classification_prompt_hardcoded(
|
|
110
|
+
gap_text=gap_text,
|
|
111
|
+
preceding_words=preceding_words,
|
|
112
|
+
following_words=following_words,
|
|
113
|
+
reference_contexts=reference_contexts,
|
|
114
|
+
artist=artist,
|
|
115
|
+
title=title,
|
|
116
|
+
gap_id=gap_id
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Fetch from LangFuse
|
|
120
|
+
try:
|
|
121
|
+
prompt_template = self._fetch_prompt(self.CLASSIFIER_PROMPT_NAME)
|
|
122
|
+
examples = self._fetch_examples()
|
|
123
|
+
|
|
124
|
+
# Build component strings
|
|
125
|
+
song_context = self._build_song_context(artist, title)
|
|
126
|
+
examples_text = self._format_examples(examples)
|
|
127
|
+
references_text = self._format_references(reference_contexts)
|
|
128
|
+
|
|
129
|
+
# Compile the prompt with variables
|
|
130
|
+
compiled = prompt_template.compile(
|
|
131
|
+
song_context=song_context,
|
|
132
|
+
examples_text=examples_text,
|
|
133
|
+
gap_id=gap_id or "unknown",
|
|
134
|
+
preceding_words=preceding_words,
|
|
135
|
+
gap_text=gap_text,
|
|
136
|
+
following_words=following_words,
|
|
137
|
+
references_text=references_text
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
logger.debug(f"Compiled LangFuse prompt for gap {gap_id}")
|
|
141
|
+
return compiled
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
raise LangFusePromptError(
|
|
145
|
+
f"Failed to fetch/compile prompt from LangFuse: {e}"
|
|
146
|
+
) from e
|
|
147
|
+
|
|
148
|
+
def _fetch_prompt(self, name: str, label: str = "production") -> Any:
|
|
149
|
+
"""Fetch a prompt template from LangFuse.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
name: The prompt name in LangFuse
|
|
153
|
+
label: Prompt label to fetch (default: "production"). Falls back to
|
|
154
|
+
version 1 if labeled version not found.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
LangFuse prompt object
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
LangFusePromptError: If fetch fails
|
|
161
|
+
"""
|
|
162
|
+
if not self._client:
|
|
163
|
+
raise LangFusePromptError("LangFuse client not initialized")
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
# Try to fetch with the specified label (default: production)
|
|
167
|
+
prompt = self._client.get_prompt(name, label=label)
|
|
168
|
+
logger.debug(f"Fetched prompt '{name}' (label={label}) from LangFuse")
|
|
169
|
+
return prompt
|
|
170
|
+
except Exception as label_error:
|
|
171
|
+
# If labeled version not found, try fetching version 1 as fallback
|
|
172
|
+
# This handles newly created prompts that haven't been promoted yet
|
|
173
|
+
try:
|
|
174
|
+
prompt = self._client.get_prompt(name, version=1)
|
|
175
|
+
logger.warning(
|
|
176
|
+
f"Prompt '{name}' label '{label}' not found, using version 1. "
|
|
177
|
+
f"Consider promoting this prompt in LangFuse UI."
|
|
178
|
+
)
|
|
179
|
+
return prompt
|
|
180
|
+
except Exception as version_error:
|
|
181
|
+
raise LangFusePromptError(
|
|
182
|
+
f"Failed to fetch prompt '{name}' from LangFuse: "
|
|
183
|
+
f"Label '{label}' error: {label_error}, "
|
|
184
|
+
f"Version 1 fallback error: {version_error}"
|
|
185
|
+
) from version_error
|
|
186
|
+
|
|
187
|
+
def _fetch_examples(self) -> List[Dict[str, Any]]:
|
|
188
|
+
"""Fetch few-shot examples from LangFuse dataset.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
List of example dictionaries
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
LangFuseDatasetError: If fetch fails
|
|
195
|
+
"""
|
|
196
|
+
if not self._client:
|
|
197
|
+
raise LangFuseDatasetError("LangFuse client not initialized")
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
dataset = self._client.get_dataset(self.EXAMPLES_DATASET_NAME)
|
|
201
|
+
examples = []
|
|
202
|
+
for item in dataset.items:
|
|
203
|
+
# Dataset items have 'input' field with the example data
|
|
204
|
+
if hasattr(item, 'input') and item.input:
|
|
205
|
+
examples.append(item.input)
|
|
206
|
+
|
|
207
|
+
logger.debug(f"Fetched {len(examples)} examples from LangFuse dataset")
|
|
208
|
+
return examples
|
|
209
|
+
except Exception as e:
|
|
210
|
+
raise LangFuseDatasetError(
|
|
211
|
+
f"Failed to fetch dataset '{self.EXAMPLES_DATASET_NAME}' from LangFuse: {e}"
|
|
212
|
+
) from e
|
|
213
|
+
|
|
214
|
+
def _build_song_context(self, artist: Optional[str], title: Optional[str]) -> str:
|
|
215
|
+
"""Build song context section for the prompt."""
|
|
216
|
+
if artist and title:
|
|
217
|
+
return (
|
|
218
|
+
f"\n## Song Context\n\n"
|
|
219
|
+
f"**Artist:** {artist}\n"
|
|
220
|
+
f"**Title:** {title}\n\n"
|
|
221
|
+
f"Note: The song title and artist name may help identify proper nouns "
|
|
222
|
+
f"or unusual words that could be mis-heard.\n"
|
|
223
|
+
)
|
|
224
|
+
return ""
|
|
225
|
+
|
|
226
|
+
def _format_examples(self, examples: List[Dict[str, Any]]) -> str:
|
|
227
|
+
"""Format few-shot examples for inclusion in prompt.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
examples: List of example dictionaries from LangFuse dataset
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Formatted examples string
|
|
234
|
+
"""
|
|
235
|
+
if not examples:
|
|
236
|
+
return ""
|
|
237
|
+
|
|
238
|
+
# Group examples by category
|
|
239
|
+
examples_by_category: Dict[str, List[Dict]] = {}
|
|
240
|
+
for ex in examples:
|
|
241
|
+
category = ex.get("category", "unknown")
|
|
242
|
+
if category not in examples_by_category:
|
|
243
|
+
examples_by_category[category] = []
|
|
244
|
+
examples_by_category[category].append(ex)
|
|
245
|
+
|
|
246
|
+
# Build formatted text
|
|
247
|
+
text = "## Example Classifications\n\n"
|
|
248
|
+
for category, category_examples in examples_by_category.items():
|
|
249
|
+
text += f"### {category.upper().replace('_', ' ')}\n\n"
|
|
250
|
+
for ex in category_examples[:2]: # Limit to 2 examples per category
|
|
251
|
+
text += f"**Gap:** {ex.get('gap_text', '')}\n"
|
|
252
|
+
text += f"**Context:** ...{ex.get('preceding', '')}... [GAP] ...{ex.get('following', '')}...\n"
|
|
253
|
+
if 'reference' in ex:
|
|
254
|
+
text += f"**Reference:** {ex['reference']}\n"
|
|
255
|
+
text += f"**Reasoning:** {ex.get('reasoning', '')}\n"
|
|
256
|
+
text += f"**Action:** {ex.get('action', '')}\n\n"
|
|
257
|
+
|
|
258
|
+
return text
|
|
259
|
+
|
|
260
|
+
def _format_references(self, reference_contexts: Dict[str, str]) -> str:
|
|
261
|
+
"""Format reference lyrics for inclusion in prompt.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
reference_contexts: Dictionary of reference lyrics from each source
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Formatted references string
|
|
268
|
+
"""
|
|
269
|
+
if not reference_contexts:
|
|
270
|
+
return ""
|
|
271
|
+
|
|
272
|
+
text = "## Available Reference Lyrics\n\n"
|
|
273
|
+
for source, context in reference_contexts.items():
|
|
274
|
+
text += f"**{source.upper()}:** {context}\n\n"
|
|
275
|
+
|
|
276
|
+
return text
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# Module-level singleton for convenience
|
|
280
|
+
_prompt_service: Optional[LangFusePromptService] = None
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def get_prompt_service() -> LangFusePromptService:
|
|
284
|
+
"""Get or create the global prompt service instance.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
LangFusePromptService singleton instance
|
|
288
|
+
"""
|
|
289
|
+
global _prompt_service
|
|
290
|
+
if _prompt_service is None:
|
|
291
|
+
_prompt_service = LangFusePromptService()
|
|
292
|
+
return _prompt_service
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def reset_prompt_service() -> None:
|
|
296
|
+
"""Reset the global prompt service instance (for testing)."""
|
|
297
|
+
global _prompt_service
|
|
298
|
+
_prompt_service = None
|
|
@@ -18,6 +18,11 @@ class ProviderConfig:
|
|
|
18
18
|
privacy_mode: bool
|
|
19
19
|
cache_dir: str
|
|
20
20
|
|
|
21
|
+
# GCP/Vertex AI settings
|
|
22
|
+
# Note: Gemini 3 models require 'global' location (not regional like us-central1)
|
|
23
|
+
gcp_project_id: Optional[str] = None
|
|
24
|
+
gcp_location: str = "global"
|
|
25
|
+
|
|
21
26
|
request_timeout_seconds: float = 30.0
|
|
22
27
|
max_retries: int = 2
|
|
23
28
|
retry_backoff_base_seconds: float = 0.2
|
|
@@ -46,6 +51,8 @@ class ProviderConfig:
|
|
|
46
51
|
openrouter_api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
47
52
|
privacy_mode=os.getenv("PRIVACY_MODE", "false").lower() in {"1", "true", "yes"},
|
|
48
53
|
cache_dir=cache_dir,
|
|
54
|
+
gcp_project_id=os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCP_PROJECT_ID"),
|
|
55
|
+
gcp_location=os.getenv("GCP_LOCATION", "global"),
|
|
49
56
|
request_timeout_seconds=float(os.getenv("AGENTIC_TIMEOUT_SECONDS", "30.0")),
|
|
50
57
|
max_retries=int(os.getenv("AGENTIC_MAX_RETRIES", "2")),
|
|
51
58
|
retry_backoff_base_seconds=float(os.getenv("AGENTIC_BACKOFF_BASE_SECONDS", "0.2")),
|
|
@@ -8,7 +8,7 @@ RESPONSE_LOG_LENGTH = 500 # Characters to log from responses
|
|
|
8
8
|
MODEL_SPEC_FORMAT = "provider/model" # Expected format for model identifiers
|
|
9
9
|
|
|
10
10
|
# Default Langfuse host
|
|
11
|
-
DEFAULT_LANGFUSE_HOST = "https://cloud.langfuse.com"
|
|
11
|
+
DEFAULT_LANGFUSE_HOST = "https://us.cloud.langfuse.com"
|
|
12
12
|
|
|
13
13
|
# Raw response indicator
|
|
14
14
|
RAW_RESPONSE_KEY = "raw" # Key used to wrap unparsed responses
|
|
@@ -187,26 +187,41 @@ class LangChainBridge(BaseAIProvider):
|
|
|
187
187
|
|
|
188
188
|
def _invoke_model(self, prompt: str) -> str:
|
|
189
189
|
"""Invoke the chat model with a prompt.
|
|
190
|
-
|
|
190
|
+
|
|
191
191
|
This is a simple wrapper that can be passed to the retry executor.
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
Args:
|
|
194
194
|
prompt: The prompt to send
|
|
195
|
-
|
|
195
|
+
|
|
196
196
|
Returns:
|
|
197
197
|
Response content as string
|
|
198
|
-
|
|
198
|
+
|
|
199
199
|
Raises:
|
|
200
200
|
Exception: Any error from the model invocation
|
|
201
201
|
"""
|
|
202
202
|
from langchain_core.messages import HumanMessage
|
|
203
|
-
|
|
203
|
+
|
|
204
204
|
# Prepare config with session_id in metadata (Langfuse format)
|
|
205
205
|
config = {}
|
|
206
206
|
if hasattr(self, '_session_id') and self._session_id:
|
|
207
207
|
config["metadata"] = {"langfuse_session_id": self._session_id}
|
|
208
208
|
logger.debug(f"🤖 [LangChain] Invoking with session_id: {self._session_id}")
|
|
209
|
-
|
|
209
|
+
|
|
210
210
|
response = self._chat_model.invoke([HumanMessage(content=prompt)], config=config)
|
|
211
|
-
|
|
211
|
+
content = response.content
|
|
212
|
+
|
|
213
|
+
# Handle multimodal response format from Gemini 3+ models
|
|
214
|
+
# Response can be a list of content parts: [{'type': 'text', 'text': '...'}]
|
|
215
|
+
if isinstance(content, list):
|
|
216
|
+
# Extract text from the first text content part
|
|
217
|
+
for part in content:
|
|
218
|
+
if isinstance(part, dict) and part.get('type') == 'text':
|
|
219
|
+
return part.get('text', '')
|
|
220
|
+
# Fallback: concatenate all text parts
|
|
221
|
+
return ''.join(
|
|
222
|
+
part.get('text', '') if isinstance(part, dict) else str(part)
|
|
223
|
+
for part in content
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return content
|
|
212
227
|
|
|
@@ -100,19 +100,10 @@ class ModelFactory:
|
|
|
100
100
|
return
|
|
101
101
|
|
|
102
102
|
try:
|
|
103
|
-
from langfuse import Langfuse
|
|
104
103
|
from langfuse.langchain import CallbackHandler
|
|
105
|
-
|
|
106
|
-
#
|
|
107
|
-
|
|
108
|
-
public_key=public_key,
|
|
109
|
-
secret_key=secret_key,
|
|
110
|
-
host=os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"),
|
|
111
|
-
)
|
|
112
|
-
|
|
113
|
-
# Then create callback handler with the same public_key
|
|
114
|
-
# The handler will use the initialized client
|
|
115
|
-
self._langfuse_handler = CallbackHandler(public_key=public_key)
|
|
104
|
+
|
|
105
|
+
# CallbackHandler auto-discovers credentials from environment variables
|
|
106
|
+
self._langfuse_handler = CallbackHandler()
|
|
116
107
|
logger.info(f"🤖 Langfuse callback handler initialized for {model_spec}")
|
|
117
108
|
except Exception as e:
|
|
118
109
|
# If Langfuse keys are set, we MUST fail fast
|
|
@@ -155,6 +146,8 @@ class ModelFactory:
|
|
|
155
146
|
return self._create_openai_model(model_name, callbacks, config)
|
|
156
147
|
elif provider == "anthropic":
|
|
157
148
|
return self._create_anthropic_model(model_name, callbacks, config)
|
|
149
|
+
elif provider in ("vertexai", "google"):
|
|
150
|
+
return self._create_vertexai_model(model_name, callbacks, config)
|
|
158
151
|
else:
|
|
159
152
|
raise ValueError(f"Unsupported provider: {provider}")
|
|
160
153
|
except ImportError as e:
|
|
@@ -197,7 +190,7 @@ class ModelFactory:
|
|
|
197
190
|
) -> Any:
|
|
198
191
|
"""Create ChatAnthropic model."""
|
|
199
192
|
from langchain_anthropic import ChatAnthropic
|
|
200
|
-
|
|
193
|
+
|
|
201
194
|
model = ChatAnthropic(
|
|
202
195
|
model=model_name,
|
|
203
196
|
timeout=config.request_timeout_seconds,
|
|
@@ -207,3 +200,25 @@ class ModelFactory:
|
|
|
207
200
|
logger.debug(f"🤖 Created Anthropic model: {model_name}")
|
|
208
201
|
return model
|
|
209
202
|
|
|
203
|
+
def _create_vertexai_model(
|
|
204
|
+
self, model_name: str, callbacks: List[Any], config: ProviderConfig
|
|
205
|
+
) -> Any:
|
|
206
|
+
"""Create ChatVertexAI model for Google Gemini via Vertex AI.
|
|
207
|
+
|
|
208
|
+
Uses Application Default Credentials (ADC) for authentication.
|
|
209
|
+
In Cloud Run, this uses the service account automatically.
|
|
210
|
+
Locally, run: gcloud auth application-default login
|
|
211
|
+
"""
|
|
212
|
+
from langchain_google_vertexai import ChatVertexAI
|
|
213
|
+
|
|
214
|
+
model = ChatVertexAI(
|
|
215
|
+
model=model_name,
|
|
216
|
+
project=config.gcp_project_id,
|
|
217
|
+
location=config.gcp_location,
|
|
218
|
+
timeout=config.request_timeout_seconds,
|
|
219
|
+
max_retries=config.max_retries,
|
|
220
|
+
callbacks=callbacks,
|
|
221
|
+
)
|
|
222
|
+
logger.debug(f"🤖 Created Vertex AI model: {model_name} (project={config.gcp_project_id})")
|
|
223
|
+
return model
|
|
224
|
+
|
|
@@ -5,6 +5,10 @@ from typing import Dict, Any
|
|
|
5
5
|
|
|
6
6
|
from .providers.config import ProviderConfig
|
|
7
7
|
|
|
8
|
+
# Default model for cloud deployments - Gemini 3 Flash via Vertex AI
|
|
9
|
+
# Note: Gemini 3 models require 'global' location (not regional like us-central1)
|
|
10
|
+
DEFAULT_CLOUD_MODEL = "vertexai/gemini-3-flash-preview"
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
class ModelRouter:
|
|
10
14
|
"""Rules-based routing by gap type/length/uncertainty (scaffold)."""
|
|
@@ -14,22 +18,23 @@ class ModelRouter:
|
|
|
14
18
|
|
|
15
19
|
def choose_model(self, gap_type: str, uncertainty: float) -> str:
|
|
16
20
|
"""Choose appropriate model based on gap characteristics.
|
|
17
|
-
|
|
21
|
+
|
|
18
22
|
Returns model identifier in format "provider/model" for LangChain:
|
|
19
|
-
- "
|
|
20
|
-
- "
|
|
23
|
+
- "vertexai/gemini-3-flash-preview" for Gemini via Vertex AI (default)
|
|
24
|
+
- "ollama/llama3.2:latest" for local Ollama models
|
|
25
|
+
- "openai/gpt-4" for OpenAI models
|
|
21
26
|
- "anthropic/claude-3-sonnet-20240229" for Anthropic models
|
|
22
27
|
"""
|
|
23
|
-
#
|
|
28
|
+
# Check for explicit model override from environment
|
|
29
|
+
env_model = os.getenv("AGENTIC_AI_MODEL")
|
|
30
|
+
if env_model:
|
|
31
|
+
return env_model
|
|
32
|
+
|
|
33
|
+
# Privacy mode: use local Ollama
|
|
24
34
|
if self._config.privacy_mode:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if uncertainty > 0.5:
|
|
30
|
-
return "anthropic/claude-3-sonnet-20240229"
|
|
31
|
-
|
|
32
|
-
# Default to GPT-4 for general cases
|
|
33
|
-
return "openai/gpt-4"
|
|
35
|
+
return "ollama/llama3.2:latest"
|
|
36
|
+
|
|
37
|
+
# Default to Gemini 3 Flash for all cases (fast, cost-effective, latest capabilities)
|
|
38
|
+
return DEFAULT_CLOUD_MODEL
|
|
34
39
|
|
|
35
40
|
|
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import shortuuid
|
|
7
7
|
|
|
8
8
|
from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
|
|
9
|
-
from lyrics_transcriber.correction.handlers.llm import LLMHandler
|
|
10
9
|
from lyrics_transcriber.correction.handlers.no_space_punct_match import NoSpacePunctuationMatchHandler
|
|
11
10
|
from lyrics_transcriber.correction.handlers.relaxed_word_count_match import RelaxedWordCountMatchHandler
|
|
12
11
|
from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
|
|
@@ -27,7 +26,6 @@ from lyrics_transcriber.correction.anchor_sequence import AnchorSequenceFinder
|
|
|
27
26
|
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
|
28
27
|
from lyrics_transcriber.correction.handlers.extend_anchor import ExtendAnchorHandler
|
|
29
28
|
from lyrics_transcriber.utils.word_utils import WordUtils
|
|
30
|
-
from lyrics_transcriber.correction.handlers.llm_providers import OllamaProvider, OpenAIProvider
|
|
31
29
|
|
|
32
30
|
|
|
33
31
|
class LyricsCorrector:
|
|
@@ -57,60 +55,18 @@ class LyricsCorrector:
|
|
|
57
55
|
]
|
|
58
56
|
|
|
59
57
|
# Create all handlers but respect enabled_handlers if provided
|
|
58
|
+
# Note: Legacy LLMHandler removed - use AgenticCorrector via USE_AGENTIC_AI=1 instead
|
|
60
59
|
all_handlers = [
|
|
61
60
|
("ExtendAnchorHandler", ExtendAnchorHandler(logger=self.logger)),
|
|
62
61
|
("WordCountMatchHandler", WordCountMatchHandler(logger=self.logger)),
|
|
63
62
|
("SyllablesMatchHandler", SyllablesMatchHandler(logger=self.logger)),
|
|
64
63
|
("RelaxedWordCountMatchHandler", RelaxedWordCountMatchHandler(logger=self.logger)),
|
|
65
64
|
("NoSpacePunctuationMatchHandler", NoSpacePunctuationMatchHandler(logger=self.logger)),
|
|
66
|
-
(
|
|
67
|
-
"LLMHandler_Ollama_R17B",
|
|
68
|
-
LLMHandler(
|
|
69
|
-
provider=OllamaProvider(model="deepseek-r1:7b", logger=self.logger),
|
|
70
|
-
name="LLMHandler_Ollama_R17B",
|
|
71
|
-
logger=self.logger,
|
|
72
|
-
cache_dir=self._cache_dir,
|
|
73
|
-
),
|
|
74
|
-
),
|
|
75
65
|
("RepeatCorrectionHandler", RepeatCorrectionHandler(logger=self.logger)),
|
|
76
66
|
("SoundAlikeHandler", SoundAlikeHandler(logger=self.logger)),
|
|
77
67
|
("LevenshteinHandler", LevenshteinHandler(logger=self.logger)),
|
|
78
68
|
]
|
|
79
69
|
|
|
80
|
-
# Add OpenRouter handlers only if API key is available
|
|
81
|
-
if os.getenv("OPENROUTER_API_KEY"):
|
|
82
|
-
openrouter_handlers = [
|
|
83
|
-
(
|
|
84
|
-
"LLMHandler_OpenRouter_Sonnet",
|
|
85
|
-
LLMHandler(
|
|
86
|
-
provider=OpenAIProvider(
|
|
87
|
-
model="anthropic/claude-3-sonnet",
|
|
88
|
-
api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
89
|
-
base_url="https://openrouter.ai/api/v1",
|
|
90
|
-
logger=self.logger,
|
|
91
|
-
),
|
|
92
|
-
name="LLMHandler_OpenRouter_Sonnet",
|
|
93
|
-
logger=self.logger,
|
|
94
|
-
cache_dir=self._cache_dir,
|
|
95
|
-
),
|
|
96
|
-
),
|
|
97
|
-
(
|
|
98
|
-
"LLMHandler_OpenRouter_R1",
|
|
99
|
-
LLMHandler(
|
|
100
|
-
provider=OpenAIProvider(
|
|
101
|
-
model="deepseek/deepseek-r1",
|
|
102
|
-
api_key=os.getenv("OPENROUTER_API_KEY"),
|
|
103
|
-
base_url="https://openrouter.ai/api/v1",
|
|
104
|
-
logger=self.logger,
|
|
105
|
-
),
|
|
106
|
-
name="LLMHandler_OpenRouter_R1",
|
|
107
|
-
logger=self.logger,
|
|
108
|
-
cache_dir=self._cache_dir,
|
|
109
|
-
),
|
|
110
|
-
),
|
|
111
|
-
]
|
|
112
|
-
all_handlers.extend(openrouter_handlers)
|
|
113
|
-
|
|
114
70
|
# Store all handler information
|
|
115
71
|
self.all_handlers = [
|
|
116
72
|
{
|