abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +781 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +31 -19
- abstractcore/config/manager.py +389 -11
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +35 -923
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +461 -13
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.2.dist-info/METADATA +562 -0
- abstractcore-2.11.2.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
|
@@ -23,11 +23,71 @@ try:
|
|
|
23
23
|
except ImportError:
|
|
24
24
|
PYDANTIC_AVAILABLE = False
|
|
25
25
|
BaseModel = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _inline_json_schema_refs(schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
29
|
+
"""Inline local $defs/$ref references in a JSON Schema dict.
|
|
30
|
+
|
|
31
|
+
Some OpenAI-compatible servers only partially support `$defs`/`$ref` inside
|
|
32
|
+
`response_format: {type:'json_schema'}`. Inlining keeps schemas simple and
|
|
33
|
+
improves compatibility for structured outputs.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
defs = schema.get("$defs")
|
|
37
|
+
if not isinstance(defs, dict) or not defs:
|
|
38
|
+
return schema
|
|
39
|
+
|
|
40
|
+
def _resolve(node: Any, *, seen: set[str]) -> Any:
|
|
41
|
+
if isinstance(node, dict):
|
|
42
|
+
ref = node.get("$ref")
|
|
43
|
+
if isinstance(ref, str) and ref.startswith("#/$defs/"):
|
|
44
|
+
key = ref[len("#/$defs/"):]
|
|
45
|
+
target = defs.get(key)
|
|
46
|
+
if isinstance(key, str) and key and isinstance(target, dict):
|
|
47
|
+
if key in seen:
|
|
48
|
+
return node
|
|
49
|
+
seen.add(key)
|
|
50
|
+
resolved_target = _resolve(dict(target), seen=seen)
|
|
51
|
+
seen.remove(key)
|
|
52
|
+
if isinstance(resolved_target, dict):
|
|
53
|
+
merged: Dict[str, Any] = dict(resolved_target)
|
|
54
|
+
for k, v in node.items():
|
|
55
|
+
if k == "$ref":
|
|
56
|
+
continue
|
|
57
|
+
merged[k] = _resolve(v, seen=seen)
|
|
58
|
+
return merged
|
|
59
|
+
|
|
60
|
+
out: Dict[str, Any] = {}
|
|
61
|
+
for k, v in node.items():
|
|
62
|
+
if k == "$defs":
|
|
63
|
+
continue
|
|
64
|
+
out[k] = _resolve(v, seen=seen)
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
if isinstance(node, list):
|
|
68
|
+
return [_resolve(x, seen=seen) for x in node]
|
|
69
|
+
|
|
70
|
+
return node
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
base = {k: v for k, v in schema.items() if k != "$defs"}
|
|
74
|
+
inlined = _resolve(base, seen=set())
|
|
75
|
+
return inlined if isinstance(inlined, dict) and inlined else schema
|
|
76
|
+
except Exception:
|
|
77
|
+
return schema
|
|
26
78
|
from .base import BaseProvider
|
|
79
|
+
from ..architectures.response_postprocessing import extract_reasoning_from_message
|
|
27
80
|
from ..core.types import GenerateResponse
|
|
28
|
-
from ..exceptions import
|
|
29
|
-
|
|
30
|
-
|
|
81
|
+
from ..exceptions import (
|
|
82
|
+
ProviderAPIError,
|
|
83
|
+
ModelNotFoundError,
|
|
84
|
+
AuthenticationError,
|
|
85
|
+
RateLimitError,
|
|
86
|
+
InvalidRequestError,
|
|
87
|
+
format_model_error,
|
|
88
|
+
)
|
|
89
|
+
from ..tools import UniversalToolHandler
|
|
90
|
+
from ..utils.truncation import preview_text
|
|
31
91
|
|
|
32
92
|
|
|
33
93
|
class OpenAICompatibleProvider(BaseProvider):
|
|
@@ -46,40 +106,40 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
46
106
|
Usage:
|
|
47
107
|
# Basic usage
|
|
48
108
|
llm = create_llm("openai-compatible",
|
|
49
|
-
base_url="http://
|
|
109
|
+
base_url="http://127.0.0.1:1234/v1",
|
|
50
110
|
model="llama-3.1-8b")
|
|
51
111
|
|
|
52
112
|
# With API key (optional for many local servers)
|
|
53
113
|
llm = create_llm("openai-compatible",
|
|
54
|
-
base_url="http://
|
|
114
|
+
base_url="http://127.0.0.1:1234/v1",
|
|
55
115
|
model="my-model",
|
|
56
116
|
api_key="your-key")
|
|
57
117
|
|
|
58
118
|
# Environment variable configuration
|
|
59
|
-
export OPENAI_COMPATIBLE_BASE_URL="http://
|
|
119
|
+
export OPENAI_COMPATIBLE_BASE_URL="http://127.0.0.1:1234/v1"
|
|
60
120
|
export OPENAI_COMPATIBLE_API_KEY="your-key" # Optional
|
|
61
121
|
llm = create_llm("openai-compatible", model="my-model")
|
|
62
122
|
"""
|
|
63
123
|
|
|
124
|
+
PROVIDER_ID = "openai-compatible"
|
|
125
|
+
PROVIDER_DISPLAY_NAME = "OpenAI-compatible server"
|
|
126
|
+
BASE_URL_ENV_VAR = "OPENAI_COMPATIBLE_BASE_URL"
|
|
127
|
+
API_KEY_ENV_VAR = "OPENAI_COMPATIBLE_API_KEY"
|
|
128
|
+
DEFAULT_BASE_URL = "http://localhost:1234/v1"
|
|
129
|
+
|
|
64
130
|
def __init__(self, model: str = "default", base_url: Optional[str] = None,
|
|
65
131
|
api_key: Optional[str] = None, **kwargs):
|
|
66
132
|
super().__init__(model, **kwargs)
|
|
67
|
-
self.provider =
|
|
133
|
+
self.provider = self.PROVIDER_ID
|
|
68
134
|
|
|
69
135
|
# Initialize tool handler
|
|
70
136
|
self.tool_handler = UniversalToolHandler(model)
|
|
71
137
|
|
|
72
|
-
|
|
73
|
-
self.base_url = (
|
|
74
|
-
base_url or
|
|
75
|
-
os.getenv("OPENAI_COMPATIBLE_BASE_URL") or
|
|
76
|
-
"http://localhost:8080/v1"
|
|
77
|
-
).rstrip('/')
|
|
138
|
+
self.base_url = self._resolve_base_url(base_url)
|
|
78
139
|
|
|
79
|
-
|
|
80
|
-
# Priority: parameter > OPENAI_COMPATIBLE_API_KEY > None
|
|
81
|
-
self.api_key = api_key or os.getenv("OPENAI_COMPATIBLE_API_KEY")
|
|
140
|
+
self.api_key = self._resolve_api_key(api_key)
|
|
82
141
|
|
|
142
|
+
# #[WARNING:TIMEOUT]
|
|
83
143
|
# Get timeout value - None means unlimited timeout
|
|
84
144
|
timeout_value = getattr(self, '_timeout', None)
|
|
85
145
|
# Validate timeout if provided (None is allowed for unlimited)
|
|
@@ -102,7 +162,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
102
162
|
fallback_timeout = None
|
|
103
163
|
self.client = httpx.Client(timeout=fallback_timeout)
|
|
104
164
|
except Exception:
|
|
105
|
-
raise RuntimeError(f"Failed to create HTTP client for
|
|
165
|
+
raise RuntimeError(f"Failed to create HTTP client for {self.PROVIDER_DISPLAY_NAME}: {e}")
|
|
106
166
|
|
|
107
167
|
self._async_client = None # Lazy-loaded async client
|
|
108
168
|
|
|
@@ -122,13 +182,130 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
122
182
|
def _get_headers(self) -> Dict[str, str]:
|
|
123
183
|
"""Get HTTP headers with optional API key authentication."""
|
|
124
184
|
headers = {"Content-Type": "application/json"}
|
|
125
|
-
# Only add Authorization header if api_key is provided and
|
|
126
|
-
if self.api_key
|
|
127
|
-
|
|
185
|
+
# Only add Authorization header if api_key is provided and meaningful.
|
|
186
|
+
api_key = None if self.api_key is None else str(self.api_key).strip()
|
|
187
|
+
if api_key and api_key.upper() != "EMPTY":
|
|
188
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
128
189
|
return headers
|
|
129
190
|
|
|
191
|
+
def _mutate_payload(self, payload: Dict[str, Any], **kwargs) -> Dict[str, Any]:
|
|
192
|
+
"""Provider-specific payload hook (default: no-op)."""
|
|
193
|
+
return payload
|
|
194
|
+
|
|
195
|
+
def _resolve_base_url(self, base_url: Optional[str]) -> str:
|
|
196
|
+
"""Resolve base URL with parameter > env var > default precedence."""
|
|
197
|
+
if base_url is not None:
|
|
198
|
+
resolved = str(base_url).strip()
|
|
199
|
+
if not resolved:
|
|
200
|
+
raise ValueError("base_url cannot be empty")
|
|
201
|
+
return resolved.rstrip("/")
|
|
202
|
+
|
|
203
|
+
env_var = getattr(self, "BASE_URL_ENV_VAR", None)
|
|
204
|
+
env_val = os.getenv(env_var) if isinstance(env_var, str) and env_var else None
|
|
205
|
+
if isinstance(env_val, str) and env_val.strip():
|
|
206
|
+
return env_val.strip().rstrip("/")
|
|
207
|
+
|
|
208
|
+
default = getattr(self, "DEFAULT_BASE_URL", None) or ""
|
|
209
|
+
return str(default).strip().rstrip("/")
|
|
210
|
+
|
|
211
|
+
def _resolve_api_key(self, api_key: Optional[str]) -> Optional[str]:
|
|
212
|
+
"""Resolve API key with parameter > env var > config fallback."""
|
|
213
|
+
if api_key is not None:
|
|
214
|
+
# Allow callers to explicitly disable auth by passing an empty string.
|
|
215
|
+
return api_key
|
|
216
|
+
|
|
217
|
+
env_var = getattr(self, "API_KEY_ENV_VAR", None)
|
|
218
|
+
env_val = os.getenv(env_var) if isinstance(env_var, str) and env_var else None
|
|
219
|
+
if env_val is not None:
|
|
220
|
+
return env_val
|
|
221
|
+
|
|
222
|
+
return self._get_api_key_from_config()
|
|
223
|
+
|
|
224
|
+
def _get_api_key_from_config(self) -> Optional[str]:
|
|
225
|
+
"""Optional config-manager fallback for subclasses (default: none)."""
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
def _extract_error_detail(self, response: Optional[httpx.Response]) -> Optional[str]:
|
|
229
|
+
"""Extract a useful error message from an HTTPX response, if possible."""
|
|
230
|
+
if response is None:
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
data = response.json()
|
|
235
|
+
if isinstance(data, dict):
|
|
236
|
+
err = data.get("error")
|
|
237
|
+
if isinstance(err, dict):
|
|
238
|
+
for k in ("message", "error", "detail"):
|
|
239
|
+
v = err.get(k)
|
|
240
|
+
if isinstance(v, str) and v.strip():
|
|
241
|
+
return v.strip()
|
|
242
|
+
for k in ("message", "detail"):
|
|
243
|
+
v = data.get(k)
|
|
244
|
+
if isinstance(v, str) and v.strip():
|
|
245
|
+
return v.strip()
|
|
246
|
+
# If it's JSON but not a dict, stringify it.
|
|
247
|
+
if data is not None:
|
|
248
|
+
return json.dumps(data, ensure_ascii=False)
|
|
249
|
+
except Exception:
|
|
250
|
+
pass
|
|
251
|
+
|
|
252
|
+
try:
|
|
253
|
+
text = response.text
|
|
254
|
+
if isinstance(text, str) and text.strip():
|
|
255
|
+
# Bound size to avoid dumping huge error bodies.
|
|
256
|
+
body = text.strip()
|
|
257
|
+
return preview_text(body, max_chars=2000)
|
|
258
|
+
except Exception:
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
def _raise_for_status(self, response: httpx.Response, *, request_url: Optional[str] = None) -> None:
|
|
264
|
+
"""Raise rich provider exceptions on HTTP errors."""
|
|
265
|
+
status_code = getattr(response, "status_code", None)
|
|
266
|
+
if status_code is None:
|
|
267
|
+
# Unit tests sometimes stub the HTTP response with only `.raise_for_status()`/`.json()`.
|
|
268
|
+
# Treat as success if `.raise_for_status()` does not raise.
|
|
269
|
+
raise_for_status = getattr(response, "raise_for_status", None)
|
|
270
|
+
if callable(raise_for_status):
|
|
271
|
+
raise_for_status()
|
|
272
|
+
return
|
|
273
|
+
|
|
274
|
+
if int(status_code) < 400:
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
detail = self._extract_error_detail(response)
|
|
278
|
+
prefix = f"{self.PROVIDER_DISPLAY_NAME} API error ({status_code})"
|
|
279
|
+
msg = f"{prefix}: {detail}" if detail else prefix
|
|
280
|
+
|
|
281
|
+
status = int(status_code)
|
|
282
|
+
if status in (401, 403):
|
|
283
|
+
raise AuthenticationError(msg)
|
|
284
|
+
if status == 429:
|
|
285
|
+
raise RateLimitError(msg)
|
|
286
|
+
if status == 400:
|
|
287
|
+
# Many OpenAI-compatible servers use 400 for schema/model errors.
|
|
288
|
+
if detail and ("model" in detail.lower()) and ("not found" in detail.lower()):
|
|
289
|
+
self._raise_model_not_found()
|
|
290
|
+
raise InvalidRequestError(msg)
|
|
291
|
+
if status == 404:
|
|
292
|
+
# Could be endpoint misconfiguration (missing /v1) or an unknown model.
|
|
293
|
+
if detail and ("model" in detail.lower()) and ("not found" in detail.lower()):
|
|
294
|
+
self._raise_model_not_found()
|
|
295
|
+
raise ProviderAPIError(msg if request_url is None else f"{msg} [{request_url}]")
|
|
296
|
+
|
|
297
|
+
raise ProviderAPIError(msg if request_url is None else f"{msg} [{request_url}]")
|
|
298
|
+
|
|
299
|
+
def _raise_model_not_found(self) -> None:
|
|
300
|
+
"""Raise ModelNotFoundError with a best-effort available-model list."""
|
|
301
|
+
try:
|
|
302
|
+
available_models = self.list_available_models(base_url=self.base_url)
|
|
303
|
+
except Exception:
|
|
304
|
+
available_models = []
|
|
305
|
+
raise ModelNotFoundError(format_model_error(self.PROVIDER_DISPLAY_NAME, self.model, available_models))
|
|
306
|
+
|
|
130
307
|
def _validate_model(self):
|
|
131
|
-
"""Validate that the model exists on the
|
|
308
|
+
"""Validate that the model exists on the server (best-effort)."""
|
|
132
309
|
# Skip validation for "default" placeholder (used by registry for model listing)
|
|
133
310
|
if self.model == "default":
|
|
134
311
|
return
|
|
@@ -137,12 +314,12 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
137
314
|
# Use base_url as-is (should include /v1) for model discovery
|
|
138
315
|
available_models = self.list_available_models(base_url=self.base_url)
|
|
139
316
|
if available_models and self.model not in available_models:
|
|
140
|
-
error_message = format_model_error(
|
|
317
|
+
error_message = format_model_error(self.PROVIDER_DISPLAY_NAME, self.model, available_models)
|
|
141
318
|
raise ModelNotFoundError(error_message)
|
|
142
319
|
except httpx.ConnectError:
|
|
143
320
|
# Server not running - will fail later when trying to generate
|
|
144
321
|
if hasattr(self, 'logger'):
|
|
145
|
-
self.logger.debug(f"
|
|
322
|
+
self.logger.debug(f"{self.PROVIDER_DISPLAY_NAME} not accessible at {self.base_url} - model validation skipped")
|
|
146
323
|
pass
|
|
147
324
|
except ModelNotFoundError:
|
|
148
325
|
# Re-raise model not found errors
|
|
@@ -153,7 +330,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
153
330
|
self.logger.debug(f"Model validation failed with error: {e} - continuing anyway")
|
|
154
331
|
pass
|
|
155
332
|
|
|
156
|
-
def
|
|
333
|
+
def unload_model(self, model_name: str) -> None:
|
|
157
334
|
"""
|
|
158
335
|
Close HTTP client connection.
|
|
159
336
|
|
|
@@ -226,6 +403,8 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
226
403
|
if messages:
|
|
227
404
|
chat_messages.extend(messages)
|
|
228
405
|
|
|
406
|
+
media_enrichment = None
|
|
407
|
+
|
|
229
408
|
# Handle media content regardless of prompt (media can be used with messages too)
|
|
230
409
|
if media:
|
|
231
410
|
# Get the last user message content to combine with media
|
|
@@ -245,6 +424,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
245
424
|
|
|
246
425
|
# Create multimodal message combining text and processed media
|
|
247
426
|
multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
|
|
427
|
+
media_enrichment = getattr(media_handler, "media_enrichment", None)
|
|
248
428
|
|
|
249
429
|
# For OpenAI-compatible servers, we might get a string (embedded text) or dict (structured)
|
|
250
430
|
if isinstance(multimodal_message, str):
|
|
@@ -263,7 +443,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
263
443
|
else:
|
|
264
444
|
chat_messages.append(multimodal_message)
|
|
265
445
|
except ImportError:
|
|
266
|
-
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
446
|
+
self.logger.warning("Media processing not available. Install with: pip install \"abstractcore[media]\"")
|
|
267
447
|
if user_message_text:
|
|
268
448
|
chat_messages.append({
|
|
269
449
|
"role": "user",
|
|
@@ -292,11 +472,16 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
292
472
|
"model": self.model,
|
|
293
473
|
"messages": chat_messages,
|
|
294
474
|
"stream": stream,
|
|
295
|
-
"temperature":
|
|
475
|
+
"temperature": generation_kwargs.get("temperature", self.temperature),
|
|
296
476
|
"max_tokens": max_output_tokens,
|
|
297
477
|
"top_p": kwargs.get("top_p", 0.9),
|
|
298
478
|
}
|
|
299
479
|
|
|
480
|
+
# Prompt caching (best-effort): pass through `prompt_cache_key` when provided.
|
|
481
|
+
prompt_cache_key = kwargs.get("prompt_cache_key")
|
|
482
|
+
if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
|
|
483
|
+
payload["prompt_cache_key"] = prompt_cache_key.strip()
|
|
484
|
+
|
|
300
485
|
# Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
|
|
301
486
|
if tools and self.tool_handler.supports_native:
|
|
302
487
|
payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
|
|
@@ -312,7 +497,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
312
497
|
payload["repetition_penalty"] = kwargs["repetition_penalty"]
|
|
313
498
|
|
|
314
499
|
# Add seed if provided (many servers support seed via OpenAI-compatible API)
|
|
315
|
-
seed_value =
|
|
500
|
+
seed_value = generation_kwargs.get("seed")
|
|
316
501
|
if seed_value is not None:
|
|
317
502
|
payload["seed"] = seed_value
|
|
318
503
|
|
|
@@ -320,6 +505,8 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
320
505
|
# Many servers support native structured outputs using the response_format parameter
|
|
321
506
|
if response_model and PYDANTIC_AVAILABLE:
|
|
322
507
|
json_schema = response_model.model_json_schema()
|
|
508
|
+
if isinstance(json_schema, dict) and json_schema:
|
|
509
|
+
json_schema = _inline_json_schema_refs(json_schema)
|
|
323
510
|
payload["response_format"] = {
|
|
324
511
|
"type": "json_schema",
|
|
325
512
|
"json_schema": {
|
|
@@ -328,11 +515,18 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
328
515
|
}
|
|
329
516
|
}
|
|
330
517
|
|
|
518
|
+
# Provider-specific request extensions (vLLM extra_body, OpenRouter headers, etc.)
|
|
519
|
+
payload = self._mutate_payload(payload, **kwargs)
|
|
520
|
+
|
|
331
521
|
if stream:
|
|
332
522
|
# Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
|
|
333
523
|
return self._stream_generate(payload)
|
|
334
524
|
else:
|
|
335
525
|
response = self._single_generate(payload)
|
|
526
|
+
if media_enrichment:
|
|
527
|
+
from ..media.enrichment import merge_enrichment_metadata
|
|
528
|
+
|
|
529
|
+
response.metadata = merge_enrichment_metadata(response.metadata, media_enrichment)
|
|
336
530
|
|
|
337
531
|
# Execute tools if enabled and tools are present
|
|
338
532
|
if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
|
|
@@ -355,7 +549,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
355
549
|
json=payload,
|
|
356
550
|
headers=self._get_headers()
|
|
357
551
|
)
|
|
358
|
-
|
|
552
|
+
self._raise_for_status(response, request_url=request_url)
|
|
359
553
|
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
360
554
|
|
|
361
555
|
result = response.json()
|
|
@@ -368,6 +562,11 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
368
562
|
message = {}
|
|
369
563
|
|
|
370
564
|
content = message.get("content", "")
|
|
565
|
+
reasoning = extract_reasoning_from_message(
|
|
566
|
+
message,
|
|
567
|
+
architecture_format=self.architecture_config,
|
|
568
|
+
model_capabilities=self.model_capabilities,
|
|
569
|
+
)
|
|
371
570
|
tool_calls = message.get("tool_calls")
|
|
372
571
|
if tool_calls is None:
|
|
373
572
|
# Some servers surface tool calls at the choice level.
|
|
@@ -375,24 +574,29 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
375
574
|
finish_reason = choice.get("finish_reason", "stop")
|
|
376
575
|
else:
|
|
377
576
|
content = "No response generated"
|
|
577
|
+
reasoning = None
|
|
378
578
|
tool_calls = None
|
|
379
579
|
finish_reason = "error"
|
|
380
580
|
|
|
381
581
|
# Extract usage info
|
|
382
582
|
usage = result.get("usage", {})
|
|
383
583
|
|
|
584
|
+
metadata: Dict[str, Any] = {
|
|
585
|
+
"_provider_request": {
|
|
586
|
+
"url": request_url,
|
|
587
|
+
"payload": payload,
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
591
|
+
metadata["reasoning"] = reasoning
|
|
592
|
+
|
|
384
593
|
return GenerateResponse(
|
|
385
594
|
content=content,
|
|
386
595
|
model=self.model,
|
|
387
596
|
finish_reason=finish_reason,
|
|
388
597
|
raw_response=result,
|
|
389
598
|
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
390
|
-
metadata=
|
|
391
|
-
"_provider_request": {
|
|
392
|
-
"url": request_url,
|
|
393
|
-
"payload": payload,
|
|
394
|
-
}
|
|
395
|
-
},
|
|
599
|
+
metadata=metadata,
|
|
396
600
|
usage={
|
|
397
601
|
"input_tokens": usage.get("prompt_tokens", 0),
|
|
398
602
|
"output_tokens": usage.get("completion_tokens", 0),
|
|
@@ -407,76 +611,72 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
407
611
|
except AttributeError as e:
|
|
408
612
|
# Handle None type errors specifically
|
|
409
613
|
if "'NoneType'" in str(e):
|
|
410
|
-
raise ProviderAPIError(f"
|
|
614
|
+
raise ProviderAPIError(f"{self.PROVIDER_DISPLAY_NAME} not properly initialized: {str(e)}")
|
|
411
615
|
else:
|
|
412
|
-
raise ProviderAPIError(f"
|
|
616
|
+
raise ProviderAPIError(f"{self.PROVIDER_DISPLAY_NAME} configuration error: {str(e)}")
|
|
413
617
|
except Exception as e:
|
|
414
618
|
error_str = str(e).lower()
|
|
415
|
-
if (
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
available_models = self.list_available_models(base_url=self.base_url)
|
|
419
|
-
error_message = format_model_error("OpenAI-compatible server", self.model, available_models)
|
|
420
|
-
raise ModelNotFoundError(error_message)
|
|
421
|
-
except Exception:
|
|
422
|
-
# If model discovery also fails, provide a generic error
|
|
423
|
-
raise ModelNotFoundError(f"Model '{self.model}' not found on OpenAI-compatible server and could not fetch available models")
|
|
424
|
-
else:
|
|
425
|
-
raise
|
|
619
|
+
if ("not found" in error_str) and ("model" in error_str):
|
|
620
|
+
self._raise_model_not_found()
|
|
621
|
+
raise
|
|
426
622
|
|
|
427
623
|
def _stream_generate(self, payload: Dict[str, Any]) -> Iterator[GenerateResponse]:
|
|
428
624
|
"""Generate streaming response"""
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
line = line.
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
625
|
+
request_url = f"{self.base_url}/chat/completions"
|
|
626
|
+
|
|
627
|
+
with self.client.stream(
|
|
628
|
+
"POST",
|
|
629
|
+
request_url,
|
|
630
|
+
json=payload,
|
|
631
|
+
headers=self._get_headers()
|
|
632
|
+
) as response:
|
|
633
|
+
self._raise_for_status(response, request_url=request_url)
|
|
634
|
+
|
|
635
|
+
for line in response.iter_lines():
|
|
636
|
+
if line:
|
|
637
|
+
# Decode bytes to string if necessary
|
|
638
|
+
if isinstance(line, bytes):
|
|
639
|
+
line = line.decode('utf-8')
|
|
640
|
+
line = line.strip()
|
|
641
|
+
|
|
642
|
+
if line.startswith("data: "):
|
|
643
|
+
data = line[6:] # Remove "data: " prefix
|
|
644
|
+
|
|
645
|
+
if data == "[DONE]":
|
|
646
|
+
break
|
|
647
|
+
|
|
648
|
+
try:
|
|
649
|
+
chunk = json.loads(data)
|
|
650
|
+
|
|
651
|
+
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
652
|
+
choice = chunk["choices"][0]
|
|
653
|
+
delta = choice.get("delta", {})
|
|
654
|
+
if not isinstance(delta, dict):
|
|
655
|
+
delta = {}
|
|
656
|
+
content = delta.get("content", "")
|
|
657
|
+
reasoning = extract_reasoning_from_message(
|
|
658
|
+
delta,
|
|
659
|
+
architecture_format=self.architecture_config,
|
|
660
|
+
model_capabilities=self.model_capabilities,
|
|
661
|
+
)
|
|
662
|
+
tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
|
|
663
|
+
finish_reason = choice.get("finish_reason")
|
|
664
|
+
|
|
665
|
+
metadata = {}
|
|
666
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
667
|
+
metadata["reasoning"] = reasoning
|
|
668
|
+
|
|
669
|
+
yield GenerateResponse(
|
|
670
|
+
content=content,
|
|
671
|
+
model=self.model,
|
|
672
|
+
finish_reason=finish_reason,
|
|
673
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
674
|
+
metadata=metadata or None,
|
|
675
|
+
raw_response=chunk
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
except json.JSONDecodeError:
|
|
679
|
+
continue
|
|
480
680
|
|
|
481
681
|
async def _agenerate_internal(self,
|
|
482
682
|
prompt: str,
|
|
@@ -542,7 +742,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
542
742
|
else:
|
|
543
743
|
chat_messages.append(multimodal_message)
|
|
544
744
|
except ImportError:
|
|
545
|
-
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
745
|
+
self.logger.warning("Media processing not available. Install with: pip install \"abstractcore[media]\"")
|
|
546
746
|
if user_message_text:
|
|
547
747
|
chat_messages.append({"role": "user", "content": user_message_text})
|
|
548
748
|
except Exception as e:
|
|
@@ -562,7 +762,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
562
762
|
"model": self.model,
|
|
563
763
|
"messages": chat_messages,
|
|
564
764
|
"stream": stream,
|
|
565
|
-
"temperature":
|
|
765
|
+
"temperature": generation_kwargs.get("temperature", self.temperature),
|
|
566
766
|
"max_tokens": max_output_tokens,
|
|
567
767
|
"top_p": kwargs.get("top_p", 0.9),
|
|
568
768
|
}
|
|
@@ -581,13 +781,15 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
581
781
|
payload["repetition_penalty"] = kwargs["repetition_penalty"]
|
|
582
782
|
|
|
583
783
|
# Add seed if provided
|
|
584
|
-
seed_value =
|
|
784
|
+
seed_value = generation_kwargs.get("seed")
|
|
585
785
|
if seed_value is not None:
|
|
586
786
|
payload["seed"] = seed_value
|
|
587
787
|
|
|
588
788
|
# Add structured output support
|
|
589
789
|
if response_model and PYDANTIC_AVAILABLE:
|
|
590
790
|
json_schema = response_model.model_json_schema()
|
|
791
|
+
if isinstance(json_schema, dict) and json_schema:
|
|
792
|
+
json_schema = _inline_json_schema_refs(json_schema)
|
|
591
793
|
payload["response_format"] = {
|
|
592
794
|
"type": "json_schema",
|
|
593
795
|
"json_schema": {
|
|
@@ -596,6 +798,9 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
596
798
|
}
|
|
597
799
|
}
|
|
598
800
|
|
|
801
|
+
# Provider-specific request extensions (vLLM extra_body, OpenRouter headers, etc.)
|
|
802
|
+
payload = self._mutate_payload(payload, **kwargs)
|
|
803
|
+
|
|
599
804
|
if stream:
|
|
600
805
|
return self._async_stream_generate(payload)
|
|
601
806
|
else:
|
|
@@ -618,7 +823,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
618
823
|
json=payload,
|
|
619
824
|
headers=self._get_headers()
|
|
620
825
|
)
|
|
621
|
-
|
|
826
|
+
self._raise_for_status(response, request_url=request_url)
|
|
622
827
|
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
623
828
|
|
|
624
829
|
result = response.json()
|
|
@@ -626,26 +831,45 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
626
831
|
# Extract response from OpenAI format
|
|
627
832
|
if "choices" in result and len(result["choices"]) > 0:
|
|
628
833
|
choice = result["choices"][0]
|
|
629
|
-
|
|
834
|
+
message = choice.get("message") or {}
|
|
835
|
+
if not isinstance(message, dict):
|
|
836
|
+
message = {}
|
|
837
|
+
|
|
838
|
+
content = message.get("content", "")
|
|
839
|
+
reasoning = extract_reasoning_from_message(
|
|
840
|
+
message,
|
|
841
|
+
architecture_format=self.architecture_config,
|
|
842
|
+
model_capabilities=self.model_capabilities,
|
|
843
|
+
)
|
|
844
|
+
tool_calls = message.get("tool_calls")
|
|
845
|
+
if tool_calls is None:
|
|
846
|
+
tool_calls = choice.get("tool_calls")
|
|
630
847
|
finish_reason = choice.get("finish_reason", "stop")
|
|
631
848
|
else:
|
|
632
849
|
content = "No response generated"
|
|
850
|
+
reasoning = None
|
|
851
|
+
tool_calls = None
|
|
633
852
|
finish_reason = "error"
|
|
634
853
|
|
|
635
854
|
# Extract usage info
|
|
636
855
|
usage = result.get("usage", {})
|
|
637
856
|
|
|
857
|
+
metadata: Dict[str, Any] = {
|
|
858
|
+
"_provider_request": {
|
|
859
|
+
"url": request_url,
|
|
860
|
+
"payload": payload,
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
864
|
+
metadata["reasoning"] = reasoning
|
|
865
|
+
|
|
638
866
|
return GenerateResponse(
|
|
639
867
|
content=content,
|
|
640
868
|
model=self.model,
|
|
641
869
|
finish_reason=finish_reason,
|
|
642
870
|
raw_response=result,
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
"url": request_url,
|
|
646
|
-
"payload": payload,
|
|
647
|
-
}
|
|
648
|
-
},
|
|
871
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
872
|
+
metadata=metadata,
|
|
649
873
|
usage={
|
|
650
874
|
"input_tokens": usage.get("prompt_tokens", 0),
|
|
651
875
|
"output_tokens": usage.get("completion_tokens", 0),
|
|
@@ -656,64 +880,72 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
656
880
|
gen_time=gen_time
|
|
657
881
|
)
|
|
658
882
|
|
|
883
|
+
except (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError, ProviderAPIError):
|
|
884
|
+
raise
|
|
659
885
|
except Exception as e:
|
|
660
886
|
error_str = str(e).lower()
|
|
661
|
-
if (
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
error_message = format_model_error("OpenAI-compatible server", self.model, available_models)
|
|
665
|
-
raise ModelNotFoundError(error_message)
|
|
666
|
-
except Exception:
|
|
667
|
-
raise ModelNotFoundError(f"Model '{self.model}' not found on OpenAI-compatible server")
|
|
668
|
-
else:
|
|
669
|
-
raise ProviderAPIError(f"OpenAI-compatible server API error: {str(e)}")
|
|
887
|
+
if ("not found" in error_str) and ("model" in error_str):
|
|
888
|
+
self._raise_model_not_found()
|
|
889
|
+
raise
|
|
670
890
|
|
|
671
891
|
async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
|
|
672
892
|
"""Native async streaming response generation."""
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
)
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
893
|
+
request_url = f"{self.base_url}/chat/completions"
|
|
894
|
+
|
|
895
|
+
async with self.async_client.stream(
|
|
896
|
+
"POST",
|
|
897
|
+
request_url,
|
|
898
|
+
json=payload,
|
|
899
|
+
headers=self._get_headers()
|
|
900
|
+
) as response:
|
|
901
|
+
self._raise_for_status(response, request_url=request_url)
|
|
902
|
+
|
|
903
|
+
async for line in response.aiter_lines():
|
|
904
|
+
if line:
|
|
905
|
+
line = line.strip()
|
|
906
|
+
|
|
907
|
+
if line.startswith("data: "):
|
|
908
|
+
data = line[6:] # Remove "data: " prefix
|
|
909
|
+
|
|
910
|
+
if data == "[DONE]":
|
|
911
|
+
break
|
|
912
|
+
|
|
913
|
+
try:
|
|
914
|
+
chunk = json.loads(data)
|
|
915
|
+
|
|
916
|
+
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
917
|
+
choice = chunk["choices"][0]
|
|
918
|
+
delta = choice.get("delta", {})
|
|
919
|
+
if not isinstance(delta, dict):
|
|
920
|
+
delta = {}
|
|
921
|
+
content = delta.get("content", "")
|
|
922
|
+
reasoning = extract_reasoning_from_message(
|
|
923
|
+
delta,
|
|
924
|
+
architecture_format=self.architecture_config,
|
|
925
|
+
model_capabilities=self.model_capabilities,
|
|
926
|
+
)
|
|
927
|
+
tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
|
|
928
|
+
finish_reason = choice.get("finish_reason")
|
|
929
|
+
|
|
930
|
+
metadata = {}
|
|
931
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
932
|
+
metadata["reasoning"] = reasoning
|
|
933
|
+
|
|
934
|
+
yield GenerateResponse(
|
|
935
|
+
content=content,
|
|
936
|
+
model=self.model,
|
|
937
|
+
finish_reason=finish_reason,
|
|
938
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
939
|
+
metadata=metadata or None,
|
|
940
|
+
raw_response=chunk
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
except json.JSONDecodeError:
|
|
944
|
+
continue
|
|
945
|
+
|
|
946
|
+
def supports_prompt_cache(self) -> bool:
|
|
947
|
+
"""Best-effort: forward `prompt_cache_key` to OpenAI-compatible servers that support it."""
|
|
948
|
+
return True
|
|
717
949
|
|
|
718
950
|
def get_capabilities(self) -> List[str]:
|
|
719
951
|
"""Get OpenAI-compatible server capabilities"""
|
|
@@ -765,24 +997,14 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
765
997
|
except Exception:
|
|
766
998
|
pass # Best effort - don't fail the operation
|
|
767
999
|
|
|
768
|
-
def _normalize_model_name(self, model_name: str) -> str:
|
|
769
|
-
"""Remove common provider prefixes from model name."""
|
|
770
|
-
for prefix in ["openai-compatible/", "lmstudio/", "qwen/", "ollama/", "huggingface/"]:
|
|
771
|
-
if model_name.startswith(prefix):
|
|
772
|
-
model_name = model_name[len(prefix):]
|
|
773
|
-
return model_name
|
|
774
|
-
|
|
775
1000
|
def _get_media_handler_for_model(self, model_name: str):
|
|
776
1001
|
"""Get appropriate media handler based on model vision capabilities."""
|
|
777
1002
|
from ..media.handlers import OpenAIMediaHandler, LocalMediaHandler
|
|
778
1003
|
|
|
779
|
-
# Normalize model name by removing provider prefixes
|
|
780
|
-
clean_model_name = self._normalize_model_name(model_name)
|
|
781
|
-
|
|
782
1004
|
# Determine if model supports vision
|
|
783
1005
|
try:
|
|
784
1006
|
from ..architectures.detection import supports_vision
|
|
785
|
-
use_vision_handler = supports_vision(
|
|
1007
|
+
use_vision_handler = supports_vision(model_name)
|
|
786
1008
|
except Exception as e:
|
|
787
1009
|
self.logger.debug(f"Vision detection failed: {e}, defaulting to LocalMediaHandler")
|
|
788
1010
|
use_vision_handler = False
|
|
@@ -790,10 +1012,10 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
790
1012
|
# Create appropriate handler
|
|
791
1013
|
if use_vision_handler:
|
|
792
1014
|
handler = OpenAIMediaHandler(self.model_capabilities, model_name=model_name)
|
|
793
|
-
self.logger.debug(f"Using OpenAIMediaHandler for vision model: {
|
|
1015
|
+
self.logger.debug(f"Using OpenAIMediaHandler for vision model: {model_name}")
|
|
794
1016
|
else:
|
|
795
|
-
handler = LocalMediaHandler(
|
|
796
|
-
self.logger.debug(f"Using LocalMediaHandler for model: {
|
|
1017
|
+
handler = LocalMediaHandler(self.provider, self.model_capabilities, model_name=model_name)
|
|
1018
|
+
self.logger.debug(f"Using LocalMediaHandler for model: {model_name}")
|
|
797
1019
|
|
|
798
1020
|
return handler
|
|
799
1021
|
|
|
@@ -835,10 +1057,12 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
835
1057
|
|
|
836
1058
|
return models
|
|
837
1059
|
else:
|
|
838
|
-
self.
|
|
1060
|
+
detail = self._extract_error_detail(response)
|
|
1061
|
+
suffix = f": {detail}" if detail else ""
|
|
1062
|
+
self.logger.warning(f"{self.PROVIDER_DISPLAY_NAME} /models returned {response.status_code}{suffix}")
|
|
839
1063
|
return []
|
|
840
1064
|
except Exception as e:
|
|
841
|
-
self.logger.warning(f"Failed to list models from
|
|
1065
|
+
self.logger.warning(f"Failed to list models from {self.PROVIDER_DISPLAY_NAME}: {e}")
|
|
842
1066
|
return []
|
|
843
1067
|
|
|
844
1068
|
def embed(self, input_text: Union[str, List[str]], **kwargs) -> Dict[str, Any]:
|
|
@@ -879,7 +1103,7 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
879
1103
|
json=payload,
|
|
880
1104
|
headers=self._get_headers()
|
|
881
1105
|
)
|
|
882
|
-
response.
|
|
1106
|
+
self._raise_for_status(response, request_url=f"{self.base_url}/embeddings")
|
|
883
1107
|
|
|
884
1108
|
# Server returns OpenAI-compatible format
|
|
885
1109
|
result = response.json()
|
|
@@ -889,6 +1113,8 @@ class OpenAICompatibleProvider(BaseProvider):
|
|
|
889
1113
|
|
|
890
1114
|
return result
|
|
891
1115
|
|
|
1116
|
+
except (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError, ProviderAPIError):
|
|
1117
|
+
raise
|
|
892
1118
|
except Exception as e:
|
|
893
1119
|
self.logger.error(f"Failed to generate embeddings: {e}")
|
|
894
|
-
raise ProviderAPIError(f"
|
|
1120
|
+
raise ProviderAPIError(f"{self.PROVIDER_DISPLAY_NAME} embedding error: {str(e)}")
|