abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/extractor.py +33 -100
  3. abstractcore/apps/intent.py +19 -0
  4. abstractcore/apps/judge.py +20 -1
  5. abstractcore/apps/summarizer.py +20 -1
  6. abstractcore/architectures/detection.py +34 -1
  7. abstractcore/architectures/response_postprocessing.py +313 -0
  8. abstractcore/assets/architecture_formats.json +38 -8
  9. abstractcore/assets/model_capabilities.json +781 -160
  10. abstractcore/compression/__init__.py +1 -2
  11. abstractcore/compression/glyph_processor.py +6 -4
  12. abstractcore/config/main.py +31 -19
  13. abstractcore/config/manager.py +389 -11
  14. abstractcore/config/vision_config.py +5 -5
  15. abstractcore/core/interface.py +151 -3
  16. abstractcore/core/session.py +16 -10
  17. abstractcore/download.py +1 -1
  18. abstractcore/embeddings/manager.py +20 -6
  19. abstractcore/endpoint/__init__.py +2 -0
  20. abstractcore/endpoint/app.py +458 -0
  21. abstractcore/mcp/client.py +3 -1
  22. abstractcore/media/__init__.py +52 -17
  23. abstractcore/media/auto_handler.py +42 -22
  24. abstractcore/media/base.py +44 -1
  25. abstractcore/media/capabilities.py +12 -33
  26. abstractcore/media/enrichment.py +105 -0
  27. abstractcore/media/handlers/anthropic_handler.py +19 -28
  28. abstractcore/media/handlers/local_handler.py +124 -70
  29. abstractcore/media/handlers/openai_handler.py +19 -31
  30. abstractcore/media/processors/__init__.py +4 -2
  31. abstractcore/media/processors/audio_processor.py +57 -0
  32. abstractcore/media/processors/office_processor.py +8 -3
  33. abstractcore/media/processors/pdf_processor.py +46 -3
  34. abstractcore/media/processors/text_processor.py +22 -24
  35. abstractcore/media/processors/video_processor.py +58 -0
  36. abstractcore/media/types.py +97 -4
  37. abstractcore/media/utils/image_scaler.py +20 -2
  38. abstractcore/media/utils/video_frames.py +219 -0
  39. abstractcore/media/vision_fallback.py +136 -22
  40. abstractcore/processing/__init__.py +32 -3
  41. abstractcore/processing/basic_deepsearch.py +15 -10
  42. abstractcore/processing/basic_intent.py +3 -2
  43. abstractcore/processing/basic_judge.py +3 -2
  44. abstractcore/processing/basic_summarizer.py +1 -1
  45. abstractcore/providers/__init__.py +3 -1
  46. abstractcore/providers/anthropic_provider.py +95 -8
  47. abstractcore/providers/base.py +1516 -81
  48. abstractcore/providers/huggingface_provider.py +546 -69
  49. abstractcore/providers/lmstudio_provider.py +35 -923
  50. abstractcore/providers/mlx_provider.py +382 -35
  51. abstractcore/providers/model_capabilities.py +5 -1
  52. abstractcore/providers/ollama_provider.py +99 -15
  53. abstractcore/providers/openai_compatible_provider.py +406 -180
  54. abstractcore/providers/openai_provider.py +188 -44
  55. abstractcore/providers/openrouter_provider.py +76 -0
  56. abstractcore/providers/registry.py +61 -5
  57. abstractcore/providers/streaming.py +138 -33
  58. abstractcore/providers/vllm_provider.py +92 -817
  59. abstractcore/server/app.py +461 -13
  60. abstractcore/server/audio_endpoints.py +139 -0
  61. abstractcore/server/vision_endpoints.py +1319 -0
  62. abstractcore/structured/handler.py +316 -41
  63. abstractcore/tools/common_tools.py +5501 -2012
  64. abstractcore/tools/comms_tools.py +1641 -0
  65. abstractcore/tools/core.py +37 -7
  66. abstractcore/tools/handler.py +4 -9
  67. abstractcore/tools/parser.py +49 -2
  68. abstractcore/tools/tag_rewriter.py +2 -1
  69. abstractcore/tools/telegram_tdlib.py +407 -0
  70. abstractcore/tools/telegram_tools.py +261 -0
  71. abstractcore/utils/cli.py +1085 -72
  72. abstractcore/utils/token_utils.py +2 -0
  73. abstractcore/utils/truncation.py +29 -0
  74. abstractcore/utils/version.py +3 -4
  75. abstractcore/utils/vlm_token_calculator.py +12 -2
  76. abstractcore-2.11.2.dist-info/METADATA +562 -0
  77. abstractcore-2.11.2.dist-info/RECORD +133 -0
  78. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
  79. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
  80. abstractcore-2.9.1.dist-info/METADATA +0 -1190
  81. abstractcore-2.9.1.dist-info/RECORD +0 -119
  82. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
  83. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
@@ -23,11 +23,71 @@ try:
23
23
  except ImportError:
24
24
  PYDANTIC_AVAILABLE = False
25
25
  BaseModel = None
26
+
27
+
28
+ def _inline_json_schema_refs(schema: Dict[str, Any]) -> Dict[str, Any]:
29
+ """Inline local $defs/$ref references in a JSON Schema dict.
30
+
31
+ Some OpenAI-compatible servers only partially support `$defs`/`$ref` inside
32
+ `response_format: {type:'json_schema'}`. Inlining keeps schemas simple and
33
+ improves compatibility for structured outputs.
34
+ """
35
+
36
+ defs = schema.get("$defs")
37
+ if not isinstance(defs, dict) or not defs:
38
+ return schema
39
+
40
+ def _resolve(node: Any, *, seen: set[str]) -> Any:
41
+ if isinstance(node, dict):
42
+ ref = node.get("$ref")
43
+ if isinstance(ref, str) and ref.startswith("#/$defs/"):
44
+ key = ref[len("#/$defs/"):]
45
+ target = defs.get(key)
46
+ if isinstance(key, str) and key and isinstance(target, dict):
47
+ if key in seen:
48
+ return node
49
+ seen.add(key)
50
+ resolved_target = _resolve(dict(target), seen=seen)
51
+ seen.remove(key)
52
+ if isinstance(resolved_target, dict):
53
+ merged: Dict[str, Any] = dict(resolved_target)
54
+ for k, v in node.items():
55
+ if k == "$ref":
56
+ continue
57
+ merged[k] = _resolve(v, seen=seen)
58
+ return merged
59
+
60
+ out: Dict[str, Any] = {}
61
+ for k, v in node.items():
62
+ if k == "$defs":
63
+ continue
64
+ out[k] = _resolve(v, seen=seen)
65
+ return out
66
+
67
+ if isinstance(node, list):
68
+ return [_resolve(x, seen=seen) for x in node]
69
+
70
+ return node
71
+
72
+ try:
73
+ base = {k: v for k, v in schema.items() if k != "$defs"}
74
+ inlined = _resolve(base, seen=set())
75
+ return inlined if isinstance(inlined, dict) and inlined else schema
76
+ except Exception:
77
+ return schema
26
78
  from .base import BaseProvider
79
+ from ..architectures.response_postprocessing import extract_reasoning_from_message
27
80
  from ..core.types import GenerateResponse
28
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
29
- from ..tools import UniversalToolHandler, execute_tools
30
- from ..events import EventType
81
+ from ..exceptions import (
82
+ ProviderAPIError,
83
+ ModelNotFoundError,
84
+ AuthenticationError,
85
+ RateLimitError,
86
+ InvalidRequestError,
87
+ format_model_error,
88
+ )
89
+ from ..tools import UniversalToolHandler
90
+ from ..utils.truncation import preview_text
31
91
 
32
92
 
33
93
  class OpenAICompatibleProvider(BaseProvider):
@@ -46,40 +106,40 @@ class OpenAICompatibleProvider(BaseProvider):
46
106
  Usage:
47
107
  # Basic usage
48
108
  llm = create_llm("openai-compatible",
49
- base_url="http://localhost:8080/v1",
109
+ base_url="http://127.0.0.1:1234/v1",
50
110
  model="llama-3.1-8b")
51
111
 
52
112
  # With API key (optional for many local servers)
53
113
  llm = create_llm("openai-compatible",
54
- base_url="http://localhost:8080/v1",
114
+ base_url="http://127.0.0.1:1234/v1",
55
115
  model="my-model",
56
116
  api_key="your-key")
57
117
 
58
118
  # Environment variable configuration
59
- export OPENAI_COMPATIBLE_BASE_URL="http://localhost:8080/v1"
119
+ export OPENAI_COMPATIBLE_BASE_URL="http://127.0.0.1:1234/v1"
60
120
  export OPENAI_COMPATIBLE_API_KEY="your-key" # Optional
61
121
  llm = create_llm("openai-compatible", model="my-model")
62
122
  """
63
123
 
124
+ PROVIDER_ID = "openai-compatible"
125
+ PROVIDER_DISPLAY_NAME = "OpenAI-compatible server"
126
+ BASE_URL_ENV_VAR = "OPENAI_COMPATIBLE_BASE_URL"
127
+ API_KEY_ENV_VAR = "OPENAI_COMPATIBLE_API_KEY"
128
+ DEFAULT_BASE_URL = "http://localhost:1234/v1"
129
+
64
130
  def __init__(self, model: str = "default", base_url: Optional[str] = None,
65
131
  api_key: Optional[str] = None, **kwargs):
66
132
  super().__init__(model, **kwargs)
67
- self.provider = "openai-compatible"
133
+ self.provider = self.PROVIDER_ID
68
134
 
69
135
  # Initialize tool handler
70
136
  self.tool_handler = UniversalToolHandler(model)
71
137
 
72
- # Base URL priority: parameter > OPENAI_COMPATIBLE_BASE_URL > default
73
- self.base_url = (
74
- base_url or
75
- os.getenv("OPENAI_COMPATIBLE_BASE_URL") or
76
- "http://localhost:8080/v1"
77
- ).rstrip('/')
138
+ self.base_url = self._resolve_base_url(base_url)
78
139
 
79
- # API key: OPTIONAL (many local servers don't require authentication)
80
- # Priority: parameter > OPENAI_COMPATIBLE_API_KEY > None
81
- self.api_key = api_key or os.getenv("OPENAI_COMPATIBLE_API_KEY")
140
+ self.api_key = self._resolve_api_key(api_key)
82
141
 
142
+ # #[WARNING:TIMEOUT]
83
143
  # Get timeout value - None means unlimited timeout
84
144
  timeout_value = getattr(self, '_timeout', None)
85
145
  # Validate timeout if provided (None is allowed for unlimited)
@@ -102,7 +162,7 @@ class OpenAICompatibleProvider(BaseProvider):
102
162
  fallback_timeout = None
103
163
  self.client = httpx.Client(timeout=fallback_timeout)
104
164
  except Exception:
105
- raise RuntimeError(f"Failed to create HTTP client for OpenAI-compatible provider: {e}")
165
+ raise RuntimeError(f"Failed to create HTTP client for {self.PROVIDER_DISPLAY_NAME}: {e}")
106
166
 
107
167
  self._async_client = None # Lazy-loaded async client
108
168
 
@@ -122,13 +182,130 @@ class OpenAICompatibleProvider(BaseProvider):
122
182
  def _get_headers(self) -> Dict[str, str]:
123
183
  """Get HTTP headers with optional API key authentication."""
124
184
  headers = {"Content-Type": "application/json"}
125
- # Only add Authorization header if api_key is provided and truthy
126
- if self.api_key:
127
- headers["Authorization"] = f"Bearer {self.api_key}"
185
+ # Only add Authorization header if api_key is provided and meaningful.
186
+ api_key = None if self.api_key is None else str(self.api_key).strip()
187
+ if api_key and api_key.upper() != "EMPTY":
188
+ headers["Authorization"] = f"Bearer {api_key}"
128
189
  return headers
129
190
 
191
+ def _mutate_payload(self, payload: Dict[str, Any], **kwargs) -> Dict[str, Any]:
192
+ """Provider-specific payload hook (default: no-op)."""
193
+ return payload
194
+
195
+ def _resolve_base_url(self, base_url: Optional[str]) -> str:
196
+ """Resolve base URL with parameter > env var > default precedence."""
197
+ if base_url is not None:
198
+ resolved = str(base_url).strip()
199
+ if not resolved:
200
+ raise ValueError("base_url cannot be empty")
201
+ return resolved.rstrip("/")
202
+
203
+ env_var = getattr(self, "BASE_URL_ENV_VAR", None)
204
+ env_val = os.getenv(env_var) if isinstance(env_var, str) and env_var else None
205
+ if isinstance(env_val, str) and env_val.strip():
206
+ return env_val.strip().rstrip("/")
207
+
208
+ default = getattr(self, "DEFAULT_BASE_URL", None) or ""
209
+ return str(default).strip().rstrip("/")
210
+
211
+ def _resolve_api_key(self, api_key: Optional[str]) -> Optional[str]:
212
+ """Resolve API key with parameter > env var > config fallback."""
213
+ if api_key is not None:
214
+ # Allow callers to explicitly disable auth by passing an empty string.
215
+ return api_key
216
+
217
+ env_var = getattr(self, "API_KEY_ENV_VAR", None)
218
+ env_val = os.getenv(env_var) if isinstance(env_var, str) and env_var else None
219
+ if env_val is not None:
220
+ return env_val
221
+
222
+ return self._get_api_key_from_config()
223
+
224
+ def _get_api_key_from_config(self) -> Optional[str]:
225
+ """Optional config-manager fallback for subclasses (default: none)."""
226
+ return None
227
+
228
+ def _extract_error_detail(self, response: Optional[httpx.Response]) -> Optional[str]:
229
+ """Extract a useful error message from an HTTPX response, if possible."""
230
+ if response is None:
231
+ return None
232
+
233
+ try:
234
+ data = response.json()
235
+ if isinstance(data, dict):
236
+ err = data.get("error")
237
+ if isinstance(err, dict):
238
+ for k in ("message", "error", "detail"):
239
+ v = err.get(k)
240
+ if isinstance(v, str) and v.strip():
241
+ return v.strip()
242
+ for k in ("message", "detail"):
243
+ v = data.get(k)
244
+ if isinstance(v, str) and v.strip():
245
+ return v.strip()
246
+ # If it's JSON but not a dict, stringify it.
247
+ if data is not None:
248
+ return json.dumps(data, ensure_ascii=False)
249
+ except Exception:
250
+ pass
251
+
252
+ try:
253
+ text = response.text
254
+ if isinstance(text, str) and text.strip():
255
+ # Bound size to avoid dumping huge error bodies.
256
+ body = text.strip()
257
+ return preview_text(body, max_chars=2000)
258
+ except Exception:
259
+ pass
260
+
261
+ return None
262
+
263
+ def _raise_for_status(self, response: httpx.Response, *, request_url: Optional[str] = None) -> None:
264
+ """Raise rich provider exceptions on HTTP errors."""
265
+ status_code = getattr(response, "status_code", None)
266
+ if status_code is None:
267
+ # Unit tests sometimes stub the HTTP response with only `.raise_for_status()`/`.json()`.
268
+ # Treat as success if `.raise_for_status()` does not raise.
269
+ raise_for_status = getattr(response, "raise_for_status", None)
270
+ if callable(raise_for_status):
271
+ raise_for_status()
272
+ return
273
+
274
+ if int(status_code) < 400:
275
+ return
276
+
277
+ detail = self._extract_error_detail(response)
278
+ prefix = f"{self.PROVIDER_DISPLAY_NAME} API error ({status_code})"
279
+ msg = f"{prefix}: {detail}" if detail else prefix
280
+
281
+ status = int(status_code)
282
+ if status in (401, 403):
283
+ raise AuthenticationError(msg)
284
+ if status == 429:
285
+ raise RateLimitError(msg)
286
+ if status == 400:
287
+ # Many OpenAI-compatible servers use 400 for schema/model errors.
288
+ if detail and ("model" in detail.lower()) and ("not found" in detail.lower()):
289
+ self._raise_model_not_found()
290
+ raise InvalidRequestError(msg)
291
+ if status == 404:
292
+ # Could be endpoint misconfiguration (missing /v1) or an unknown model.
293
+ if detail and ("model" in detail.lower()) and ("not found" in detail.lower()):
294
+ self._raise_model_not_found()
295
+ raise ProviderAPIError(msg if request_url is None else f"{msg} [{request_url}]")
296
+
297
+ raise ProviderAPIError(msg if request_url is None else f"{msg} [{request_url}]")
298
+
299
+ def _raise_model_not_found(self) -> None:
300
+ """Raise ModelNotFoundError with a best-effort available-model list."""
301
+ try:
302
+ available_models = self.list_available_models(base_url=self.base_url)
303
+ except Exception:
304
+ available_models = []
305
+ raise ModelNotFoundError(format_model_error(self.PROVIDER_DISPLAY_NAME, self.model, available_models))
306
+
130
307
  def _validate_model(self):
131
- """Validate that the model exists on the OpenAI-compatible server"""
308
+ """Validate that the model exists on the server (best-effort)."""
132
309
  # Skip validation for "default" placeholder (used by registry for model listing)
133
310
  if self.model == "default":
134
311
  return
@@ -137,12 +314,12 @@ class OpenAICompatibleProvider(BaseProvider):
137
314
  # Use base_url as-is (should include /v1) for model discovery
138
315
  available_models = self.list_available_models(base_url=self.base_url)
139
316
  if available_models and self.model not in available_models:
140
- error_message = format_model_error("OpenAI-compatible server", self.model, available_models)
317
+ error_message = format_model_error(self.PROVIDER_DISPLAY_NAME, self.model, available_models)
141
318
  raise ModelNotFoundError(error_message)
142
319
  except httpx.ConnectError:
143
320
  # Server not running - will fail later when trying to generate
144
321
  if hasattr(self, 'logger'):
145
- self.logger.debug(f"OpenAI-compatible server not accessible at {self.base_url} - model validation skipped")
322
+ self.logger.debug(f"{self.PROVIDER_DISPLAY_NAME} not accessible at {self.base_url} - model validation skipped")
146
323
  pass
147
324
  except ModelNotFoundError:
148
325
  # Re-raise model not found errors
@@ -153,7 +330,7 @@ class OpenAICompatibleProvider(BaseProvider):
153
330
  self.logger.debug(f"Model validation failed with error: {e} - continuing anyway")
154
331
  pass
155
332
 
156
- def unload(self) -> None:
333
+ def unload_model(self, model_name: str) -> None:
157
334
  """
158
335
  Close HTTP client connection.
159
336
 
@@ -226,6 +403,8 @@ class OpenAICompatibleProvider(BaseProvider):
226
403
  if messages:
227
404
  chat_messages.extend(messages)
228
405
 
406
+ media_enrichment = None
407
+
229
408
  # Handle media content regardless of prompt (media can be used with messages too)
230
409
  if media:
231
410
  # Get the last user message content to combine with media
@@ -245,6 +424,7 @@ class OpenAICompatibleProvider(BaseProvider):
245
424
 
246
425
  # Create multimodal message combining text and processed media
247
426
  multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
427
+ media_enrichment = getattr(media_handler, "media_enrichment", None)
248
428
 
249
429
  # For OpenAI-compatible servers, we might get a string (embedded text) or dict (structured)
250
430
  if isinstance(multimodal_message, str):
@@ -263,7 +443,7 @@ class OpenAICompatibleProvider(BaseProvider):
263
443
  else:
264
444
  chat_messages.append(multimodal_message)
265
445
  except ImportError:
266
- self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
446
+ self.logger.warning("Media processing not available. Install with: pip install \"abstractcore[media]\"")
267
447
  if user_message_text:
268
448
  chat_messages.append({
269
449
  "role": "user",
@@ -292,11 +472,16 @@ class OpenAICompatibleProvider(BaseProvider):
292
472
  "model": self.model,
293
473
  "messages": chat_messages,
294
474
  "stream": stream,
295
- "temperature": kwargs.get("temperature", self.temperature),
475
+ "temperature": generation_kwargs.get("temperature", self.temperature),
296
476
  "max_tokens": max_output_tokens,
297
477
  "top_p": kwargs.get("top_p", 0.9),
298
478
  }
299
479
 
480
+ # Prompt caching (best-effort): pass through `prompt_cache_key` when provided.
481
+ prompt_cache_key = kwargs.get("prompt_cache_key")
482
+ if isinstance(prompt_cache_key, str) and prompt_cache_key.strip():
483
+ payload["prompt_cache_key"] = prompt_cache_key.strip()
484
+
300
485
  # Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
301
486
  if tools and self.tool_handler.supports_native:
302
487
  payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
@@ -312,7 +497,7 @@ class OpenAICompatibleProvider(BaseProvider):
312
497
  payload["repetition_penalty"] = kwargs["repetition_penalty"]
313
498
 
314
499
  # Add seed if provided (many servers support seed via OpenAI-compatible API)
315
- seed_value = kwargs.get("seed", self.seed)
500
+ seed_value = generation_kwargs.get("seed")
316
501
  if seed_value is not None:
317
502
  payload["seed"] = seed_value
318
503
 
@@ -320,6 +505,8 @@ class OpenAICompatibleProvider(BaseProvider):
320
505
  # Many servers support native structured outputs using the response_format parameter
321
506
  if response_model and PYDANTIC_AVAILABLE:
322
507
  json_schema = response_model.model_json_schema()
508
+ if isinstance(json_schema, dict) and json_schema:
509
+ json_schema = _inline_json_schema_refs(json_schema)
323
510
  payload["response_format"] = {
324
511
  "type": "json_schema",
325
512
  "json_schema": {
@@ -328,11 +515,18 @@ class OpenAICompatibleProvider(BaseProvider):
328
515
  }
329
516
  }
330
517
 
518
+ # Provider-specific request extensions (vLLM extra_body, OpenRouter headers, etc.)
519
+ payload = self._mutate_payload(payload, **kwargs)
520
+
331
521
  if stream:
332
522
  # Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
333
523
  return self._stream_generate(payload)
334
524
  else:
335
525
  response = self._single_generate(payload)
526
+ if media_enrichment:
527
+ from ..media.enrichment import merge_enrichment_metadata
528
+
529
+ response.metadata = merge_enrichment_metadata(response.metadata, media_enrichment)
336
530
 
337
531
  # Execute tools if enabled and tools are present
338
532
  if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
@@ -355,7 +549,7 @@ class OpenAICompatibleProvider(BaseProvider):
355
549
  json=payload,
356
550
  headers=self._get_headers()
357
551
  )
358
- response.raise_for_status()
552
+ self._raise_for_status(response, request_url=request_url)
359
553
  gen_time = round((time.time() - start_time) * 1000, 1)
360
554
 
361
555
  result = response.json()
@@ -368,6 +562,11 @@ class OpenAICompatibleProvider(BaseProvider):
368
562
  message = {}
369
563
 
370
564
  content = message.get("content", "")
565
+ reasoning = extract_reasoning_from_message(
566
+ message,
567
+ architecture_format=self.architecture_config,
568
+ model_capabilities=self.model_capabilities,
569
+ )
371
570
  tool_calls = message.get("tool_calls")
372
571
  if tool_calls is None:
373
572
  # Some servers surface tool calls at the choice level.
@@ -375,24 +574,29 @@ class OpenAICompatibleProvider(BaseProvider):
375
574
  finish_reason = choice.get("finish_reason", "stop")
376
575
  else:
377
576
  content = "No response generated"
577
+ reasoning = None
378
578
  tool_calls = None
379
579
  finish_reason = "error"
380
580
 
381
581
  # Extract usage info
382
582
  usage = result.get("usage", {})
383
583
 
584
+ metadata: Dict[str, Any] = {
585
+ "_provider_request": {
586
+ "url": request_url,
587
+ "payload": payload,
588
+ }
589
+ }
590
+ if isinstance(reasoning, str) and reasoning.strip():
591
+ metadata["reasoning"] = reasoning
592
+
384
593
  return GenerateResponse(
385
594
  content=content,
386
595
  model=self.model,
387
596
  finish_reason=finish_reason,
388
597
  raw_response=result,
389
598
  tool_calls=tool_calls if isinstance(tool_calls, list) else None,
390
- metadata={
391
- "_provider_request": {
392
- "url": request_url,
393
- "payload": payload,
394
- }
395
- },
599
+ metadata=metadata,
396
600
  usage={
397
601
  "input_tokens": usage.get("prompt_tokens", 0),
398
602
  "output_tokens": usage.get("completion_tokens", 0),
@@ -407,76 +611,72 @@ class OpenAICompatibleProvider(BaseProvider):
407
611
  except AttributeError as e:
408
612
  # Handle None type errors specifically
409
613
  if "'NoneType'" in str(e):
410
- raise ProviderAPIError(f"OpenAI-compatible provider not properly initialized: {str(e)}")
614
+ raise ProviderAPIError(f"{self.PROVIDER_DISPLAY_NAME} not properly initialized: {str(e)}")
411
615
  else:
412
- raise ProviderAPIError(f"OpenAI-compatible provider configuration error: {str(e)}")
616
+ raise ProviderAPIError(f"{self.PROVIDER_DISPLAY_NAME} configuration error: {str(e)}")
413
617
  except Exception as e:
414
618
  error_str = str(e).lower()
415
- if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
416
- # Model not found - show available models
417
- try:
418
- available_models = self.list_available_models(base_url=self.base_url)
419
- error_message = format_model_error("OpenAI-compatible server", self.model, available_models)
420
- raise ModelNotFoundError(error_message)
421
- except Exception:
422
- # If model discovery also fails, provide a generic error
423
- raise ModelNotFoundError(f"Model '{self.model}' not found on OpenAI-compatible server and could not fetch available models")
424
- else:
425
- raise
619
+ if ("not found" in error_str) and ("model" in error_str):
620
+ self._raise_model_not_found()
621
+ raise
426
622
 
427
623
  def _stream_generate(self, payload: Dict[str, Any]) -> Iterator[GenerateResponse]:
428
624
  """Generate streaming response"""
429
- try:
430
- with self.client.stream(
431
- "POST",
432
- f"{self.base_url}/chat/completions",
433
- json=payload,
434
- headers=self._get_headers()
435
- ) as response:
436
- response.raise_for_status()
437
-
438
- for line in response.iter_lines():
439
- if line:
440
- # Decode bytes to string if necessary
441
- if isinstance(line, bytes):
442
- line = line.decode('utf-8')
443
- line = line.strip()
444
-
445
- if line.startswith("data: "):
446
- data = line[6:] # Remove "data: " prefix
447
-
448
- if data == "[DONE]":
449
- break
450
-
451
- try:
452
- chunk = json.loads(data)
453
-
454
- if "choices" in chunk and len(chunk["choices"]) > 0:
455
- choice = chunk["choices"][0]
456
- delta = choice.get("delta", {})
457
- if not isinstance(delta, dict):
458
- delta = {}
459
- content = delta.get("content", "")
460
- tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
461
- finish_reason = choice.get("finish_reason")
462
-
463
- yield GenerateResponse(
464
- content=content,
465
- model=self.model,
466
- finish_reason=finish_reason,
467
- tool_calls=tool_calls if isinstance(tool_calls, list) else None,
468
- raw_response=chunk
469
- )
470
-
471
- except json.JSONDecodeError:
472
- continue
473
-
474
- except Exception as e:
475
- yield GenerateResponse(
476
- content=f"Error: {str(e)}",
477
- model=self.model,
478
- finish_reason="error"
479
- )
625
+ request_url = f"{self.base_url}/chat/completions"
626
+
627
+ with self.client.stream(
628
+ "POST",
629
+ request_url,
630
+ json=payload,
631
+ headers=self._get_headers()
632
+ ) as response:
633
+ self._raise_for_status(response, request_url=request_url)
634
+
635
+ for line in response.iter_lines():
636
+ if line:
637
+ # Decode bytes to string if necessary
638
+ if isinstance(line, bytes):
639
+ line = line.decode('utf-8')
640
+ line = line.strip()
641
+
642
+ if line.startswith("data: "):
643
+ data = line[6:] # Remove "data: " prefix
644
+
645
+ if data == "[DONE]":
646
+ break
647
+
648
+ try:
649
+ chunk = json.loads(data)
650
+
651
+ if "choices" in chunk and len(chunk["choices"]) > 0:
652
+ choice = chunk["choices"][0]
653
+ delta = choice.get("delta", {})
654
+ if not isinstance(delta, dict):
655
+ delta = {}
656
+ content = delta.get("content", "")
657
+ reasoning = extract_reasoning_from_message(
658
+ delta,
659
+ architecture_format=self.architecture_config,
660
+ model_capabilities=self.model_capabilities,
661
+ )
662
+ tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
663
+ finish_reason = choice.get("finish_reason")
664
+
665
+ metadata = {}
666
+ if isinstance(reasoning, str) and reasoning.strip():
667
+ metadata["reasoning"] = reasoning
668
+
669
+ yield GenerateResponse(
670
+ content=content,
671
+ model=self.model,
672
+ finish_reason=finish_reason,
673
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
674
+ metadata=metadata or None,
675
+ raw_response=chunk
676
+ )
677
+
678
+ except json.JSONDecodeError:
679
+ continue
480
680
 
481
681
  async def _agenerate_internal(self,
482
682
  prompt: str,
@@ -542,7 +742,7 @@ class OpenAICompatibleProvider(BaseProvider):
542
742
  else:
543
743
  chat_messages.append(multimodal_message)
544
744
  except ImportError:
545
- self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
745
+ self.logger.warning("Media processing not available. Install with: pip install \"abstractcore[media]\"")
546
746
  if user_message_text:
547
747
  chat_messages.append({"role": "user", "content": user_message_text})
548
748
  except Exception as e:
@@ -562,7 +762,7 @@ class OpenAICompatibleProvider(BaseProvider):
562
762
  "model": self.model,
563
763
  "messages": chat_messages,
564
764
  "stream": stream,
565
- "temperature": kwargs.get("temperature", self.temperature),
765
+ "temperature": generation_kwargs.get("temperature", self.temperature),
566
766
  "max_tokens": max_output_tokens,
567
767
  "top_p": kwargs.get("top_p", 0.9),
568
768
  }
@@ -581,13 +781,15 @@ class OpenAICompatibleProvider(BaseProvider):
581
781
  payload["repetition_penalty"] = kwargs["repetition_penalty"]
582
782
 
583
783
  # Add seed if provided
584
- seed_value = kwargs.get("seed", self.seed)
784
+ seed_value = generation_kwargs.get("seed")
585
785
  if seed_value is not None:
586
786
  payload["seed"] = seed_value
587
787
 
588
788
  # Add structured output support
589
789
  if response_model and PYDANTIC_AVAILABLE:
590
790
  json_schema = response_model.model_json_schema()
791
+ if isinstance(json_schema, dict) and json_schema:
792
+ json_schema = _inline_json_schema_refs(json_schema)
591
793
  payload["response_format"] = {
592
794
  "type": "json_schema",
593
795
  "json_schema": {
@@ -596,6 +798,9 @@ class OpenAICompatibleProvider(BaseProvider):
596
798
  }
597
799
  }
598
800
 
801
+ # Provider-specific request extensions (vLLM extra_body, OpenRouter headers, etc.)
802
+ payload = self._mutate_payload(payload, **kwargs)
803
+
599
804
  if stream:
600
805
  return self._async_stream_generate(payload)
601
806
  else:
@@ -618,7 +823,7 @@ class OpenAICompatibleProvider(BaseProvider):
618
823
  json=payload,
619
824
  headers=self._get_headers()
620
825
  )
621
- response.raise_for_status()
826
+ self._raise_for_status(response, request_url=request_url)
622
827
  gen_time = round((time.time() - start_time) * 1000, 1)
623
828
 
624
829
  result = response.json()
@@ -626,26 +831,45 @@ class OpenAICompatibleProvider(BaseProvider):
626
831
  # Extract response from OpenAI format
627
832
  if "choices" in result and len(result["choices"]) > 0:
628
833
  choice = result["choices"][0]
629
- content = choice.get("message", {}).get("content", "")
834
+ message = choice.get("message") or {}
835
+ if not isinstance(message, dict):
836
+ message = {}
837
+
838
+ content = message.get("content", "")
839
+ reasoning = extract_reasoning_from_message(
840
+ message,
841
+ architecture_format=self.architecture_config,
842
+ model_capabilities=self.model_capabilities,
843
+ )
844
+ tool_calls = message.get("tool_calls")
845
+ if tool_calls is None:
846
+ tool_calls = choice.get("tool_calls")
630
847
  finish_reason = choice.get("finish_reason", "stop")
631
848
  else:
632
849
  content = "No response generated"
850
+ reasoning = None
851
+ tool_calls = None
633
852
  finish_reason = "error"
634
853
 
635
854
  # Extract usage info
636
855
  usage = result.get("usage", {})
637
856
 
857
+ metadata: Dict[str, Any] = {
858
+ "_provider_request": {
859
+ "url": request_url,
860
+ "payload": payload,
861
+ }
862
+ }
863
+ if isinstance(reasoning, str) and reasoning.strip():
864
+ metadata["reasoning"] = reasoning
865
+
638
866
  return GenerateResponse(
639
867
  content=content,
640
868
  model=self.model,
641
869
  finish_reason=finish_reason,
642
870
  raw_response=result,
643
- metadata={
644
- "_provider_request": {
645
- "url": request_url,
646
- "payload": payload,
647
- }
648
- },
871
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
872
+ metadata=metadata,
649
873
  usage={
650
874
  "input_tokens": usage.get("prompt_tokens", 0),
651
875
  "output_tokens": usage.get("completion_tokens", 0),
@@ -656,64 +880,72 @@ class OpenAICompatibleProvider(BaseProvider):
656
880
  gen_time=gen_time
657
881
  )
658
882
 
883
+ except (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError, ProviderAPIError):
884
+ raise
659
885
  except Exception as e:
660
886
  error_str = str(e).lower()
661
- if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
662
- try:
663
- available_models = self.list_available_models(base_url=self.base_url)
664
- error_message = format_model_error("OpenAI-compatible server", self.model, available_models)
665
- raise ModelNotFoundError(error_message)
666
- except Exception:
667
- raise ModelNotFoundError(f"Model '{self.model}' not found on OpenAI-compatible server")
668
- else:
669
- raise ProviderAPIError(f"OpenAI-compatible server API error: {str(e)}")
887
+ if ("not found" in error_str) and ("model" in error_str):
888
+ self._raise_model_not_found()
889
+ raise
670
890
 
671
891
  async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
672
892
  """Native async streaming response generation."""
673
- try:
674
- async with self.async_client.stream(
675
- "POST",
676
- f"{self.base_url}/chat/completions",
677
- json=payload,
678
- headers=self._get_headers()
679
- ) as response:
680
- response.raise_for_status()
681
-
682
- async for line in response.aiter_lines():
683
- if line:
684
- line = line.strip()
685
-
686
- if line.startswith("data: "):
687
- data = line[6:] # Remove "data: " prefix
688
-
689
- if data == "[DONE]":
690
- break
691
-
692
- try:
693
- chunk = json.loads(data)
694
-
695
- if "choices" in chunk and len(chunk["choices"]) > 0:
696
- choice = chunk["choices"][0]
697
- delta = choice.get("delta", {})
698
- content = delta.get("content", "")
699
- finish_reason = choice.get("finish_reason")
700
-
701
- yield GenerateResponse(
702
- content=content,
703
- model=self.model,
704
- finish_reason=finish_reason,
705
- raw_response=chunk
706
- )
707
-
708
- except json.JSONDecodeError:
709
- continue
710
-
711
- except Exception as e:
712
- yield GenerateResponse(
713
- content=f"Error: {str(e)}",
714
- model=self.model,
715
- finish_reason="error"
716
- )
893
+ request_url = f"{self.base_url}/chat/completions"
894
+
895
+ async with self.async_client.stream(
896
+ "POST",
897
+ request_url,
898
+ json=payload,
899
+ headers=self._get_headers()
900
+ ) as response:
901
+ self._raise_for_status(response, request_url=request_url)
902
+
903
+ async for line in response.aiter_lines():
904
+ if line:
905
+ line = line.strip()
906
+
907
+ if line.startswith("data: "):
908
+ data = line[6:] # Remove "data: " prefix
909
+
910
+ if data == "[DONE]":
911
+ break
912
+
913
+ try:
914
+ chunk = json.loads(data)
915
+
916
+ if "choices" in chunk and len(chunk["choices"]) > 0:
917
+ choice = chunk["choices"][0]
918
+ delta = choice.get("delta", {})
919
+ if not isinstance(delta, dict):
920
+ delta = {}
921
+ content = delta.get("content", "")
922
+ reasoning = extract_reasoning_from_message(
923
+ delta,
924
+ architecture_format=self.architecture_config,
925
+ model_capabilities=self.model_capabilities,
926
+ )
927
+ tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
928
+ finish_reason = choice.get("finish_reason")
929
+
930
+ metadata = {}
931
+ if isinstance(reasoning, str) and reasoning.strip():
932
+ metadata["reasoning"] = reasoning
933
+
934
+ yield GenerateResponse(
935
+ content=content,
936
+ model=self.model,
937
+ finish_reason=finish_reason,
938
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
939
+ metadata=metadata or None,
940
+ raw_response=chunk
941
+ )
942
+
943
+ except json.JSONDecodeError:
944
+ continue
945
+
946
+ def supports_prompt_cache(self) -> bool:
947
+ """Best-effort: forward `prompt_cache_key` to OpenAI-compatible servers that support it."""
948
+ return True
717
949
 
718
950
  def get_capabilities(self) -> List[str]:
719
951
  """Get OpenAI-compatible server capabilities"""
@@ -765,24 +997,14 @@ class OpenAICompatibleProvider(BaseProvider):
765
997
  except Exception:
766
998
  pass # Best effort - don't fail the operation
767
999
 
768
- def _normalize_model_name(self, model_name: str) -> str:
769
- """Remove common provider prefixes from model name."""
770
- for prefix in ["openai-compatible/", "lmstudio/", "qwen/", "ollama/", "huggingface/"]:
771
- if model_name.startswith(prefix):
772
- model_name = model_name[len(prefix):]
773
- return model_name
774
-
775
1000
  def _get_media_handler_for_model(self, model_name: str):
776
1001
  """Get appropriate media handler based on model vision capabilities."""
777
1002
  from ..media.handlers import OpenAIMediaHandler, LocalMediaHandler
778
1003
 
779
- # Normalize model name by removing provider prefixes
780
- clean_model_name = self._normalize_model_name(model_name)
781
-
782
1004
  # Determine if model supports vision
783
1005
  try:
784
1006
  from ..architectures.detection import supports_vision
785
- use_vision_handler = supports_vision(clean_model_name)
1007
+ use_vision_handler = supports_vision(model_name)
786
1008
  except Exception as e:
787
1009
  self.logger.debug(f"Vision detection failed: {e}, defaulting to LocalMediaHandler")
788
1010
  use_vision_handler = False
@@ -790,10 +1012,10 @@ class OpenAICompatibleProvider(BaseProvider):
790
1012
  # Create appropriate handler
791
1013
  if use_vision_handler:
792
1014
  handler = OpenAIMediaHandler(self.model_capabilities, model_name=model_name)
793
- self.logger.debug(f"Using OpenAIMediaHandler for vision model: {clean_model_name}")
1015
+ self.logger.debug(f"Using OpenAIMediaHandler for vision model: {model_name}")
794
1016
  else:
795
- handler = LocalMediaHandler("openai-compatible", self.model_capabilities, model_name=model_name)
796
- self.logger.debug(f"Using LocalMediaHandler for model: {clean_model_name}")
1017
+ handler = LocalMediaHandler(self.provider, self.model_capabilities, model_name=model_name)
1018
+ self.logger.debug(f"Using LocalMediaHandler for model: {model_name}")
797
1019
 
798
1020
  return handler
799
1021
 
@@ -835,10 +1057,12 @@ class OpenAICompatibleProvider(BaseProvider):
835
1057
 
836
1058
  return models
837
1059
  else:
838
- self.logger.warning(f"OpenAI-compatible server API returned status {response.status_code}")
1060
+ detail = self._extract_error_detail(response)
1061
+ suffix = f": {detail}" if detail else ""
1062
+ self.logger.warning(f"{self.PROVIDER_DISPLAY_NAME} /models returned {response.status_code}{suffix}")
839
1063
  return []
840
1064
  except Exception as e:
841
- self.logger.warning(f"Failed to list models from OpenAI-compatible server: {e}")
1065
+ self.logger.warning(f"Failed to list models from {self.PROVIDER_DISPLAY_NAME}: {e}")
842
1066
  return []
843
1067
 
844
1068
  def embed(self, input_text: Union[str, List[str]], **kwargs) -> Dict[str, Any]:
@@ -879,7 +1103,7 @@ class OpenAICompatibleProvider(BaseProvider):
879
1103
  json=payload,
880
1104
  headers=self._get_headers()
881
1105
  )
882
- response.raise_for_status()
1106
+ self._raise_for_status(response, request_url=f"{self.base_url}/embeddings")
883
1107
 
884
1108
  # Server returns OpenAI-compatible format
885
1109
  result = response.json()
@@ -889,6 +1113,8 @@ class OpenAICompatibleProvider(BaseProvider):
889
1113
 
890
1114
  return result
891
1115
 
1116
+ except (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError, ProviderAPIError):
1117
+ raise
892
1118
  except Exception as e:
893
1119
  self.logger.error(f"Failed to generate embeddings: {e}")
894
- raise ProviderAPIError(f"OpenAI-compatible server embedding error: {str(e)}")
1120
+ raise ProviderAPIError(f"{self.PROVIDER_DISPLAY_NAME} embedding error: {str(e)}")